mirror of
https://dev.lirent.ru/Vatrog/vm-automation-signaling.git
synced 2026-06-25 20:36:36 +03:00
Compare commits
2 Commits
0f452fe37c
...
v0.3.12
| Author | SHA1 | Date | |
|---|---|---|---|
|
bcf5d4f824
|
|||
|
7ab6119b1f
|
+1
-1
@@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
# Single source of truth for the version: CI passes -DVMSIG_VERSION=${TAG#v}, so the project
|
||||
# version (-> libvgpu-perception SONAME/.so version) and the .deb version come from one tag.
|
||||
set(VMSIG_VERSION "0.3.11" CACHE STRING "Release version (MAJOR.MINOR.PATCH); CI passes the tag")
|
||||
set(VMSIG_VERSION "0.3.12" CACHE STRING "Release version (MAJOR.MINOR.PATCH); CI passes the tag")
|
||||
project(vmsig VERSION ${VMSIG_VERSION} LANGUAGES C)
|
||||
|
||||
set(CMAKE_C_STANDARD 17)
|
||||
|
||||
@@ -15,6 +15,13 @@ typedef struct {
|
||||
uint32_t fail_boots; /* test-only: fail the first N stub bootstraps before */
|
||||
/* succeeding (drives the retry/backoff path deterministically */
|
||||
/* without timing dependence); 0 in production. stub path only. */
|
||||
const char* persist_path; /* armed: path to the kcr3 cache file (sibling of .slots in the */
|
||||
/* watch dir, tmpfs-local: survives a daemon restart, dies with the */
|
||||
/* RAM file on host reboot). NULL/empty => persist disabled (cold */
|
||||
/* bootstrap only). The boot-session discriminator is the kcr3 */
|
||||
/* itself: on resume it is validated against live RAM via */
|
||||
/* vmie_win32_open_ro_fd (NULL if it no longer resolves the kernel) */
|
||||
/* — a stale kcr3 after a guest reboot is rejected, fail-closed. */
|
||||
} vmsig_memctx_cfg;
|
||||
|
||||
/* Max SRC bytes per atomic gva_write (bounds the worker POD slot; mc_req header + src
|
||||
|
||||
+218
-28
@@ -26,6 +26,7 @@
|
||||
#include <sys/mman.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <sys/timerfd.h> /* one-shot backoff timer for cold-bootstrap retry */
|
||||
#include <sys/stat.h> /* persist file mode bits (0600) */
|
||||
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
#include "win32.h" /* vmie_win32_open/host_bootstrap/proc_list/close */
|
||||
@@ -67,7 +68,82 @@ static int memfd_create(const char* name, unsigned int flags) {
|
||||
* eventfd, slot 1 is the one-shot backoff timerfd that re-kicks the bootstrap. */
|
||||
enum { MC_COOKIE_WORKER = 0, MC_COOKIE_RETRY = 1 };
|
||||
|
||||
enum { MC_JOB_BOOTSTRAP = 0, MC_JOB_WRITE = 1 };
|
||||
/* MC_JOB_RESUME: fast-path boot-session re-validation. On a daemon restart the cold scan
|
||||
* (host_bootstrap) is slow AND unstable (it hunts the agent beacon across physical RAM); if
|
||||
* the guest did NOT reboot, its System DTB (kcr3) is unchanged and was cached at the last
|
||||
* live scan. RESUME re-opens an O_RDONLY context with that cached kcr3 (vmie_win32_open_ro_fd,
|
||||
* which bypasses the beacon scan) — the boot-session discriminator is the kcr3 ITSELF against
|
||||
* the live RAM: it resolves the kernel (ntoskrnl) only if the guest is the same boot. */
|
||||
enum { MC_JOB_BOOTSTRAP = 0, MC_JOB_WRITE = 1, MC_JOB_RESUME = 2 };
|
||||
|
||||
/* ---- kcr3 context persist: a cache of the cold-bootstrap result, mirror of the .slots
|
||||
* idiom in src/discovery/slot.c (magic+version POD, native byte order, atomic tmp+rename,
|
||||
* fail-soft load). Deliberately NOT factored into a shared helper: discovery (vmid<->slot)
|
||||
* and this adapter (kcr3 cache) are different layers with different lifecycles — Rule-of-three
|
||||
* is not reached, and a shared helper would couple the two prematurely.
|
||||
*
|
||||
* We persist the MINIMUM: only {magic, version, kcr3}. NO RAM metadata (st_ino/size/mtime/
|
||||
* btime): those do NOT prove the RAM holds the same boot session (the backing file outlives a
|
||||
* memory overwrite, the inode can be reused). The boot-session discriminator is the kcr3
|
||||
* self-validating against the live RAM at load time (see MC_JOB_RESUME), not file metadata.
|
||||
*
|
||||
* MEMWRITE-target safety: a persisted kcr3 is a READ locator only. The write target (a->kcr3)
|
||||
* is set ONLY by the bootstrap worker after a fresh live scan — never from this file. */
|
||||
#define MC_PERSIST_MAGIC 0x4B435258u /* "KCRX" — kcr3 context cache */
|
||||
#define MC_PERSIST_VERSION 1u
|
||||
typedef struct {
|
||||
uint32_t magic;
|
||||
uint32_t version;
|
||||
uint64_t kcr3; /* System DTB obtained from a live RAM scan; validated by open_ro_fd */
|
||||
} mc_persist_blob;
|
||||
|
||||
/* Atomic save: write a temp sibling then rename over the target, so a reader (or a racing
|
||||
* second daemon) sees either the whole old file or the whole new one. Loop-thread-only.
|
||||
* Returns 0 on success, -1 otherwise (best-effort: the datum is already published). */
|
||||
static int mc_persist_save(const char* path, uint64_t kcr3) {
|
||||
if (!path || !*path) return -1;
|
||||
mc_persist_blob b;
|
||||
memset(&b, 0, sizeof b);
|
||||
b.magic = MC_PERSIST_MAGIC; b.version = MC_PERSIST_VERSION; b.kcr3 = kcr3;
|
||||
|
||||
char tmp[512];
|
||||
int n = snprintf(tmp, sizeof tmp, "%s.tmp", path);
|
||||
if (n < 0 || (size_t)n >= sizeof tmp) return -1;
|
||||
|
||||
int fd = open(tmp, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0600);
|
||||
if (fd < 0) return -1;
|
||||
ssize_t w = write(fd, &b, sizeof b);
|
||||
int rc = (w == (ssize_t)sizeof b) ? 0 : -1;
|
||||
if (close(fd) != 0) rc = -1;
|
||||
if (rc == 0 && rename(tmp, path) != 0) rc = -1;
|
||||
if (rc != 0) unlink(tmp);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Load + validate the POD header. Loop-thread-only. Returns 1 if a well-formed blob was read
|
||||
* (out filled), 0 otherwise (no file / short / wrong magic or version => fail-soft, fall back
|
||||
* to a cold bootstrap). No migrations: an old version is ignored and overwritten by the next
|
||||
* live scan result. NOTE: this validates only the file SHAPE; the kcr3 itself is validated
|
||||
* against live RAM on the worker (MC_JOB_RESUME), which is the real boot-session discriminator. */
|
||||
static int mc_persist_load(const char* path, mc_persist_blob* out) {
|
||||
if (!path || !*path) return 0;
|
||||
int fd = open(path, O_RDONLY | O_CLOEXEC);
|
||||
if (fd < 0) return 0; /* no file => cold bootstrap */
|
||||
mc_persist_blob b;
|
||||
ssize_t r = read(fd, &b, sizeof b);
|
||||
close(fd);
|
||||
if (r != (ssize_t)sizeof b || b.magic != MC_PERSIST_MAGIC || b.version != MC_PERSIST_VERSION)
|
||||
return 0; /* corrupt/old => cold bootstrap */
|
||||
*out = b;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Drop the cache on a destructive VM-lifecycle (the RAM may have changed). Best-effort.
|
||||
* Hygiene only: even without the drop a stale kcr3 would be rejected by the self-validation,
|
||||
* but we do not leave a known-dead file around. Loop-thread-only. */
|
||||
static void mc_persist_drop(const char* path) {
|
||||
if (path && *path) unlink(path);
|
||||
}
|
||||
|
||||
/* worker req/res (POD <= VMSIG_WORK_SLOT). One off-loop worker runs BOTH the cold
|
||||
* bootstrap and the atomic writes (FIFO serializes a write against the close-on-rebootstrap).
|
||||
@@ -80,8 +156,9 @@ typedef struct {
|
||||
uint32_t attempt; /* MC_JOB_BOOTSTRAP: consecutive-failure index of THIS */
|
||||
/* kick (copy of a->boot_attempts); stub fails while */
|
||||
/* attempt < a->fail_boots. NOT the epoch counter. */
|
||||
/* --- MC_JOB_WRITE --- */
|
||||
uint64_t cr3; /* target AS root; 0 => a->kcr3 (kernel AS), resolved on worker */
|
||||
/* --- MC_JOB_WRITE / MC_JOB_RESUME --- */
|
||||
uint64_t cr3; /* WRITE: target AS root (0 => a->kcr3); RESUME: persisted kcr3 to validate */
|
||||
uint64_t low; /* MC_JOB_RESUME: below-4G split for vmie_win32_open_ro_fd (ignored by others) */
|
||||
uint64_t gva;
|
||||
uint32_t len;
|
||||
uint32_t corr;
|
||||
@@ -100,6 +177,7 @@ struct vmsig_adapter {
|
||||
uint32_t endpoint;
|
||||
int stub;
|
||||
const char* ram_path; /* armed: RAM-backing path (NOT published outward) */
|
||||
const char* persist_path; /* armed: kcr3 cache file path (cfg, loop-thread-only); NULL => persist off */
|
||||
uint64_t low;
|
||||
int cfg_ro_fd; /* >=0 => infra-sealed RO-fd (owned by adapter, closed in mc_close); <0 => default */
|
||||
vmsig_emit emit;
|
||||
@@ -232,6 +310,49 @@ static int mc_job(void* user, const void* req, void* res) {
|
||||
#endif
|
||||
}
|
||||
|
||||
if (rq->op == MC_JOB_RESUME) {
|
||||
/* Fast-path boot-session re-validation: open an O_RDONLY context with the PERSISTED
|
||||
* kcr3 and let the engine decide if it still resolves the kernel in the LIVE RAM.
|
||||
* This is purely a READ validation — it NEVER touches a->win/a->mem/a->kcr3 (the
|
||||
* RW write-hold, owned by the bootstrap worker after a fresh live scan). MEMWRITE-
|
||||
* target safety: a persisted kcr3 must never become the gva_write target. */
|
||||
if (a->stub) {
|
||||
/* No VMIE here, so there is no real RAM to validate against: synthetically ACCEPT a
|
||||
* nonzero kcr3 so the stub can exercise the persist MECHANICS (save/load/fast-vs-slow
|
||||
* selection). This is NOT real boot-session validation — that is armed-only. */
|
||||
if (rq->cr3 == 0) return -1;
|
||||
rs->kcr3 = rq->cr3;
|
||||
return 0;
|
||||
}
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
/* fresh O_RDONLY fd over the backing (same source as mc_reg_share_fd: dup the infra
|
||||
* RO-fd, else open ram_path O_RDONLY). The RO context borrows it (dup'd internally),
|
||||
* so we close our copy after open. */
|
||||
int rfd;
|
||||
if (a->cfg_ro_fd >= 0) rfd = fcntl(a->cfg_ro_fd, F_DUPFD_CLOEXEC, 0);
|
||||
else if (a->ram_path) rfd = open(a->ram_path, O_RDONLY | O_CLOEXEC);
|
||||
else return -1;
|
||||
if (rfd < 0) return -1;
|
||||
vmie_win32* v = vmie_win32_open_ro_fd(rfd, rq->low, rq->cr3);
|
||||
close(rfd); /* borrowed by open_ro_fd (dup'd internally) */
|
||||
if (!v) return -1; /* kcr3 no longer resolves the kernel => stale/guest-reboot */
|
||||
/* Second, independent signal: the System process must be present AND its cr3 must equal
|
||||
* the persisted kcr3 (the System DTB by definition). Catches the pathology "kcr3 resolves
|
||||
* a DIFFERENT kernel". Cheap — the RO context is already built. Fail-closed on mismatch. */
|
||||
process procs[16];
|
||||
int n = proc_list(v, 0, procs, 16);
|
||||
int system_ok = 0;
|
||||
for (int i = 0; i < n && i < 16; i++)
|
||||
if (!strcmp(procs[i].name, "System")) { system_ok = (procs[i].cr3 == rq->cr3); break; }
|
||||
vmie_win32_close(v); /* validation-only: the read datum needs no held handle */
|
||||
if (!system_ok) return -1;
|
||||
rs->kcr3 = rq->cr3; /* validated: publish the read datum (NOT a->kcr3) */
|
||||
return 0;
|
||||
#else
|
||||
return -1; /* armed without the build flag: resume impossible -> cold bootstrap */
|
||||
#endif
|
||||
}
|
||||
|
||||
/* MC_JOB_BOOTSTRAP */
|
||||
if (a->stub) {
|
||||
/* test-only: fail the first fail_boots attempts to exercise the retry path
|
||||
@@ -259,6 +380,45 @@ static void mc_kick_bootstrap(struct vmsig_adapter* a) {
|
||||
(void)vmsig_worker_submit(a->worker, &rq, sizeof rq); /* full => drop (rare) */
|
||||
}
|
||||
|
||||
/* Submit the fast-path RESUME (off-loop: open_ro_fd reads image pages, not on the loop thread).
|
||||
* Carries the persisted kcr3 + the cfg low for vmie_win32_open_ro_fd. On miss/validation-fail the
|
||||
* completion handler falls back to a cold bootstrap — the persist never replaces it. */
|
||||
static void mc_kick_resume(struct vmsig_adapter* a, uint64_t kcr3) {
|
||||
mc_req rq;
|
||||
memset(&rq, 0, sizeof rq);
|
||||
rq.op = MC_JOB_RESUME; rq.cr3 = kcr3; rq.low = a->low;
|
||||
(void)vmsig_worker_submit(a->worker, &rq, sizeof rq); /* full => drop (rare) */
|
||||
}
|
||||
|
||||
/* Single publication path for BOTH RESUME and BOOTSTRAP (no two ways to publish a MEMCTX).
|
||||
* Assembles the single-low locator from `kcr3` + a->low, marks have_ctx, and emits the MEMCTX
|
||||
* trigger; the core authoritatively re-describes and stamps the epoch. Loop-thread-only.
|
||||
*
|
||||
* Ownership: this writes kcr3 ONLY into cur_pod.kcr3 (the delivery copy). It does NOT touch
|
||||
* a->kcr3 — that is the gva_write TARGET, owned solely by the bootstrap worker. The difference
|
||||
* between the two callers is only the SOURCE of kcr3 and whether an RW-hold / persist-save
|
||||
* follows; the locator assembly itself is shared here. */
|
||||
static void mc_publish_ctx(struct vmsig_adapter* a, uint64_t kcr3) {
|
||||
memset(&a->cur_pod, 0, sizeof a->cur_pod);
|
||||
a->cur_pod.kcr3 = kcr3;
|
||||
a->cur_pod.low = a->low ? a->low : MC_STUB_SIZE;
|
||||
a->cur_pod.flags = VMSIG_MEMCTX_RDONLY;
|
||||
a->cur_nseg = 1; /* single-low identity (gpa 0 .. low) */
|
||||
a->cur_segs[0].gpa = 0;
|
||||
a->cur_segs[0].len = a->cur_pod.low;
|
||||
a->cur_segs[0].file_off = 0;
|
||||
a->cur_pod.nseg = a->cur_nseg;
|
||||
a->have_ctx = 1;
|
||||
|
||||
/* emit the MEMCTX trigger: the core authoritatively re-describes + stamps the epoch. */
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_MEMCTX; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||
memcpy(up.inln, &a->cur_pod, sizeof a->cur_pod);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
}
|
||||
|
||||
/* ---- reg hooks (vmsig_memctx_reg.ctx = a; called by the core on the loop thread) ---- */
|
||||
static void mc_reg_describe(void* ctx, vmsig_memctx* out_pod,
|
||||
const vmsig_memseg** out_segs, uint32_t* out_nseg) {
|
||||
@@ -286,6 +446,10 @@ static void mc_reg_invalidate(void* ctx, uint32_t epoch) {
|
||||
struct vmsig_adapter* a = ctx;
|
||||
(void)epoch; /* the core owns the epoch; the adapter must re-bootstrap */
|
||||
a->have_ctx = 0; /* the previous context is invalid */
|
||||
/* destructive VM-lifecycle => the RAM may have changed => drop the kcr3 cache so the next
|
||||
* restart cannot fast-path off a now-dead kcr3 (the self-validation would reject it anyway,
|
||||
* but we do not leave a known-stale file). Best-effort, loop-thread-only. */
|
||||
mc_persist_drop(a->persist_path);
|
||||
/* new cycle: drop a stale arm from the previous cycle and restart the failure counter at
|
||||
* zero so this bootstrap's backoff starts fresh (and the first-failure diagnostic re-arms). */
|
||||
a->boot_attempts = 0;
|
||||
@@ -307,6 +471,7 @@ static vmsig_adapter* mc_open(const void* cfg, uint32_t endpoint) {
|
||||
a->stub_fd = -1;
|
||||
a->retry_fd = -1;
|
||||
a->fail_boots = c ? c->fail_boots : 0; /* set once; read-only afterwards (worker reads) */
|
||||
a->persist_path = c ? c->persist_path : NULL; /* NULL => persist disabled (cold bootstrap only) */
|
||||
return a;
|
||||
}
|
||||
|
||||
@@ -364,7 +529,16 @@ static int mc_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
|
||||
mc_kick_bootstrap(a); /* first bootstrap off-loop; assemble the locator on completion */
|
||||
/* Fast-path: if a kcr3 cache exists, try a RESUME (re-validate it against live RAM) BEFORE
|
||||
* the cold scan. On a daemon restart over an unchanged guest this publishes the read datum
|
||||
* in milliseconds instead of minutes of beacon-scan retry. On any miss (persist off / stub /
|
||||
* no file / corrupt) we fall straight into the existing cold bootstrap. The RW-hold for
|
||||
* MEMWRITE is still acquired by a cold bootstrap (kicked in parallel after a RESUME hit). */
|
||||
mc_persist_blob b;
|
||||
if (a->persist_path && *a->persist_path && mc_persist_load(a->persist_path, &b))
|
||||
mc_kick_resume(a, b.kcr3); /* validate the cached kcr3 off-loop; cold fallback on miss */
|
||||
else
|
||||
mc_kick_bootstrap(a); /* first cold bootstrap off-loop; assemble locator on completion */
|
||||
return 2; /* worker eventfd + backoff timerfd */
|
||||
}
|
||||
|
||||
@@ -391,6 +565,27 @@ static int mc_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
|
||||
mc_memwrite_ack(a, rs.ok && rc == 0, rs.corr, rs.origin);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rs.op == MC_JOB_RESUME) {
|
||||
/* Fast-path completion. The persisted kcr3 was validated against the LIVE RAM on the
|
||||
* worker (open_ro_fd != NULL [+ System-cr3 match]) — the read datum is safe to publish.
|
||||
* Note: the worker did NOT set a->kcr3/a->win/a->mem (the RW write-hold), so MEMWRITE
|
||||
* stays ok=0 until a cold bootstrap acquires it. */
|
||||
if (rc == 0) {
|
||||
mc_publish_ctx(a, rs.kcr3); /* video lives instantly (read datum), epoch by core */
|
||||
mc_kick_bootstrap(a); /* in parallel: acquire the RW-hold (a->kcr3) for MEMWRITE */
|
||||
/* Do NOT save the persist (the kcr3 came FROM the file) and do NOT arm a retry
|
||||
* (the read datum is up; the parallel bootstrap arms its own retry on failure). */
|
||||
} else {
|
||||
/* validation miss: the persisted kcr3 no longer resolves the kernel (guest rebooted
|
||||
* or corrupt). Fall back to an honest cold scan; on success it rewrites the persist
|
||||
* with a fresh kcr3. Do NOT retry the RESUME — the cache is under suspicion. */
|
||||
mc_kick_bootstrap(a);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* MC_JOB_BOOTSTRAP */
|
||||
if (rc != 0) {
|
||||
/* bootstrap failed: the guest is likely still booting (host_bootstrap found no
|
||||
* System process). This is NOT a control-level error — do NOT emit VMSIG_EV_ERROR
|
||||
@@ -405,34 +600,29 @@ static int mc_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
|
||||
mc_arm_retry(a); /* one-shot timer at mc_boot_backoff(boot_attempts) */
|
||||
continue;
|
||||
}
|
||||
/* assemble the locator on the loop thread from rs.kcr3. a->kcr3 is the gva_write
|
||||
* TARGET and is owned SOLELY by the worker thread (set in mc_bootstrap_armed, read by
|
||||
* MC_JOB_WRITE — same thread, FIFO happens-before); the loop must NOT also write it, or
|
||||
* an in-flight write at line ~170 would race it. cur_pod.kcr3 is loop-only (delivery). */
|
||||
|
||||
/* bootstrap succeeded: cancel any pending retry and reset the failure counter BEFORE
|
||||
* publishing, so a stale timer armed by a prior failure cannot fire over a live context. */
|
||||
/* bootstrap succeeded: a->kcr3/a->mem (the gva_write TARGET / RW-hold) were set on the
|
||||
* worker (mc_bootstrap_armed); the loop must NOT also write a->kcr3 (it would race an
|
||||
* in-flight write — same FIFO thread owns it). MEMWRITE is now possible. cur_pod.kcr3 is
|
||||
* loop-only (delivery) and is set inside mc_publish_ctx.
|
||||
*
|
||||
* Cancel any pending retry and reset the failure counter BEFORE publishing, so a stale
|
||||
* timer armed by a prior failure cannot fire over a live context. */
|
||||
a->boot_attempts = 0;
|
||||
mc_disarm_retry(a);
|
||||
|
||||
memset(&a->cur_pod, 0, sizeof a->cur_pod);
|
||||
a->cur_pod.kcr3 = rs.kcr3;
|
||||
a->cur_pod.low = a->low ? a->low : MC_STUB_SIZE;
|
||||
a->cur_pod.flags = VMSIG_MEMCTX_RDONLY;
|
||||
a->cur_nseg = 1; /* single-low identity (gpa 0 .. low) */
|
||||
a->cur_segs[0].gpa = 0;
|
||||
a->cur_segs[0].len = a->cur_pod.low;
|
||||
a->cur_segs[0].file_off = 0;
|
||||
a->cur_pod.nseg = a->cur_nseg;
|
||||
a->have_ctx = 1;
|
||||
/* Publish only if a RESUME has not already published this same context (same kcr3): a
|
||||
* parallel cold bootstrap after a RESUME hit must acquire the RW-hold WITHOUT emitting a
|
||||
* redundant MEMCTX. First-time publication otherwise. */
|
||||
if (!a->have_ctx)
|
||||
mc_publish_ctx(a, rs.kcr3);
|
||||
|
||||
/* emit the MEMCTX trigger: the core authoritatively re-describes + stamps the epoch. */
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_MEMCTX; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||
memcpy(up.inln, &a->cur_pod, sizeof a->cur_pod);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
/* Cache the freshly-scanned kcr3 for the next daemon restart (best-effort; the datum is
|
||||
* already published). Only the cold scan writes the persist — never the RESUME path (its
|
||||
* kcr3 came from the file). Gated on persist_path presence: production stub paths get a
|
||||
* NULL persist_path from discovery, so they never write; a test may supply one to exercise
|
||||
* the persist mechanics (the stub bootstrap yields a synthetic-but-stable kcr3). */
|
||||
if (a->persist_path && *a->persist_path)
|
||||
(void)mc_persist_save(a->persist_path, rs.kcr3);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -121,6 +121,9 @@ static void bridge_id(char* out, size_t cap, uint32_t ep, char ab) {
|
||||
snprintf(out, cap, "vmsig-in-%c-%u", ab, ep);
|
||||
}
|
||||
|
||||
/* Best-effort object-del of a possibly-stale object id (defined below; fwd for bridge_add). */
|
||||
static void bridge_del_fire(struct vmsig_adapter* a, char ab);
|
||||
|
||||
/* Add one input-linux object forwarding an evdev node into the guest. grab_all toggles the
|
||||
* device-grab for every input-linux on this endpoint (set on A only — one is enough). The
|
||||
* reply is correlated through the existing pend[] table under VH_OP_BRIDGE_ADD and consumed
|
||||
@@ -130,6 +133,13 @@ static int bridge_add(struct vmsig_adapter* a, char ab, const char* evdev, int g
|
||||
if (!p) return -1;
|
||||
char id[32];
|
||||
bridge_id(id, sizeof id, a->endpoint, ab);
|
||||
/* Idempotent re-attach: fire object-del for this id FIRST. A prior daemon instance tears the
|
||||
* bridge down best-effort WITHOUT a round-trip (bridge_del_fire in vh_close), and a fast
|
||||
* restart/redeploy can reach here before QEMU processed that del — leaving the object live,
|
||||
* so a bare object-add fails with "duplicate property '<id>'" (observed for device B). QMP is
|
||||
* sequential per connection, so this del is applied before the add below; on a clean first
|
||||
* attach it just no-ops (DeviceNotFound, silently dropped — that frame carries no QMP id). */
|
||||
bridge_del_fire(a, ab);
|
||||
uint32_t qid = ++a->next_id;
|
||||
char line[320];
|
||||
int len = snprintf(line, sizeof line,
|
||||
@@ -144,9 +154,11 @@ static int bridge_add(struct vmsig_adapter* a, char ab, const char* evdev, int g
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Best-effort object_del fired on teardown before the fd closes (see vh_close). No reply is
|
||||
* awaited; QEMU also drops these objects when the VM powers off, so del matters only on a
|
||||
* detach without power-off (daemon restart / endpoint move). */
|
||||
/* Best-effort object-del, no reply awaited. Fired in TWO places: on teardown before the fd
|
||||
* closes (vh_close) AND before every object-add (bridge_add, idempotent re-attach). QEMU drops
|
||||
* these objects when the VM powers off, so del matters on a detach/re-attach without power-off
|
||||
* (daemon restart / endpoint move). A del of an absent id no-ops (DeviceNotFound, silently
|
||||
* dropped — this frame carries no QMP id, so handle_line finds no pend). */
|
||||
static void bridge_del_fire(struct vmsig_adapter* a, char ab) {
|
||||
char id[32];
|
||||
bridge_id(id, sizeof id, a->endpoint, ab);
|
||||
|
||||
@@ -60,6 +60,10 @@ struct vmsig_discovery {
|
||||
* writes these at attach; the vmhost seam borrows them to add input-linux objects. Same
|
||||
* lifetime discipline as ep_facts (outlives the deferred adapter reap). */
|
||||
struct { char evdev_a[64]; char evdev_b[64]; } ep_bridge[VMSIG_SLOT_COUNT];
|
||||
/* Stable per-endpoint home for the memctx kcr3-cache path (sibling of .slots in the watch
|
||||
* dir). The memctx adapter keeps the pointer across its lifetime; same lifetime discipline
|
||||
* as ep_facts/ep_bridge (outlives the deferred adapter reap, overwritten on next attach). */
|
||||
char ep_persist[VMSIG_SLOT_COUNT][DISC_PATH_MAX + 32];
|
||||
};
|
||||
|
||||
static uint64_t now_ns(void) {
|
||||
@@ -269,14 +273,25 @@ static void bootstrap_scan(vmsig_discovery* d) {
|
||||
|
||||
static int default_attach(void* ud, vmsig_core* core, uint32_t vmid, uint32_t endpoint,
|
||||
const vmsig_host_facts* f) {
|
||||
(void)vmid;
|
||||
vmsig_discovery* d = ud; /* default sink carries the discovery handle (ep_bridge home) */
|
||||
char* ev_a = d ? d->ep_bridge[endpoint].evdev_a : NULL;
|
||||
char* ev_b = d ? d->ep_bridge[endpoint].evdev_b : NULL;
|
||||
if (d) { ev_a[0] = '\0'; ev_b[0] = '\0'; } /* clear stale paths from a prior attach */
|
||||
|
||||
/* Form the kcr3-cache path (per-vmid, sibling of .slots/the RAM file in the watch dir).
|
||||
* Gated on d->persist — one policy for all ephemeral watch-dir state. NULL => persist off. */
|
||||
const char* persist_path = NULL;
|
||||
if (d && d->persist) {
|
||||
int pn = snprintf(d->ep_persist[endpoint], sizeof d->ep_persist[endpoint],
|
||||
"%s/.kcr3-vm-%u", d->watch_dir, vmid);
|
||||
/* only enable the cache if the path fit (a truncated path would point elsewhere). */
|
||||
if (pn > 0 && (size_t)pn < sizeof d->ep_persist[endpoint])
|
||||
persist_path = d->ep_persist[endpoint];
|
||||
}
|
||||
|
||||
vmsig_memctx_cfg mc; memset(&mc, 0, sizeof mc);
|
||||
mc.stub = 0; mc.ram_path = f->ram_path; mc.low = f->low; mc.ro_fd = -1;
|
||||
mc.persist_path = persist_path;
|
||||
vmsig_input_cfg in; memset(&in, 0, sizeof in);
|
||||
/* input is uinput; power/lifecycle via the vmhost seam. The adapter publishes its uinput
|
||||
* evdev paths into ep_bridge so the vmhost seam can forward them via input-linux. */
|
||||
|
||||
@@ -428,6 +428,130 @@ static void test_retry(void) {
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* ---- 8-11. kcr3-persist MECHANICS (stub) ---------------------------------- *
|
||||
* These exercise the persist MACHINERY only: save/load, corruption fail-soft, drop-on-
|
||||
* invalidate, and the fast-vs-slow path selection. They do NOT exercise the real boot-session
|
||||
* validation (vmie_win32_open_ro_fd rejecting a stale kcr3) — that is VMIE-dependent and is
|
||||
* covered only on the armed stand. Under the stub, MC_JOB_RESUME synthetically ACCEPTS any
|
||||
* nonzero kcr3 (there is no live RAM to validate against), so a successful RESUME here proves
|
||||
* the mechanism wired the cached kcr3 into a publication, NOT that the kcr3 was validated. */
|
||||
|
||||
static int file_exists(const char* path) { return access(path, F_OK) == 0; }
|
||||
|
||||
/* Run a memctx endpoint to its first MEMCTX (or the ticks failsafe) over a private core. */
|
||||
static void run_once(uint64_t* out_kcr3, int* out_memctx, const char* persist_path,
|
||||
uint32_t fail_boots) {
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
|
||||
holder h; memset(&h, 0, sizeof h);
|
||||
h.core = core; h.is_driver = 1; h.expect_ep = 0; h.stop_epoch = -1;
|
||||
add_holder(core, &h, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
|
||||
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
|
||||
|
||||
vmsig_memctx_cfg mc; memset(&mc, 0, sizeof mc);
|
||||
mc.stub = 1; mc.ram_path = NULL; mc.low = 0; mc.ro_fd = -1;
|
||||
mc.fail_boots = fail_boots; mc.persist_path = persist_path;
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), &mc, 0) >= 0, "add memctx");
|
||||
|
||||
vmsig_core_run(core);
|
||||
if (out_kcr3) *out_kcr3 = h.last_kcr3;
|
||||
if (out_memctx) *out_memctx = h.memctx;
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* 8. save-then-resume: run1 (cold stub bootstrap) publishes MEMCTX and WRITES the cache; run2
|
||||
* over the SAME persist_path takes the RESUME fast-path. The KEY is fail_boots=large in run2:
|
||||
* if it had gone through a cold bootstrap it would have failed N times (no MEMCTX inside the
|
||||
* loop budget); a prompt MEMCTX carrying the SAVED kcr3 proves RESUME bypassed the bootstrap. */
|
||||
static void test_persist_save_then_resume(void) {
|
||||
printf("test_persist_save_then_resume\n");
|
||||
char path[256];
|
||||
snprintf(path, sizeof path, "/tmp/vmsig-kcrx-%d.bin", (int)getpid());
|
||||
unlink(path);
|
||||
|
||||
uint64_t k1 = 0; int m1 = 0;
|
||||
run_once(&k1, &m1, path, 0);
|
||||
CHECK(m1 >= 1, "run1 published MEMCTX");
|
||||
CHECK(k1 != 0, "run1 kcr3 nonzero");
|
||||
CHECK(file_exists(path), "run1 wrote the kcr3 cache file");
|
||||
|
||||
/* run2: a cold bootstrap would fail 1000 times — only RESUME can publish promptly. */
|
||||
uint64_t k2 = 0; int m2 = 0;
|
||||
run_once(&k2, &m2, path, 1000);
|
||||
CHECK(m2 >= 1, "run2 published MEMCTX via the RESUME fast-path (bootstrap would have failed)");
|
||||
CHECK(k2 == k1, "run2 published the SAVED kcr3 (resumed from cache, not a fresh scan)");
|
||||
|
||||
unlink(path);
|
||||
}
|
||||
|
||||
/* 9. corrupt file => load fail-soft => cold bootstrap still brings the context up. */
|
||||
static void test_persist_corrupt(void) {
|
||||
printf("test_persist_corrupt\n");
|
||||
char path[256];
|
||||
snprintf(path, sizeof path, "/tmp/vmsig-kcrx-corrupt-%d.bin", (int)getpid());
|
||||
int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0600);
|
||||
CHECK(fd >= 0, "created a corrupt cache file");
|
||||
if (fd >= 0) { (void)!write(fd, "x", 1); close(fd); } /* 1 byte: short/wrong magic */
|
||||
|
||||
uint64_t k = 0; int m = 0;
|
||||
run_once(&k, &m, path, 0); /* load miss => cold bootstrap (fail_boots=0 => succeeds) */
|
||||
CHECK(m >= 1, "MEMCTX still published after a corrupt cache (fail-soft load)");
|
||||
CHECK(k != 0, "kcr3 nonzero from the cold bootstrap");
|
||||
|
||||
unlink(path);
|
||||
}
|
||||
|
||||
/* 10. invalidate drops the cache; the re-bootstrap on the new epoch rewrites it fresh. */
|
||||
static void test_persist_invalidate_drop(void) {
|
||||
printf("test_persist_invalidate_drop\n");
|
||||
char path[256];
|
||||
snprintf(path, sizeof path, "/tmp/vmsig-kcrx-inv-%d.bin", (int)getpid());
|
||||
unlink(path);
|
||||
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
|
||||
holder h; memset(&h, 0, sizeof h);
|
||||
/* inject a destructive lifecycle on epoch0 (as test_epoch); stop after epoch1. */
|
||||
h.core = core; h.is_driver = 1; h.expect_ep = 0; h.inject_reset = 1; h.stop_epoch = 1;
|
||||
add_holder(core, &h, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
|
||||
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
|
||||
vmsig_memctx_cfg mc; memset(&mc, 0, sizeof mc);
|
||||
mc.stub = 1; mc.ram_path = NULL; mc.low = 0; mc.ro_fd = -1; mc.persist_path = path;
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), &mc, 0) >= 0, "add memctx");
|
||||
|
||||
vmsig_core_run(core);
|
||||
|
||||
/* epoch0 bootstrap wrote the cache; invalidate dropped it; epoch1 bootstrap rewrote it. */
|
||||
CHECK(h.invalidated >= 1, "invalidation fired");
|
||||
CHECK(h.last_epoch == 1, "re-published at epoch 1 after invalidate");
|
||||
CHECK(file_exists(path), "cache rewritten by the post-invalidate bootstrap");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
unlink(path);
|
||||
}
|
||||
|
||||
/* 11. persist disabled (persist_path=NULL): no cache file is ever created (today's behavior). */
|
||||
static void test_persist_stub_disabled(void) {
|
||||
printf("test_persist_stub_disabled\n");
|
||||
char path[256];
|
||||
snprintf(path, sizeof path, "/tmp/vmsig-kcrx-off-%d.bin", (int)getpid());
|
||||
unlink(path);
|
||||
|
||||
uint64_t k = 0; int m = 0;
|
||||
run_once(&k, &m, NULL, 0); /* persist off */
|
||||
CHECK(m >= 1, "MEMCTX published with persist disabled");
|
||||
CHECK(!file_exists(path), "no cache file created when persist is disabled");
|
||||
|
||||
unlink(path); /* belt-and-braces */
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
test_multicast();
|
||||
test_epoch();
|
||||
@@ -436,6 +560,10 @@ int main(void) {
|
||||
test_socket();
|
||||
test_ro_fd_ownership();
|
||||
test_retry();
|
||||
test_persist_save_then_resume();
|
||||
test_persist_corrupt();
|
||||
test_persist_invalidate_drop();
|
||||
test_persist_stub_disabled();
|
||||
printf("memctx tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
|
||||
+12
-2
@@ -5,8 +5,10 @@
|
||||
*
|
||||
* It also verifies the host->guest input bridge: with bridge_evdev_a/b set in cfg, on reaching
|
||||
* READY the seam adds two input-linux objects (A with grab_all, B without) over its own
|
||||
* connection, with neutral per-endpoint ids and the evdev paths from cfg; the bridge replies
|
||||
* never surface as ACK/VM_LIFECYCLE to control; on teardown it fires object_del for both. */
|
||||
* connection, with neutral per-endpoint ids and the evdev paths from cfg; each add is preceded
|
||||
* by an idempotent object-del of the same id (clears a stale object from a crashed/racing prior
|
||||
* daemon); the bridge replies never surface as ACK/VM_LIFECYCLE to control; on teardown it
|
||||
* fires object-del for both. */
|
||||
#define _GNU_SOURCE
|
||||
#include "vmsig.h"
|
||||
#include "vmhost.h" /* private cfg (CMake provides the include path) */
|
||||
@@ -148,6 +150,10 @@ int main(void) {
|
||||
CHECK(srv_expect(c, "\"vmsig-in-b-0\""), "bridge B has neutral per-endpoint id");
|
||||
CHECK(srv_expect(c, EVDEV_A), "bridge A carries the cfg evdev path for A");
|
||||
CHECK(srv_expect(c, EVDEV_B), "bridge B carries the cfg evdev path for B");
|
||||
/* Idempotent re-attach: each add is preceded by an object-del of the same id. The EOF
|
||||
* teardown below skips del (seam DEAD), so this object-del can ONLY originate from the
|
||||
* del-before-add path. */
|
||||
CHECK(srv_expect(c, "object-del"), "bridge fires object-del before add (idempotent re-attach)");
|
||||
srv_send(c, "{\"return\": {}, \"id\": 1}\r\n"); /* ack bridge A (consumed silently) */
|
||||
srv_send(c, "{\"return\": {}, \"id\": 2}\r\n"); /* ack bridge B (consumed silently) */
|
||||
|
||||
@@ -219,6 +225,10 @@ int main(void) {
|
||||
srv_send(c2, "{\"return\": {}, \"id\": 1}\r\n");
|
||||
srv_send(c2, "{\"return\": {}, \"id\": 2}\r\n");
|
||||
|
||||
/* Attach already emitted object-del (del-before-add). Reset the accumulator so the
|
||||
* teardown del below is verified in ISOLATION, not satisfied by the attach del. */
|
||||
rx_reset();
|
||||
|
||||
/* Clean reap WITHOUT EOF: stop the loop then free (vh_close fires del). */
|
||||
vmsig_core_stop(core2);
|
||||
pthread_join(th2, NULL);
|
||||
|
||||
Reference in New Issue
Block a user