/* test_memctx.c — seam for the coherent address-space context (MEMCTX). * * 1) multicast + RO-fd + decode: a holder with CAP_MEMCTX receives MEMCTX, kcr3, * epoch=0, nseg=1; the received fd mmaps PROT_READ, while PROT_WRITE -> EACCES * (RO physically enforced); the vmsig_memctx_segs helper reconstructs segs[]; * a co-resident holder without CAP_MEMCTX does NOT receive it (deny); * 2) epoch: a synthetic destructive VM_LIFECYCLE -> MEMCTX_INVALIDATED -> * re-multicast at epoch+1 with a NEW kcr3; * 3) retain/replay: a LATE subscriber (attached AFTER publication) receives * the retained MEMCTX with a valid re-shared RO-fd (synchronously on add_control); * 4) multi-VM: two endpoints, isolation (a VM holder does not see another's context); * 5) socket E2E: MEMCTX travels as a vmsig_wire frame + RO-fd in cmsg (SCM_RIGHTS), the * client mmaps RO via the received fd. * In-proc (except 5) and under ASAN. SISC: not a single control name in the adapter. */ #define _GNU_SOURCE #include "vmsig.h" #include "vmsig_socket.h" /* vmsig_wire, vmsig_socket_attach */ #include "core_internal.h" /* core_emit_up (synthetic lifecycle injection) */ #include "memctx.h" /* vmsig_memctx_cfg (infra ro_fd ownership test) */ #include #include #include #include #include #include #include #include #include #include #include #include static int g_fail = 0; #define CHECK(cond, msg) do { \ if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \ } while (0) /* ===== in-proc holder ===== */ typedef struct holder holder; struct holder { vmsig_core* core; holder* peer; /* multi-VM: stop when both are ready (or NULL) */ int is_driver; /* stops the loop on a condition */ uint32_t expect_ep; int memctx, invalidated, ticks, bad_ep, errors; uint64_t last_kcr3, kcr3_e0; uint32_t last_epoch, last_nseg; int ro_ok, rw_eacces, seg0_ok; int inject_reset, injected; int stop_epoch; /* stop when last_epoch>=stop_epoch (-1 = else) */ }; static void maybe_stop(holder* h) { if (!h->is_driver) return; if (h->ticks > 30) { vmsig_core_stop(h->core); return; } /* failsafe (vmhost ticks) */ if (h->stop_epoch >= 0) { if ((int)h->last_epoch >= h->stop_epoch && h->memctx >= 1) vmsig_core_stop(h->core); } else if (h->peer) { if (h->memctx >= 1 && h->peer->memctx >= 1) vmsig_core_stop(h->core); } else if (h->memctx >= 1) { vmsig_core_stop(h->core); } } static int h_on_ev(void* u, const vmsig_event* ev) { holder* h = u; if (ev->kind == VMSIG_EV_VM_LIFECYCLE) h->ticks++; else if (ev->kind == VMSIG_EV_MEMCTX_INVALIDATED) h->invalidated++; else if (ev->kind == VMSIG_EV_ERROR) h->errors++; /* no boot-retry ERROR spam */ maybe_stop(h); return 0; } static int h_on_memctx(void* u, const vmsig_event* ev, int fd) { holder* h = u; const vmsig_memctx* m = (const vmsig_memctx*)ev->inln; h->memctx++; if (ev->endpoint != h->expect_ep) h->bad_ep++; h->last_kcr3 = m->kcr3; h->last_epoch = m->epoch; if (m->epoch == 0) h->kcr3_e0 = m->kcr3; uint32_t n = 0; const vmsig_memseg* segs = vmsig_memctx_segs(ev, &n); h->last_nseg = n; if (segs && n >= 1 && segs[0].gpa == 0 && segs[0].len == m->low) h->seg0_ok = 1; if (fd >= 0 && m->low) { void* ro = mmap(NULL, (size_t)m->low, PROT_READ, MAP_SHARED, fd, 0); if (ro != MAP_FAILED) { h->ro_ok = 1; munmap(ro, (size_t)m->low); } void* rw = mmap(NULL, (size_t)m->low, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (rw == MAP_FAILED) h->rw_eacces = 1; else munmap(rw, (size_t)m->low); } /* epoch test: on the first context (epoch0) inject a destructive transition. */ if (h->inject_reset && !h->injected && m->epoch == 0) { h->injected = 1; vmsig_event lc; memset(&lc, 0, sizeof lc); lc.kind = VMSIG_EV_VM_LIFECYCLE; lc.source = VMSIG_SRC_VMHOST; lc.dir = VMSIG_DIR_UP; lc.prio = VMSIG_PRIO_URGENT; lc.endpoint = h->expect_ep; lc.origin = 0; vmsig_vm_state vs = { VMSIG_VM_RESET, 0 }; memcpy(lc.inln, &vs, sizeof vs); core_emit_up(h->core, &lc); /* core: epoch++ + invalidate + re-multicast */ } maybe_stop(h); return 0; } static void add_holder(vmsig_core* core, holder* h, uint32_t cap, uint32_t source_mask, uint64_t endpoint_mask) { vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg); cfg.on_event = h_on_ev; cfg.on_memctx = h_on_memctx; cfg.user = h; void* ctl = vmsig_inproc_control_new(&cfg); vmsig_grant g; memset(&g, 0, sizeof g); g.principal = 5; g.endpoint_mask = endpoint_mask; g.source_mask = source_mask; g.cap_mask = cap; vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g); } /* ---- 1. multicast + RO-fd + decode + deny ---------------------------------- */ static void test_multicast(void) { printf("test_multicast\n"); vmsig_ctx* ctx = vmsig_ctx_new(); vmsig_core* core = vmsig_core_new(ctx); holder good; memset(&good, 0, sizeof good); good.core = core; good.is_driver = 1; good.expect_ep = 0; good.stop_epoch = -1; holder deny; memset(&deny, 0, sizeof deny); deny.core = core; deny.expect_ep = 0; deny.stop_epoch = -1; add_holder(core, &good, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0); add_holder(core, &deny, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0); /* no MEMCTX */ CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)"); CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx"); vmsig_core_run(core); CHECK(good.memctx >= 1, "GOOD received MEMCTX"); CHECK(good.last_kcr3 != 0, "kcr3 nonzero"); CHECK(good.last_epoch == 0, "first publication is epoch 0"); CHECK(good.last_nseg == 1, "nseg=1 (single-low)"); CHECK(good.seg0_ok, "segs[] decoded by the helper (gpa=0,len=low)"); CHECK(good.ro_ok, "RO-fd: mmap(PROT_READ) ok"); CHECK(good.rw_eacces, "RO-fd: mmap(PROT_WRITE) -> EACCES (RO enforced)"); CHECK(good.bad_ep == 0, "delivery endpoint is correct"); CHECK(deny.memctx == 0, "deny without CAP_MEMCTX does NOT receive MEMCTX"); vmsig_core_free(core); vmsig_ctx_free(ctx); } /* ---- 2. epoch: invalidation + re-multicast epoch+1 ------------------------- */ static void test_epoch(void) { printf("test_epoch\n"); vmsig_ctx* ctx = vmsig_ctx_new(); vmsig_core* core = vmsig_core_new(ctx); holder h; memset(&h, 0, sizeof h); h.core = core; h.is_driver = 1; h.expect_ep = 0; h.inject_reset = 1; h.stop_epoch = 1; add_holder(core, &h, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0); CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)"); CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx"); vmsig_core_run(core); CHECK(h.memctx >= 2, "contexts for epochs 0 and 1 received"); CHECK(h.invalidated >= 1, "MEMCTX_INVALIDATED delivered on epoch change"); CHECK(h.last_epoch == 1, "re-multicast at epoch+1"); CHECK(h.kcr3_e0 != 0 && h.last_kcr3 != 0 && h.last_kcr3 != h.kcr3_e0, "new kcr3 after re-bootstrap (epoch 1 kcr3 != epoch 0)"); vmsig_core_free(core); vmsig_ctx_free(ctx); } /* ---- 3. retain/replay to a late subscriber --------------------------------- */ static void test_retain(void) { printf("test_retain\n"); vmsig_ctx* ctx = vmsig_ctx_new(); vmsig_core* core = vmsig_core_new(ctx); holder a; memset(&a, 0, sizeof a); a.core = core; a.is_driver = 1; a.expect_ep = 0; a.stop_epoch = -1; add_holder(core, &a, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0); CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)"); CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx"); vmsig_core_run(core); /* A receives MEMCTX, loop stopped */ CHECK(a.memctx >= 1, "early subscriber A received MEMCTX"); /* LATE subscriber B: attaches AFTER publication. Replay of the retained context * happens SYNCHRONOUSLY in add_control (cell valid) — without a second loop run. */ holder b; memset(&b, 0, sizeof b); b.core = core; b.expect_ep = 0; b.stop_epoch = -1; add_holder(core, &b, VMSIG_CAP_MEMCTX, 0xFFFFFFFFu, 1ull << 0); CHECK(b.memctx >= 1, "late subscriber B received the retained MEMCTX (replay)"); CHECK(b.last_kcr3 != 0, "B: kcr3 nonzero in the replay"); CHECK(b.ro_ok, "B: re-shared RO-fd mmaps PROT_READ"); CHECK(b.rw_eacces, "B: re-shared fd is RO (PROT_WRITE -> EACCES)"); vmsig_core_free(core); vmsig_ctx_free(ctx); } /* ---- 4. multi-VM: endpoint isolation --------------------------------------- */ static void test_multivm(void) { printf("test_multivm\n"); vmsig_ctx* ctx = vmsig_ctx_new(); vmsig_core* core = vmsig_core_new(ctx); holder h0; memset(&h0, 0, sizeof h0); holder h1; memset(&h1, 0, sizeof h1); h0.core = core; h0.is_driver = 1; h0.expect_ep = 0; h0.stop_epoch = -1; h0.peer = &h1; h1.core = core; h1.is_driver = 1; h1.expect_ep = 1; h1.stop_epoch = -1; h1.peer = &h0; /* each holder is scoped to its OWN endpoint (+OBSERVE for watchdog lifecycle ticks on ep0). */ add_holder(core, &h0, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0); add_holder(core, &h1, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 1); CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost ep0 (watchdog)"); CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx ep0"); CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 1) >= 0, "add memctx ep1"); vmsig_core_run(core); CHECK(h0.memctx >= 1 && h0.bad_ep == 0, "VM0 receives ONLY its own context (ep0)"); CHECK(h1.memctx >= 1 && h1.bad_ep == 0, "VM1 receives ONLY its own context (ep1)"); vmsig_core_free(core); vmsig_ctx_free(ctx); } /* ---- 5. socket end-to-end: MEMCTX frame + fd in cmsg ----------------------- */ #define SOCK_EP 3u static vmsig_grant sock_policy(uint32_t uid, uint32_t pid, void* ud) { (void)pid; (void)ud; vmsig_grant g; memset(&g, 0, sizeof g); g.principal = uid; g.endpoint_mask = 1ull << SOCK_EP; g.source_mask = 0xFFFFFFFFu; g.cap_mask = VMSIG_CAP_MEMCTX; return g; } static void* loop_main(void* p) { vmsig_core_run((vmsig_core*)p); return NULL; } static int connect_abstract(const char* name) { int fd = socket(AF_UNIX, SOCK_STREAM, 0); if (fd < 0) return -1; struct sockaddr_un a; memset(&a, 0, sizeof a); a.sun_family = AF_UNIX; size_t n = strlen(name); a.sun_path[0] = 0; memcpy(a.sun_path + 1, name + 1, n - 1); socklen_t alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n); if (connect(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; } return fd; } /* Read ONE 80-byte vmsig_wire frame; the adjacent fd (cmsg) -> into *out_fd. */ static int recv_wire(int fd, vmsig_wire* w, int* out_fd) { *out_fd = -1; struct iovec iov = { .iov_base = w, .iov_len = sizeof *w }; union { char buf[CMSG_SPACE(sizeof(int))]; struct cmsghdr a; } cm; memset(&cm, 0, sizeof cm); struct msghdr mh; memset(&mh, 0, sizeof mh); mh.msg_iov = &iov; mh.msg_iovlen = 1; mh.msg_control = cm.buf; mh.msg_controllen = sizeof cm.buf; size_t got = 0; while (got < sizeof *w) { iov.iov_base = (char*)w + got; iov.iov_len = sizeof *w - got; ssize_t n = recvmsg(fd, &mh, MSG_CMSG_CLOEXEC); if (n <= 0) return (got == 0) ? 0 : -1; for (struct cmsghdr* c = CMSG_FIRSTHDR(&mh); c; c = CMSG_NXTHDR(&mh, c)) if (c->cmsg_level == SOL_SOCKET && c->cmsg_type == SCM_RIGHTS) memcpy(out_fd, CMSG_DATA(c), sizeof(int)); got += (size_t)n; mh.msg_control = NULL; mh.msg_controllen = 0; /* fd only on the first recvmsg */ } return 1; } static void test_socket(void) { printf("test_socket\n"); vmsig_ctx* ctx = vmsig_ctx_new(); vmsig_core* core = vmsig_core_new(ctx); CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, SOCK_EP) >= 0, "add memctx"); const char* SOCK = "@vmsig-memctx-e2e"; CHECK(vmsig_socket_attach(core, SOCK, sock_policy, NULL) == 0, "socket attach"); pthread_t th; pthread_create(&th, NULL, loop_main, core); int c = connect_abstract(SOCK); CHECK(c >= 0, "client connected"); if (c < 0) { vmsig_core_stop(core); pthread_join(th, NULL); vmsig_core_free(core); vmsig_ctx_free(ctx); return; } struct timeval tv = { .tv_sec = 3, .tv_usec = 0 }; setsockopt(c, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv); int got_ctx = 0, ro_fd = -1, ro_ok = 0, rw_eacces = 0; vmsig_memctx pod; memset(&pod, 0, sizeof pod); for (int iter = 0; iter < 20 && !got_ctx; iter++) { vmsig_wire w; int wfd = -1; int r = recv_wire(c, &w, &wfd); if (r != 1) break; if (w.kind == VMSIG_EV_MEMCTX) { got_ctx = 1; ro_fd = wfd; memcpy(&pod, w.inln, sizeof pod); CHECK(ro_fd >= 0, "MEMCTX frame carries an RO-fd in cmsg"); CHECK(pod.flags & VMSIG_MEMCTX_RDONLY, "RDONLY flag is set"); if (ro_fd >= 0 && pod.low) { void* ro = mmap(NULL, (size_t)pod.low, PROT_READ, MAP_SHARED, ro_fd, 0); if (ro != MAP_FAILED) { ro_ok = 1; munmap(ro, (size_t)pod.low); } void* rw = mmap(NULL, (size_t)pod.low, PROT_READ | PROT_WRITE, MAP_SHARED, ro_fd, 0); if (rw == MAP_FAILED) rw_eacces = 1; else munmap(rw, (size_t)pod.low); } } } CHECK(got_ctx == 1, "MEMCTX frame arrived over the socket (wire framing)"); CHECK(ro_ok, "mmap RO via the received fd"); CHECK(rw_eacces, "write-mmap via the received fd fails (RO)"); if (ro_fd >= 0) close(ro_fd); close(c); vmsig_core_stop(core); pthread_join(th, NULL); vmsig_core_free(core); vmsig_ctx_free(ctx); } /* ---- 6. ro_fd ownership: an infra-supplied RO-fd is closed by the adapter --- * * Regression for the latent leak: cfg.ro_fd ownership transfers to the adapter at * open(); mc_close() must close it, so a re-grant (detach + re-attach with a fresh * infra ro_fd) does not leak the prior one. Only DUPS leave outward (one per share), * so the original stays open across the run and is reaped at adapter close. */ #ifndef MFD_CLOEXEC #include #include static int memfd_create(const char* name, unsigned int flags) { return (int)syscall(SYS_memfd_create, name, flags); } #endif #ifndef MFD_ALLOW_SEALING #define MFD_ALLOW_SEALING 0x0002U #endif #ifndef F_ADD_SEALS #define F_ADD_SEALS (1024 + 9) #define F_SEAL_SHRINK 0x0002 #define F_SEAL_GROW 0x0004 #endif #ifndef F_SEAL_FUTURE_WRITE #define F_SEAL_FUTURE_WRITE 0x0010 #endif static int make_ro_backing(uint32_t size) { int fd = memfd_create("vmsig_test_ro", MFD_CLOEXEC | MFD_ALLOW_SEALING); if (fd < 0) fd = memfd_create("vmsig_test_ro", MFD_CLOEXEC); if (fd < 0) return -1; if (ftruncate(fd, (off_t)size) != 0) { close(fd); return -1; } (void)fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_FUTURE_WRITE); return fd; } static void test_ro_fd_ownership(void) { printf("test_ro_fd_ownership\n"); int ro = make_ro_backing(0x10000u); /* >= the stub low so the holder can mmap */ CHECK(ro >= 0, "created an RO backing fd"); if (ro < 0) return; vmsig_ctx* ctx = vmsig_ctx_new(); vmsig_core* core = vmsig_core_new(ctx); holder h; memset(&h, 0, sizeof h); h.core = core; h.is_driver = 1; h.expect_ep = 0; h.stop_epoch = -1; add_holder(core, &h, VMSIG_CAP_MEMCTX, 0xFFFFFFFFu, 1ull << 0); /* stub kcr3 (no VM) but a REAL infra ro_fd handed in for the RO share path. */ vmsig_memctx_cfg mc; memset(&mc, 0, sizeof mc); mc.stub = 1; mc.ram_path = NULL; mc.low = 0; mc.ro_fd = ro; CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), &mc, 0) >= 0, "add memctx (infra ro_fd)"); vmsig_core_run(core); CHECK(h.memctx >= 1, "holder received MEMCTX over the infra ro_fd"); CHECK(h.ro_ok, "infra ro_fd re-shared and mmaps PROT_READ"); CHECK(fcntl(ro, F_GETFD) >= 0, "infra ro_fd still open before close (no premature close)"); vmsig_core_free(core); /* mc_close closes the owned cfg_ro_fd */ vmsig_ctx_free(ctx); CHECK(fcntl(ro, F_GETFD) == -1, "infra ro_fd closed by mc_close after free (no leak)"); if (fcntl(ro, F_GETFD) >= 0) close(ro); /* belt-and-braces if the assert failed */ } /* ---- 7. cold-bootstrap retry: stub fails N times, then publishes via backoff ----- * * Regression for the cold-bootstrap-while-guest-boots bug: a failed bootstrap must NOT be * terminal nor emit URGENT ERROR — it arms a one-shot backoff timerfd that re-kicks the * bootstrap until it succeeds. fail_boots=3 makes the first three stub bootstraps fail * deterministically (no timing dependence); the real timerfd fires at ~50/100/200ms, so the * 4th kick succeeds sub-second. vmhost is added (as test_multicast) for the ticks failsafe * and a realistic loop; stop on memctx>=1 (stop_epoch=-1). */ static void test_retry(void) { printf("test_retry\n"); vmsig_ctx* ctx = vmsig_ctx_new(); vmsig_core* core = vmsig_core_new(ctx); holder h; memset(&h, 0, sizeof h); h.core = core; h.is_driver = 1; h.expect_ep = 0; h.stop_epoch = -1; /* OBSERVE so vmhost lifecycle ticks reach maybe_stop (ticks>30 failsafe) and ERROR * (if any) is counted; MEMCTX cap to receive the published context. */ add_holder(core, &h, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0); CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)"); vmsig_memctx_cfg mc; memset(&mc, 0, sizeof mc); mc.stub = 1; mc.ram_path = NULL; mc.low = 0; mc.ro_fd = -1; mc.fail_boots = 3; CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), &mc, 0) >= 0, "add memctx (fail_boots=3)"); vmsig_core_run(core); CHECK(h.memctx >= 1, "MEMCTX published after a series of bootstrap failures (retry worked)"); CHECK(h.last_kcr3 != 0, "valid kcr3 after the successful retry"); CHECK(h.errors == 0, "no ERROR spam during boot retries"); vmsig_core_free(core); vmsig_ctx_free(ctx); } int main(void) { test_multicast(); test_epoch(); test_retain(); test_multivm(); test_socket(); test_ro_fd_ownership(); test_retry(); printf("memctx tests: %s\n", g_fail ? "FAIL" : "PASS"); return g_fail ? 1 : 0; }