Files
vatrog-vm-signaling/src/test/test_memctx.c
T

406 lines
17 KiB
C
Raw Normal View History

/* test_memctx.c — seam for the coherent address-space context (MEMCTX).
*
* 1) multicast + RO-fd + decode: a holder with CAP_MEMCTX receives MEMCTX, kcr3,
* epoch=0, nseg=1; the received fd mmaps PROT_READ, while PROT_WRITE -> EACCES
* (RO physically enforced); the vmsig_memctx_segs helper reconstructs segs[];
* a co-resident holder without CAP_MEMCTX does NOT receive it (deny);
* 2) epoch: a synthetic destructive VM_LIFECYCLE -> MEMCTX_INVALIDATED ->
* re-multicast at epoch+1 with a NEW kcr3;
* 3) retain/replay: a LATE subscriber (attached AFTER publication) receives
* the retained MEMCTX with a valid re-shared RO-fd (synchronously on add_control);
* 4) multi-VM: two endpoints, isolation (a VM holder does not see another's context);
* 5) socket E2E: MEMCTX travels as a vmsig_wire frame + RO-fd in cmsg (SCM_RIGHTS), the
* client mmaps RO via the received fd.
* In-proc (except 5) and under ASAN. SISC: not a single control name in the adapter. */
#define _GNU_SOURCE
#include "vmsig.h"
#include "vmsig_socket.h" /* vmsig_wire, vmsig_socket_attach */
#include "core_internal.h" /* core_emit_up (synthetic lifecycle injection) */
#include "memctx.h" /* vmsig_memctx_cfg (infra ro_fd ownership test) */
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <time.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stddef.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { \
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
} while (0)
/* ===== in-proc holder ===== */
typedef struct holder holder;
struct holder {
vmsig_core* core;
holder* peer; /* multi-VM: stop when both are ready (or NULL) */
int is_driver; /* stops the loop on a condition */
uint32_t expect_ep;
int memctx, invalidated, ticks, bad_ep;
uint64_t last_kcr3, kcr3_e0;
uint32_t last_epoch, last_nseg;
int ro_ok, rw_eacces, seg0_ok;
int inject_reset, injected;
int stop_epoch; /* stop when last_epoch>=stop_epoch (-1 = else) */
};
static void maybe_stop(holder* h) {
if (!h->is_driver) return;
if (h->ticks > 30) { vmsig_core_stop(h->core); return; } /* failsafe (vmhost ticks) */
if (h->stop_epoch >= 0) {
if ((int)h->last_epoch >= h->stop_epoch && h->memctx >= 1) vmsig_core_stop(h->core);
} else if (h->peer) {
if (h->memctx >= 1 && h->peer->memctx >= 1) vmsig_core_stop(h->core);
} else if (h->memctx >= 1) {
vmsig_core_stop(h->core);
}
}
static int h_on_ev(void* u, const vmsig_event* ev) {
holder* h = u;
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) h->ticks++;
else if (ev->kind == VMSIG_EV_MEMCTX_INVALIDATED) h->invalidated++;
maybe_stop(h);
return 0;
}
static int h_on_memctx(void* u, const vmsig_event* ev, int fd) {
holder* h = u;
const vmsig_memctx* m = (const vmsig_memctx*)ev->inln;
h->memctx++;
if (ev->endpoint != h->expect_ep) h->bad_ep++;
h->last_kcr3 = m->kcr3; h->last_epoch = m->epoch;
if (m->epoch == 0) h->kcr3_e0 = m->kcr3;
uint32_t n = 0;
const vmsig_memseg* segs = vmsig_memctx_segs(ev, &n);
h->last_nseg = n;
if (segs && n >= 1 && segs[0].gpa == 0 && segs[0].len == m->low) h->seg0_ok = 1;
if (fd >= 0 && m->low) {
void* ro = mmap(NULL, (size_t)m->low, PROT_READ, MAP_SHARED, fd, 0);
if (ro != MAP_FAILED) { h->ro_ok = 1; munmap(ro, (size_t)m->low); }
void* rw = mmap(NULL, (size_t)m->low, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (rw == MAP_FAILED) h->rw_eacces = 1; else munmap(rw, (size_t)m->low);
}
/* epoch test: on the first context (epoch0) inject a destructive transition. */
if (h->inject_reset && !h->injected && m->epoch == 0) {
h->injected = 1;
vmsig_event lc; memset(&lc, 0, sizeof lc);
lc.kind = VMSIG_EV_VM_LIFECYCLE; lc.source = VMSIG_SRC_VMHOST; lc.dir = VMSIG_DIR_UP;
lc.prio = VMSIG_PRIO_URGENT; lc.endpoint = h->expect_ep; lc.origin = 0;
vmsig_vm_state vs = { VMSIG_VM_RESET, 0 };
memcpy(lc.inln, &vs, sizeof vs);
core_emit_up(h->core, &lc); /* core: epoch++ + invalidate + re-multicast */
}
maybe_stop(h);
return 0;
}
static void add_holder(vmsig_core* core, holder* h, uint32_t cap,
uint32_t source_mask, uint64_t endpoint_mask) {
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
cfg.on_event = h_on_ev; cfg.on_memctx = h_on_memctx; cfg.user = h;
void* ctl = vmsig_inproc_control_new(&cfg);
vmsig_grant g; memset(&g, 0, sizeof g);
g.principal = 5; g.endpoint_mask = endpoint_mask;
g.source_mask = source_mask; g.cap_mask = cap;
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
}
/* ---- 1. multicast + RO-fd + decode + deny ---------------------------------- */
static void test_multicast(void) {
printf("test_multicast\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
holder good; memset(&good, 0, sizeof good);
good.core = core; good.is_driver = 1; good.expect_ep = 0; good.stop_epoch = -1;
holder deny; memset(&deny, 0, sizeof deny);
deny.core = core; deny.expect_ep = 0; deny.stop_epoch = -1;
add_holder(core, &good, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
add_holder(core, &deny, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0); /* no MEMCTX */
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
vmsig_core_run(core);
CHECK(good.memctx >= 1, "GOOD received MEMCTX");
CHECK(good.last_kcr3 != 0, "kcr3 nonzero");
CHECK(good.last_epoch == 0, "first publication is epoch 0");
CHECK(good.last_nseg == 1, "nseg=1 (single-low)");
CHECK(good.seg0_ok, "segs[] decoded by the helper (gpa=0,len=low)");
CHECK(good.ro_ok, "RO-fd: mmap(PROT_READ) ok");
CHECK(good.rw_eacces, "RO-fd: mmap(PROT_WRITE) -> EACCES (RO enforced)");
CHECK(good.bad_ep == 0, "delivery endpoint is correct");
CHECK(deny.memctx == 0, "deny without CAP_MEMCTX does NOT receive MEMCTX");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ---- 2. epoch: invalidation + re-multicast epoch+1 ------------------------- */
static void test_epoch(void) {
printf("test_epoch\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
holder h; memset(&h, 0, sizeof h);
h.core = core; h.is_driver = 1; h.expect_ep = 0; h.inject_reset = 1; h.stop_epoch = 1;
add_holder(core, &h, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
vmsig_core_run(core);
CHECK(h.memctx >= 2, "contexts for epochs 0 and 1 received");
CHECK(h.invalidated >= 1, "MEMCTX_INVALIDATED delivered on epoch change");
CHECK(h.last_epoch == 1, "re-multicast at epoch+1");
CHECK(h.kcr3_e0 != 0 && h.last_kcr3 != 0 && h.last_kcr3 != h.kcr3_e0,
"new kcr3 after re-bootstrap (epoch 1 kcr3 != epoch 0)");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ---- 3. retain/replay to a late subscriber --------------------------------- */
static void test_retain(void) {
printf("test_retain\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
holder a; memset(&a, 0, sizeof a);
a.core = core; a.is_driver = 1; a.expect_ep = 0; a.stop_epoch = -1;
add_holder(core, &a, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
vmsig_core_run(core); /* A receives MEMCTX, loop stopped */
CHECK(a.memctx >= 1, "early subscriber A received MEMCTX");
/* LATE subscriber B: attaches AFTER publication. Replay of the retained context
* happens SYNCHRONOUSLY in add_control (cell valid) — without a second loop run. */
holder b; memset(&b, 0, sizeof b);
b.core = core; b.expect_ep = 0; b.stop_epoch = -1;
add_holder(core, &b, VMSIG_CAP_MEMCTX, 0xFFFFFFFFu, 1ull << 0);
CHECK(b.memctx >= 1, "late subscriber B received the retained MEMCTX (replay)");
CHECK(b.last_kcr3 != 0, "B: kcr3 nonzero in the replay");
CHECK(b.ro_ok, "B: re-shared RO-fd mmaps PROT_READ");
CHECK(b.rw_eacces, "B: re-shared fd is RO (PROT_WRITE -> EACCES)");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ---- 4. multi-VM: endpoint isolation --------------------------------------- */
static void test_multivm(void) {
printf("test_multivm\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
holder h0; memset(&h0, 0, sizeof h0);
holder h1; memset(&h1, 0, sizeof h1);
h0.core = core; h0.is_driver = 1; h0.expect_ep = 0; h0.stop_epoch = -1; h0.peer = &h1;
h1.core = core; h1.is_driver = 1; h1.expect_ep = 1; h1.stop_epoch = -1; h1.peer = &h0;
/* each holder is scoped to its OWN endpoint (+OBSERVE for watchdog lifecycle ticks on ep0). */
add_holder(core, &h0, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
add_holder(core, &h1, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 1);
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost ep0 (watchdog)");
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx ep0");
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 1) >= 0, "add memctx ep1");
vmsig_core_run(core);
CHECK(h0.memctx >= 1 && h0.bad_ep == 0, "VM0 receives ONLY its own context (ep0)");
CHECK(h1.memctx >= 1 && h1.bad_ep == 0, "VM1 receives ONLY its own context (ep1)");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ---- 5. socket end-to-end: MEMCTX frame + fd in cmsg ----------------------- */
#define SOCK_EP 3u
static vmsig_grant sock_policy(uint32_t uid, uint32_t pid, void* ud) {
(void)pid; (void)ud;
vmsig_grant g; memset(&g, 0, sizeof g);
g.principal = uid; g.endpoint_mask = 1ull << SOCK_EP;
g.source_mask = 0xFFFFFFFFu; g.cap_mask = VMSIG_CAP_MEMCTX;
return g;
}
static void* loop_main(void* p) { vmsig_core_run((vmsig_core*)p); return NULL; }
static int connect_abstract(const char* name) {
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0) return -1;
struct sockaddr_un a; memset(&a, 0, sizeof a); a.sun_family = AF_UNIX;
size_t n = strlen(name);
a.sun_path[0] = 0;
memcpy(a.sun_path + 1, name + 1, n - 1);
socklen_t alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
if (connect(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; }
return fd;
}
/* Read ONE 80-byte vmsig_wire frame; the adjacent fd (cmsg) -> into *out_fd. */
static int recv_wire(int fd, vmsig_wire* w, int* out_fd) {
*out_fd = -1;
struct iovec iov = { .iov_base = w, .iov_len = sizeof *w };
union { char buf[CMSG_SPACE(sizeof(int))]; struct cmsghdr a; } cm;
memset(&cm, 0, sizeof cm);
struct msghdr mh; memset(&mh, 0, sizeof mh);
mh.msg_iov = &iov; mh.msg_iovlen = 1;
mh.msg_control = cm.buf; mh.msg_controllen = sizeof cm.buf;
size_t got = 0;
while (got < sizeof *w) {
iov.iov_base = (char*)w + got; iov.iov_len = sizeof *w - got;
ssize_t n = recvmsg(fd, &mh, MSG_CMSG_CLOEXEC);
if (n <= 0) return (got == 0) ? 0 : -1;
for (struct cmsghdr* c = CMSG_FIRSTHDR(&mh); c; c = CMSG_NXTHDR(&mh, c))
if (c->cmsg_level == SOL_SOCKET && c->cmsg_type == SCM_RIGHTS)
memcpy(out_fd, CMSG_DATA(c), sizeof(int));
got += (size_t)n;
mh.msg_control = NULL; mh.msg_controllen = 0; /* fd only on the first recvmsg */
}
return 1;
}
static void test_socket(void) {
printf("test_socket\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, SOCK_EP) >= 0, "add memctx");
const char* SOCK = "@vmsig-memctx-e2e";
CHECK(vmsig_socket_attach(core, SOCK, sock_policy, NULL) == 0, "socket attach");
pthread_t th; pthread_create(&th, NULL, loop_main, core);
int c = connect_abstract(SOCK);
CHECK(c >= 0, "client connected");
if (c < 0) { vmsig_core_stop(core); pthread_join(th, NULL); vmsig_core_free(core); vmsig_ctx_free(ctx); return; }
struct timeval tv = { .tv_sec = 3, .tv_usec = 0 };
setsockopt(c, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv);
int got_ctx = 0, ro_fd = -1, ro_ok = 0, rw_eacces = 0;
vmsig_memctx pod; memset(&pod, 0, sizeof pod);
for (int iter = 0; iter < 20 && !got_ctx; iter++) {
vmsig_wire w; int wfd = -1;
int r = recv_wire(c, &w, &wfd);
if (r != 1) break;
if (w.kind == VMSIG_EV_MEMCTX) {
got_ctx = 1; ro_fd = wfd;
memcpy(&pod, w.inln, sizeof pod);
CHECK(ro_fd >= 0, "MEMCTX frame carries an RO-fd in cmsg");
CHECK(pod.flags & VMSIG_MEMCTX_RDONLY, "RDONLY flag is set");
if (ro_fd >= 0 && pod.low) {
void* ro = mmap(NULL, (size_t)pod.low, PROT_READ, MAP_SHARED, ro_fd, 0);
if (ro != MAP_FAILED) { ro_ok = 1; munmap(ro, (size_t)pod.low); }
void* rw = mmap(NULL, (size_t)pod.low, PROT_READ | PROT_WRITE, MAP_SHARED, ro_fd, 0);
if (rw == MAP_FAILED) rw_eacces = 1; else munmap(rw, (size_t)pod.low);
}
}
}
CHECK(got_ctx == 1, "MEMCTX frame arrived over the socket (wire framing)");
CHECK(ro_ok, "mmap RO via the received fd");
CHECK(rw_eacces, "write-mmap via the received fd fails (RO)");
if (ro_fd >= 0) close(ro_fd);
close(c);
vmsig_core_stop(core);
pthread_join(th, NULL);
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ---- 6. ro_fd ownership: an infra-supplied RO-fd is closed by the adapter --- *
* Regression for the latent leak: cfg.ro_fd ownership transfers to the adapter at
* open(); mc_close() must close it, so a re-grant (detach + re-attach with a fresh
* infra ro_fd) does not leak the prior one. Only DUPS leave outward (one per share),
* so the original stays open across the run and is reaped at adapter close. */
#ifndef MFD_CLOEXEC
#include <sys/syscall.h>
#include <linux/memfd.h>
static int memfd_create(const char* name, unsigned int flags) {
return (int)syscall(SYS_memfd_create, name, flags);
}
#endif
#ifndef MFD_ALLOW_SEALING
#define MFD_ALLOW_SEALING 0x0002U
#endif
#ifndef F_ADD_SEALS
#define F_ADD_SEALS (1024 + 9)
#define F_SEAL_SHRINK 0x0002
#define F_SEAL_GROW 0x0004
#endif
#ifndef F_SEAL_FUTURE_WRITE
#define F_SEAL_FUTURE_WRITE 0x0010
#endif
static int make_ro_backing(uint32_t size) {
int fd = memfd_create("vmsig_test_ro", MFD_CLOEXEC | MFD_ALLOW_SEALING);
if (fd < 0) fd = memfd_create("vmsig_test_ro", MFD_CLOEXEC);
if (fd < 0) return -1;
if (ftruncate(fd, (off_t)size) != 0) { close(fd); return -1; }
(void)fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_FUTURE_WRITE);
return fd;
}
static void test_ro_fd_ownership(void) {
printf("test_ro_fd_ownership\n");
int ro = make_ro_backing(0x10000u); /* >= the stub low so the holder can mmap */
CHECK(ro >= 0, "created an RO backing fd");
if (ro < 0) return;
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
holder h; memset(&h, 0, sizeof h);
h.core = core; h.is_driver = 1; h.expect_ep = 0; h.stop_epoch = -1;
add_holder(core, &h, VMSIG_CAP_MEMCTX, 0xFFFFFFFFu, 1ull << 0);
/* stub kcr3 (no VM) but a REAL infra ro_fd handed in for the RO share path. */
vmsig_memctx_cfg mc; memset(&mc, 0, sizeof mc);
mc.stub = 1; mc.ram_path = NULL; mc.low = 0; mc.ro_fd = ro;
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), &mc, 0) >= 0, "add memctx (infra ro_fd)");
vmsig_core_run(core);
CHECK(h.memctx >= 1, "holder received MEMCTX over the infra ro_fd");
CHECK(h.ro_ok, "infra ro_fd re-shared and mmaps PROT_READ");
CHECK(fcntl(ro, F_GETFD) >= 0, "infra ro_fd still open before close (no premature close)");
vmsig_core_free(core); /* mc_close closes the owned cfg_ro_fd */
vmsig_ctx_free(ctx);
CHECK(fcntl(ro, F_GETFD) == -1, "infra ro_fd closed by mc_close after free (no leak)");
if (fcntl(ro, F_GETFD) >= 0) close(ro); /* belt-and-braces if the assert failed */
}
int main(void) {
test_multicast();
test_epoch();
test_retain();
test_multivm();
test_socket();
test_ro_fd_ownership();
printf("memctx tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}