From e6c7aed8eb08ad85e4d547a0bd214579b5da274c Mon Sep 17 00:00:00 2001 From: Gregory Lirent Date: Sat, 20 Jun 2026 21:21:20 +0300 Subject: [PATCH] memwrite: per-process (cr3) target and full-extent socket SRC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - CMD_MEMWRITE now carries a target page-table root (cr3) as its first field; cr3 == 0 keeps the kernel address-space default (backward-compatible). A control that has discovered a process's cr3 through its own read-only perception can write that process's private memory under the same exclusive write lease. Freshness of the cr3 is the control's responsibility — signaling does not validate it (that is perception, not coherence), mirroring the read side. - A socket control can now carry an SRC larger than the inline frame budget: a length-prefixed SRC tail follows the CMD_MEMWRITE frame (flag SRC_PAYLOAD, the length being the frame's own len). A per-connection two-phase receiver accumulates the tail into a fixed conn-owned buffer up to the extent bound, matching the in-process payload path. A zero or over-bound length is a framing violation that closes the connection: leaving the promised tail unread would desync the stream and draining an arbitrary length would be a denial of service. The capability, exclusive lease, source and extent gates are unchanged and reused; only the event header gained the cr3 field and the socket transport gained the tail receiver. The adapter resolves cr3 == 0 to the kernel root on its worker thread and writes atomically. --- CMakeLists.txt | 2 + include/vmsig_event.h | 28 ++++++--- include/vmsig_socket.h | 9 ++- src/adapter/memctx/memctx.c | 14 +++-- src/control/socket.c | 114 +++++++++++++++++++++++++++++----- src/test/test_memwrite.c | 62 ++++++++++++++++--- src/test/test_sock.c | 119 ++++++++++++++++++++++++++++++++++++ 7 files changed, 309 insertions(+), 39 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 82ad6cf..b80f3dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,6 +87,8 @@ add_test(NAME sec COMMAND vmsig_sectest) add_executable(vmsig_socktest src/test/test_sock.c) target_link_libraries(vmsig_socktest PRIVATE vmsig Threads::Threads) +target_include_directories(vmsig_socktest PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/memctx/include) target_compile_options(vmsig_socktest PRIVATE -Wall -Wextra) add_test(NAME sock COMMAND vmsig_socktest) diff --git a/include/vmsig_event.h b/include/vmsig_event.h index 38aa0b8..09fa6e4 100644 --- a/include/vmsig_event.h +++ b/include/vmsig_event.h @@ -168,20 +168,32 @@ typedef struct { } vmsig_input; /* fits in vmsig_event.inln[48] */ /* ===== Memory write (DOWN VMSIG_EV_CMD_MEMWRITE) — NEUTRAL, write-signaled ===== - * control describes an ATOMIC write into guest memory abstractly (guest VA + length), - * WITHOUT knowing vmie/cr3: the memctx adapter resolves it under the held kcr3 and does - * ONE gva_write. Requires CAP_MEMWRITE + an exclusive MEMWRITE lease + an extent check. + * control describes an ATOMIC write into guest memory abstractly: a TARGET address space + * (cr3) + a guest VA + a length, WITHOUT knowing vmie. The memctx adapter does ONE + * gva_write under the requested cr3. Requires CAP_MEMWRITE + an exclusive MEMWRITE lease + * + an extent check. + * + * cr3: the target page-table root (DirectoryTableBase). cr3 == 0 selects the adapter's + * permanent System DTB (kernel AS) — the backward-compatible default. A non-zero cr3 + * addresses a PROCESS private AS that the control discovered out-of-band (RO proc_list). + * FRESHNESS of a process cr3 is the CONTROL's responsibility: a process may exit and its + * cr3 (page-table root PFN) be reused, so a stale cr3 would write into an UNRELATED AS. + * signaling does NOT validate cr3 (that is perception, not coherence) — symmetric to the + * read side, where the control already owns cr3 freshness for gva_read. The kernel default + * (cr3 == 0) has no such hazard: the System DTB is permanent for the VM session (epoch). + * * SRC bytes: inline (<= VMSIG_MEMWRITE_INLINE) ride in the inln tail right after this header * (flags & INLINE); larger in-proc writes ride in the borrowed payload (flags & PAYLOAD). */ -#define VMSIG_MEMWRITE_INLINE 32u /* inln tail capacity for SRC (48 - 16 header) */ +#define VMSIG_MEMWRITE_INLINE 24u /* inln tail capacity for SRC (48 - 24 header) */ #define VMSIG_MW_SRC_INLINE 0x1u /* SRC bytes are in inln tail (len<=INLINE) */ #define VMSIG_MW_SRC_PAYLOAD 0x2u /* SRC bytes are in ev->payload.data (in-proc) */ typedef struct { - uint64_t gva; /* guest virtual address to write (resolved under the adapter's kcr3) */ - uint32_t len; /* number of bytes to write (1..VMSIG_MEMWRITE_MAX) */ - uint32_t flags; /* VMSIG_MW_SRC_INLINE | VMSIG_MW_SRC_PAYLOAD */ + uint64_t cr3; /* target page-table root; 0 => the adapter's System DTB (kernel AS) */ + uint64_t gva; /* guest virtual address to write (translated under cr3) */ + uint32_t len; /* number of bytes to write (1..VMSIG_MEMWRITE_MAX) */ + uint32_t flags; /* VMSIG_MW_SRC_INLINE | VMSIG_MW_SRC_PAYLOAD */ /* inline SRC tail (when VMSIG_MW_SRC_INLINE): up to VMSIG_MEMWRITE_INLINE bytes follow */ -} vmsig_memwrite; /* header = 8+4+4 = 16 bytes; +32 tail = 48 (exactly inln[48]) */ +} vmsig_memwrite; /* header = 8+8+4+4 = 24 bytes; +24 tail = 48 (exactly inln[48]) */ /* ===== Cursor (UP VMSIG_EV_CURSOR_STATE, in inln) — NEUTRAL ===== * Cursor position from the SCREEN sensor (vgpu). NEUTRAL payload format only: emitted by the diff --git a/include/vmsig_socket.h b/include/vmsig_socket.h index 1390bec..7511d46 100644 --- a/include/vmsig_socket.h +++ b/include/vmsig_socket.h @@ -10,7 +10,14 @@ /* Wire format: fixed-size, pointer-free — the same contract on the external * poller. Single host (unix socket) => native byte order. Only the event's - * inline part is serialized (payload pointers do not go on the wire). */ + * inline part is serialized (payload pointers do not go on the wire). + * + * Exception (DOWN only): a CMD_MEMWRITE frame with VMSIG_MW_SRC_PAYLOAD is followed + * on the stream by exactly vmsig_memwrite.len SRC bytes (length-prefixed by the + * contract's mw.len, no separate wire prefix). A client writes the 80-byte frame, + * then the len SRC bytes (1..VMSIG_MEMWRITE_MAX). For len <= VMSIG_MEMWRITE_INLINE the + * client uses VMSIG_MW_SRC_INLINE instead (SRC rides in the inln tail, no trailing + * bytes). All other DOWN frames and all UP deliveries are a single fixed frame. */ #define VMSIG_WIRE_MAGIC 0x47495356u /* 'VSIG' */ #define VMSIG_WIRE_VERSION 1u typedef struct { diff --git a/src/adapter/memctx/memctx.c b/src/adapter/memctx/memctx.c index 089a1b9..f7dad37 100644 --- a/src/adapter/memctx/memctx.c +++ b/src/adapter/memctx/memctx.c @@ -59,11 +59,13 @@ enum { MC_JOB_BOOTSTRAP = 0, MC_JOB_WRITE = 1 }; /* worker req/res (POD <= VMSIG_WORK_SLOT). One off-loop worker runs BOTH the cold * bootstrap and the atomic writes (FIFO serializes a write against the close-on-rebootstrap). * boot_count drives the stub kcr3 (changes per epoch); the real guest kcr3 does NOT depend - * on it (armed reads the System DTB). MC_JOB_WRITE copies SRC off-loop into req.src. */ + * on it (armed reads the System DTB). MC_JOB_WRITE copies SRC off-loop into req.src plus the + * target cr3 (0 => System DTB; resolved on the worker against a->kcr3). */ typedef struct { uint32_t op; /* MC_JOB_* */ uint32_t boot_count; /* MC_JOB_BOOTSTRAP */ /* --- MC_JOB_WRITE --- */ + uint64_t cr3; /* target AS root; 0 => a->kcr3 (kernel AS), resolved on worker */ uint64_t gva; uint32_t len; uint32_t corr; @@ -168,8 +170,10 @@ static int mc_job(void* user, const void* req, void* res) { if (a->stub) { rs->ok = 1; return 0; } /* stub: ack without actuation */ #ifdef VMSIG_WITH_VMIE /* a->mem is NULL until a bootstrap has succeeded (or after one failed and cleared it): - * the guard turns that into an ok=0 ACK (observable to the initiator), not a crash. */ - rs->ok = (a->mem && gva_write(a->mem, (uintptr_t)a->kcr3, (uintptr_t)rq->gva, + * the guard turns that into an ok=0 ACK (observable to the initiator), not a crash. + * cr3 resolve is on the worker (sole owner of a->kcr3): 0 => kernel AS (System DTB). */ + uint64_t target = rq->cr3 ? rq->cr3 : a->kcr3; + rs->ok = (a->mem && gva_write(a->mem, (uintptr_t)target, (uintptr_t)rq->gva, rq->src, rq->len) == 0); return rs->ok ? 0 : -1; #else @@ -363,13 +367,13 @@ static int mc_submit(vmsig_adapter* a, const vmsig_event* ev) { return 0; } mc_req rq; memset(&rq, 0, sizeof rq); - rq.op = MC_JOB_WRITE; rq.gva = mw->gva; rq.len = len; + rq.op = MC_JOB_WRITE; rq.cr3 = mw->cr3; rq.gva = mw->gva; rq.len = len; rq.corr = ev->corr; rq.origin = ev->origin; /* copy SRC into the worker req (off-loop gva_write reads from rq.src). */ if (mw->flags & VMSIG_MW_SRC_INLINE) { if (len > VMSIG_MEMWRITE_INLINE) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; } - memcpy(rq.src, ev->inln + sizeof *mw, len); /* inln tail after the 16-byte header */ + memcpy(rq.src, ev->inln + sizeof *mw, len); /* inln tail after the 24-byte header */ } else if (mw->flags & VMSIG_MW_SRC_PAYLOAD) { if (!ev->payload.data || ev->payload.len < len) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; } memcpy(rq.src, ev->payload.data, len); /* in-proc borrowed payload */ diff --git a/src/control/socket.c b/src/control/socket.c index 074621f..4044ba7 100644 --- a/src/control/socket.c +++ b/src/control/socket.c @@ -7,12 +7,20 @@ * dispatched through emit_down (enforced by the grant), UP events are serialized into * a frame. On EOF — deferred reap. * + * DOWN framing: every kind is a single fixed vmsig_wire frame, EXCEPT a CMD_MEMWRITE + * carrying VMSIG_MW_SRC_PAYLOAD — then mw.len SRC bytes follow the frame (length-prefixed + * by the contract's mw.len, no separate wire prefix). The per-conn receiver is a 2-phase + * state machine (FRAME -> TAIL): it accumulates the frame, and for a PAYLOAD MEMWRITE it + * accumulates the SRC tail into a fixed conn-owned blob, then emits a BORROWED-payload + * event. The blob lives in the conn so it outlives the DOWN queue until pump_down copies it. + * * DoS protection: per-uid limit of concurrent connections (against eviction of - * legitimate ones); a janitor timerfd detaches "stuck" partial frames (slowloris). - * The global ceiling and slot reuse live in the core. */ + * legitimate ones); a janitor timerfd detaches "stuck" partial frames / SRC tails + * (slowloris). The global ceiling and slot reuse live in the core. */ #define _GNU_SOURCE #include "vmsig_socket.h" #include "core_internal.h" /* core_add_source, core_request_drop, add_control */ +#include "memctx.h" /* VMSIG_MEMWRITE_MAX: SRC-tail bound (one source of truth) */ #include #include #include @@ -57,6 +65,10 @@ int vmsig_wire_decode(const vmsig_wire* w, vmsig_event* ev) { } /* ===== per-conn control ===== */ +/* DOWN receive phases: read the fixed frame, then (only for a PAYLOAD MEMWRITE) the + * length-prefixed SRC tail. State persists in the conn across EPOLLIN (partial recv). */ +typedef enum { CONN_RX_FRAME = 0, CONN_RX_TAIL = 1 } conn_rx_phase; + typedef struct sock_conn { int fd; vmsig_core* core; @@ -69,6 +81,11 @@ typedef struct sock_conn { void* token; uint8_t buf[sizeof(vmsig_wire)]; size_t buflen; + conn_rx_phase phase; /* FRAME: read vmsig_wire; TAIL: read SRC tail */ + vmsig_event pend; /* decoded frame awaiting its SRC tail */ + uint32_t need; /* expected tail length (= mw.len) */ + uint32_t got; /* tail bytes already accumulated */ + uint8_t blob[VMSIG_MEMWRITE_MAX]; /* SRC tail (BORROWED payload; lives in conn) */ } sock_conn; static int conn_fd(void* ctl) { return ((sock_conn*)ctl)->fd; } @@ -90,26 +107,86 @@ static void conn_set_emit_down(void* ctl, int (*emit)(void* token, vmsig_event*) sock_conn* c = ctl; c->emit_down = emit; c->token = token; } +/* Does this decoded frame pull a length-prefixed SRC tail? Only a CMD_MEMWRITE that + * advertises VMSIG_MW_SRC_PAYLOAD. The tail length is mw.len from the contract (already on + * the wire in inln) — no separate wire prefix. INLINE / other kinds carry no tail. */ +static int frame_pulls_tail(const vmsig_event* ev, uint32_t* need) { + if (ev->kind != VMSIG_EV_CMD_MEMWRITE) return 0; + const vmsig_memwrite* mw = (const vmsig_memwrite*)ev->inln; + if (!(mw->flags & VMSIG_MW_SRC_PAYLOAD)) return 0; + *need = mw->len; + return 1; +} + static int conn_on_readable(void* ctl) { sock_conn* c = ctl; for (;;) { - ssize_t n = read(c->fd, c->buf + c->buflen, sizeof c->buf - c->buflen); - if (n == 0) { core_request_drop(c->core, c->id); return 0; } /* EOF */ + if (c->phase == CONN_RX_FRAME) { + ssize_t n = read(c->fd, c->buf + c->buflen, sizeof c->buf - c->buflen); + if (n == 0) { core_request_drop(c->core, c->id); return 0; } /* EOF */ + if (n < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) break; + core_request_drop(c->core, c->id); + return 0; + } + c->last_ns = now_ns(); + c->buflen += (size_t)n; + if (c->buflen != sizeof c->buf) continue; /* frame not whole yet */ + c->buflen = 0; + + vmsig_event ev; + if (vmsig_wire_decode((const vmsig_wire*)c->buf, &ev) != 0) + continue; /* bad magic/ver — drop frame */ + ev.dir = VMSIG_DIR_DOWN; /* from a poller — DOWN only */ + + uint32_t need = 0; + if (!frame_pulls_tail(&ev, &need)) { /* variant A / other kinds */ + if (c->emit_down) c->emit_down(c->token, &ev); /* enforced by the grant */ + continue; + } + /* Cap BEFORE reading the tail. A PAYLOAD frame promises EXACTLY mw.len tail bytes + * with 1 <= mw.len <= MAX. A zero or over-cap length is a framing-contract + * violation: the promised tail cannot be safely consumed (draining an + * attacker-chosen length is a DoS) and leaving it unread would desync the stream + * (the SRC bytes would be misread as the next frame). Close the connection — a + * conformant poller never requests a tail outside [1, MAX]. */ + if (need == 0 || need > VMSIG_MEMWRITE_MAX) { + core_request_drop(c->core, c->id); + return 0; + } + c->pend = ev; c->need = need; c->got = 0; + c->phase = CONN_RX_TAIL; /* fall through to read the tail */ + continue; + } + + /* CONN_RX_TAIL: accumulate exactly c->need SRC bytes into the conn-owned blob. */ + ssize_t n = read(c->fd, c->blob + c->got, c->need - c->got); + if (n == 0) { core_request_drop(c->core, c->id); return 0; } /* EOF */ if (n < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK) break; core_request_drop(c->core, c->id); return 0; } c->last_ns = now_ns(); - c->buflen += (size_t)n; - if (c->buflen == sizeof c->buf) { - vmsig_event ev; - if (vmsig_wire_decode((const vmsig_wire*)c->buf, &ev) == 0) { - ev.dir = VMSIG_DIR_DOWN; /* from a poller — DOWN only */ - if (c->emit_down) c->emit_down(c->token, &ev); /* enforced by the grant */ - } - c->buflen = 0; - } + c->got += (uint32_t)n; + if (c->got != c->need) continue; /* tail not whole yet */ + + /* Tail complete: borrow it through the payload. The blob lives in the conn and thus + * outlives the DOWN queue until pump_down copies it (mc_submit). release=NULL — the + * body belongs to the conn; the adapter copies synchronously on the loop thread. */ + c->pend.payload.data = c->blob; + c->pend.payload.len = c->need; + c->pend.payload.codec = VMSIG_CODEC_MEMCTX; + c->pend.payload.flags = VMSIG_PL_BORROWED; + c->pend.payload.release= NULL; + c->pend.payload.owner = NULL; + if (c->emit_down) c->emit_down(c->token, &c->pend); + c->phase = CONN_RX_FRAME; c->got = 0; c->need = 0; + + /* Do NOT reuse c->blob until pump_down has copied it. conn-fd is LEVEL-triggered + * (EPOLLIN without EPOLLET), so any remaining bytes re-fire EPOLLIN on the next + * pass — break out and let pump_down run first. */ + break; } return 0; } @@ -244,16 +321,21 @@ static void on_accept(void* user, uint32_t events) { } } -/* janitor: detach connections with a stuck partial frame (slowloris) */ +/* janitor: detach connections with a stuck partial frame OR a stuck partial SRC tail + * (slowloris). The blob is a conn field, so teardown (free(c) in conn_close) needs no + * extra cleanup; a partial tail never emitted an event, so no dangling payload either. */ static void on_janitor(void* user, uint32_t events) { (void)events; sock_listener* L = user; uint64_t v; while (read(L->janitor_fd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ } uint64_t now = now_ns(); - for (sock_conn* c = L->conns; c; c = c->lnext) - if (c->buflen > 0 && now - c->last_ns > VMSIG_SOCK_IDLE_NS) + for (sock_conn* c = L->conns; c; c = c->lnext) { + int stuck_frame = (c->buflen > 0); /* partial vmsig_wire */ + int stuck_tail = (c->phase == CONN_RX_TAIL && c->got < c->need);/* partial SRC tail */ + if ((stuck_frame || stuck_tail) && now - c->last_ns > VMSIG_SOCK_IDLE_NS) core_request_drop(c->core, c->id); + } } /* listener cleanup on core_free (owner = the core, via on_free of the first source) */ diff --git a/src/test/test_memwrite.c b/src/test/test_memwrite.c index 420a45f..04a787b 100644 --- a/src/test/test_memwrite.c +++ b/src/test/test_memwrite.c @@ -90,13 +90,14 @@ static int acquire_mw(void* ctl) { return vmsig_inproc_send(ctl, &d); } -/* CMD_MEMWRITE with inline SRC; corr for tracking. flags: VMSIG_MW_SRC_* (0 => no SRC). */ -static int send_write(void* ctl, uint64_t gva, uint32_t len, uint32_t flags, +/* CMD_MEMWRITE with inline SRC; corr for tracking. flags: VMSIG_MW_SRC_* (0 => no SRC). + * cr3 selects the target AS: 0 => kernel default (System DTB), non-zero => a process AS. */ +static int send_write(void* ctl, uint64_t cr3, uint64_t gva, uint32_t len, uint32_t flags, const void* src, uint32_t corr) { vmsig_event d; memset(&d, 0, sizeof d); d.kind = VMSIG_EV_CMD_MEMWRITE; d.source = VMSIG_SRC_MEMCTX; d.dir = VMSIG_DIR_DOWN; d.endpoint = 0; d.prio = VMSIG_PRIO_HIGH; d.corr = corr; - vmsig_memwrite mw = { gva, len, flags }; + vmsig_memwrite mw = { cr3, gva, len, flags }; memcpy(d.inln, &mw, sizeof mw); if ((flags & VMSIG_MW_SRC_INLINE) && src && len <= VMSIG_MEMWRITE_INLINE) memcpy(d.inln + sizeof mw, src, len); @@ -123,21 +124,21 @@ static void test_path_and_deny(void) { /* 3) lease gate: without ACQUIRE the write is dropped at the gate (-1, no actuation). */ uint8_t pat[8] = { 0xDE, 0xAD, 0xBE, 0xEF, 1, 2, 3, 4 }; - CHECK(send_write(A, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 99) == -1, + CHECK(send_write(A, 0, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 99) == -1, "3: CMD_MEMWRITE without a lease is dropped by the gate"); /* acquire the MEMWRITE lease (synchronous intercept; UP reply paced by ctx). */ CHECK(acquire_mw(A) == 0, "acquire submitted"); /* 1) happy path: inline write -> queued -> ACT_ACK{ok=1}. Also drains the GRANTED reply. */ - CHECK(send_write(A, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 11) == 0, + CHECK(send_write(A, 0, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 11) == 0, "1: owner's CMD_MEMWRITE passes the gate"); /* 2) extent: len > MAX -> ACK{ok=0}, NOT actuated (queued ack on the loop thread). */ - CHECK(send_write(A, 0x2000, VMSIG_MEMWRITE_MAX + 1, VMSIG_MW_SRC_INLINE, pat, 22) == 0, + CHECK(send_write(A, 0, 0x2000, VMSIG_MEMWRITE_MAX + 1, VMSIG_MW_SRC_INLINE, pat, 22) == 0, "2: over-extent write is accepted by the gate (denied inside the adapter)"); /* 2b) missing SRC flag -> ACK{ok=0}. */ - CHECK(send_write(A, 0x3000, 4, 0u, NULL, 33) == 0, + CHECK(send_write(A, 0, 0x3000, 4, 0u, NULL, 33) == 0, "2b: no-SRC-flag write is accepted by the gate (denied inside the adapter)"); /* expect 3 ACKs (corr 11/22/33) + the GRANTED reply. */ @@ -160,6 +161,45 @@ static void test_path_and_deny(void) { vmsig_ctx_free(ctx); } +/* ---- 1b: cr3 default + explicit — the cr3 field threads the whole seam ------- + * Stub does not actuate, so this asserts CONTRACT/ROUTE only: both a kernel-default + * (cr3==0) and an arbitrary process cr3 (cr3!=0) pass cap -> grant -> lease-gate -> + * route -> adapter -> ACT_ACK{ok=1}. A non-zero cr3 does NOT bypass any gate (it is + * read by the adapter only after the core admitted the command). */ +static void test_cr3_default_and_explicit(void) { + printf("test_cr3_default_and_explicit\n"); + vmsig_ctx* ctx = vmsig_ctx_new(); + vmsig_core* core = vmsig_core_new(ctx); + cstate s; memset(&s, 0, sizeof s); s.core = core; + + void* A = add_ctl(core, &s, VMSIG_CAP_MEMWRITE, 10); + CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx"); + + CHECK(acquire_mw(A) == 0, "acquire submitted"); + + uint8_t pat[8] = { 0xAA, 0xBB, 0xCC, 0xDD, 5, 6, 7, 8 }; + /* (a) kernel default: cr3 == 0 -> resolves to the adapter's System DTB on the worker. */ + CHECK(send_write(A, 0, 0x4000, 8, VMSIG_MW_SRC_INLINE, pat, 71) == 0, + "1b-a: cr3==0 (kernel default) passes the gate"); + /* (b) explicit process AS: an arbitrary non-zero cr3 is carried through unchanged. */ + CHECK(send_write(A, 0xDEADBEEF000ull, 0x5000, 8, VMSIG_MW_SRC_INLINE, pat, 72) == 0, + "1b-b: cr3!=0 (process AS) passes the gate"); + + run_until_acks(&s, 2); + + CHECK(s.granted == 1, "1b: lease GRANTED once"); + int saw71_ok = -1, saw72_ok = -1; + for (int i = 0; i < s.nack; i++) { + if (s.ack_corr[i] == 71) saw71_ok = s.ack_ok[i]; + if (s.ack_corr[i] == 72) saw72_ok = s.ack_ok[i]; + } + CHECK(saw71_ok == 1, "1b-a: cr3==0 write ACKs ok=1 (kernel default, stub)"); + CHECK(saw72_ok == 1, "1b-b: cr3!=0 write ACKs ok=1 (route proven, stub does not actuate)"); + + vmsig_core_free(core); + vmsig_ctx_free(ctx); +} + /* ---- 4: cap gate — no CAP_MEMWRITE cannot acquire the lease ----------------- */ static void test_cap_gate(void) { printf("test_cap_gate\n"); @@ -195,11 +235,11 @@ static void test_inflight_fence(void) { uint8_t pat[4] = { 1, 2, 3, 4 }; CHECK(acquire_mw(A) == 0, "A acquires"); /* A queues a write (corr=55): passes the gate (A owns), lands in the DOWN queue. */ - CHECK(send_write(A, 0x1000, 4, VMSIG_MW_SRC_INLINE, pat, 55) == 0, "A queues write 55"); + CHECK(send_write(A, 0, 0x1000, 4, VMSIG_MW_SRC_INLINE, pat, 55) == 0, "A queues write 55"); /* B preempts SYNCHRONOUSLY (acquire does not go through ctx). */ CHECK(acquire_mw(B) == 0, "B preempts"); /* B's own write (corr=66) — should actuate. */ - CHECK(send_write(B, 0x2000, 4, VMSIG_MW_SRC_INLINE, pat, 66) == 0, "B queues write 66"); + CHECK(send_write(B, 0, 0x2000, 4, VMSIG_MW_SRC_INLINE, pat, 66) == 0, "B queues write 66"); run_until_acks(&s, 1); /* B's 66 acks; A's 55 must be fenced (no ack) */ @@ -218,7 +258,11 @@ static void test_inflight_fence(void) { int main(void) { printf("test_memwrite\n"); + /* POD layout: cr3+gva (2*u64) + len+flags (2*u32) = 24, +24 inline tail = inln[48]. */ + CHECK(sizeof(vmsig_memwrite) == 24, "vmsig_memwrite header is 24 bytes"); + CHECK(VMSIG_MEMWRITE_INLINE == 24u, "VMSIG_MEMWRITE_INLINE is 24"); test_path_and_deny(); + test_cr3_default_and_explicit(); test_cap_gate(); test_inflight_fence(); cref_free_all(); diff --git a/src/test/test_sock.c b/src/test/test_sock.c index d2bc412..ba7b2a1 100644 --- a/src/test/test_sock.c +++ b/src/test/test_sock.c @@ -5,6 +5,7 @@ #define _GNU_SOURCE #include "vmsig.h" #include "vmsig_socket.h" +#include "memctx.h" /* VMSIG_MEMWRITE_MAX: the adapter's extent bound (private) */ #include #include #include @@ -48,6 +49,12 @@ static vmsig_grant pol_deny(uint32_t uid, uint32_t pid, void* ud) { return g; } +static uint64_t now_ns(void) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec; +} + static int connect_abstract(const char* name) { int fd = socket(AF_UNIX, SOCK_STREAM, 0); if (fd < 0) return -1; @@ -88,8 +95,120 @@ static void test_wire(void) { CHECK(vmsig_wire_decode(&bad, &x) == -1, "bad magic rejected"); } +/* ===== variant B: socket CMD_MEMWRITE with a length-prefixed SRC tail (>INLINE) ===== + * Stub memctx adapter (no VM): proves the socket transport carries a frame + SRC tail + * (with partial recv), routes through cap -> grant -> lease-gate -> adapter, and gets + * ACT_ACK ok=1. Also: an over-cap len is a framing violation that closes the connection. */ + +/* Policy granting a MEMWRITE-capable poller (cap MEMWRITE|MEMCTX|OBSERVE). */ +static vmsig_grant pol_mw(uint32_t uid, uint32_t pid, void* ud) { + (void)pid; (void)ud; + vmsig_grant g; memset(&g, 0, sizeof g); + g.principal = uid; g.endpoint_mask = 1ull << 0; + g.source_mask = 0xFFFFFFFFu; + g.cap_mask = VMSIG_CAP_MEMWRITE | VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE; + g.arb_prio = 10; + return g; +} + +/* Encode + write a single fixed frame. */ +static int send_frame(int fd, const vmsig_event* ev) { + vmsig_wire w; vmsig_wire_encode(&w, ev); + return (write(fd, &w, sizeof w) == (ssize_t)sizeof w) ? 0 : -1; +} + +/* Read fixed frames until an ACT_ACK with the wanted corr; return its ok flag (-1 on + * timeout/EOF). The ACK inln layout from mc_memwrite_ack: {int ok; uint32 corr; uint32 origin}. */ +static int wait_ack(int fd, uint32_t want_corr, int ms) { + struct timeval tv = { .tv_sec = 0, .tv_usec = 200 * 1000 }; + setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv); + uint64_t deadline = now_ns() + (uint64_t)ms * 1000000ull; + vmsig_wire w; uint8_t* p = (uint8_t*)&w; size_t off = 0; + while (now_ns() < deadline) { + ssize_t n = read(fd, p + off, sizeof w - off); + if (n <= 0) continue; /* timeout/EOF retry within deadline */ + off += (size_t)n; + if (off < sizeof w) continue; + off = 0; + vmsig_event ev; + if (vmsig_wire_decode(&w, &ev) != 0) continue; + if (ev.kind == VMSIG_EV_ACT_ACK && ev.corr == want_corr) { + int ok; memcpy(&ok, ev.inln, sizeof ok); + return ok; + } + } + return -1; +} + +static void test_memwrite_tail(void) { + printf("test_memwrite_tail\n"); + vmsig_ctx* ctx = vmsig_ctx_new(); + vmsig_core* core = vmsig_core_new(ctx); + CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx stub"); + const char* MW = "@vmsig-sock-mw-test"; + CHECK(vmsig_socket_attach(core, MW, pol_mw, NULL) == 0, "attach mw listener"); + + pthread_t th; + pthread_create(&th, NULL, loop_main, core); + + int fd = connect_abstract(MW); + CHECK(fd >= 0, "client connected (mw)"); + if (fd >= 0) { + /* acquire the MEMWRITE lease */ + vmsig_event d; memset(&d, 0, sizeof d); + d.kind = VMSIG_EV_CMD_ACQUIRE; d.source = VMSIG_SRC_MEMCTX; d.dir = VMSIG_DIR_DOWN; + d.endpoint = 0; d.prio = VMSIG_PRIO_HIGH; + vmsig_lease_req lr = { VMSIG_LEASE_MEMWRITE, 0 }; + memcpy(d.inln, &lr, sizeof lr); + CHECK(send_frame(fd, &d) == 0, "send ACQUIRE"); + + /* happy path: CMD_MEMWRITE(PAYLOAD, len=64) + 64-byte tail, written in two halves + * to exercise the TAIL-phase partial accumulation. */ + const uint32_t len = 64u; + uint8_t src[64]; + for (uint32_t i = 0; i < len; i++) src[i] = (uint8_t)(i + 1); + vmsig_event mwe; memset(&mwe, 0, sizeof mwe); + mwe.kind = VMSIG_EV_CMD_MEMWRITE; mwe.source = VMSIG_SRC_MEMCTX; mwe.dir = VMSIG_DIR_DOWN; + mwe.endpoint = 0; mwe.prio = VMSIG_PRIO_HIGH; mwe.corr = 0x101; + vmsig_memwrite mw = { 0, 0x1000, len, VMSIG_MW_SRC_PAYLOAD }; + memcpy(mwe.inln, &mw, sizeof mw); + CHECK(send_frame(fd, &mwe) == 0, "send CMD_MEMWRITE frame (PAYLOAD)"); + CHECK(write(fd, src, 32) == 32, "send SRC tail part 1"); + struct timespec ts = { .tv_sec = 0, .tv_nsec = 5 * 1000000 }; + nanosleep(&ts, NULL); /* let the loop accumulate a partial tail */ + CHECK(write(fd, src + 32, 32) == 32, "send SRC tail part 2"); + CHECK(wait_ack(fd, 0x101, 1000) == 1, "B: payload-tail write ACKs ok=1 (stub)"); + + /* negative: an over-cap PAYLOAD len is a framing-contract violation. The server closes + * the connection — it cannot safely skip the promised tail, and draining an arbitrary + * length would be a DoS. Verify no ACK arrives and the socket reaches EOF (conn shut). */ + memset(&mwe.inln, 0, sizeof mwe.inln); + mwe.corr = 0x102; + vmsig_memwrite mw2 = { 0, 0x2000, VMSIG_MEMWRITE_MAX + 1u, VMSIG_MW_SRC_PAYLOAD }; + memcpy(mwe.inln, &mw2, sizeof mw2); + CHECK(send_frame(fd, &mwe) == 0, "send CMD_MEMWRITE frame (over-cap)"); + /* No ACK arrives; the server shuts the conn, so the socket drains to EOF. A 1s recv + * timeout bounds the wait if the server wrongly kept the connection open. */ + struct timeval rtv = { .tv_sec = 1, .tv_usec = 0 }; + setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &rtv, sizeof rtv); + uint8_t junk[80]; ssize_t rr; + while ((rr = read(fd, junk, sizeof junk)) > 0) { /* drain any in-flight, then EOF */ } + CHECK(rr == 0, "B: over-cap closed the connection (EOF)"); + + close(fd); + } + + struct timespec t = { .tv_sec = 0, .tv_nsec = 50 * 1000000 }; + nanosleep(&t, NULL); + vmsig_core_stop(core); + pthread_join(th, NULL); + vmsig_core_free(core); + vmsig_ctx_free(ctx); +} + int main(void) { test_wire(); + test_memwrite_tail(); printf("test_socket\n"); vmsig_ctx* ctx = vmsig_ctx_new();