memwrite: per-process (cr3) target and full-extent socket SRC

- CMD_MEMWRITE now carries a target page-table root (cr3) as its first field;
  cr3 == 0 keeps the kernel address-space default (backward-compatible). A control
  that has discovered a process's cr3 through its own read-only perception can
  write that process's private memory under the same exclusive write lease.
  Freshness of the cr3 is the control's responsibility — signaling does not
  validate it (that is perception, not coherence), mirroring the read side.

- A socket control can now carry an SRC larger than the inline frame budget: a
  length-prefixed SRC tail follows the CMD_MEMWRITE frame (flag SRC_PAYLOAD, the
  length being the frame's own len). A per-connection two-phase receiver
  accumulates the tail into a fixed conn-owned buffer up to the extent bound,
  matching the in-process payload path. A zero or over-bound length is a framing
  violation that closes the connection: leaving the promised tail unread would
  desync the stream and draining an arbitrary length would be a denial of service.

The capability, exclusive lease, source and extent gates are unchanged and
reused; only the event header gained the cr3 field and the socket transport
gained the tail receiver. The adapter resolves cr3 == 0 to the kernel root on
its worker thread and writes atomically.
This commit is contained in:
2026-06-20 21:21:20 +03:00
parent e9aee057c7
commit e6c7aed8eb
7 changed files with 309 additions and 39 deletions
+2
View File
@@ -87,6 +87,8 @@ add_test(NAME sec COMMAND vmsig_sectest)
add_executable(vmsig_socktest src/test/test_sock.c) add_executable(vmsig_socktest src/test/test_sock.c)
target_link_libraries(vmsig_socktest PRIVATE vmsig Threads::Threads) target_link_libraries(vmsig_socktest PRIVATE vmsig Threads::Threads)
target_include_directories(vmsig_socktest PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/memctx/include)
target_compile_options(vmsig_socktest PRIVATE -Wall -Wextra) target_compile_options(vmsig_socktest PRIVATE -Wall -Wextra)
add_test(NAME sock COMMAND vmsig_socktest) add_test(NAME sock COMMAND vmsig_socktest)
+18 -6
View File
@@ -168,20 +168,32 @@ typedef struct {
} vmsig_input; /* fits in vmsig_event.inln[48] */ } vmsig_input; /* fits in vmsig_event.inln[48] */
/* ===== Memory write (DOWN VMSIG_EV_CMD_MEMWRITE) — NEUTRAL, write-signaled ===== /* ===== Memory write (DOWN VMSIG_EV_CMD_MEMWRITE) — NEUTRAL, write-signaled =====
* control describes an ATOMIC write into guest memory abstractly (guest VA + length), * control describes an ATOMIC write into guest memory abstractly: a TARGET address space
* WITHOUT knowing vmie/cr3: the memctx adapter resolves it under the held kcr3 and does * (cr3) + a guest VA + a length, WITHOUT knowing vmie. The memctx adapter does ONE
* ONE gva_write. Requires CAP_MEMWRITE + an exclusive MEMWRITE lease + an extent check. * gva_write under the requested cr3. Requires CAP_MEMWRITE + an exclusive MEMWRITE lease
* + an extent check.
*
* cr3: the target page-table root (DirectoryTableBase). cr3 == 0 selects the adapter's
* permanent System DTB (kernel AS) — the backward-compatible default. A non-zero cr3
* addresses a PROCESS private AS that the control discovered out-of-band (RO proc_list).
* FRESHNESS of a process cr3 is the CONTROL's responsibility: a process may exit and its
* cr3 (page-table root PFN) be reused, so a stale cr3 would write into an UNRELATED AS.
* signaling does NOT validate cr3 (that is perception, not coherence) — symmetric to the
* read side, where the control already owns cr3 freshness for gva_read. The kernel default
* (cr3 == 0) has no such hazard: the System DTB is permanent for the VM session (epoch).
*
* SRC bytes: inline (<= VMSIG_MEMWRITE_INLINE) ride in the inln tail right after this header * SRC bytes: inline (<= VMSIG_MEMWRITE_INLINE) ride in the inln tail right after this header
* (flags & INLINE); larger in-proc writes ride in the borrowed payload (flags & PAYLOAD). */ * (flags & INLINE); larger in-proc writes ride in the borrowed payload (flags & PAYLOAD). */
#define VMSIG_MEMWRITE_INLINE 32u /* inln tail capacity for SRC (48 - 16 header) */ #define VMSIG_MEMWRITE_INLINE 24u /* inln tail capacity for SRC (48 - 24 header) */
#define VMSIG_MW_SRC_INLINE 0x1u /* SRC bytes are in inln tail (len<=INLINE) */ #define VMSIG_MW_SRC_INLINE 0x1u /* SRC bytes are in inln tail (len<=INLINE) */
#define VMSIG_MW_SRC_PAYLOAD 0x2u /* SRC bytes are in ev->payload.data (in-proc) */ #define VMSIG_MW_SRC_PAYLOAD 0x2u /* SRC bytes are in ev->payload.data (in-proc) */
typedef struct { typedef struct {
uint64_t gva; /* guest virtual address to write (resolved under the adapter's kcr3) */ uint64_t cr3; /* target page-table root; 0 => the adapter's System DTB (kernel AS) */
uint64_t gva; /* guest virtual address to write (translated under cr3) */
uint32_t len; /* number of bytes to write (1..VMSIG_MEMWRITE_MAX) */ uint32_t len; /* number of bytes to write (1..VMSIG_MEMWRITE_MAX) */
uint32_t flags; /* VMSIG_MW_SRC_INLINE | VMSIG_MW_SRC_PAYLOAD */ uint32_t flags; /* VMSIG_MW_SRC_INLINE | VMSIG_MW_SRC_PAYLOAD */
/* inline SRC tail (when VMSIG_MW_SRC_INLINE): up to VMSIG_MEMWRITE_INLINE bytes follow */ /* inline SRC tail (when VMSIG_MW_SRC_INLINE): up to VMSIG_MEMWRITE_INLINE bytes follow */
} vmsig_memwrite; /* header = 8+4+4 = 16 bytes; +32 tail = 48 (exactly inln[48]) */ } vmsig_memwrite; /* header = 8+8+4+4 = 24 bytes; +24 tail = 48 (exactly inln[48]) */
/* ===== Cursor (UP VMSIG_EV_CURSOR_STATE, in inln) — NEUTRAL ===== /* ===== Cursor (UP VMSIG_EV_CURSOR_STATE, in inln) — NEUTRAL =====
* Cursor position from the SCREEN sensor (vgpu). NEUTRAL payload format only: emitted by the * Cursor position from the SCREEN sensor (vgpu). NEUTRAL payload format only: emitted by the
+8 -1
View File
@@ -10,7 +10,14 @@
/* Wire format: fixed-size, pointer-free — the same contract on the external /* Wire format: fixed-size, pointer-free — the same contract on the external
* poller. Single host (unix socket) => native byte order. Only the event's * poller. Single host (unix socket) => native byte order. Only the event's
* inline part is serialized (payload pointers do not go on the wire). */ * inline part is serialized (payload pointers do not go on the wire).
*
* Exception (DOWN only): a CMD_MEMWRITE frame with VMSIG_MW_SRC_PAYLOAD is followed
* on the stream by exactly vmsig_memwrite.len SRC bytes (length-prefixed by the
* contract's mw.len, no separate wire prefix). A client writes the 80-byte frame,
* then the len SRC bytes (1..VMSIG_MEMWRITE_MAX). For len <= VMSIG_MEMWRITE_INLINE the
* client uses VMSIG_MW_SRC_INLINE instead (SRC rides in the inln tail, no trailing
* bytes). All other DOWN frames and all UP deliveries are a single fixed frame. */
#define VMSIG_WIRE_MAGIC 0x47495356u /* 'VSIG' */ #define VMSIG_WIRE_MAGIC 0x47495356u /* 'VSIG' */
#define VMSIG_WIRE_VERSION 1u #define VMSIG_WIRE_VERSION 1u
typedef struct { typedef struct {
+9 -5
View File
@@ -59,11 +59,13 @@ enum { MC_JOB_BOOTSTRAP = 0, MC_JOB_WRITE = 1 };
/* worker req/res (POD <= VMSIG_WORK_SLOT). One off-loop worker runs BOTH the cold /* worker req/res (POD <= VMSIG_WORK_SLOT). One off-loop worker runs BOTH the cold
* bootstrap and the atomic writes (FIFO serializes a write against the close-on-rebootstrap). * bootstrap and the atomic writes (FIFO serializes a write against the close-on-rebootstrap).
* boot_count drives the stub kcr3 (changes per epoch); the real guest kcr3 does NOT depend * boot_count drives the stub kcr3 (changes per epoch); the real guest kcr3 does NOT depend
* on it (armed reads the System DTB). MC_JOB_WRITE copies SRC off-loop into req.src. */ * on it (armed reads the System DTB). MC_JOB_WRITE copies SRC off-loop into req.src plus the
* target cr3 (0 => System DTB; resolved on the worker against a->kcr3). */
typedef struct { typedef struct {
uint32_t op; /* MC_JOB_* */ uint32_t op; /* MC_JOB_* */
uint32_t boot_count; /* MC_JOB_BOOTSTRAP */ uint32_t boot_count; /* MC_JOB_BOOTSTRAP */
/* --- MC_JOB_WRITE --- */ /* --- MC_JOB_WRITE --- */
uint64_t cr3; /* target AS root; 0 => a->kcr3 (kernel AS), resolved on worker */
uint64_t gva; uint64_t gva;
uint32_t len; uint32_t len;
uint32_t corr; uint32_t corr;
@@ -168,8 +170,10 @@ static int mc_job(void* user, const void* req, void* res) {
if (a->stub) { rs->ok = 1; return 0; } /* stub: ack without actuation */ if (a->stub) { rs->ok = 1; return 0; } /* stub: ack without actuation */
#ifdef VMSIG_WITH_VMIE #ifdef VMSIG_WITH_VMIE
/* a->mem is NULL until a bootstrap has succeeded (or after one failed and cleared it): /* a->mem is NULL until a bootstrap has succeeded (or after one failed and cleared it):
* the guard turns that into an ok=0 ACK (observable to the initiator), not a crash. */ * the guard turns that into an ok=0 ACK (observable to the initiator), not a crash.
rs->ok = (a->mem && gva_write(a->mem, (uintptr_t)a->kcr3, (uintptr_t)rq->gva, * cr3 resolve is on the worker (sole owner of a->kcr3): 0 => kernel AS (System DTB). */
uint64_t target = rq->cr3 ? rq->cr3 : a->kcr3;
rs->ok = (a->mem && gva_write(a->mem, (uintptr_t)target, (uintptr_t)rq->gva,
rq->src, rq->len) == 0); rq->src, rq->len) == 0);
return rs->ok ? 0 : -1; return rs->ok ? 0 : -1;
#else #else
@@ -363,13 +367,13 @@ static int mc_submit(vmsig_adapter* a, const vmsig_event* ev) {
return 0; return 0;
} }
mc_req rq; memset(&rq, 0, sizeof rq); mc_req rq; memset(&rq, 0, sizeof rq);
rq.op = MC_JOB_WRITE; rq.gva = mw->gva; rq.len = len; rq.op = MC_JOB_WRITE; rq.cr3 = mw->cr3; rq.gva = mw->gva; rq.len = len;
rq.corr = ev->corr; rq.origin = ev->origin; rq.corr = ev->corr; rq.origin = ev->origin;
/* copy SRC into the worker req (off-loop gva_write reads from rq.src). */ /* copy SRC into the worker req (off-loop gva_write reads from rq.src). */
if (mw->flags & VMSIG_MW_SRC_INLINE) { if (mw->flags & VMSIG_MW_SRC_INLINE) {
if (len > VMSIG_MEMWRITE_INLINE) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; } if (len > VMSIG_MEMWRITE_INLINE) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; }
memcpy(rq.src, ev->inln + sizeof *mw, len); /* inln tail after the 16-byte header */ memcpy(rq.src, ev->inln + sizeof *mw, len); /* inln tail after the 24-byte header */
} else if (mw->flags & VMSIG_MW_SRC_PAYLOAD) { } else if (mw->flags & VMSIG_MW_SRC_PAYLOAD) {
if (!ev->payload.data || ev->payload.len < len) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; } if (!ev->payload.data || ev->payload.len < len) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; }
memcpy(rq.src, ev->payload.data, len); /* in-proc borrowed payload */ memcpy(rq.src, ev->payload.data, len); /* in-proc borrowed payload */
+93 -11
View File
@@ -7,12 +7,20 @@
* dispatched through emit_down (enforced by the grant), UP events are serialized into * dispatched through emit_down (enforced by the grant), UP events are serialized into
* a frame. On EOF — deferred reap. * a frame. On EOF — deferred reap.
* *
* DOWN framing: every kind is a single fixed vmsig_wire frame, EXCEPT a CMD_MEMWRITE
* carrying VMSIG_MW_SRC_PAYLOAD — then mw.len SRC bytes follow the frame (length-prefixed
* by the contract's mw.len, no separate wire prefix). The per-conn receiver is a 2-phase
* state machine (FRAME -> TAIL): it accumulates the frame, and for a PAYLOAD MEMWRITE it
* accumulates the SRC tail into a fixed conn-owned blob, then emits a BORROWED-payload
* event. The blob lives in the conn so it outlives the DOWN queue until pump_down copies it.
*
* DoS protection: per-uid limit of concurrent connections (against eviction of * DoS protection: per-uid limit of concurrent connections (against eviction of
* legitimate ones); a janitor timerfd detaches "stuck" partial frames (slowloris). * legitimate ones); a janitor timerfd detaches "stuck" partial frames / SRC tails
* The global ceiling and slot reuse live in the core. */ * (slowloris). The global ceiling and slot reuse live in the core. */
#define _GNU_SOURCE #define _GNU_SOURCE
#include "vmsig_socket.h" #include "vmsig_socket.h"
#include "core_internal.h" /* core_add_source, core_request_drop, add_control */ #include "core_internal.h" /* core_add_source, core_request_drop, add_control */
#include "memctx.h" /* VMSIG_MEMWRITE_MAX: SRC-tail bound (one source of truth) */
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/uio.h> #include <sys/uio.h>
#include <sys/un.h> #include <sys/un.h>
@@ -57,6 +65,10 @@ int vmsig_wire_decode(const vmsig_wire* w, vmsig_event* ev) {
} }
/* ===== per-conn control ===== */ /* ===== per-conn control ===== */
/* DOWN receive phases: read the fixed frame, then (only for a PAYLOAD MEMWRITE) the
* length-prefixed SRC tail. State persists in the conn across EPOLLIN (partial recv). */
typedef enum { CONN_RX_FRAME = 0, CONN_RX_TAIL = 1 } conn_rx_phase;
typedef struct sock_conn { typedef struct sock_conn {
int fd; int fd;
vmsig_core* core; vmsig_core* core;
@@ -69,6 +81,11 @@ typedef struct sock_conn {
void* token; void* token;
uint8_t buf[sizeof(vmsig_wire)]; uint8_t buf[sizeof(vmsig_wire)];
size_t buflen; size_t buflen;
conn_rx_phase phase; /* FRAME: read vmsig_wire; TAIL: read SRC tail */
vmsig_event pend; /* decoded frame awaiting its SRC tail */
uint32_t need; /* expected tail length (= mw.len) */
uint32_t got; /* tail bytes already accumulated */
uint8_t blob[VMSIG_MEMWRITE_MAX]; /* SRC tail (BORROWED payload; lives in conn) */
} sock_conn; } sock_conn;
static int conn_fd(void* ctl) { return ((sock_conn*)ctl)->fd; } static int conn_fd(void* ctl) { return ((sock_conn*)ctl)->fd; }
@@ -90,9 +107,21 @@ static void conn_set_emit_down(void* ctl, int (*emit)(void* token, vmsig_event*)
sock_conn* c = ctl; c->emit_down = emit; c->token = token; sock_conn* c = ctl; c->emit_down = emit; c->token = token;
} }
/* Does this decoded frame pull a length-prefixed SRC tail? Only a CMD_MEMWRITE that
* advertises VMSIG_MW_SRC_PAYLOAD. The tail length is mw.len from the contract (already on
* the wire in inln) — no separate wire prefix. INLINE / other kinds carry no tail. */
static int frame_pulls_tail(const vmsig_event* ev, uint32_t* need) {
if (ev->kind != VMSIG_EV_CMD_MEMWRITE) return 0;
const vmsig_memwrite* mw = (const vmsig_memwrite*)ev->inln;
if (!(mw->flags & VMSIG_MW_SRC_PAYLOAD)) return 0;
*need = mw->len;
return 1;
}
static int conn_on_readable(void* ctl) { static int conn_on_readable(void* ctl) {
sock_conn* c = ctl; sock_conn* c = ctl;
for (;;) { for (;;) {
if (c->phase == CONN_RX_FRAME) {
ssize_t n = read(c->fd, c->buf + c->buflen, sizeof c->buf - c->buflen); ssize_t n = read(c->fd, c->buf + c->buflen, sizeof c->buf - c->buflen);
if (n == 0) { core_request_drop(c->core, c->id); return 0; } /* EOF */ if (n == 0) { core_request_drop(c->core, c->id); return 0; } /* EOF */
if (n < 0) { if (n < 0) {
@@ -102,14 +131,62 @@ static int conn_on_readable(void* ctl) {
} }
c->last_ns = now_ns(); c->last_ns = now_ns();
c->buflen += (size_t)n; c->buflen += (size_t)n;
if (c->buflen == sizeof c->buf) { if (c->buflen != sizeof c->buf) continue; /* frame not whole yet */
vmsig_event ev;
if (vmsig_wire_decode((const vmsig_wire*)c->buf, &ev) == 0) {
ev.dir = VMSIG_DIR_DOWN; /* from a poller — DOWN only */
if (c->emit_down) c->emit_down(c->token, &ev); /* enforced by the grant */
}
c->buflen = 0; c->buflen = 0;
vmsig_event ev;
if (vmsig_wire_decode((const vmsig_wire*)c->buf, &ev) != 0)
continue; /* bad magic/ver — drop frame */
ev.dir = VMSIG_DIR_DOWN; /* from a poller — DOWN only */
uint32_t need = 0;
if (!frame_pulls_tail(&ev, &need)) { /* variant A / other kinds */
if (c->emit_down) c->emit_down(c->token, &ev); /* enforced by the grant */
continue;
} }
/* Cap BEFORE reading the tail. A PAYLOAD frame promises EXACTLY mw.len tail bytes
* with 1 <= mw.len <= MAX. A zero or over-cap length is a framing-contract
* violation: the promised tail cannot be safely consumed (draining an
* attacker-chosen length is a DoS) and leaving it unread would desync the stream
* (the SRC bytes would be misread as the next frame). Close the connection — a
* conformant poller never requests a tail outside [1, MAX]. */
if (need == 0 || need > VMSIG_MEMWRITE_MAX) {
core_request_drop(c->core, c->id);
return 0;
}
c->pend = ev; c->need = need; c->got = 0;
c->phase = CONN_RX_TAIL; /* fall through to read the tail */
continue;
}
/* CONN_RX_TAIL: accumulate exactly c->need SRC bytes into the conn-owned blob. */
ssize_t n = read(c->fd, c->blob + c->got, c->need - c->got);
if (n == 0) { core_request_drop(c->core, c->id); return 0; } /* EOF */
if (n < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) break;
core_request_drop(c->core, c->id);
return 0;
}
c->last_ns = now_ns();
c->got += (uint32_t)n;
if (c->got != c->need) continue; /* tail not whole yet */
/* Tail complete: borrow it through the payload. The blob lives in the conn and thus
* outlives the DOWN queue until pump_down copies it (mc_submit). release=NULL — the
* body belongs to the conn; the adapter copies synchronously on the loop thread. */
c->pend.payload.data = c->blob;
c->pend.payload.len = c->need;
c->pend.payload.codec = VMSIG_CODEC_MEMCTX;
c->pend.payload.flags = VMSIG_PL_BORROWED;
c->pend.payload.release= NULL;
c->pend.payload.owner = NULL;
if (c->emit_down) c->emit_down(c->token, &c->pend);
c->phase = CONN_RX_FRAME; c->got = 0; c->need = 0;
/* Do NOT reuse c->blob until pump_down has copied it. conn-fd is LEVEL-triggered
* (EPOLLIN without EPOLLET), so any remaining bytes re-fire EPOLLIN on the next
* pass — break out and let pump_down run first. */
break;
} }
return 0; return 0;
} }
@@ -244,17 +321,22 @@ static void on_accept(void* user, uint32_t events) {
} }
} }
/* janitor: detach connections with a stuck partial frame (slowloris) */ /* janitor: detach connections with a stuck partial frame OR a stuck partial SRC tail
* (slowloris). The blob is a conn field, so teardown (free(c) in conn_close) needs no
* extra cleanup; a partial tail never emitted an event, so no dangling payload either. */
static void on_janitor(void* user, uint32_t events) { static void on_janitor(void* user, uint32_t events) {
(void)events; (void)events;
sock_listener* L = user; sock_listener* L = user;
uint64_t v; uint64_t v;
while (read(L->janitor_fd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ } while (read(L->janitor_fd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
uint64_t now = now_ns(); uint64_t now = now_ns();
for (sock_conn* c = L->conns; c; c = c->lnext) for (sock_conn* c = L->conns; c; c = c->lnext) {
if (c->buflen > 0 && now - c->last_ns > VMSIG_SOCK_IDLE_NS) int stuck_frame = (c->buflen > 0); /* partial vmsig_wire */
int stuck_tail = (c->phase == CONN_RX_TAIL && c->got < c->need);/* partial SRC tail */
if ((stuck_frame || stuck_tail) && now - c->last_ns > VMSIG_SOCK_IDLE_NS)
core_request_drop(c->core, c->id); core_request_drop(c->core, c->id);
} }
}
/* listener cleanup on core_free (owner = the core, via on_free of the first source) */ /* listener cleanup on core_free (owner = the core, via on_free of the first source) */
static void listener_free(void* user) { static void listener_free(void* user) {
+53 -9
View File
@@ -90,13 +90,14 @@ static int acquire_mw(void* ctl) {
return vmsig_inproc_send(ctl, &d); return vmsig_inproc_send(ctl, &d);
} }
/* CMD_MEMWRITE with inline SRC; corr for tracking. flags: VMSIG_MW_SRC_* (0 => no SRC). */ /* CMD_MEMWRITE with inline SRC; corr for tracking. flags: VMSIG_MW_SRC_* (0 => no SRC).
static int send_write(void* ctl, uint64_t gva, uint32_t len, uint32_t flags, * cr3 selects the target AS: 0 => kernel default (System DTB), non-zero => a process AS. */
static int send_write(void* ctl, uint64_t cr3, uint64_t gva, uint32_t len, uint32_t flags,
const void* src, uint32_t corr) { const void* src, uint32_t corr) {
vmsig_event d; memset(&d, 0, sizeof d); vmsig_event d; memset(&d, 0, sizeof d);
d.kind = VMSIG_EV_CMD_MEMWRITE; d.source = VMSIG_SRC_MEMCTX; d.dir = VMSIG_DIR_DOWN; d.kind = VMSIG_EV_CMD_MEMWRITE; d.source = VMSIG_SRC_MEMCTX; d.dir = VMSIG_DIR_DOWN;
d.endpoint = 0; d.prio = VMSIG_PRIO_HIGH; d.corr = corr; d.endpoint = 0; d.prio = VMSIG_PRIO_HIGH; d.corr = corr;
vmsig_memwrite mw = { gva, len, flags }; vmsig_memwrite mw = { cr3, gva, len, flags };
memcpy(d.inln, &mw, sizeof mw); memcpy(d.inln, &mw, sizeof mw);
if ((flags & VMSIG_MW_SRC_INLINE) && src && len <= VMSIG_MEMWRITE_INLINE) if ((flags & VMSIG_MW_SRC_INLINE) && src && len <= VMSIG_MEMWRITE_INLINE)
memcpy(d.inln + sizeof mw, src, len); memcpy(d.inln + sizeof mw, src, len);
@@ -123,21 +124,21 @@ static void test_path_and_deny(void) {
/* 3) lease gate: without ACQUIRE the write is dropped at the gate (-1, no actuation). */ /* 3) lease gate: without ACQUIRE the write is dropped at the gate (-1, no actuation). */
uint8_t pat[8] = { 0xDE, 0xAD, 0xBE, 0xEF, 1, 2, 3, 4 }; uint8_t pat[8] = { 0xDE, 0xAD, 0xBE, 0xEF, 1, 2, 3, 4 };
CHECK(send_write(A, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 99) == -1, CHECK(send_write(A, 0, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 99) == -1,
"3: CMD_MEMWRITE without a lease is dropped by the gate"); "3: CMD_MEMWRITE without a lease is dropped by the gate");
/* acquire the MEMWRITE lease (synchronous intercept; UP reply paced by ctx). */ /* acquire the MEMWRITE lease (synchronous intercept; UP reply paced by ctx). */
CHECK(acquire_mw(A) == 0, "acquire submitted"); CHECK(acquire_mw(A) == 0, "acquire submitted");
/* 1) happy path: inline write -> queued -> ACT_ACK{ok=1}. Also drains the GRANTED reply. */ /* 1) happy path: inline write -> queued -> ACT_ACK{ok=1}. Also drains the GRANTED reply. */
CHECK(send_write(A, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 11) == 0, CHECK(send_write(A, 0, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 11) == 0,
"1: owner's CMD_MEMWRITE passes the gate"); "1: owner's CMD_MEMWRITE passes the gate");
/* 2) extent: len > MAX -> ACK{ok=0}, NOT actuated (queued ack on the loop thread). */ /* 2) extent: len > MAX -> ACK{ok=0}, NOT actuated (queued ack on the loop thread). */
CHECK(send_write(A, 0x2000, VMSIG_MEMWRITE_MAX + 1, VMSIG_MW_SRC_INLINE, pat, 22) == 0, CHECK(send_write(A, 0, 0x2000, VMSIG_MEMWRITE_MAX + 1, VMSIG_MW_SRC_INLINE, pat, 22) == 0,
"2: over-extent write is accepted by the gate (denied inside the adapter)"); "2: over-extent write is accepted by the gate (denied inside the adapter)");
/* 2b) missing SRC flag -> ACK{ok=0}. */ /* 2b) missing SRC flag -> ACK{ok=0}. */
CHECK(send_write(A, 0x3000, 4, 0u, NULL, 33) == 0, CHECK(send_write(A, 0, 0x3000, 4, 0u, NULL, 33) == 0,
"2b: no-SRC-flag write is accepted by the gate (denied inside the adapter)"); "2b: no-SRC-flag write is accepted by the gate (denied inside the adapter)");
/* expect 3 ACKs (corr 11/22/33) + the GRANTED reply. */ /* expect 3 ACKs (corr 11/22/33) + the GRANTED reply. */
@@ -160,6 +161,45 @@ static void test_path_and_deny(void) {
vmsig_ctx_free(ctx); vmsig_ctx_free(ctx);
} }
/* ---- 1b: cr3 default + explicit — the cr3 field threads the whole seam -------
* Stub does not actuate, so this asserts CONTRACT/ROUTE only: both a kernel-default
* (cr3==0) and an arbitrary process cr3 (cr3!=0) pass cap -> grant -> lease-gate ->
* route -> adapter -> ACT_ACK{ok=1}. A non-zero cr3 does NOT bypass any gate (it is
* read by the adapter only after the core admitted the command). */
static void test_cr3_default_and_explicit(void) {
printf("test_cr3_default_and_explicit\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
cstate s; memset(&s, 0, sizeof s); s.core = core;
void* A = add_ctl(core, &s, VMSIG_CAP_MEMWRITE, 10);
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
CHECK(acquire_mw(A) == 0, "acquire submitted");
uint8_t pat[8] = { 0xAA, 0xBB, 0xCC, 0xDD, 5, 6, 7, 8 };
/* (a) kernel default: cr3 == 0 -> resolves to the adapter's System DTB on the worker. */
CHECK(send_write(A, 0, 0x4000, 8, VMSIG_MW_SRC_INLINE, pat, 71) == 0,
"1b-a: cr3==0 (kernel default) passes the gate");
/* (b) explicit process AS: an arbitrary non-zero cr3 is carried through unchanged. */
CHECK(send_write(A, 0xDEADBEEF000ull, 0x5000, 8, VMSIG_MW_SRC_INLINE, pat, 72) == 0,
"1b-b: cr3!=0 (process AS) passes the gate");
run_until_acks(&s, 2);
CHECK(s.granted == 1, "1b: lease GRANTED once");
int saw71_ok = -1, saw72_ok = -1;
for (int i = 0; i < s.nack; i++) {
if (s.ack_corr[i] == 71) saw71_ok = s.ack_ok[i];
if (s.ack_corr[i] == 72) saw72_ok = s.ack_ok[i];
}
CHECK(saw71_ok == 1, "1b-a: cr3==0 write ACKs ok=1 (kernel default, stub)");
CHECK(saw72_ok == 1, "1b-b: cr3!=0 write ACKs ok=1 (route proven, stub does not actuate)");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ---- 4: cap gate — no CAP_MEMWRITE cannot acquire the lease ----------------- */ /* ---- 4: cap gate — no CAP_MEMWRITE cannot acquire the lease ----------------- */
static void test_cap_gate(void) { static void test_cap_gate(void) {
printf("test_cap_gate\n"); printf("test_cap_gate\n");
@@ -195,11 +235,11 @@ static void test_inflight_fence(void) {
uint8_t pat[4] = { 1, 2, 3, 4 }; uint8_t pat[4] = { 1, 2, 3, 4 };
CHECK(acquire_mw(A) == 0, "A acquires"); CHECK(acquire_mw(A) == 0, "A acquires");
/* A queues a write (corr=55): passes the gate (A owns), lands in the DOWN queue. */ /* A queues a write (corr=55): passes the gate (A owns), lands in the DOWN queue. */
CHECK(send_write(A, 0x1000, 4, VMSIG_MW_SRC_INLINE, pat, 55) == 0, "A queues write 55"); CHECK(send_write(A, 0, 0x1000, 4, VMSIG_MW_SRC_INLINE, pat, 55) == 0, "A queues write 55");
/* B preempts SYNCHRONOUSLY (acquire does not go through ctx). */ /* B preempts SYNCHRONOUSLY (acquire does not go through ctx). */
CHECK(acquire_mw(B) == 0, "B preempts"); CHECK(acquire_mw(B) == 0, "B preempts");
/* B's own write (corr=66) — should actuate. */ /* B's own write (corr=66) — should actuate. */
CHECK(send_write(B, 0x2000, 4, VMSIG_MW_SRC_INLINE, pat, 66) == 0, "B queues write 66"); CHECK(send_write(B, 0, 0x2000, 4, VMSIG_MW_SRC_INLINE, pat, 66) == 0, "B queues write 66");
run_until_acks(&s, 1); /* B's 66 acks; A's 55 must be fenced (no ack) */ run_until_acks(&s, 1); /* B's 66 acks; A's 55 must be fenced (no ack) */
@@ -218,7 +258,11 @@ static void test_inflight_fence(void) {
int main(void) { int main(void) {
printf("test_memwrite\n"); printf("test_memwrite\n");
/* POD layout: cr3+gva (2*u64) + len+flags (2*u32) = 24, +24 inline tail = inln[48]. */
CHECK(sizeof(vmsig_memwrite) == 24, "vmsig_memwrite header is 24 bytes");
CHECK(VMSIG_MEMWRITE_INLINE == 24u, "VMSIG_MEMWRITE_INLINE is 24");
test_path_and_deny(); test_path_and_deny();
test_cr3_default_and_explicit();
test_cap_gate(); test_cap_gate();
test_inflight_fence(); test_inflight_fence();
cref_free_all(); cref_free_all();
+119
View File
@@ -5,6 +5,7 @@
#define _GNU_SOURCE #define _GNU_SOURCE
#include "vmsig.h" #include "vmsig.h"
#include "vmsig_socket.h" #include "vmsig_socket.h"
#include "memctx.h" /* VMSIG_MEMWRITE_MAX: the adapter's extent bound (private) */
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
@@ -48,6 +49,12 @@ static vmsig_grant pol_deny(uint32_t uid, uint32_t pid, void* ud) {
return g; return g;
} }
static uint64_t now_ns(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec;
}
static int connect_abstract(const char* name) { static int connect_abstract(const char* name) {
int fd = socket(AF_UNIX, SOCK_STREAM, 0); int fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0) return -1; if (fd < 0) return -1;
@@ -88,8 +95,120 @@ static void test_wire(void) {
CHECK(vmsig_wire_decode(&bad, &x) == -1, "bad magic rejected"); CHECK(vmsig_wire_decode(&bad, &x) == -1, "bad magic rejected");
} }
/* ===== variant B: socket CMD_MEMWRITE with a length-prefixed SRC tail (>INLINE) =====
* Stub memctx adapter (no VM): proves the socket transport carries a frame + SRC tail
* (with partial recv), routes through cap -> grant -> lease-gate -> adapter, and gets
* ACT_ACK ok=1. Also: an over-cap len is a framing violation that closes the connection. */
/* Policy granting a MEMWRITE-capable poller (cap MEMWRITE|MEMCTX|OBSERVE). */
static vmsig_grant pol_mw(uint32_t uid, uint32_t pid, void* ud) {
(void)pid; (void)ud;
vmsig_grant g; memset(&g, 0, sizeof g);
g.principal = uid; g.endpoint_mask = 1ull << 0;
g.source_mask = 0xFFFFFFFFu;
g.cap_mask = VMSIG_CAP_MEMWRITE | VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE;
g.arb_prio = 10;
return g;
}
/* Encode + write a single fixed frame. */
static int send_frame(int fd, const vmsig_event* ev) {
vmsig_wire w; vmsig_wire_encode(&w, ev);
return (write(fd, &w, sizeof w) == (ssize_t)sizeof w) ? 0 : -1;
}
/* Read fixed frames until an ACT_ACK with the wanted corr; return its ok flag (-1 on
* timeout/EOF). The ACK inln layout from mc_memwrite_ack: {int ok; uint32 corr; uint32 origin}. */
static int wait_ack(int fd, uint32_t want_corr, int ms) {
struct timeval tv = { .tv_sec = 0, .tv_usec = 200 * 1000 };
setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv);
uint64_t deadline = now_ns() + (uint64_t)ms * 1000000ull;
vmsig_wire w; uint8_t* p = (uint8_t*)&w; size_t off = 0;
while (now_ns() < deadline) {
ssize_t n = read(fd, p + off, sizeof w - off);
if (n <= 0) continue; /* timeout/EOF retry within deadline */
off += (size_t)n;
if (off < sizeof w) continue;
off = 0;
vmsig_event ev;
if (vmsig_wire_decode(&w, &ev) != 0) continue;
if (ev.kind == VMSIG_EV_ACT_ACK && ev.corr == want_corr) {
int ok; memcpy(&ok, ev.inln, sizeof ok);
return ok;
}
}
return -1;
}
static void test_memwrite_tail(void) {
printf("test_memwrite_tail\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx stub");
const char* MW = "@vmsig-sock-mw-test";
CHECK(vmsig_socket_attach(core, MW, pol_mw, NULL) == 0, "attach mw listener");
pthread_t th;
pthread_create(&th, NULL, loop_main, core);
int fd = connect_abstract(MW);
CHECK(fd >= 0, "client connected (mw)");
if (fd >= 0) {
/* acquire the MEMWRITE lease */
vmsig_event d; memset(&d, 0, sizeof d);
d.kind = VMSIG_EV_CMD_ACQUIRE; d.source = VMSIG_SRC_MEMCTX; d.dir = VMSIG_DIR_DOWN;
d.endpoint = 0; d.prio = VMSIG_PRIO_HIGH;
vmsig_lease_req lr = { VMSIG_LEASE_MEMWRITE, 0 };
memcpy(d.inln, &lr, sizeof lr);
CHECK(send_frame(fd, &d) == 0, "send ACQUIRE");
/* happy path: CMD_MEMWRITE(PAYLOAD, len=64) + 64-byte tail, written in two halves
* to exercise the TAIL-phase partial accumulation. */
const uint32_t len = 64u;
uint8_t src[64];
for (uint32_t i = 0; i < len; i++) src[i] = (uint8_t)(i + 1);
vmsig_event mwe; memset(&mwe, 0, sizeof mwe);
mwe.kind = VMSIG_EV_CMD_MEMWRITE; mwe.source = VMSIG_SRC_MEMCTX; mwe.dir = VMSIG_DIR_DOWN;
mwe.endpoint = 0; mwe.prio = VMSIG_PRIO_HIGH; mwe.corr = 0x101;
vmsig_memwrite mw = { 0, 0x1000, len, VMSIG_MW_SRC_PAYLOAD };
memcpy(mwe.inln, &mw, sizeof mw);
CHECK(send_frame(fd, &mwe) == 0, "send CMD_MEMWRITE frame (PAYLOAD)");
CHECK(write(fd, src, 32) == 32, "send SRC tail part 1");
struct timespec ts = { .tv_sec = 0, .tv_nsec = 5 * 1000000 };
nanosleep(&ts, NULL); /* let the loop accumulate a partial tail */
CHECK(write(fd, src + 32, 32) == 32, "send SRC tail part 2");
CHECK(wait_ack(fd, 0x101, 1000) == 1, "B: payload-tail write ACKs ok=1 (stub)");
/* negative: an over-cap PAYLOAD len is a framing-contract violation. The server closes
* the connection — it cannot safely skip the promised tail, and draining an arbitrary
* length would be a DoS. Verify no ACK arrives and the socket reaches EOF (conn shut). */
memset(&mwe.inln, 0, sizeof mwe.inln);
mwe.corr = 0x102;
vmsig_memwrite mw2 = { 0, 0x2000, VMSIG_MEMWRITE_MAX + 1u, VMSIG_MW_SRC_PAYLOAD };
memcpy(mwe.inln, &mw2, sizeof mw2);
CHECK(send_frame(fd, &mwe) == 0, "send CMD_MEMWRITE frame (over-cap)");
/* No ACK arrives; the server shuts the conn, so the socket drains to EOF. A 1s recv
* timeout bounds the wait if the server wrongly kept the connection open. */
struct timeval rtv = { .tv_sec = 1, .tv_usec = 0 };
setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &rtv, sizeof rtv);
uint8_t junk[80]; ssize_t rr;
while ((rr = read(fd, junk, sizeof junk)) > 0) { /* drain any in-flight, then EOF */ }
CHECK(rr == 0, "B: over-cap closed the connection (EOF)");
close(fd);
}
struct timespec t = { .tv_sec = 0, .tv_nsec = 50 * 1000000 };
nanosleep(&t, NULL);
vmsig_core_stop(core);
pthread_join(th, NULL);
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
int main(void) { int main(void) {
test_wire(); test_wire();
test_memwrite_tail();
printf("test_socket\n"); printf("test_socket\n");
vmsig_ctx* ctx = vmsig_ctx_new(); vmsig_ctx* ctx = vmsig_ctx_new();