mirror of
https://dev.lirent.ru/Vatrog/vm-automation-signaling.git
synced 2026-06-26 04:36:37 +03:00
memwrite: per-process (cr3) target and full-extent socket SRC
- CMD_MEMWRITE now carries a target page-table root (cr3) as its first field; cr3 == 0 keeps the kernel address-space default (backward-compatible). A control that has discovered a process's cr3 through its own read-only perception can write that process's private memory under the same exclusive write lease. Freshness of the cr3 is the control's responsibility — signaling does not validate it (that is perception, not coherence), mirroring the read side. - A socket control can now carry an SRC larger than the inline frame budget: a length-prefixed SRC tail follows the CMD_MEMWRITE frame (flag SRC_PAYLOAD, the length being the frame's own len). A per-connection two-phase receiver accumulates the tail into a fixed conn-owned buffer up to the extent bound, matching the in-process payload path. A zero or over-bound length is a framing violation that closes the connection: leaving the promised tail unread would desync the stream and draining an arbitrary length would be a denial of service. The capability, exclusive lease, source and extent gates are unchanged and reused; only the event header gained the cr3 field and the socket transport gained the tail receiver. The adapter resolves cr3 == 0 to the kernel root on its worker thread and writes atomically.
This commit is contained in:
+98
-16
@@ -7,12 +7,20 @@
|
||||
* dispatched through emit_down (enforced by the grant), UP events are serialized into
|
||||
* a frame. On EOF — deferred reap.
|
||||
*
|
||||
* DOWN framing: every kind is a single fixed vmsig_wire frame, EXCEPT a CMD_MEMWRITE
|
||||
* carrying VMSIG_MW_SRC_PAYLOAD — then mw.len SRC bytes follow the frame (length-prefixed
|
||||
* by the contract's mw.len, no separate wire prefix). The per-conn receiver is a 2-phase
|
||||
* state machine (FRAME -> TAIL): it accumulates the frame, and for a PAYLOAD MEMWRITE it
|
||||
* accumulates the SRC tail into a fixed conn-owned blob, then emits a BORROWED-payload
|
||||
* event. The blob lives in the conn so it outlives the DOWN queue until pump_down copies it.
|
||||
*
|
||||
* DoS protection: per-uid limit of concurrent connections (against eviction of
|
||||
* legitimate ones); a janitor timerfd detaches "stuck" partial frames (slowloris).
|
||||
* The global ceiling and slot reuse live in the core. */
|
||||
* legitimate ones); a janitor timerfd detaches "stuck" partial frames / SRC tails
|
||||
* (slowloris). The global ceiling and slot reuse live in the core. */
|
||||
#define _GNU_SOURCE
|
||||
#include "vmsig_socket.h"
|
||||
#include "core_internal.h" /* core_add_source, core_request_drop, add_control */
|
||||
#include "memctx.h" /* VMSIG_MEMWRITE_MAX: SRC-tail bound (one source of truth) */
|
||||
#include <sys/socket.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/un.h>
|
||||
@@ -57,6 +65,10 @@ int vmsig_wire_decode(const vmsig_wire* w, vmsig_event* ev) {
|
||||
}
|
||||
|
||||
/* ===== per-conn control ===== */
|
||||
/* DOWN receive phases: read the fixed frame, then (only for a PAYLOAD MEMWRITE) the
|
||||
* length-prefixed SRC tail. State persists in the conn across EPOLLIN (partial recv). */
|
||||
typedef enum { CONN_RX_FRAME = 0, CONN_RX_TAIL = 1 } conn_rx_phase;
|
||||
|
||||
typedef struct sock_conn {
|
||||
int fd;
|
||||
vmsig_core* core;
|
||||
@@ -69,6 +81,11 @@ typedef struct sock_conn {
|
||||
void* token;
|
||||
uint8_t buf[sizeof(vmsig_wire)];
|
||||
size_t buflen;
|
||||
conn_rx_phase phase; /* FRAME: read vmsig_wire; TAIL: read SRC tail */
|
||||
vmsig_event pend; /* decoded frame awaiting its SRC tail */
|
||||
uint32_t need; /* expected tail length (= mw.len) */
|
||||
uint32_t got; /* tail bytes already accumulated */
|
||||
uint8_t blob[VMSIG_MEMWRITE_MAX]; /* SRC tail (BORROWED payload; lives in conn) */
|
||||
} sock_conn;
|
||||
|
||||
static int conn_fd(void* ctl) { return ((sock_conn*)ctl)->fd; }
|
||||
@@ -90,26 +107,86 @@ static void conn_set_emit_down(void* ctl, int (*emit)(void* token, vmsig_event*)
|
||||
sock_conn* c = ctl; c->emit_down = emit; c->token = token;
|
||||
}
|
||||
|
||||
/* Does this decoded frame pull a length-prefixed SRC tail? Only a CMD_MEMWRITE that
|
||||
* advertises VMSIG_MW_SRC_PAYLOAD. The tail length is mw.len from the contract (already on
|
||||
* the wire in inln) — no separate wire prefix. INLINE / other kinds carry no tail. */
|
||||
static int frame_pulls_tail(const vmsig_event* ev, uint32_t* need) {
|
||||
if (ev->kind != VMSIG_EV_CMD_MEMWRITE) return 0;
|
||||
const vmsig_memwrite* mw = (const vmsig_memwrite*)ev->inln;
|
||||
if (!(mw->flags & VMSIG_MW_SRC_PAYLOAD)) return 0;
|
||||
*need = mw->len;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int conn_on_readable(void* ctl) {
|
||||
sock_conn* c = ctl;
|
||||
for (;;) {
|
||||
ssize_t n = read(c->fd, c->buf + c->buflen, sizeof c->buf - c->buflen);
|
||||
if (n == 0) { core_request_drop(c->core, c->id); return 0; } /* EOF */
|
||||
if (c->phase == CONN_RX_FRAME) {
|
||||
ssize_t n = read(c->fd, c->buf + c->buflen, sizeof c->buf - c->buflen);
|
||||
if (n == 0) { core_request_drop(c->core, c->id); return 0; } /* EOF */
|
||||
if (n < 0) {
|
||||
if (errno == EAGAIN || errno == EWOULDBLOCK) break;
|
||||
core_request_drop(c->core, c->id);
|
||||
return 0;
|
||||
}
|
||||
c->last_ns = now_ns();
|
||||
c->buflen += (size_t)n;
|
||||
if (c->buflen != sizeof c->buf) continue; /* frame not whole yet */
|
||||
c->buflen = 0;
|
||||
|
||||
vmsig_event ev;
|
||||
if (vmsig_wire_decode((const vmsig_wire*)c->buf, &ev) != 0)
|
||||
continue; /* bad magic/ver — drop frame */
|
||||
ev.dir = VMSIG_DIR_DOWN; /* from a poller — DOWN only */
|
||||
|
||||
uint32_t need = 0;
|
||||
if (!frame_pulls_tail(&ev, &need)) { /* variant A / other kinds */
|
||||
if (c->emit_down) c->emit_down(c->token, &ev); /* enforced by the grant */
|
||||
continue;
|
||||
}
|
||||
/* Cap BEFORE reading the tail. A PAYLOAD frame promises EXACTLY mw.len tail bytes
|
||||
* with 1 <= mw.len <= MAX. A zero or over-cap length is a framing-contract
|
||||
* violation: the promised tail cannot be safely consumed (draining an
|
||||
* attacker-chosen length is a DoS) and leaving it unread would desync the stream
|
||||
* (the SRC bytes would be misread as the next frame). Close the connection — a
|
||||
* conformant poller never requests a tail outside [1, MAX]. */
|
||||
if (need == 0 || need > VMSIG_MEMWRITE_MAX) {
|
||||
core_request_drop(c->core, c->id);
|
||||
return 0;
|
||||
}
|
||||
c->pend = ev; c->need = need; c->got = 0;
|
||||
c->phase = CONN_RX_TAIL; /* fall through to read the tail */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* CONN_RX_TAIL: accumulate exactly c->need SRC bytes into the conn-owned blob. */
|
||||
ssize_t n = read(c->fd, c->blob + c->got, c->need - c->got);
|
||||
if (n == 0) { core_request_drop(c->core, c->id); return 0; } /* EOF */
|
||||
if (n < 0) {
|
||||
if (errno == EAGAIN || errno == EWOULDBLOCK) break;
|
||||
core_request_drop(c->core, c->id);
|
||||
return 0;
|
||||
}
|
||||
c->last_ns = now_ns();
|
||||
c->buflen += (size_t)n;
|
||||
if (c->buflen == sizeof c->buf) {
|
||||
vmsig_event ev;
|
||||
if (vmsig_wire_decode((const vmsig_wire*)c->buf, &ev) == 0) {
|
||||
ev.dir = VMSIG_DIR_DOWN; /* from a poller — DOWN only */
|
||||
if (c->emit_down) c->emit_down(c->token, &ev); /* enforced by the grant */
|
||||
}
|
||||
c->buflen = 0;
|
||||
}
|
||||
c->got += (uint32_t)n;
|
||||
if (c->got != c->need) continue; /* tail not whole yet */
|
||||
|
||||
/* Tail complete: borrow it through the payload. The blob lives in the conn and thus
|
||||
* outlives the DOWN queue until pump_down copies it (mc_submit). release=NULL — the
|
||||
* body belongs to the conn; the adapter copies synchronously on the loop thread. */
|
||||
c->pend.payload.data = c->blob;
|
||||
c->pend.payload.len = c->need;
|
||||
c->pend.payload.codec = VMSIG_CODEC_MEMCTX;
|
||||
c->pend.payload.flags = VMSIG_PL_BORROWED;
|
||||
c->pend.payload.release= NULL;
|
||||
c->pend.payload.owner = NULL;
|
||||
if (c->emit_down) c->emit_down(c->token, &c->pend);
|
||||
c->phase = CONN_RX_FRAME; c->got = 0; c->need = 0;
|
||||
|
||||
/* Do NOT reuse c->blob until pump_down has copied it. conn-fd is LEVEL-triggered
|
||||
* (EPOLLIN without EPOLLET), so any remaining bytes re-fire EPOLLIN on the next
|
||||
* pass — break out and let pump_down run first. */
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -244,16 +321,21 @@ static void on_accept(void* user, uint32_t events) {
|
||||
}
|
||||
}
|
||||
|
||||
/* janitor: detach connections with a stuck partial frame (slowloris) */
|
||||
/* janitor: detach connections with a stuck partial frame OR a stuck partial SRC tail
|
||||
* (slowloris). The blob is a conn field, so teardown (free(c) in conn_close) needs no
|
||||
* extra cleanup; a partial tail never emitted an event, so no dangling payload either. */
|
||||
static void on_janitor(void* user, uint32_t events) {
|
||||
(void)events;
|
||||
sock_listener* L = user;
|
||||
uint64_t v;
|
||||
while (read(L->janitor_fd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
|
||||
uint64_t now = now_ns();
|
||||
for (sock_conn* c = L->conns; c; c = c->lnext)
|
||||
if (c->buflen > 0 && now - c->last_ns > VMSIG_SOCK_IDLE_NS)
|
||||
for (sock_conn* c = L->conns; c; c = c->lnext) {
|
||||
int stuck_frame = (c->buflen > 0); /* partial vmsig_wire */
|
||||
int stuck_tail = (c->phase == CONN_RX_TAIL && c->got < c->need);/* partial SRC tail */
|
||||
if ((stuck_frame || stuck_tail) && now - c->last_ns > VMSIG_SOCK_IDLE_NS)
|
||||
core_request_drop(c->core, c->id);
|
||||
}
|
||||
}
|
||||
|
||||
/* listener cleanup on core_free (owner = the core, via on_free of the first source) */
|
||||
|
||||
Reference in New Issue
Block a user