mirror of
https://dev.lirent.ru/Vatrog/vm-automation-signaling.git
synced 2026-06-20 19:06:37 +03:00
vmsig: a neutral signaling layer between sensors/input and controls
An epoll-driven, neutral transfer-event bus that connects sensors and input actuators to one or more controls, bidirectionally. It owns the transfer context and events — delivery order, priority, protocol-level timing, and an interrupt-driven event model over fd sources (eventfd/timerfd/sockets) — and stays agnostic to both the sensor/input drivers and the control. What lives here: - memctx: a coherent address-space context per endpoint — the guest address-space root paired with a pre-opened read-only RAM-region fd, with per-endpoint epoch invalidation and retained replay to late subscribers. Perception lives in out-of-tree sensor libraries that consume this datum read-only. - exclusive-ownership leases for destructive resource classes (input, power, memory-write). - write-signaled memory writes (MEMWRITE): an atomic write to guest memory routed through the seam under an exclusive lease, never a writable mapping. - a host-management seam for VM lifecycle/status and a neutral input-injection command path. - multi-VM endpoints; capability-gated, audited control authorization over an in-process or unix-socket transport. Builds against headers only by default (a stub mode that exercises the seam without a VM); armed builds link the real sensor/input libraries behind flags. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,44 @@
|
||||
#ifndef VMSIG_ADAPTER_UTIL_H
|
||||
#define VMSIG_ADAPTER_UTIL_H
|
||||
#include <stddef.h>
|
||||
|
||||
/* adapter_util.h — shared primitive "blocking API -> completion eventfd".
|
||||
*
|
||||
* A bridge turning a synchronous CPU-bound / blocking neighbor call (vmie,
|
||||
* vmctl) into a readiness source for the epoll core: the loop thread posts a request, a
|
||||
* separate worker thread runs the blocking work and signals a completion eventfd; on it
|
||||
* the loop wakes and collects the result in on_readiness. Reused by the memctx
|
||||
* (off-loop bootstrap) and input adapters. */
|
||||
|
||||
typedef struct vmsig_worker vmsig_worker;
|
||||
|
||||
#define VMSIG_WORK_SLOT 256 /* req/res slot size (POD, copied) */
|
||||
|
||||
/* Callback run IN the worker thread: req -> res (both POD <= VMSIG_WORK_SLOT).
|
||||
* Returns 0/-1 (the code is stored alongside, see vmsig_worker_poll). Must not touch
|
||||
* core structures — only compute res from req. */
|
||||
typedef int (*vmsig_work_fn)(void* user, const void* req, void* res);
|
||||
|
||||
/* Create a worker pool of nthreads threads over a shared queue (nthreads>=1). vmie
|
||||
* allows parallel read-only readers; for a serial channel (QMP) use 1. max_depth — the
|
||||
* request-queue depth ceiling (<=0 => default): submit beyond it is rejected (-1) so an
|
||||
* untrusted flood does not grow into OOM. NULL on error. */
|
||||
vmsig_worker* vmsig_worker_new(vmsig_work_fn fn, void* user, int nthreads, int max_depth);
|
||||
|
||||
/* Stop the threads (join) and free. Safe on NULL. */
|
||||
void vmsig_worker_free(vmsig_worker* w);
|
||||
|
||||
/* completion eventfd: the adapter registers it as a VMSIG_RDY_EVENTFD source. */
|
||||
int vmsig_worker_evfd(const vmsig_worker* w);
|
||||
|
||||
/* loop thread: post a request (copied, len <= VMSIG_WORK_SLOT). 0/-1. */
|
||||
int vmsig_worker_submit(vmsig_worker* w, const void* req, size_t len);
|
||||
|
||||
/* loop thread (in on_readiness): drain the completion eventfd. */
|
||||
void vmsig_worker_ack(vmsig_worker* w);
|
||||
|
||||
/* loop thread: collect a ready result. 1 — written to res (+ *rc = fn code),
|
||||
* 0 — empty, -1 — error. Drain in a loop until 0. */
|
||||
int vmsig_worker_poll(vmsig_worker* w, void* res, size_t cap, int* rc);
|
||||
|
||||
#endif /* VMSIG_ADAPTER_UTIL_H */
|
||||
@@ -0,0 +1,18 @@
|
||||
#ifndef VMSIG_INPUT_H
|
||||
#define VMSIG_INPUT_H
|
||||
|
||||
/* Private config of the input adapter (vmctl). cfg==NULL => stub mode. Armed mode
|
||||
* (VMSIG_WITH_VMCTL) opens vmctl_open() and actuates for real. driver is an int so
|
||||
* as not to pull vmctl.h into this header (values match VMCTL_DRIVER_*). */
|
||||
typedef struct {
|
||||
int stub;
|
||||
int driver; /* 0=QMP, 1=UINPUT (see VMCTL_DRIVER_*) */
|
||||
const char* qmp_path;
|
||||
const char* input_bus;
|
||||
int ptr_mode;
|
||||
} vmsig_input_cfg;
|
||||
|
||||
/* Input event codes/contract are PUBLIC: vmsig_input / vmsig_input_kind in
|
||||
* include/vmsig_event.h (external control encodes them into inln). No private duplicate. */
|
||||
|
||||
#endif /* VMSIG_INPUT_H */
|
||||
@@ -0,0 +1,230 @@
|
||||
/* input.c — input/actuator adapter for vmctl (input + power/lifecycle).
|
||||
*
|
||||
* Mechanism (recommended): vmctl is a blocking QMP round-trip; we run it on a
|
||||
* worker thread, completion ack via a completion-eventfd. The uinput path is a
|
||||
* local instantaneous write; when armed it would be done inline (see comment in submit).
|
||||
* Real actuation is under VMSIG_WITH_VMCTL; otherwise the stub acks (spine without a VM). */
|
||||
#include "vmsig_adapter.h"
|
||||
#include "adapter_util.h"
|
||||
#include "input.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/epoll.h>
|
||||
|
||||
#ifdef VMSIG_WITH_VMCTL
|
||||
#include "vmctl.h"
|
||||
#endif
|
||||
|
||||
/* POD request/result of the worker. */
|
||||
typedef struct {
|
||||
int cmd; /* 0 = input event, 1 = lifecycle */
|
||||
uint32_t corr;
|
||||
uint32_t origin; /* initiator (addressed ACK) */
|
||||
int kind; /* vmsig_input_kind (for cmd==0) */
|
||||
int code; /* axis/btn/evdev-code */
|
||||
int value; /* abs/rel/down */
|
||||
double scroll;
|
||||
int life_op; /* VMSIG_LIFE_* (powerdown/reset/wakeup/pause/resume) */
|
||||
} input_req;
|
||||
typedef struct { int ok; uint32_t corr; uint32_t origin; } input_res;
|
||||
|
||||
/* signaling does NOT track held state: the record of what is pressed lives in the
|
||||
* ACTUATOR (vmctl); we hand it to control on request (CMD_QUERY_INPUT), release is control's decision. */
|
||||
struct vmsig_adapter {
|
||||
uint32_t endpoint;
|
||||
int stub;
|
||||
vmsig_emit emit;
|
||||
vmsig_worker* worker;
|
||||
int driver; /* 0=QMP, 1=UINPUT (VMCTL_DRIVER_*); carried open->attach */
|
||||
const char* qmp_path; /* borrowed from cfg (valid through attach) */
|
||||
const char* input_bus;
|
||||
int ptr_mode;
|
||||
#ifdef VMSIG_WITH_VMCTL
|
||||
vmctl_t* vmctl;
|
||||
#endif
|
||||
};
|
||||
|
||||
static int input_job(void* user, const void* reqp, void* resp) {
|
||||
struct vmsig_adapter* a = user;
|
||||
const input_req* rq = reqp;
|
||||
input_res* rs = resp;
|
||||
memset(rs, 0, sizeof *rs);
|
||||
rs->corr = rq->corr;
|
||||
rs->origin = rq->origin;
|
||||
#ifdef VMSIG_WITH_VMCTL
|
||||
if (a->vmctl) {
|
||||
int r = -1;
|
||||
if (rq->cmd == 0) {
|
||||
vmctl_batch b; vmctl_batch_init(&b);
|
||||
switch (rq->kind) {
|
||||
case VMSIG_INPUT_ABS: vmctl_batch_abs(&b, rq->code, rq->value); break;
|
||||
case VMSIG_INPUT_REL: vmctl_batch_rel(&b, rq->code, rq->value); break;
|
||||
case VMSIG_INPUT_BTN: vmctl_batch_btn(&b, rq->code, rq->value); break;
|
||||
case VMSIG_INPUT_KEY: vmctl_batch_key(&b, rq->code, rq->value); break;
|
||||
case VMSIG_INPUT_SCROLL: vmctl_batch_scroll(&b, rq->code, rq->scroll); break;
|
||||
default: break;
|
||||
}
|
||||
r = vmctl_batch_send(a->vmctl, &b);
|
||||
} else {
|
||||
switch (rq->life_op) {
|
||||
case 0: r = vmctl_powerdown(a->vmctl); break;
|
||||
case 1: r = vmctl_reset(a->vmctl); break;
|
||||
case 2: r = vmctl_wakeup(a->vmctl); break;
|
||||
case 3: r = vmctl_pause(a->vmctl); break;
|
||||
case 4: r = vmctl_resume(a->vmctl); break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
rs->ok = (r == 0);
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
(void)a;
|
||||
rs->ok = 1; /* stub: ack without actuation */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static vmsig_adapter* in_open(const void* cfg, uint32_t endpoint) {
|
||||
const vmsig_input_cfg* c = cfg;
|
||||
struct vmsig_adapter* a = calloc(1, sizeof *a);
|
||||
if (!a) return NULL;
|
||||
a->endpoint = endpoint;
|
||||
a->stub = c ? c->stub : 1;
|
||||
if (c) { /* carry the driver selection to attach (cfg not passed there) */
|
||||
a->driver = c->driver;
|
||||
a->qmp_path = c->qmp_path;
|
||||
a->input_bus = c->input_bus;
|
||||
a->ptr_mode = c->ptr_mode;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
static int in_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
|
||||
if (cap < 1) return -1;
|
||||
a->emit = *emit;
|
||||
a->worker = vmsig_worker_new(input_job, a, 1, 64); /* QMP is a serial channel, cap 64 */
|
||||
if (!a->worker) return -1;
|
||||
|
||||
#ifdef VMSIG_WITH_VMCTL
|
||||
if (!a->stub) {
|
||||
/* armed: build vmctl_config from the carried cfg and open the actuator. UINPUT
|
||||
* (host uinput + optional virtio-input-host-pci passthrough via QMP) is the primary
|
||||
* input driver; QMP input-send-event is the fallback. */
|
||||
vmctl_config vcfg;
|
||||
memset(&vcfg, 0, sizeof vcfg);
|
||||
vcfg.driver = (a->driver == 1) ? VMCTL_DRIVER_UINPUT : VMCTL_DRIVER_QMP;
|
||||
vcfg.qmp_path = a->qmp_path;
|
||||
vcfg.input_bus = a->input_bus;
|
||||
vcfg.ptr_mode = a->ptr_mode;
|
||||
vcfg.uinput_id = NULL; /* built-in HID identity defaults */
|
||||
a->vmctl = vmctl_open(&vcfg);
|
||||
if (!a->vmctl) { vmsig_worker_free(a->worker); a->worker = NULL; return -1; }
|
||||
}
|
||||
#endif
|
||||
|
||||
reg[0].fd = vmsig_worker_evfd(a->worker);
|
||||
reg[0].epoll_events = EPOLLIN;
|
||||
reg[0].shape = VMSIG_RDY_EVENTFD;
|
||||
reg[0].cookie = 0;
|
||||
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_SEAM_UP; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int in_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
|
||||
(void)cookie; (void)events;
|
||||
vmsig_worker_ack(a->worker);
|
||||
input_res rs; int rc;
|
||||
while (vmsig_worker_poll(a->worker, &rs, sizeof rs, &rc) == 1) {
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||
up.corr = rs.corr; up.origin = rs.origin;
|
||||
up.payload.flags = VMSIG_PL_INLINE;
|
||||
memcpy(up.inln, &rs, sizeof up.inln < sizeof rs ? sizeof up.inln : sizeof rs);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int in_submit(vmsig_adapter* a, const vmsig_event* ev) {
|
||||
if (ev->kind == VMSIG_EV_CMD_QUERY_INPUT) {
|
||||
/* Return what is PRESSED from the vmctl ACTUATOR's record (signaling does NOT track
|
||||
* held itself). The read is read-only (no QMP round-trip) => on the loop thread;
|
||||
* addressed reply to the initiator. stub without vmctl => empty set (nothing to
|
||||
* actuate — nothing to hold). */
|
||||
vmsig_input_held h;
|
||||
memset(&h, 0, sizeof h);
|
||||
#ifdef VMSIG_WITH_VMCTL
|
||||
if (a->vmctl) {
|
||||
const uint32_t capn = (uint32_t)(sizeof h.ent / sizeof h.ent[0]);
|
||||
unsigned char bits[VMCTL_KEYS_SNAPSHOT_BYTES];
|
||||
int n = vmctl_keys_snapshot(a->vmctl, bits, sizeof bits);
|
||||
for (int code = 0; n > 0 && code <= VMCTL_KEY_CODE_MAX; code++)
|
||||
if (bits[code >> 3] & (1u << (code & 7))) {
|
||||
if (h.count < capn) { h.ent[h.count].kind = VMSIG_INPUT_KEY;
|
||||
h.ent[h.count].code = (uint16_t)code; h.count++; }
|
||||
else h.flags |= VMSIG_INPUT_HELD_TRUNC;
|
||||
}
|
||||
unsigned bm = vmctl_btns_snapshot(a->vmctl);
|
||||
for (int b = 0; b < 8; b++) if (bm & (1u << b)) {
|
||||
if (h.count < capn) { h.ent[h.count].kind = VMSIG_INPUT_BTN;
|
||||
h.ent[h.count].code = (uint16_t)b; h.count++; }
|
||||
else h.flags |= VMSIG_INPUT_HELD_TRUNC;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_INPUT_HELD; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint; up.origin = ev->origin;
|
||||
up.payload.flags = VMSIG_PL_INLINE;
|
||||
memcpy(up.inln, &h, sizeof up.inln < sizeof h ? sizeof up.inln : sizeof h);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
return 0;
|
||||
}
|
||||
|
||||
input_req rq;
|
||||
memset(&rq, 0, sizeof rq);
|
||||
rq.corr = ev->corr; rq.origin = ev->origin;
|
||||
if (ev->kind == VMSIG_EV_CMD_INPUT) {
|
||||
rq.cmd = 0;
|
||||
/* Decode the NEUTRAL public input contract from inln (vmsig_input). We do NOT track
|
||||
* held — that is the vmctl actuator's record (returned via CMD_QUERY_INPUT). */
|
||||
vmsig_input in;
|
||||
memcpy(&in, ev->inln, sizeof in <= sizeof ev->inln ? sizeof in : sizeof ev->inln);
|
||||
rq.kind = (int)in.kind;
|
||||
rq.code = (int)in.code;
|
||||
rq.value = (int)in.value;
|
||||
rq.scroll = in.scroll;
|
||||
} else if (ev->kind == VMSIG_EV_CMD_LIFECYCLE) {
|
||||
rq.cmd = 1;
|
||||
rq.life_op = (int)(unsigned char)ev->inln[0];
|
||||
} else {
|
||||
return 1; /* not for this seam */
|
||||
}
|
||||
return vmsig_worker_submit(a->worker, &rq, sizeof rq) == 0 ? 0 : -1;
|
||||
}
|
||||
|
||||
static void in_close(vmsig_adapter* a) {
|
||||
if (!a) return;
|
||||
vmsig_worker_free(a->worker);
|
||||
#ifdef VMSIG_WITH_VMCTL
|
||||
if (a->vmctl) vmctl_close(a->vmctl);
|
||||
#endif
|
||||
free(a);
|
||||
}
|
||||
|
||||
static const vmsig_adapter_ops IN_OPS = {
|
||||
.name = "input", .source = VMSIG_SRC_INPUT, .codec = VMSIG_CODEC_INPUT,
|
||||
.open = in_open, .attach = in_attach, .on_readiness = in_on_ready,
|
||||
.submit = in_submit, .close = in_close
|
||||
};
|
||||
|
||||
const vmsig_adapter_ops* vmsig_input_ops(void) { return &IN_OPS; }
|
||||
@@ -0,0 +1,162 @@
|
||||
/* worker.c — bridge "blocking API -> completion eventfd" (pool of N threads).
|
||||
* MPSC request/result queues under a mutex + condvar; result readiness is
|
||||
* signaled via eventfd, on which the core's epoll loop wakes. N threads share one
|
||||
* request queue (for vmie — parallel read-only readers; for QMP — N=1). */
|
||||
#include "adapter_util.h"
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/eventfd.h>
|
||||
|
||||
typedef struct work_node {
|
||||
struct work_node* next;
|
||||
int rc; /* fn return code (for results) */
|
||||
size_t len;
|
||||
unsigned char buf[VMSIG_WORK_SLOT];
|
||||
} work_node;
|
||||
|
||||
typedef struct { work_node* head; work_node* tail; } work_q;
|
||||
|
||||
struct vmsig_worker {
|
||||
pthread_t* threads;
|
||||
int nthreads;
|
||||
pthread_mutex_t lock;
|
||||
pthread_cond_t cv;
|
||||
work_q req; /* loop -> workers */
|
||||
work_q res; /* workers -> loop */
|
||||
int evfd;
|
||||
int stop;
|
||||
int max_depth; /* cap on req-queue depth */
|
||||
int req_count; /* current req-queue depth */
|
||||
vmsig_work_fn fn;
|
||||
void* user;
|
||||
};
|
||||
|
||||
static void q_push(work_q* q, work_node* n) {
|
||||
n->next = NULL;
|
||||
if (q->tail) q->tail->next = n; else q->head = n;
|
||||
q->tail = n;
|
||||
}
|
||||
static work_node* q_pop(work_q* q) {
|
||||
work_node* n = q->head;
|
||||
if (!n) return NULL;
|
||||
q->head = n->next;
|
||||
if (!q->head) q->tail = NULL;
|
||||
return n;
|
||||
}
|
||||
static void q_drain(work_q* q) {
|
||||
work_node* n = q->head;
|
||||
while (n) { work_node* nx = n->next; free(n); n = nx; }
|
||||
q->head = q->tail = NULL;
|
||||
}
|
||||
|
||||
static void* worker_main(void* arg) {
|
||||
vmsig_worker* w = arg;
|
||||
for (;;) {
|
||||
pthread_mutex_lock(&w->lock);
|
||||
while (!w->stop && !w->req.head) pthread_cond_wait(&w->cv, &w->lock);
|
||||
/* On stop we DRAIN the queue: run the remaining requests so that submitted
|
||||
* work is not silently lost (matters for jobs carrying resource ownership).
|
||||
* We exit only when stop AND the queue is empty. */
|
||||
if (w->stop && !w->req.head) { pthread_mutex_unlock(&w->lock); break; }
|
||||
work_node* rq = q_pop(&w->req);
|
||||
if (rq) w->req_count--;
|
||||
pthread_mutex_unlock(&w->lock);
|
||||
if (!rq) continue;
|
||||
|
||||
work_node* rs = calloc(1, sizeof *rs);
|
||||
if (rs) {
|
||||
rs->rc = w->fn ? w->fn(w->user, rq->buf, rs->buf) : -1;
|
||||
rs->len = VMSIG_WORK_SLOT;
|
||||
pthread_mutex_lock(&w->lock);
|
||||
q_push(&w->res, rs);
|
||||
pthread_mutex_unlock(&w->lock);
|
||||
uint64_t one = 1;
|
||||
ssize_t r = write(w->evfd, &one, sizeof one);
|
||||
(void)r;
|
||||
}
|
||||
free(rq);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
vmsig_worker* vmsig_worker_new(vmsig_work_fn fn, void* user, int nthreads, int max_depth) {
|
||||
if (nthreads < 1) nthreads = 1;
|
||||
vmsig_worker* w = calloc(1, sizeof *w);
|
||||
if (!w) return NULL;
|
||||
w->fn = fn; w->user = user; w->evfd = -1;
|
||||
w->max_depth = max_depth > 0 ? max_depth : 512;
|
||||
w->threads = calloc((size_t)nthreads, sizeof *w->threads);
|
||||
if (!w->threads) { free(w); return NULL; }
|
||||
w->evfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
|
||||
if (w->evfd < 0) { free(w->threads); free(w); return NULL; }
|
||||
if (pthread_mutex_init(&w->lock, NULL) != 0) { close(w->evfd); free(w->threads); free(w); return NULL; }
|
||||
if (pthread_cond_init(&w->cv, NULL) != 0) {
|
||||
pthread_mutex_destroy(&w->lock); close(w->evfd); free(w->threads); free(w); return NULL;
|
||||
}
|
||||
for (int i = 0; i < nthreads; i++) {
|
||||
if (pthread_create(&w->threads[i], NULL, worker_main, w) != 0) break;
|
||||
w->nthreads++;
|
||||
}
|
||||
if (w->nthreads == 0) {
|
||||
pthread_cond_destroy(&w->cv); pthread_mutex_destroy(&w->lock);
|
||||
close(w->evfd); free(w->threads); free(w); return NULL;
|
||||
}
|
||||
return w;
|
||||
}
|
||||
|
||||
void vmsig_worker_free(vmsig_worker* w) {
|
||||
if (!w) return;
|
||||
pthread_mutex_lock(&w->lock);
|
||||
w->stop = 1;
|
||||
pthread_cond_broadcast(&w->cv);
|
||||
pthread_mutex_unlock(&w->lock);
|
||||
for (int i = 0; i < w->nthreads; i++) pthread_join(w->threads[i], NULL);
|
||||
q_drain(&w->req);
|
||||
q_drain(&w->res);
|
||||
pthread_cond_destroy(&w->cv);
|
||||
pthread_mutex_destroy(&w->lock);
|
||||
if (w->evfd >= 0) close(w->evfd);
|
||||
free(w->threads);
|
||||
free(w);
|
||||
}
|
||||
|
||||
int vmsig_worker_evfd(const vmsig_worker* w) { return w ? w->evfd : -1; }
|
||||
|
||||
int vmsig_worker_submit(vmsig_worker* w, const void* req, size_t len) {
|
||||
if (!w || len > VMSIG_WORK_SLOT) return -1;
|
||||
pthread_mutex_lock(&w->lock);
|
||||
if (w->req_count >= w->max_depth) { /* queue cap: reject flooding */
|
||||
pthread_mutex_unlock(&w->lock);
|
||||
return -1;
|
||||
}
|
||||
work_node* n = calloc(1, sizeof *n);
|
||||
if (!n) { pthread_mutex_unlock(&w->lock); return -1; }
|
||||
if (req && len) memcpy(n->buf, req, len);
|
||||
n->len = len;
|
||||
q_push(&w->req, n);
|
||||
w->req_count++;
|
||||
pthread_cond_signal(&w->cv);
|
||||
pthread_mutex_unlock(&w->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vmsig_worker_ack(vmsig_worker* w) {
|
||||
if (!w) return;
|
||||
uint64_t v;
|
||||
while (read(w->evfd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
|
||||
}
|
||||
|
||||
int vmsig_worker_poll(vmsig_worker* w, void* res, size_t cap, int* rc) {
|
||||
if (!w) return -1;
|
||||
pthread_mutex_lock(&w->lock);
|
||||
work_node* n = q_pop(&w->res);
|
||||
pthread_mutex_unlock(&w->lock);
|
||||
if (!n) return 0;
|
||||
if (res && cap) memcpy(res, n->buf, cap < n->len ? cap : n->len);
|
||||
if (rc) *rc = n->rc;
|
||||
free(n);
|
||||
return 1;
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
#ifndef VMSIG_MEMCTX_CFG_H
|
||||
#define VMSIG_MEMCTX_CFG_H
|
||||
#include <stdint.h>
|
||||
|
||||
/* Private config of the memctx adapter (vmie). Passed as opaque to open(); NOT
|
||||
* public (layout per reference: src/<module>/include/). cfg==NULL => stub. */
|
||||
typedef struct {
|
||||
int stub; /* 1 => synthetic kcr3/RO-fd (spine without a VM) */
|
||||
const char* ram_path; /* armed: path to guest RAM backing (NOT published outward) */
|
||||
uint64_t low; /* below-4G split (vmie_win32_open / locator.low) */
|
||||
int ro_fd; /* >=0 => infra supplied a pre-sealed RO-fd (policy); */
|
||||
/* <0 => default: open(ram_path, O_RDONLY) / stub-memfd */
|
||||
} vmsig_memctx_cfg;
|
||||
|
||||
/* Max SRC bytes per atomic gva_write (bounds the worker POD slot; mc_req header + src
|
||||
* must stay <= VMSIG_WORK_SLOT). Private to the adapter (an executor bound), NOT part of
|
||||
* the neutral control contract — control only needs VMSIG_MEMWRITE_INLINE for inline SRC. */
|
||||
#define VMSIG_MEMWRITE_MAX 192u
|
||||
|
||||
#endif /* VMSIG_MEMCTX_CFG_H */
|
||||
@@ -0,0 +1,407 @@
|
||||
/* memctx.c — vmie sensor adapter: vends ONE coherent guest address-space context —
|
||||
* the permanent System DirectoryTableBase (`kcr3`) PAIRED with a RAM-region locator
|
||||
* and a pre-opened O_RDONLY fd. This is NOT perception and NOT semantics: signaling
|
||||
* multicasts the datum + RO-fd, while the holder (an S-lib / any control) opens ITS OWN
|
||||
* read-only vmie_mem from the fd and does gva_read/scan/pmap itself.
|
||||
*
|
||||
* Cold bring-up (host_bootstrap) is CPU-bound and blocking, so it runs on an off-loop
|
||||
* worker; the loop thread only assembles the locator on the completion-eventfd and emits
|
||||
* the MEMCTX trigger. The epoch is stamped by the CORE (retained-context); on an epoch
|
||||
* change the core calls reg.invalidate, the adapter re-bootstraps and re-emits MEMCTX.
|
||||
*
|
||||
* RO outward is physical: O_RDONLY fd => mmap(PROT_WRITE) -> EACCES, so a write into the
|
||||
* guest on the holder side is structurally impossible. stub mode (without VMSIG_WITH_VMIE
|
||||
* or ram_path==NULL) synthesizes a kcr3 and a genuinely RO-mappable fd (memfd + seal) —
|
||||
* the seam is provable without a VM. */
|
||||
#define _GNU_SOURCE
|
||||
#include "vmsig_adapter.h"
|
||||
#include "memctx.h"
|
||||
#include "adapter_util.h" /* vmsig_worker (off-loop bootstrap) */
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/epoll.h>
|
||||
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
#include "win32.h" /* vmie_win32_open/host_bootstrap/proc_list/close */
|
||||
#endif
|
||||
|
||||
/* memfd_create / seal — ABI fallbacks for old glibc/kernel (stub RO-fd backing). */
|
||||
#ifndef MFD_CLOEXEC
|
||||
#include <sys/syscall.h>
|
||||
#include <linux/memfd.h>
|
||||
static int memfd_create(const char* name, unsigned int flags) {
|
||||
return (int)syscall(SYS_memfd_create, name, flags);
|
||||
}
|
||||
#endif
|
||||
#ifndef MFD_ALLOW_SEALING
|
||||
#define MFD_ALLOW_SEALING 0x0002U
|
||||
#endif
|
||||
#ifndef F_ADD_SEALS
|
||||
#define F_ADD_SEALS (1024 + 9)
|
||||
#define F_SEAL_SHRINK 0x0002
|
||||
#define F_SEAL_GROW 0x0004
|
||||
#endif
|
||||
#ifndef F_SEAL_FUTURE_WRITE
|
||||
#define F_SEAL_FUTURE_WRITE 0x0010 /* kernel 5.1+: forbid future writable mappings */
|
||||
#endif
|
||||
|
||||
#define MC_STUB_SIZE 0x10000u /* 64 KB of synthetic RAM image (stub) */
|
||||
#define MC_MAX_SEG 8
|
||||
#define MC_WORKER_DEPTH 16 /* one off-loop thread: rare bootstrap + writes */
|
||||
|
||||
enum { MC_JOB_BOOTSTRAP = 0, MC_JOB_WRITE = 1 };
|
||||
|
||||
/* worker req/res (POD <= VMSIG_WORK_SLOT). One off-loop worker runs BOTH the cold
|
||||
* bootstrap and the atomic writes (FIFO serializes a write against the close-on-rebootstrap).
|
||||
* boot_count drives the stub kcr3 (changes per epoch); the real guest kcr3 does NOT depend
|
||||
* on it (armed reads the System DTB). MC_JOB_WRITE copies SRC off-loop into req.src. */
|
||||
typedef struct {
|
||||
uint32_t op; /* MC_JOB_* */
|
||||
uint32_t boot_count; /* MC_JOB_BOOTSTRAP */
|
||||
/* --- MC_JOB_WRITE --- */
|
||||
uint64_t gva;
|
||||
uint32_t len;
|
||||
uint32_t corr;
|
||||
uint32_t origin;
|
||||
uint8_t src[VMSIG_MEMWRITE_MAX]; /* SRC bytes copied off-loop (gva_write reads this) */
|
||||
} mc_req;
|
||||
typedef struct {
|
||||
uint32_t op; /* echoes the job type so on_ready demuxes */
|
||||
int ok; /* MC_JOB_WRITE result */
|
||||
uint32_t corr;
|
||||
uint32_t origin;
|
||||
uint64_t kcr3; /* MC_JOB_BOOTSTRAP result */
|
||||
} mc_res;
|
||||
|
||||
struct vmsig_adapter {
|
||||
uint32_t endpoint;
|
||||
int stub;
|
||||
const char* ram_path; /* armed: RAM-backing path (NOT published outward) */
|
||||
uint64_t low;
|
||||
int cfg_ro_fd; /* >=0 => infra-sealed RO-fd (policy); <0 => default */
|
||||
vmsig_emit emit;
|
||||
int registered; /* register_memctx already called */
|
||||
vmsig_worker* worker; /* off-loop bootstrap + atomic writes */
|
||||
uint32_t boot_count; /* incremented on each (re-)bootstrap */
|
||||
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
vmie_win32* win; /* held RW handle across the epoch (kcr3 source + gva_write target) */
|
||||
vmie_mem* mem; /* vmie_win32_mem(win); borrowed, valid until vmie_win32_close */
|
||||
#endif
|
||||
uint64_t kcr3; /* current System DTB (also published in cur_pod.kcr3) */
|
||||
|
||||
/* persistent locator: owned by the loop thread; worker only yields kcr3 into scratch. */
|
||||
int have_ctx;
|
||||
vmsig_memctx cur_pod; /* kcr3/low/nseg/flags (epoch stamped by the core) */
|
||||
vmsig_memseg cur_segs[MC_MAX_SEG];
|
||||
uint32_t cur_nseg;
|
||||
|
||||
int stub_fd; /* stub: memfd of synth RAM (+seal); share_fd reopens it */
|
||||
};
|
||||
|
||||
/* fwd: MEMWRITE completion ACK (defined below mc_submit; used in mc_on_ready demux). */
|
||||
static void mc_memwrite_ack(struct vmsig_adapter* a, int ok, uint32_t corr, uint32_t origin);
|
||||
|
||||
/* ---- stub RO-fd: memfd + deterministic contents + seal of future writes ---- */
|
||||
static int mc_make_stub_fd(uint32_t size) {
|
||||
int fd = memfd_create("vmsig_memctx", MFD_CLOEXEC | MFD_ALLOW_SEALING);
|
||||
if (fd < 0) fd = memfd_create("vmsig_memctx", MFD_CLOEXEC);
|
||||
if (fd < 0) return -1;
|
||||
if (ftruncate(fd, (off_t)size) != 0) { close(fd); return -1; }
|
||||
/* deterministic contents via a temporary RW mapping BEFORE the seal */
|
||||
uint8_t* p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if (p != MAP_FAILED) {
|
||||
for (uint32_t i = 0; i < size; i++) p[i] = (uint8_t)(i & 0xFFu);
|
||||
munmap(p, size);
|
||||
}
|
||||
/* FUTURE_WRITE: even if the holder reopens the fd as O_RDWR, it gets no writable mapping.
|
||||
* best-effort (kernel 5.1+); on older kernels only the O_RDONLY fd protects. */
|
||||
if (fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_FUTURE_WRITE) != 0)
|
||||
(void)fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW);
|
||||
return fd;
|
||||
}
|
||||
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
/* armed bring-up: open RAM (RW is vmie's internal concern), host_bootstrap, extract the
|
||||
* permanent System DTB as the System process cr3 (kcr3 — the root of the guest AS). The RW
|
||||
* handle is HELD across the epoch (kcr3 source + gva_write target); ONLY the RO-fd (share_fd)
|
||||
* leaves outward — write goes through this command plane, never a writable mmap. Runs on the
|
||||
* off-loop worker; a stale handle from a prior epoch is dropped first (serialized FIFO with
|
||||
* in-flight writes). */
|
||||
static int mc_bootstrap_armed(struct vmsig_adapter* a, uint64_t* out_kcr3) {
|
||||
if (a->win) { vmie_win32_close(a->win); a->win = NULL; a->mem = NULL; } /* drop stale epoch handle */
|
||||
vmie_win32* v = vmie_win32_open(a->ram_path, a->low);
|
||||
if (!v) return -1;
|
||||
if (host_bootstrap(v) != 0) { vmie_win32_close(v); return -1; }
|
||||
process procs[16];
|
||||
int n = proc_list(v, 0, procs, 16);
|
||||
uint64_t kcr3 = 0;
|
||||
for (int i = 0; i < n && i < 16; i++)
|
||||
if (!strcmp(procs[i].name, "System")) { kcr3 = procs[i].cr3; break; }
|
||||
if (!kcr3) { vmie_win32_close(v); return -1; }
|
||||
a->win = v; /* HOLD: RW handle lives across the epoch */
|
||||
a->mem = vmie_win32_mem(v); /* borrowed; valid until vmie_win32_close(v) */
|
||||
a->kcr3 = kcr3;
|
||||
*out_kcr3 = kcr3;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ---- worker job: cold bring-up OR atomic write, off-loop ----------------- *
|
||||
* Demultiplexed by rq->op. BOTH run on the SAME single worker thread, so a write on the
|
||||
* held handle never races the close-on-rebootstrap (FIFO). The job MUST NOT touch core
|
||||
* structures — it only reads a->mem/a->kcr3 (stable between re-bootstraps on this thread). */
|
||||
static int mc_job(void* user, const void* req, void* res) {
|
||||
struct vmsig_adapter* a = user;
|
||||
const mc_req* rq = req;
|
||||
mc_res* rs = res;
|
||||
memset(rs, 0, sizeof *rs);
|
||||
rs->op = rq->op;
|
||||
|
||||
if (rq->op == MC_JOB_WRITE) {
|
||||
rs->corr = rq->corr; rs->origin = rq->origin;
|
||||
if (a->stub) { rs->ok = 1; return 0; } /* stub: ack without actuation */
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
/* a->mem is NULL until a bootstrap has succeeded (or after one failed and cleared it):
|
||||
* the guard turns that into an ok=0 ACK (observable to the initiator), not a crash. */
|
||||
rs->ok = (a->mem && gva_write(a->mem, (uintptr_t)a->kcr3, (uintptr_t)rq->gva,
|
||||
rq->src, rq->len) == 0);
|
||||
return rs->ok ? 0 : -1;
|
||||
#else
|
||||
rs->ok = 0;
|
||||
return -1; /* armed without the build flag: write impossible */
|
||||
#endif
|
||||
}
|
||||
|
||||
/* MC_JOB_BOOTSTRAP */
|
||||
if (a->stub) {
|
||||
rs->kcr3 = 0xC0DE0000ull + (uint64_t)rq->boot_count * 0x1000ull; /* changes per epoch */
|
||||
return 0;
|
||||
}
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
uint64_t kcr3 = 0;
|
||||
if (mc_bootstrap_armed(a, &kcr3) != 0) return -1;
|
||||
rs->kcr3 = kcr3;
|
||||
return 0;
|
||||
#else
|
||||
return -1; /* armed without the build flag: bootstrap impossible -> ERROR */
|
||||
#endif
|
||||
}
|
||||
|
||||
static void mc_kick_bootstrap(struct vmsig_adapter* a) {
|
||||
a->boot_count++;
|
||||
mc_req rq;
|
||||
memset(&rq, 0, sizeof rq);
|
||||
rq.op = MC_JOB_BOOTSTRAP; rq.boot_count = a->boot_count;
|
||||
(void)vmsig_worker_submit(a->worker, &rq, sizeof rq); /* full => drop (rare) */
|
||||
}
|
||||
|
||||
/* ---- reg hooks (vmsig_memctx_reg.ctx = a; called by the core on the loop thread) ---- */
|
||||
static void mc_reg_describe(void* ctx, vmsig_memctx* out_pod,
|
||||
const vmsig_memseg** out_segs, uint32_t* out_nseg) {
|
||||
struct vmsig_adapter* a = ctx;
|
||||
*out_pod = a->cur_pod; /* kcr3/low/nseg/flags; the core overwrites the epoch */
|
||||
*out_segs = a->cur_segs;
|
||||
*out_nseg = a->cur_nseg;
|
||||
}
|
||||
|
||||
static int mc_reg_share_fd(void* ctx) {
|
||||
struct vmsig_adapter* a = ctx;
|
||||
if (a->cfg_ro_fd >= 0)
|
||||
return fcntl(a->cfg_ro_fd, F_DUPFD_CLOEXEC, 0); /* infra-sealed RO-fd: dup */
|
||||
if (a->stub) {
|
||||
if (a->stub_fd < 0) return -1;
|
||||
char path[64];
|
||||
snprintf(path, sizeof path, "/proc/self/fd/%d", a->stub_fd);
|
||||
return open(path, O_RDONLY | O_CLOEXEC); /* fresh O_RDONLY on the backing */
|
||||
}
|
||||
if (!a->ram_path) return -1;
|
||||
return open(a->ram_path, O_RDONLY | O_CLOEXEC); /* armed default */
|
||||
}
|
||||
|
||||
static void mc_reg_invalidate(void* ctx, uint32_t epoch) {
|
||||
struct vmsig_adapter* a = ctx;
|
||||
(void)epoch; /* the core owns the epoch; the adapter must re-bootstrap */
|
||||
a->have_ctx = 0; /* the previous context is invalid */
|
||||
mc_kick_bootstrap(a); /* off-loop; on_ready re-emits MEMCTX (new epoch) */
|
||||
}
|
||||
|
||||
/* ---- vtable ---- */
|
||||
static vmsig_adapter* mc_open(const void* cfg, uint32_t endpoint) {
|
||||
const vmsig_memctx_cfg* c = cfg;
|
||||
struct vmsig_adapter* a = calloc(1, sizeof *a);
|
||||
if (!a) return NULL;
|
||||
a->endpoint = endpoint;
|
||||
a->stub = c ? c->stub : 1;
|
||||
a->ram_path = c ? c->ram_path : NULL;
|
||||
a->low = c ? c->low : 0;
|
||||
a->cfg_ro_fd = (c && c->ro_fd >= 0) ? c->ro_fd : -1;
|
||||
if (!a->ram_path && a->cfg_ro_fd < 0) a->stub = 1; /* no path/fd => stub */
|
||||
a->stub_fd = -1;
|
||||
return a;
|
||||
}
|
||||
|
||||
static int mc_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
|
||||
if (cap < 1) return -1;
|
||||
a->emit = *emit;
|
||||
|
||||
a->worker = vmsig_worker_new(mc_job, a, 1, MC_WORKER_DEPTH);
|
||||
if (!a->worker) return -1;
|
||||
|
||||
if (a->stub && a->cfg_ro_fd < 0) {
|
||||
a->stub_fd = mc_make_stub_fd(MC_STUB_SIZE);
|
||||
if (a->stub_fd < 0) { vmsig_worker_free(a->worker); a->worker = NULL; return -1; }
|
||||
}
|
||||
|
||||
/* worker completion-eventfd as the readiness source (cookie=0). */
|
||||
reg[0].fd = vmsig_worker_evfd(a->worker);
|
||||
reg[0].epoll_events = EPOLLIN;
|
||||
reg[0].shape = VMSIG_RDY_EVENTFD;
|
||||
reg[0].cookie = 0;
|
||||
|
||||
/* register the reg BEFORE the first bootstrap: the core slot gets the hooks. describe
|
||||
* is not called until the slot is valid (which only happens after the first MEMCTX). */
|
||||
if (a->emit.register_memctx) {
|
||||
vmsig_memctx_reg r;
|
||||
memset(&r, 0, sizeof r);
|
||||
r.endpoint = a->endpoint;
|
||||
r.source = VMSIG_SRC_MEMCTX;
|
||||
r.ctx = a;
|
||||
r.describe = mc_reg_describe;
|
||||
r.share_fd = mc_reg_share_fd;
|
||||
r.invalidate = mc_reg_invalidate;
|
||||
if (a->emit.register_memctx(a->emit.token, &r) == 0) a->registered = 1;
|
||||
}
|
||||
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_SEAM_UP; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
|
||||
mc_kick_bootstrap(a); /* first bootstrap off-loop; assemble the locator on completion */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int mc_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
|
||||
(void)cookie; (void)events;
|
||||
vmsig_worker_ack(a->worker);
|
||||
mc_res rs;
|
||||
int rc;
|
||||
while (vmsig_worker_poll(a->worker, &rs, sizeof rs, &rc) == 1) {
|
||||
if (rs.op == MC_JOB_WRITE) {
|
||||
/* atomic write completed: addressed ACT_ACK to the initiator. */
|
||||
mc_memwrite_ack(a, rs.ok && rc == 0, rs.corr, rs.origin);
|
||||
continue;
|
||||
}
|
||||
if (rc != 0) {
|
||||
/* bootstrap failed: ERROR (source MEMCTX); do NOT publish an invalid kcr3. */
|
||||
vmsig_event er;
|
||||
memset(&er, 0, sizeof er);
|
||||
er.kind = VMSIG_EV_ERROR; er.source = VMSIG_SRC_MEMCTX; er.dir = VMSIG_DIR_UP;
|
||||
er.prio = VMSIG_PRIO_URGENT; er.endpoint = a->endpoint;
|
||||
a->emit.emit(a->emit.token, &er);
|
||||
continue;
|
||||
}
|
||||
/* assemble the locator on the loop thread from rs.kcr3. a->kcr3 is the gva_write
|
||||
* TARGET and is owned SOLELY by the worker thread (set in mc_bootstrap_armed, read by
|
||||
* MC_JOB_WRITE — same thread, FIFO happens-before); the loop must NOT also write it, or
|
||||
* an in-flight write at line ~170 would race it. cur_pod.kcr3 is loop-only (delivery). */
|
||||
memset(&a->cur_pod, 0, sizeof a->cur_pod);
|
||||
a->cur_pod.kcr3 = rs.kcr3;
|
||||
a->cur_pod.low = a->low ? a->low : MC_STUB_SIZE;
|
||||
a->cur_pod.flags = VMSIG_MEMCTX_RDONLY;
|
||||
a->cur_nseg = 1; /* single-low identity (gpa 0 .. low) */
|
||||
a->cur_segs[0].gpa = 0;
|
||||
a->cur_segs[0].len = a->cur_pod.low;
|
||||
a->cur_segs[0].file_off = 0;
|
||||
a->cur_pod.nseg = a->cur_nseg;
|
||||
a->have_ctx = 1;
|
||||
|
||||
/* emit the MEMCTX trigger: the core authoritatively re-describes + stamps the epoch. */
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_MEMCTX; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||
memcpy(up.inln, &a->cur_pod, sizeof a->cur_pod);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Emit an addressed ACT_ACK for a MEMWRITE (source MEMCTX, to the initiator). inln carries
|
||||
* {ok,corr,origin} (same shape as the input adapter's ACK), so control reads ok at offset 0.
|
||||
* ok=0 covers extent-deny / no-SRC / queue-full / write failure (default-deny, observable). */
|
||||
static void mc_memwrite_ack(struct vmsig_adapter* a, int ok, uint32_t corr, uint32_t origin) {
|
||||
struct { int ok; uint32_t corr; uint32_t origin; } body = { ok, corr, origin };
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||
up.corr = corr; up.origin = origin;
|
||||
up.payload.flags = VMSIG_PL_INLINE;
|
||||
memcpy(up.inln, &body, sizeof body);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
}
|
||||
|
||||
/* DOWN MEMWRITE handler: validate extent, copy SRC off-loop, submit the atomic gva_write to
|
||||
* the worker. Default-deny: any invalid path (no SRC flag, len out of bounds, short payload,
|
||||
* queue full) ACKs ok=0 and does NOT actuate. The completion ACK for a queued write arrives
|
||||
* via mc_on_ready. Returns 0 when the event is consumed by this seam, 1 when it is not ours. */
|
||||
static int mc_submit(vmsig_adapter* a, const vmsig_event* ev) {
|
||||
if (ev->kind != VMSIG_EV_CMD_MEMWRITE) return 1; /* not for this seam */
|
||||
|
||||
const vmsig_memwrite* mw = (const vmsig_memwrite*)ev->inln;
|
||||
uint32_t len = mw->len;
|
||||
if (len == 0 || len > VMSIG_MEMWRITE_MAX) { /* extent: bounded */
|
||||
mc_memwrite_ack(a, 0, ev->corr, ev->origin);
|
||||
return 0;
|
||||
}
|
||||
mc_req rq; memset(&rq, 0, sizeof rq);
|
||||
rq.op = MC_JOB_WRITE; rq.gva = mw->gva; rq.len = len;
|
||||
rq.corr = ev->corr; rq.origin = ev->origin;
|
||||
|
||||
/* copy SRC into the worker req (off-loop gva_write reads from rq.src). */
|
||||
if (mw->flags & VMSIG_MW_SRC_INLINE) {
|
||||
if (len > VMSIG_MEMWRITE_INLINE) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; }
|
||||
memcpy(rq.src, ev->inln + sizeof *mw, len); /* inln tail after the 16-byte header */
|
||||
} else if (mw->flags & VMSIG_MW_SRC_PAYLOAD) {
|
||||
if (!ev->payload.data || ev->payload.len < len) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; }
|
||||
memcpy(rq.src, ev->payload.data, len); /* in-proc borrowed payload */
|
||||
} else {
|
||||
mc_memwrite_ack(a, 0, ev->corr, ev->origin); /* no SRC flag */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vmsig_worker_submit(a->worker, &rq, sizeof rq) != 0) {
|
||||
mc_memwrite_ack(a, 0, ev->corr, ev->origin); /* queue full -> ACK err */
|
||||
return -1;
|
||||
}
|
||||
return 0; /* completion ACK arrives via mc_on_ready */
|
||||
}
|
||||
|
||||
static void mc_close(vmsig_adapter* a) {
|
||||
if (!a) return;
|
||||
if (a->registered && a->emit.unregister_memctx)
|
||||
a->emit.unregister_memctx(a->emit.token, a->endpoint);
|
||||
if (a->worker) vmsig_worker_free(a->worker); /* join: bootstrap + write jobs finished */
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
if (a->win) vmie_win32_close(a->win); /* AFTER worker join: no in-flight gva_write */
|
||||
#endif
|
||||
if (a->stub_fd >= 0) close(a->stub_fd);
|
||||
/* cfg_ro_fd belongs to the infrastructure (the open caller) — do NOT close it. */
|
||||
free(a);
|
||||
}
|
||||
|
||||
static const vmsig_adapter_ops MC_OPS = {
|
||||
.name = "memctx", .source = VMSIG_SRC_MEMCTX, .codec = VMSIG_CODEC_MEMCTX,
|
||||
.open = mc_open, .attach = mc_attach, .on_readiness = mc_on_ready,
|
||||
.submit = mc_submit, .close = mc_close
|
||||
};
|
||||
|
||||
const vmsig_adapter_ops* vmsig_memctx_ops(void) { return &MC_OPS; }
|
||||
@@ -0,0 +1,13 @@
|
||||
#ifndef VMSIG_VMHOST_H
|
||||
#define VMSIG_VMHOST_H
|
||||
|
||||
/* Private config of the vmhost adapter (signaling's own QMP client).
|
||||
* cfg==NULL or no qmp_path => stub mode (synthetic events, no QEMU).
|
||||
* qmp_path given => armed: connect to QEMU's QMP socket ('@' prefix = abstract).
|
||||
* No build flag needed — the client depends only on POSIX and its own code. */
|
||||
typedef struct {
|
||||
int stub;
|
||||
const char* qmp_path;
|
||||
} vmsig_vmhost_cfg;
|
||||
|
||||
#endif /* VMSIG_VMHOST_H */
|
||||
@@ -0,0 +1,313 @@
|
||||
/* vmhost.c — QEMU/QMP host-plane: signaling's OWN layer for observing the VM
|
||||
* and its basic control. Not a wrapper over a neighbor repo — an own QMP client;
|
||||
* depends only on POSIX, so it is always functional (no build flag).
|
||||
*
|
||||
* This is the first truly epoll-native source: the QMP socket (VMSIG_RDY_FD) lives
|
||||
* directly in the loop, non-blocking, async events. Up: QMP events -> VM_LIFECYCLE
|
||||
* (broadcast), EOF -> SEAM_DOWN. Down: CMD_VM -> QMP command with id correlation,
|
||||
* reply addressed to the initiator. stub mode (no QEMU) synthesizes events/replies. */
|
||||
#define _GNU_SOURCE
|
||||
#include "vmsig_adapter.h"
|
||||
#include "vmhost.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <stddef.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/un.h>
|
||||
#include <sys/timerfd.h>
|
||||
#include <sys/epoll.h>
|
||||
|
||||
#define VMHOST_BUF 4096
|
||||
#define VMHOST_STUB_MS 200
|
||||
#define VMHOST_MAX_PENDING 64
|
||||
|
||||
enum { ST_STUB = 0, ST_CONNECTING, ST_NEGOTIATING, ST_READY, ST_DEAD };
|
||||
|
||||
typedef struct { uint32_t id, origin, corr; uint8_t op; int used; } pend_ent;
|
||||
|
||||
struct vmsig_adapter {
|
||||
uint32_t endpoint;
|
||||
int stub;
|
||||
const char* qmp_path;
|
||||
vmsig_emit emit;
|
||||
int fd; /* QMP socket (armed) or timerfd (stub) */
|
||||
int st;
|
||||
uint32_t cur; /* current synthetic state (stub) */
|
||||
char buf[VMHOST_BUF];
|
||||
size_t buflen;
|
||||
uint32_t next_id;
|
||||
pend_ent pend[VMHOST_MAX_PENDING];
|
||||
};
|
||||
|
||||
/* ---- minimal QMP line parse (top-level keys only; full JSON — deferred) ---- */
|
||||
static int jstr(const char* line, const char* key, char* out, size_t cap) {
|
||||
const char* p = strstr(line, key);
|
||||
if (!p) return 0;
|
||||
p += strlen(key);
|
||||
while (*p == ' ' || *p == '\t' || *p == ':') p++;
|
||||
if (*p != '"') return 0;
|
||||
p++;
|
||||
size_t i = 0;
|
||||
while (*p && *p != '"' && i + 1 < cap) out[i++] = *p++;
|
||||
out[i] = 0;
|
||||
return 1;
|
||||
}
|
||||
static long jnum(const char* line, const char* key) {
|
||||
const char* p = strstr(line, key);
|
||||
if (!p) return -1;
|
||||
p += strlen(key);
|
||||
while (*p == ' ' || *p == '\t' || *p == ':') p++;
|
||||
if (*p < '0' || *p > '9') return -1;
|
||||
return strtol(p, NULL, 10);
|
||||
}
|
||||
static uint32_t ev_state(const char* n) {
|
||||
if (!strcmp(n, "RESUME")) return VMSIG_VM_RUNNING;
|
||||
if (!strcmp(n, "STOP")) return VMSIG_VM_PAUSED;
|
||||
if (!strcmp(n, "SHUTDOWN")) return VMSIG_VM_SHUTDOWN;
|
||||
if (!strcmp(n, "RESET")) return VMSIG_VM_RESET;
|
||||
if (!strcmp(n, "POWERDOWN")) return VMSIG_VM_POWERDOWN;
|
||||
if (!strcmp(n, "GUEST_PANICKED")) return VMSIG_VM_CRASHED;
|
||||
return VMSIG_VM_UNKNOWN;
|
||||
}
|
||||
static uint32_t status_state(const char* s) {
|
||||
if (!strcmp(s, "running")) return VMSIG_VM_RUNNING;
|
||||
if (!strcmp(s, "paused")) return VMSIG_VM_PAUSED;
|
||||
if (!strcmp(s, "shutdown")) return VMSIG_VM_SHUTDOWN;
|
||||
return VMSIG_VM_UNKNOWN;
|
||||
}
|
||||
static const char* op_qmp(uint32_t op) {
|
||||
switch (op) {
|
||||
case VMSIG_VMOP_QUERY: return "query-status";
|
||||
case VMSIG_VMOP_CONT: return "cont";
|
||||
case VMSIG_VMOP_STOP: return "stop";
|
||||
case VMSIG_VMOP_RESET: return "system_reset";
|
||||
case VMSIG_VMOP_POWERDOWN: return "system_powerdown";
|
||||
case VMSIG_VMOP_QUIT: return "quit";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static pend_ent* pend_alloc(struct vmsig_adapter* a) {
|
||||
for (int i = 0; i < VMHOST_MAX_PENDING; i++) if (!a->pend[i].used) return &a->pend[i];
|
||||
return NULL;
|
||||
}
|
||||
static pend_ent* pend_find(struct vmsig_adapter* a, uint32_t id) {
|
||||
for (int i = 0; i < VMHOST_MAX_PENDING; i++)
|
||||
if (a->pend[i].used && a->pend[i].id == id) return &a->pend[i];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* ---- emission of neutral UP events ---- */
|
||||
static void emit_vm(struct vmsig_adapter* a, uint32_t state, uint32_t origin, uint32_t corr) {
|
||||
vmsig_vm_state vs = { state, 0 };
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_VM_LIFECYCLE; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = (state == VMSIG_VM_RUNNING || state == VMSIG_VM_PAUSED)
|
||||
? VMSIG_PRIO_NORMAL : VMSIG_PRIO_URGENT;
|
||||
up.endpoint = a->endpoint; up.origin = origin; up.corr = corr;
|
||||
up.payload.flags = VMSIG_PL_INLINE;
|
||||
memcpy(up.inln, &vs, sizeof vs);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
}
|
||||
static void emit_seam(struct vmsig_adapter* a, vmsig_kind k) {
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = k; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_URGENT; up.endpoint = a->endpoint;
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
}
|
||||
static void emit_ack(struct vmsig_adapter* a, uint32_t origin, uint32_t corr, int ok) {
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint; up.origin = origin; up.corr = corr;
|
||||
up.payload.flags = VMSIG_PL_INLINE;
|
||||
up.inln[0] = (uint8_t)(ok ? 1 : 0);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
}
|
||||
|
||||
/* ---- armed: handle one QMP line ---- */
|
||||
static void handle_line(struct vmsig_adapter* a, const char* line) {
|
||||
switch (a->st) {
|
||||
case ST_CONNECTING:
|
||||
if (strstr(line, "\"QMP\"")) { /* greeting -> negotiate capabilities */
|
||||
static const char cap[] = "{\"execute\":\"qmp_capabilities\"}\r\n";
|
||||
ssize_t r = write(a->fd, cap, sizeof cap - 1); (void)r;
|
||||
a->st = ST_NEGOTIATING;
|
||||
}
|
||||
break;
|
||||
case ST_NEGOTIATING:
|
||||
if (strstr(line, "\"return\"")) { a->st = ST_READY; emit_seam(a, VMSIG_EV_SEAM_UP); }
|
||||
break;
|
||||
case ST_READY:
|
||||
if (strstr(line, "\"event\"")) {
|
||||
char name[64];
|
||||
if (jstr(line, "\"event\"", name, sizeof name)) {
|
||||
uint32_t s = ev_state(name);
|
||||
if (s != VMSIG_VM_UNKNOWN) emit_vm(a, s, 0, 0); /* broadcast */
|
||||
}
|
||||
} else if (strstr(line, "\"return\"") || strstr(line, "\"error\"")) {
|
||||
long id = jnum(line, "\"id\"");
|
||||
pend_ent* p = id >= 0 ? pend_find(a, (uint32_t)id) : NULL;
|
||||
if (p) {
|
||||
if (p->op == VMSIG_VMOP_QUERY && strstr(line, "\"return\"")) {
|
||||
char stbuf[32]; uint32_t s = VMSIG_VM_UNKNOWN;
|
||||
if (jstr(line, "\"status\"", stbuf, sizeof stbuf)) s = status_state(stbuf);
|
||||
emit_vm(a, s, p->origin, p->corr); /* addressed reply */
|
||||
} else {
|
||||
emit_ack(a, p->origin, p->corr, strstr(line, "\"return\"") != NULL);
|
||||
}
|
||||
p->used = 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
static void armed_dead(struct vmsig_adapter* a) {
|
||||
emit_seam(a, VMSIG_EV_SEAM_DOWN); /* VM transport died */
|
||||
if (a->fd >= 0) { close(a->fd); a->fd = -1; } /* close removes the fd from epoll */
|
||||
a->st = ST_DEAD;
|
||||
}
|
||||
|
||||
/* ---- vtable ---- */
|
||||
static vmsig_adapter* vh_open(const void* cfg, uint32_t endpoint) {
|
||||
const vmsig_vmhost_cfg* c = cfg;
|
||||
struct vmsig_adapter* a = calloc(1, sizeof *a);
|
||||
if (!a) return NULL;
|
||||
a->endpoint = endpoint;
|
||||
a->qmp_path = (c && c->qmp_path && c->qmp_path[0]) ? c->qmp_path : NULL;
|
||||
a->stub = (a->qmp_path == NULL); /* path given => armed, otherwise stub */
|
||||
a->fd = -1;
|
||||
a->cur = VMSIG_VM_RUNNING;
|
||||
return a;
|
||||
}
|
||||
|
||||
static int vh_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
|
||||
if (cap < 1) return -1;
|
||||
a->emit = *emit;
|
||||
|
||||
if (a->stub) {
|
||||
a->fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
|
||||
if (a->fd < 0) return -1;
|
||||
struct itimerspec its;
|
||||
memset(&its, 0, sizeof its);
|
||||
its.it_interval.tv_sec = VMHOST_STUB_MS / 1000u;
|
||||
its.it_interval.tv_nsec = (long)(VMHOST_STUB_MS % 1000u) * 1000000L;
|
||||
its.it_value = its.it_interval;
|
||||
if (timerfd_settime(a->fd, 0, &its, NULL) < 0) { close(a->fd); a->fd = -1; return -1; }
|
||||
a->st = ST_STUB;
|
||||
reg[0].fd = a->fd; reg[0].epoll_events = EPOLLIN;
|
||||
reg[0].shape = VMSIG_RDY_TIMERFD; reg[0].cookie = 0;
|
||||
emit_seam(a, VMSIG_EV_SEAM_UP);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* armed: connect to QEMU's QMP socket */
|
||||
int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
|
||||
if (fd < 0) return -1;
|
||||
struct sockaddr_un addr;
|
||||
memset(&addr, 0, sizeof addr);
|
||||
addr.sun_family = AF_UNIX;
|
||||
socklen_t alen;
|
||||
size_t n = strlen(a->qmp_path);
|
||||
if (a->qmp_path[0] == '@') {
|
||||
if (n > sizeof addr.sun_path) { close(fd); return -1; }
|
||||
addr.sun_path[0] = 0;
|
||||
memcpy(addr.sun_path + 1, a->qmp_path + 1, n - 1);
|
||||
alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
|
||||
} else {
|
||||
if (n >= sizeof addr.sun_path) { close(fd); return -1; }
|
||||
memcpy(addr.sun_path, a->qmp_path, n);
|
||||
alen = (socklen_t)sizeof addr;
|
||||
}
|
||||
if (connect(fd, (struct sockaddr*)&addr, alen) < 0 && errno != EINPROGRESS) {
|
||||
close(fd); return -1;
|
||||
}
|
||||
a->fd = fd; a->st = ST_CONNECTING;
|
||||
reg[0].fd = fd; reg[0].epoll_events = EPOLLIN;
|
||||
reg[0].shape = VMSIG_RDY_FD; reg[0].cookie = 0;
|
||||
/* SEAM_UP is emitted upon reaching READY (after qmp_capabilities) */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int vh_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
|
||||
(void)cookie; (void)events;
|
||||
|
||||
if (a->stub) {
|
||||
uint64_t ticks;
|
||||
while (read(a->fd, &ticks, sizeof ticks) == (ssize_t)sizeof ticks) { /* drain */ }
|
||||
a->cur = (a->cur == VMSIG_VM_RUNNING) ? VMSIG_VM_PAUSED : VMSIG_VM_RUNNING;
|
||||
emit_vm(a, a->cur, 0, 0); /* broadcast */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (a->st == ST_DEAD) return 0;
|
||||
for (;;) {
|
||||
if (a->buflen >= sizeof a->buf) a->buflen = 0; /* line overflow -> reset */
|
||||
ssize_t r = read(a->fd, a->buf + a->buflen, sizeof a->buf - a->buflen);
|
||||
if (r == 0) { armed_dead(a); return 0; }
|
||||
if (r < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK) break; armed_dead(a); return 0; }
|
||||
a->buflen += (size_t)r;
|
||||
size_t start = 0;
|
||||
for (size_t i = 0; i < a->buflen; i++) {
|
||||
if (a->buf[i] == '\n') { a->buf[i] = 0; handle_line(a, a->buf + start); start = i + 1; }
|
||||
}
|
||||
if (start > 0) { memmove(a->buf, a->buf + start, a->buflen - start); a->buflen -= start; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vh_submit(vmsig_adapter* a, const vmsig_event* ev) {
|
||||
if (ev->kind != VMSIG_EV_CMD_VM) return 1; /* not for this seam */
|
||||
vmsig_vm_cmd cmd;
|
||||
memcpy(&cmd, ev->inln, sizeof cmd);
|
||||
|
||||
if (a->stub) {
|
||||
uint32_t s;
|
||||
switch (cmd.op) {
|
||||
case VMSIG_VMOP_QUERY: s = a->cur; break;
|
||||
case VMSIG_VMOP_CONT: s = a->cur = VMSIG_VM_RUNNING; break;
|
||||
case VMSIG_VMOP_STOP: s = a->cur = VMSIG_VM_PAUSED; break;
|
||||
case VMSIG_VMOP_RESET: s = VMSIG_VM_RESET; break;
|
||||
case VMSIG_VMOP_POWERDOWN: s = VMSIG_VM_POWERDOWN; break;
|
||||
case VMSIG_VMOP_QUIT: s = VMSIG_VM_SHUTDOWN; break;
|
||||
default: return 1;
|
||||
}
|
||||
emit_vm(a, s, ev->origin, ev->corr); /* reply addressed to the initiator */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (a->st != ST_READY) return -1;
|
||||
const char* q = op_qmp(cmd.op);
|
||||
if (!q) return 1;
|
||||
pend_ent* p = pend_alloc(a);
|
||||
if (!p) return -1; /* backpressure: pending table is full */
|
||||
uint32_t id = ++a->next_id;
|
||||
p->used = 1; p->id = id; p->origin = ev->origin; p->corr = ev->corr; p->op = (uint8_t)cmd.op;
|
||||
char line[160];
|
||||
int len = snprintf(line, sizeof line, "{\"execute\":\"%s\",\"id\":%u}\r\n", q, id);
|
||||
ssize_t r = write(a->fd, line, (size_t)len);
|
||||
if (r != (ssize_t)len) { p->used = 0; return -1; }
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vh_close(vmsig_adapter* a) {
|
||||
if (!a) return;
|
||||
if (a->fd >= 0) close(a->fd);
|
||||
free(a);
|
||||
}
|
||||
|
||||
static const vmsig_adapter_ops VH_OPS = {
|
||||
.name = "vmhost", .source = VMSIG_SRC_VMHOST, .codec = VMSIG_CODEC_VMHOST,
|
||||
.open = vh_open, .attach = vh_attach, .on_readiness = vh_on_ready,
|
||||
.submit = vh_submit, .close = vh_close
|
||||
};
|
||||
|
||||
const vmsig_adapter_ops* vmsig_vmhost_ops(void) { return &VH_OPS; }
|
||||
@@ -0,0 +1,182 @@
|
||||
/* cli.c — vmsig spine demonstrator (no real VM).
|
||||
*
|
||||
* Brings up the context + epoll core, attaches an in-proc control and a set of stub
|
||||
* adapters (input/vmhost/memctx) on a single endpoint (VM 0). Proves the bidirectional seam:
|
||||
* UP: SEAM_UP, VM_LIFECYCLE (vmhost stub tick), MEMCTX (kcr3+locator + RO-fd);
|
||||
* DOWN: CMD_ACQUIRE+CMD_INPUT -> input adapter -> ACT_ACK (correlation);
|
||||
* CMD_VM QUERY -> vmhost -> VM_LIFECYCLE (addressed reply).
|
||||
* The address-space context arrives via MULTICAST: control receives kcr3 and a
|
||||
* pre-opened O_RDONLY fd of the RAM region (control does NOT see ram_path; it mmaps
|
||||
* the fd itself, write -> EACCES). (vgpu frame perception now lives in an out-of-repo
|
||||
* S-lib that consumes this MEMCTX seam — not in signaling.)
|
||||
* Shutdown: on SIGINT or automatically, once all paths are proven. */
|
||||
#include "vmsig.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
static vmsig_core* g_core;
|
||||
static void on_sigint(int s) { (void)s; if (g_core) vmsig_core_stop(g_core); }
|
||||
|
||||
typedef struct {
|
||||
vmsig_core* core;
|
||||
void* ctl;
|
||||
int total, lifecycles, acks, seams, memctx;
|
||||
uint64_t last_kcr3;
|
||||
uint32_t last_epoch;
|
||||
int sent_first; /* sent acquire+input+vm on the first lifecycle tick */
|
||||
} demo;
|
||||
|
||||
static const char* kind_name(vmsig_kind k) {
|
||||
switch (k) {
|
||||
case VMSIG_EV_SEAM_UP: return "SEAM_UP";
|
||||
case VMSIG_EV_SEAM_DOWN: return "SEAM_DOWN";
|
||||
case VMSIG_EV_VM_LIFECYCLE: return "VM_LIFECYCLE";
|
||||
case VMSIG_EV_ACT_ACK: return "ACT_ACK";
|
||||
case VMSIG_EV_MEMCTX: return "MEMCTX";
|
||||
default: return "?";
|
||||
}
|
||||
}
|
||||
|
||||
/* Core -> control: address-space context + pre-opened O_RDONLY fd of the RAM region.
|
||||
* Demonstrate RO: mmap(PROT_READ) ok, mmap(PROT_WRITE) -> EACCES. The fd is borrowed
|
||||
* (closed by the core after the call) — here we mmap and immediately unmap. */
|
||||
static int on_memctx(void* user, const vmsig_event* ev, int fd) {
|
||||
demo* d = user;
|
||||
const vmsig_memctx* m = (const vmsig_memctx*)ev->inln;
|
||||
d->memctx++;
|
||||
d->last_kcr3 = m->kcr3; d->last_epoch = m->epoch;
|
||||
uint32_t nseg = 0;
|
||||
const vmsig_memseg* segs = vmsig_memctx_segs(ev, &nseg);
|
||||
printf(" UP MEMCTX ep=%u kcr3=%#llx low=%#llx epoch=%u nseg=%u rdonly=%d\n",
|
||||
(unsigned)ev->endpoint, (unsigned long long)m->kcr3,
|
||||
(unsigned long long)m->low, (unsigned)m->epoch, (unsigned)nseg,
|
||||
(m->flags & VMSIG_MEMCTX_RDONLY) ? 1 : 0);
|
||||
if (fd >= 0 && m->low) {
|
||||
void* ro = mmap(NULL, (size_t)m->low, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (ro != MAP_FAILED) {
|
||||
void* rw = mmap(NULL, (size_t)m->low, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
printf(" RO-fd: mmap(PROT_READ) ok, mmap(PROT_WRITE) %s\n",
|
||||
rw == MAP_FAILED ? "EACCES (RO enforced)" : "UNEXPECTEDLY succeeded!");
|
||||
if (rw != MAP_FAILED) munmap(rw, (size_t)m->low);
|
||||
munmap(ro, (size_t)m->low);
|
||||
}
|
||||
}
|
||||
(void)segs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int on_event(void* user, const vmsig_event* ev) {
|
||||
demo* d = user;
|
||||
d->total++;
|
||||
switch (ev->kind) {
|
||||
case VMSIG_EV_SEAM_UP: d->seams++; break;
|
||||
case VMSIG_EV_ACT_ACK: d->acks++; break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) {
|
||||
d->lifecycles++;
|
||||
vmsig_vm_state vs; memcpy(&vs, ev->inln, sizeof vs);
|
||||
printf(" UP VM_LIFECYCLE ep=%u state=%u%s\n",
|
||||
(unsigned)ev->endpoint, (unsigned)vs.state, ev->origin ? " (reply)" : "");
|
||||
} else if (ev->kind != VMSIG_EV_MEMCTX) { /* MEMCTX is printed in on_memctx */
|
||||
printf(" UP %-12s src=%u ep=%u seq=%u prio=%u\n",
|
||||
kind_name(ev->kind), (unsigned)ev->source, (unsigned)ev->endpoint,
|
||||
(unsigned)ev->seq, (unsigned)ev->prio);
|
||||
}
|
||||
|
||||
/* On the first lifecycle tick: acquire the INPUT lease, send input, and query VM status. */
|
||||
if (ev->kind == VMSIG_EV_VM_LIFECYCLE && !ev->origin && !d->sent_first) {
|
||||
d->sent_first = 1;
|
||||
/* Input is a destructive class: first acquire the exclusive INPUT lease. */
|
||||
vmsig_event acq;
|
||||
memset(&acq, 0, sizeof acq);
|
||||
acq.kind = VMSIG_EV_CMD_ACQUIRE; acq.source = VMSIG_SRC_INPUT; acq.dir = VMSIG_DIR_DOWN;
|
||||
acq.prio = VMSIG_PRIO_HIGH; acq.endpoint = 0;
|
||||
((vmsig_lease_req*)acq.inln)->cls = VMSIG_LEASE_INPUT;
|
||||
printf(" DOWN CMD_ACQUIRE INPUT@ep0\n");
|
||||
vmsig_inproc_send(d->ctl, &acq);
|
||||
|
||||
vmsig_event in;
|
||||
memset(&in, 0, sizeof in);
|
||||
in.kind = VMSIG_EV_CMD_INPUT; in.source = VMSIG_SRC_INPUT; in.dir = VMSIG_DIR_DOWN;
|
||||
in.prio = VMSIG_PRIO_HIGH; in.endpoint = 0; in.corr = 0xC0FFEEu;
|
||||
in.payload.flags = VMSIG_PL_INLINE;
|
||||
vmsig_input act; memset(&act, 0, sizeof act); /* neutral public input contract */
|
||||
act.kind = VMSIG_INPUT_ABS; act.code = 0; act.value = 100; /* demo: abs axis X = 100 */
|
||||
memcpy(in.inln, &act, sizeof act);
|
||||
printf(" DOWN CMD_INPUT ABS axis=0 val=100 corr=0x%X\n", (unsigned)in.corr);
|
||||
vmsig_inproc_send(d->ctl, &in);
|
||||
|
||||
vmsig_event vm;
|
||||
memset(&vm, 0, sizeof vm);
|
||||
vm.kind = VMSIG_EV_CMD_VM; vm.source = VMSIG_SRC_VMHOST; vm.dir = VMSIG_DIR_DOWN;
|
||||
vm.prio = VMSIG_PRIO_NORMAL; vm.endpoint = 0; vm.corr = 0x5Au;
|
||||
vmsig_vm_cmd vc = { VMSIG_VMOP_QUERY };
|
||||
memcpy(vm.inln, &vc, sizeof vc);
|
||||
printf(" DOWN CMD_VM QUERY\n");
|
||||
vmsig_inproc_send(d->ctl, &vm);
|
||||
}
|
||||
|
||||
/* All paths proven — stop (for automated verification). */
|
||||
if (d->memctx >= 1 && d->acks >= 1 && d->lifecycles >= 2) vmsig_core_stop(d->core);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
if (!ctx) { fprintf(stderr, "ctx_new failed\n"); return 1; }
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
if (!core) { fprintf(stderr, "core_new failed\n"); vmsig_ctx_free(ctx); return 1; }
|
||||
g_core = core;
|
||||
signal(SIGINT, on_sigint);
|
||||
|
||||
demo d;
|
||||
memset(&d, 0, sizeof d);
|
||||
d.core = core;
|
||||
|
||||
vmsig_inproc_cfg ccfg;
|
||||
memset(&ccfg, 0, sizeof ccfg);
|
||||
ccfg.on_event = on_event;
|
||||
ccfg.on_memctx = on_memctx;
|
||||
ccfg.user = &d;
|
||||
ccfg.sub.source_mask = 0; /* all sources */
|
||||
ccfg.sub.prio_min = VMSIG_PRIO_BULK;
|
||||
ccfg.sub.endpoint_mask = 0; /* all VMs */
|
||||
|
||||
void* ctl = vmsig_inproc_control_new(&ccfg);
|
||||
if (!ctl) { fprintf(stderr, "control_new failed\n"); vmsig_core_free(core); vmsig_ctx_free(ctx); return 1; }
|
||||
d.ctl = ctl;
|
||||
|
||||
/* Trusted in-proc control: full grant on VM 0 (the policy is set by the embedding
|
||||
* program; for an out-of-process poller the grant would be issued upon authentication). */
|
||||
vmsig_grant grant;
|
||||
memset(&grant, 0, sizeof grant);
|
||||
grant.principal = 1;
|
||||
grant.endpoint_mask = 1u << 0;
|
||||
grant.source_mask = 0xFFFFFFFFu;
|
||||
grant.cap_mask = VMSIG_CAP_OBSERVE | VMSIG_CAP_INPUT | VMSIG_CAP_LIFECYCLE |
|
||||
VMSIG_CAP_MEMCTX | VMSIG_CAP_POWER | VMSIG_CAP_VM;
|
||||
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &grant);
|
||||
|
||||
/* Single endpoint (VM 0), stub adapters (cfg = NULL). */
|
||||
if (vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, 0) < 0 ||
|
||||
vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) < 0 || /* stub QEMU plane */
|
||||
vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) < 0) { /* stub AS context */
|
||||
fprintf(stderr, "add_adapter failed\n");
|
||||
vmsig_core_free(core); vmsig_ctx_free(ctx); return 1;
|
||||
}
|
||||
|
||||
printf("vmsig_cli: loop started (Ctrl-C to stop)\n");
|
||||
int rc = vmsig_core_run(core);
|
||||
printf("vmsig_cli: loop finished rc=%d (events=%d seams=%d lifecycles=%d acks=%d memctx=%d kcr3=%#llx epoch=%u)\n",
|
||||
rc, d.total, d.seams, d.lifecycles, d.acks, d.memctx,
|
||||
(unsigned long long)d.last_kcr3, (unsigned)d.last_epoch);
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
return rc;
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
/* inproc.c — reference in-process control: a thin shim turning a C callback into
|
||||
* the neutral control vtable. fd = -1 (no epoll registration); UP arrives via a
|
||||
* direct on_event call, DOWN leaves through the emit hook installed by the core. */
|
||||
#include "vmsig_control.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef struct {
|
||||
vmsig_inproc_cfg cfg;
|
||||
int (*emit_down)(void* token, vmsig_event*);
|
||||
void* token;
|
||||
} inproc_ctl;
|
||||
|
||||
static int ip_fd(void* ctl) { (void)ctl; return -1; }
|
||||
static int ip_subscribe(void* ctl, vmsig_sub* out) { inproc_ctl* c = ctl; *out = c->cfg.sub; return 0; }
|
||||
static int ip_deliver(void* ctl, const vmsig_event* ev) {
|
||||
inproc_ctl* c = ctl;
|
||||
return c->cfg.on_event ? c->cfg.on_event(c->cfg.user, ev) : 0;
|
||||
}
|
||||
static void ip_set_emit_down(void* ctl, int (*emit)(void* token, vmsig_event*), void* token) {
|
||||
inproc_ctl* c = ctl; c->emit_down = emit; c->token = token;
|
||||
}
|
||||
static void ip_close(void* ctl) { free(ctl); }
|
||||
|
||||
/* Core -> in-proc algorithm: address-space context (MEMCTX) + RO-fd as a direct int.
|
||||
* The fd is borrowed (dup/mmap to retain it); the core closes it after the call. */
|
||||
static int ip_attach_memctx(void* ctl, const vmsig_event* ev, int fd) {
|
||||
inproc_ctl* c = ctl;
|
||||
if (!c->cfg.on_memctx) return -1;
|
||||
return c->cfg.on_memctx(c->cfg.user, ev, fd);
|
||||
}
|
||||
|
||||
static const vmsig_control_ops IP_OPS = {
|
||||
.name = "inproc",
|
||||
.fd = ip_fd,
|
||||
.subscribe = ip_subscribe,
|
||||
.deliver = ip_deliver,
|
||||
.on_readable = NULL, /* no fd — nothing to read */
|
||||
.set_emit_down = ip_set_emit_down,
|
||||
.close = ip_close,
|
||||
.attach_memctx = ip_attach_memctx
|
||||
};
|
||||
|
||||
const vmsig_control_ops* vmsig_inproc_control_ops(void) { return &IP_OPS; }
|
||||
|
||||
void* vmsig_inproc_control_new(const vmsig_inproc_cfg* cfg) {
|
||||
inproc_ctl* c = calloc(1, sizeof *c);
|
||||
if (!c) return NULL;
|
||||
if (cfg) c->cfg = *cfg;
|
||||
return c;
|
||||
}
|
||||
|
||||
int vmsig_inproc_send(void* ctl, vmsig_event* down) {
|
||||
inproc_ctl* c = ctl;
|
||||
if (!c || !c->emit_down) return -1;
|
||||
return c->emit_down(c->token, down);
|
||||
}
|
||||
@@ -0,0 +1,318 @@
|
||||
/* socket.c — out-of-process control over a unix socket.
|
||||
*
|
||||
* The listener registers in the core as a SLOT_SOURCE (listen-fd). On accept the
|
||||
* peer is authenticated via SO_PEERCRED, the policy issues a neutral grant; an empty
|
||||
* grant => the connection is closed (not a valid poller). Otherwise a per-conn
|
||||
* control is created: its fd is driven by the epoll core, DOWN frames are parsed and
|
||||
* dispatched through emit_down (enforced by the grant), UP events are serialized into
|
||||
* a frame. On EOF — deferred reap.
|
||||
*
|
||||
* DoS protection: per-uid limit of concurrent connections (against eviction of
|
||||
* legitimate ones); a janitor timerfd detaches "stuck" partial frames (slowloris).
|
||||
* The global ceiling and slot reuse live in the core. */
|
||||
#define _GNU_SOURCE
|
||||
#include "vmsig_socket.h"
|
||||
#include "core_internal.h" /* core_add_source, core_request_drop, add_control */
|
||||
#include <sys/socket.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/un.h>
|
||||
#include <sys/timerfd.h>
|
||||
#include <sys/stat.h> /* umask */
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stddef.h>
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
#include <time.h>
|
||||
|
||||
#define VMSIG_SOCK_PER_UID_MAX 8 /* concurrent connections per uid */
|
||||
#define VMSIG_SOCK_IDLE_NS (10ull * 1000000000ull) /* timeout for a stuck partial frame */
|
||||
#define VMSIG_SOCK_JANITOR_S 5 /* sweep period */
|
||||
|
||||
typedef struct sock_listener sock_listener;
|
||||
|
||||
static uint64_t now_ns(void) {
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec;
|
||||
}
|
||||
|
||||
/* ===== wire codec (public — also for external clients) ===== */
|
||||
void vmsig_wire_encode(vmsig_wire* w, const vmsig_event* ev) {
|
||||
memset(w, 0, sizeof *w);
|
||||
w->magic = VMSIG_WIRE_MAGIC; w->version = VMSIG_WIRE_VERSION;
|
||||
w->kind = ev->kind; w->source = ev->source; w->dir = ev->dir; w->prio = ev->prio;
|
||||
w->endpoint = ev->endpoint; w->corr = ev->corr;
|
||||
memcpy(w->inln, ev->inln, sizeof w->inln);
|
||||
}
|
||||
int vmsig_wire_decode(const vmsig_wire* w, vmsig_event* ev) {
|
||||
if (w->magic != VMSIG_WIRE_MAGIC || w->version != VMSIG_WIRE_VERSION) return -1;
|
||||
memset(ev, 0, sizeof *ev);
|
||||
ev->kind = w->kind; ev->source = w->source; ev->dir = w->dir; ev->prio = w->prio;
|
||||
ev->endpoint = w->endpoint; ev->corr = w->corr;
|
||||
ev->payload.flags = VMSIG_PL_INLINE;
|
||||
memcpy(ev->inln, w->inln, sizeof ev->inln);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ===== per-conn control ===== */
|
||||
typedef struct sock_conn {
|
||||
int fd;
|
||||
vmsig_core* core;
|
||||
int id;
|
||||
uint32_t uid;
|
||||
uint64_t last_ns; /* activity for the janitor */
|
||||
sock_listener* L;
|
||||
struct sock_conn* lnext; /* listener's connection list */
|
||||
int (*emit_down)(void* token, vmsig_event*);
|
||||
void* token;
|
||||
uint8_t buf[sizeof(vmsig_wire)];
|
||||
size_t buflen;
|
||||
} sock_conn;
|
||||
|
||||
static int conn_fd(void* ctl) { return ((sock_conn*)ctl)->fd; }
|
||||
|
||||
static int conn_subscribe(void* ctl, vmsig_sub* out) {
|
||||
(void)ctl; memset(out, 0, sizeof *out); return 0; /* everything; the grant gates it */
|
||||
}
|
||||
|
||||
static int conn_deliver(void* ctl, const vmsig_event* ev) {
|
||||
sock_conn* c = ctl;
|
||||
vmsig_wire w;
|
||||
vmsig_wire_encode(&w, ev);
|
||||
ssize_t r = write(c->fd, &w, sizeof w); /* best-effort; EAGAIN => frame dropped */
|
||||
(void)r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void conn_set_emit_down(void* ctl, int (*emit)(void* token, vmsig_event*), void* token) {
|
||||
sock_conn* c = ctl; c->emit_down = emit; c->token = token;
|
||||
}
|
||||
|
||||
static int conn_on_readable(void* ctl) {
|
||||
sock_conn* c = ctl;
|
||||
for (;;) {
|
||||
ssize_t n = read(c->fd, c->buf + c->buflen, sizeof c->buf - c->buflen);
|
||||
if (n == 0) { core_request_drop(c->core, c->id); return 0; } /* EOF */
|
||||
if (n < 0) {
|
||||
if (errno == EAGAIN || errno == EWOULDBLOCK) break;
|
||||
core_request_drop(c->core, c->id);
|
||||
return 0;
|
||||
}
|
||||
c->last_ns = now_ns();
|
||||
c->buflen += (size_t)n;
|
||||
if (c->buflen == sizeof c->buf) {
|
||||
vmsig_event ev;
|
||||
if (vmsig_wire_decode((const vmsig_wire*)c->buf, &ev) == 0) {
|
||||
ev.dir = VMSIG_DIR_DOWN; /* from a poller — DOWN only */
|
||||
if (c->emit_down) c->emit_down(c->token, &ev); /* enforced by the grant */
|
||||
}
|
||||
c->buflen = 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ===== listener ===== */
|
||||
struct sock_listener {
|
||||
int listen_fd;
|
||||
int janitor_fd;
|
||||
vmsig_core* core;
|
||||
vmsig_socket_policy policy;
|
||||
void* ud;
|
||||
sock_conn* conns; /* singly-linked list of active connections */
|
||||
};
|
||||
|
||||
static void listener_unlink(sock_listener* L, sock_conn* c) {
|
||||
sock_conn** pp = &L->conns;
|
||||
while (*pp) { if (*pp == c) { *pp = c->lnext; return; } pp = &(*pp)->lnext; }
|
||||
}
|
||||
|
||||
static int listener_uid_count(sock_listener* L, uint32_t uid) {
|
||||
int n = 0;
|
||||
for (sock_conn* c = L->conns; c; c = c->lnext) if (c->uid == uid) n++;
|
||||
return n;
|
||||
}
|
||||
|
||||
static void conn_close(void* ctl) {
|
||||
sock_conn* c = ctl;
|
||||
if (c->L) listener_unlink(c->L, c);
|
||||
if (c->fd >= 0) close(c->fd);
|
||||
free(c);
|
||||
}
|
||||
|
||||
/* Send a SINGLE 80-byte vmsig_wire frame + ONE RO-fd in a cmsg (SCM_RIGHTS). This keeps
|
||||
* the control-socket stream fixed-framed at sizeof(vmsig_wire): the client reads one
|
||||
* frame via recvmsg and extracts the fd only on an fd-carrying frame. Partial cmsg
|
||||
* transfer is not allowed (the fd is all-or-nothing): a short sendmsg -> -1. Shared
|
||||
* primitive for the memctx handoff (one SCM_RIGHTS mechanism). */
|
||||
static int conn_send_fd_frame(sock_conn* c, const vmsig_wire* w, int fd) {
|
||||
struct iovec iov;
|
||||
iov.iov_base = (void*)w;
|
||||
iov.iov_len = sizeof *w;
|
||||
|
||||
union {
|
||||
char buf[CMSG_SPACE(sizeof(int))];
|
||||
struct cmsghdr align;
|
||||
} cm;
|
||||
memset(&cm, 0, sizeof cm);
|
||||
|
||||
struct msghdr mh;
|
||||
memset(&mh, 0, sizeof mh);
|
||||
mh.msg_iov = &iov;
|
||||
mh.msg_iovlen = 1;
|
||||
mh.msg_control = cm.buf;
|
||||
mh.msg_controllen = sizeof cm.buf;
|
||||
|
||||
struct cmsghdr* cmsg = CMSG_FIRSTHDR(&mh);
|
||||
cmsg->cmsg_level = SOL_SOCKET;
|
||||
cmsg->cmsg_type = SCM_RIGHTS;
|
||||
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
|
||||
memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
|
||||
|
||||
for (;;) {
|
||||
ssize_t n = sendmsg(c->fd, &mh, MSG_NOSIGNAL);
|
||||
if (n < 0) {
|
||||
if (errno == EINTR) continue;
|
||||
return -1;
|
||||
}
|
||||
return ((size_t)n == sizeof *w) ? 0 : -1; /* partial frame -> failure */
|
||||
}
|
||||
}
|
||||
|
||||
/* Core -> socket-control: handoff of an address-space context (kind=MEMCTX, inln=vmsig_memctx
|
||||
* POD) + RO-fd of the RAM region in a cmsg. The segs payload does NOT go on the wire (the
|
||||
* fixed-framed vmsig_wire carries only inln); the holder opens it at `low`. */
|
||||
static int conn_attach_memctx(void* ctl, const vmsig_event* ev, int fd) {
|
||||
sock_conn* c = ctl;
|
||||
if (fd < 0 || !ev) return -1;
|
||||
vmsig_wire w;
|
||||
vmsig_wire_encode(&w, ev); /* kind=MEMCTX, inln=vmsig_memctx; payload is not serialized */
|
||||
return conn_send_fd_frame(c, &w, fd);
|
||||
}
|
||||
|
||||
static const vmsig_control_ops CONN_OPS = {
|
||||
.name = "socket",
|
||||
.fd = conn_fd, .subscribe = conn_subscribe, .deliver = conn_deliver,
|
||||
.on_readable = conn_on_readable, .set_emit_down = conn_set_emit_down, .close = conn_close,
|
||||
.attach_memctx = conn_attach_memctx
|
||||
};
|
||||
|
||||
static void on_accept(void* user, uint32_t events) {
|
||||
(void)events;
|
||||
sock_listener* L = user;
|
||||
for (;;) {
|
||||
int fd = accept4(L->listen_fd, NULL, NULL, SOCK_NONBLOCK | SOCK_CLOEXEC);
|
||||
if (fd < 0) break; /* EAGAIN / other — done */
|
||||
|
||||
uint32_t uid = (uint32_t)-1, pid = 0;
|
||||
struct ucred uc; socklen_t ul = sizeof uc;
|
||||
if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &uc, &ul) == 0) {
|
||||
uid = (uint32_t)uc.uid; pid = (uint32_t)uc.pid;
|
||||
}
|
||||
vmsig_grant g;
|
||||
if (L->policy) g = L->policy(uid, pid, L->ud);
|
||||
else memset(&g, 0, sizeof g);
|
||||
|
||||
if (g.cap_mask == 0 || g.endpoint_mask == 0) { /* not a valid poller */
|
||||
vmsig_audit a = { VMSIG_AUDIT_REJECT, uid, 0, 0, pid };
|
||||
core_audit(L->core, &a);
|
||||
close(fd);
|
||||
continue;
|
||||
}
|
||||
if (listener_uid_count(L, uid) >= VMSIG_SOCK_PER_UID_MAX) { /* anti-eviction */
|
||||
vmsig_audit a = { VMSIG_AUDIT_REJECT, uid, 0, 0, pid };
|
||||
core_audit(L->core, &a);
|
||||
close(fd);
|
||||
continue;
|
||||
}
|
||||
sock_conn* conn = calloc(1, sizeof *conn);
|
||||
if (!conn) { close(fd); continue; }
|
||||
conn->fd = fd; conn->core = L->core; conn->id = -1;
|
||||
conn->uid = uid; conn->last_ns = now_ns(); conn->L = L;
|
||||
conn->lnext = L->conns; L->conns = conn;
|
||||
int id = vmsig_core_add_control(L->core, &CONN_OPS, conn, &g);
|
||||
if (id < 0) { /* no slot — reject */
|
||||
vmsig_audit a = { VMSIG_AUDIT_REJECT, uid, 0, 0, pid };
|
||||
core_audit(L->core, &a);
|
||||
listener_unlink(L, conn); close(fd); free(conn); continue;
|
||||
}
|
||||
conn->id = id;
|
||||
vmsig_audit a = { VMSIG_AUDIT_ADMIT, g.principal, 0, 0, pid };
|
||||
core_audit(L->core, &a);
|
||||
}
|
||||
}
|
||||
|
||||
/* janitor: detach connections with a stuck partial frame (slowloris) */
|
||||
static void on_janitor(void* user, uint32_t events) {
|
||||
(void)events;
|
||||
sock_listener* L = user;
|
||||
uint64_t v;
|
||||
while (read(L->janitor_fd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
|
||||
uint64_t now = now_ns();
|
||||
for (sock_conn* c = L->conns; c; c = c->lnext)
|
||||
if (c->buflen > 0 && now - c->last_ns > VMSIG_SOCK_IDLE_NS)
|
||||
core_request_drop(c->core, c->id);
|
||||
}
|
||||
|
||||
/* listener cleanup on core_free (owner = the core, via on_free of the first source) */
|
||||
static void listener_free(void* user) {
|
||||
sock_listener* L = user;
|
||||
if (L->janitor_fd >= 0) close(L->janitor_fd);
|
||||
if (L->listen_fd >= 0) close(L->listen_fd);
|
||||
free(L);
|
||||
}
|
||||
|
||||
int vmsig_socket_attach(vmsig_core* core, const char* path,
|
||||
vmsig_socket_policy policy, void* ud) {
|
||||
if (!core || !path || !*path) return -1;
|
||||
int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
|
||||
if (fd < 0) return -1;
|
||||
|
||||
struct sockaddr_un addr;
|
||||
memset(&addr, 0, sizeof addr);
|
||||
addr.sun_family = AF_UNIX;
|
||||
socklen_t alen;
|
||||
size_t n = strlen(path);
|
||||
if (path[0] == '@') { /* abstract namespace */
|
||||
if (n > sizeof addr.sun_path) { close(fd); return -1; }
|
||||
addr.sun_path[0] = 0;
|
||||
memcpy(addr.sun_path + 1, path + 1, n - 1);
|
||||
alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
|
||||
} else { /* filesystem path */
|
||||
if (n >= sizeof addr.sun_path) { close(fd); return -1; }
|
||||
unlink(path);
|
||||
memcpy(addr.sun_path, path, n);
|
||||
alen = (socklen_t)sizeof addr;
|
||||
}
|
||||
/* Create the filesystem socket with restrictive perms (0600): the path must not be
|
||||
* the only gate — connect requires write, so we open it to the owner only.
|
||||
* (An abstract socket has no FS perms; its access is bounded by the net namespace.) */
|
||||
mode_t old_um = 0;
|
||||
int restrict_perm = (path[0] != '@');
|
||||
if (restrict_perm) old_um = umask(0177);
|
||||
int br = bind(fd, (struct sockaddr*)&addr, alen);
|
||||
if (restrict_perm) umask(old_um);
|
||||
if (br < 0) { close(fd); return -1; }
|
||||
if (listen(fd, 64) < 0) { close(fd); return -1; }
|
||||
|
||||
int jfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
|
||||
if (jfd < 0) { close(fd); return -1; }
|
||||
struct itimerspec its;
|
||||
memset(&its, 0, sizeof its);
|
||||
its.it_interval.tv_sec = VMSIG_SOCK_JANITOR_S;
|
||||
its.it_value = its.it_interval;
|
||||
if (timerfd_settime(jfd, 0, &its, NULL) < 0) { close(jfd); close(fd); return -1; }
|
||||
|
||||
sock_listener* L = calloc(1, sizeof *L);
|
||||
if (!L) { close(jfd); close(fd); return -1; }
|
||||
L->listen_fd = fd; L->janitor_fd = jfd; L->core = core; L->policy = policy; L->ud = ud;
|
||||
/* the listen source owns the listener (on_free=listener_free closes both fds + free) */
|
||||
if (core_add_source(core, fd, on_accept, L, listener_free) < 0) {
|
||||
close(jfd); close(fd); free(L); return -1;
|
||||
}
|
||||
/* janitor without on_free (L already belongs to the core); on error core_free releases it */
|
||||
if (core_add_source(core, jfd, on_janitor, L, NULL) < 0) return -1;
|
||||
return 0;
|
||||
}
|
||||
+224
@@ -0,0 +1,224 @@
|
||||
/* core.c — core lifecycle and registration of adapters/controls.
|
||||
* The loop and pumps live in loop.c. */
|
||||
#include "core_internal.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <sys/eventfd.h>
|
||||
|
||||
core_slot* core_register_fd(vmsig_core* c, int fd, uint32_t epoll_events, slot_role role) {
|
||||
/* reuse a detached (SLOT_DEAD) slot so c->slots[] does not grow on every
|
||||
* connection */
|
||||
core_slot* s = NULL;
|
||||
for (int i = 0; i < c->nslots; i++)
|
||||
if (c->slots[i]->role == SLOT_DEAD) { s = c->slots[i]; break; }
|
||||
|
||||
if (!s) {
|
||||
if (c->nslots == c->cap_slots) {
|
||||
int ncap = c->cap_slots ? c->cap_slots * 2 : 16;
|
||||
core_slot** ns = realloc(c->slots, (size_t)ncap * sizeof *ns);
|
||||
if (!ns) return NULL;
|
||||
c->slots = ns;
|
||||
c->cap_slots = ncap;
|
||||
}
|
||||
s = calloc(1, sizeof *s);
|
||||
if (!s) return NULL;
|
||||
c->slots[c->nslots++] = s;
|
||||
}
|
||||
|
||||
memset(s, 0, sizeof *s);
|
||||
s->role = role;
|
||||
s->fd = fd;
|
||||
|
||||
struct epoll_event ee;
|
||||
memset(&ee, 0, sizeof ee);
|
||||
ee.events = epoll_events;
|
||||
ee.data.ptr = s;
|
||||
if (epoll_ctl(c->epfd, EPOLL_CTL_ADD, fd, &ee) < 0) { s->role = SLOT_DEAD; return NULL; }
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
vmsig_core* vmsig_core_new(vmsig_ctx* ctx) {
|
||||
if (!ctx) return NULL;
|
||||
vmsig_core* c = calloc(1, sizeof *c);
|
||||
if (!c) return NULL;
|
||||
c->ctx = ctx;
|
||||
c->epfd = -1;
|
||||
c->wake_fd = -1;
|
||||
|
||||
c->epfd = epoll_create1(EPOLL_CLOEXEC);
|
||||
if (c->epfd < 0) { free(c); return NULL; }
|
||||
|
||||
c->wake_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
|
||||
if (c->wake_fd < 0) { close(c->epfd); free(c); return NULL; }
|
||||
if (!core_register_fd(c, c->wake_fd, EPOLLIN, SLOT_WAKEUP)) {
|
||||
close(c->wake_fd); close(c->epfd); free(c); return NULL;
|
||||
}
|
||||
|
||||
/* context pacing timerfds (created in ctx_new) as loop sources */
|
||||
for (int d = VMSIG_DIR_UP; d <= VMSIG_DIR_DOWN; d++) {
|
||||
int tfd = vmsig_ctx_timing_fd(ctx, (vmsig_dir)d);
|
||||
if (tfd >= 0) core_register_fd(c, tfd, EPOLLIN, SLOT_CTX_TIMING);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
int vmsig_core_add_adapter(vmsig_core* c, const vmsig_adapter_ops* ops,
|
||||
const void* cfg, uint32_t endpoint) {
|
||||
if (!c || !ops || c->nadapters >= VMSIG_MAX_ADAPTERS) return -1;
|
||||
|
||||
vmsig_adapter* a = ops->open(cfg, endpoint);
|
||||
if (!a) return -1;
|
||||
|
||||
vmsig_emit emit = { core_emit_up, core_register_memctx, core_unregister_memctx, c };
|
||||
vmsig_fd_reg reg[VMSIG_ADAPTER_FDS];
|
||||
memset(reg, 0, sizeof reg);
|
||||
|
||||
int n = ops->attach(a, &emit, reg, VMSIG_ADAPTER_FDS);
|
||||
if (n < 0) { ops->close(a); return -1; }
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
uint32_t events = reg[i].epoll_events ? reg[i].epoll_events : (uint32_t)EPOLLIN;
|
||||
core_slot* s = core_register_fd(c, reg[i].fd, events, SLOT_ADAPTER);
|
||||
if (!s) { ops->close(a); return -1; }
|
||||
s->ops = ops;
|
||||
s->adapter = a;
|
||||
s->cookie = reg[i].cookie;
|
||||
}
|
||||
|
||||
int id = c->nadapters;
|
||||
c->adapters[c->nadapters].ops = ops;
|
||||
c->adapters[c->nadapters].a = a;
|
||||
c->adapters[c->nadapters].endpoint = endpoint;
|
||||
c->nadapters++;
|
||||
return id;
|
||||
}
|
||||
|
||||
int vmsig_core_add_control(vmsig_core* c, const vmsig_control_ops* ops, void* ctl,
|
||||
const vmsig_grant* grant) {
|
||||
if (!c || !ops) return -1;
|
||||
|
||||
/* reuse a freed (reaped) slot; otherwise grow up to the ceiling */
|
||||
int id = -1;
|
||||
for (int i = 0; i < c->ncontrols; i++)
|
||||
if (!c->controls[i].active) { id = i; break; }
|
||||
if (id < 0) {
|
||||
if (c->ncontrols >= VMSIG_MAX_CONTROLS) return -1;
|
||||
id = c->ncontrols++;
|
||||
}
|
||||
core_control_ent* e = &c->controls[id];
|
||||
uint16_t gen = e->gen; /* generation survives the slot memset */
|
||||
memset(e, 0, sizeof *e);
|
||||
e->gen = (uint16_t)(gen + 1); /* new generation for this (re)use */
|
||||
e->ops = ops;
|
||||
e->ctl = ctl;
|
||||
e->active = 1;
|
||||
if (grant) e->grant = *grant; /* otherwise stays zero => default-deny */
|
||||
e->dctx.core = c;
|
||||
e->dctx.ctl_id = id;
|
||||
|
||||
if (ops->subscribe) ops->subscribe(ctl, &e->sub);
|
||||
/* emit_down token is our down_ctx, so emit_down can find this control's grant */
|
||||
if (ops->set_emit_down) ops->set_emit_down(ctl, core_emit_down, &e->dctx);
|
||||
|
||||
int fd = ops->fd ? ops->fd(ctl) : -1;
|
||||
if (fd >= 0) {
|
||||
core_slot* s = core_register_fd(c, fd, EPOLLIN, SLOT_CONTROL);
|
||||
if (!s) return -1;
|
||||
s->cops = ops;
|
||||
s->ctl = ctl;
|
||||
e->slot = s;
|
||||
}
|
||||
|
||||
/* Late subscriber: replay retained MEMCTX (if a context is already published and
|
||||
* this control is qualified). For a control added BEFORE the first publication,
|
||||
* the cell is not yet valid — it receives MEMCTX via the normal multicast in pump_up. */
|
||||
core_memctx_replay(c, id);
|
||||
|
||||
return id; /* ncontrols already bumped when picking id (on growth); reuse does not grow it */
|
||||
}
|
||||
|
||||
|
||||
/* ===== MEMCTX registration: per-endpoint retain cell (called by the adapter on the loop thread) =====
|
||||
* Registers the address-space context adapter's reg hooks. The core holds THIS and does
|
||||
* NOT store a copy of the locator: on delivery/replay it calls reg.describe/share_fd.
|
||||
* valid/epoch are maintained in route/epoch_bump (not here): register only records that
|
||||
* "the adapter is connected". */
|
||||
int core_register_memctx(void* token, const vmsig_memctx_reg* reg) {
|
||||
vmsig_core* c = token;
|
||||
if (!c || !reg || reg->endpoint >= 64) return -1;
|
||||
core_memctx_cell* cell = &c->memctx[reg->endpoint];
|
||||
cell->reg = *reg;
|
||||
cell->registered = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void core_unregister_memctx(void* token, uint32_t endpoint) {
|
||||
vmsig_core* c = token;
|
||||
if (!c || endpoint >= 64) return;
|
||||
core_memctx_cell* cell = &c->memctx[endpoint];
|
||||
cell->registered = 0;
|
||||
cell->valid = 0;
|
||||
memset(&cell->reg, 0, sizeof cell->reg);
|
||||
}
|
||||
|
||||
void vmsig_core_set_audit(vmsig_core* c, void (*cb)(void* ud, const vmsig_audit* a), void* ud) {
|
||||
if (!c) return;
|
||||
c->audit_cb = cb;
|
||||
c->audit_ud = ud;
|
||||
}
|
||||
|
||||
void core_audit(vmsig_core* c, const vmsig_audit* a) {
|
||||
if (c && c->audit_cb) c->audit_cb(c->audit_ud, a);
|
||||
}
|
||||
|
||||
void vmsig_core_set_arb_policy(vmsig_core* c, vmsig_arb_policy cb, void* ud) {
|
||||
if (!c) return;
|
||||
c->arb_cb = cb;
|
||||
c->arb_ud = ud;
|
||||
/* lease[][] is zeroed in vmsig_core_new (calloc) => all cells free. */
|
||||
}
|
||||
|
||||
int core_add_source(vmsig_core* c, int fd, void (*cb)(void* user, uint32_t events),
|
||||
void* user, void (*on_free)(void* user)) {
|
||||
if (!c || fd < 0 || !cb) return -1;
|
||||
core_slot* s = core_register_fd(c, fd, EPOLLIN, SLOT_SOURCE);
|
||||
if (!s) return -1;
|
||||
s->on_source = cb;
|
||||
s->on_free = on_free;
|
||||
s->source_user = user;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void core_request_drop(vmsig_core* c, int ctl_id) {
|
||||
if (!c || ctl_id < 0 || ctl_id >= c->ncontrols) return;
|
||||
c->controls[ctl_id].reap = 1;
|
||||
core_wake(c); /* wake the loop for a reap pass (without stop) */
|
||||
}
|
||||
|
||||
void vmsig_core_free(vmsig_core* c) {
|
||||
if (!c) return;
|
||||
/* graceful: stop workers and close SI handles / sockets. Adapters are closed
|
||||
* FIRST: their close stops off-loop workers and unregisters their seams (e.g.
|
||||
* memctx) BEFORE destruction. */
|
||||
for (int i = 0; i < c->nadapters; i++)
|
||||
if (c->adapters[i].ops->close) c->adapters[i].ops->close(c->adapters[i].a);
|
||||
for (int i = 0; i < c->ncontrols; i++)
|
||||
if (c->controls[i].active && c->controls[i].ops->close)
|
||||
c->controls[i].ops->close(c->controls[i].ctl);
|
||||
|
||||
/* cleanup of fd sources (e.g. unix listener: close listen/janitor fd + free) */
|
||||
for (int i = 0; i < c->nslots; i++)
|
||||
if (c->slots[i]->role == SLOT_SOURCE && c->slots[i]->on_free)
|
||||
c->slots[i]->on_free(c->slots[i]->source_user);
|
||||
|
||||
for (int i = 0; i < c->nslots; i++) free(c->slots[i]);
|
||||
free(c->slots);
|
||||
if (c->wake_fd >= 0) close(c->wake_fd);
|
||||
if (c->epfd >= 0) close(c->epfd);
|
||||
/* ctx is not ours: its owner frees it */
|
||||
free(c);
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
#ifndef VMSIG_CORE_INTERNAL_H
|
||||
#define VMSIG_CORE_INTERNAL_H
|
||||
#include "vmsig_core.h"
|
||||
#include <signal.h>
|
||||
|
||||
/* Private internals of the epoll core. Each registered fd carries a
|
||||
* core_slot* in epoll_event.data.ptr; the slot's role decides how to handle it. */
|
||||
|
||||
#define VMSIG_MAX_EVENTS 64
|
||||
#define VMSIG_MAX_ADAPTERS 256 /* up to ~64 VMs * 3 adapters + slack (mode A) */
|
||||
#define VMSIG_MAX_CONTROLS 64 /* concurrent pollers; more => processes (C) */
|
||||
#define VMSIG_ADAPTER_FDS 8 /* max fds per adapter */
|
||||
#define VMSIG_DOWN_PENDING_MAX 256 /* ceiling of DOWN commands per poller in ctx (fairness) */
|
||||
|
||||
typedef enum {
|
||||
SLOT_WAKEUP, /* wake/stop eventfd */
|
||||
SLOT_ADAPTER, /* adapter fd (timerfd/eventfd/socket) */
|
||||
SLOT_CTX_TIMING, /* context pacing timerfd */
|
||||
SLOT_CONTROL, /* out-of-process control socket */
|
||||
SLOT_SOURCE, /* arbitrary fd + callback (e.g. listen-fd) */
|
||||
SLOT_DEAD /* detached (reaped); loop ignores it */
|
||||
} slot_role;
|
||||
|
||||
typedef struct core_slot {
|
||||
slot_role role;
|
||||
int fd;
|
||||
/* for SLOT_ADAPTER */
|
||||
const vmsig_adapter_ops* ops;
|
||||
vmsig_adapter* adapter;
|
||||
uint32_t cookie;
|
||||
/* for SLOT_CONTROL */
|
||||
const vmsig_control_ops* cops;
|
||||
void* ctl;
|
||||
/* for SLOT_SOURCE */
|
||||
void (*on_source)(void* user, uint32_t events);
|
||||
void (*on_free)(void* user); /* invoked at core_free (source cleanup) */
|
||||
void* source_user;
|
||||
} core_slot;
|
||||
|
||||
typedef struct {
|
||||
const vmsig_adapter_ops* ops;
|
||||
vmsig_adapter* a;
|
||||
uint32_t endpoint;
|
||||
} core_adapter_ent;
|
||||
|
||||
|
||||
/* ===== Retained address-space context (MEMCTX seam) =====
|
||||
* The core retains per-endpoint "a current context exists in the current epoch" + the
|
||||
* adapter's reg pointer (describe/share_fd/invalidate). Replays to a late qualified
|
||||
* subscriber (CAP_MEMCTX + source_mask + endpoint) re-sharing the RO-fd. Does NOT store a
|
||||
* copy of the locator: on delivery/replay it calls reg.describe (adapter snapshot) +
|
||||
* reg.share_fd (fresh RO-fd). Invalidated on epoch change; cleared on unregister/free. */
|
||||
typedef struct {
|
||||
int registered; /* adapter called register_memctx (reg valid) */
|
||||
int valid; /* a published context exists in the current epoch */
|
||||
uint32_t epoch; /* snapshot epoch (== core epoch[ep] when valid) */
|
||||
vmsig_memctx_reg reg; /* valid when registered */
|
||||
} core_memctx_cell;
|
||||
|
||||
/* ===== Lease layer (arbitration of exclusive ownership of destructive resources) =====
|
||||
* One cell per (endpoint, lease-class): who owns it (origin) + a snapshot of arb_prio at
|
||||
* acquisition time. owner=0 => free. The snapshot (rather than the live grant) makes the
|
||||
* policy resilient to the owner's grant changing after acquisition. */
|
||||
#define VMSIG_LEASE_CLASSES 3 /* INPUT, POWER, MEMWRITE (== VMSIG_LEASE_CLASS_MAX) */
|
||||
typedef struct {
|
||||
uint32_t owner; /* origin (gen<<16)|(id+1) of the owner; 0 = free */
|
||||
uint32_t owner_prio; /* owner's arb_prio at acquisition time (snapshot) */
|
||||
} core_lease_cell;
|
||||
|
||||
struct vmsig_core; /* fwd for core_down_ctx */
|
||||
|
||||
/* DOWN emission context: handed to a control in set_emit_down so emit_down knows WHICH
|
||||
* control issued the command (for grant lookup and enforcement). Stable: lives in the
|
||||
* fixed controls[] array. */
|
||||
typedef struct {
|
||||
struct vmsig_core* core;
|
||||
int ctl_id;
|
||||
} core_down_ctx;
|
||||
|
||||
typedef struct {
|
||||
const vmsig_control_ops* ops;
|
||||
void* ctl;
|
||||
vmsig_sub sub;
|
||||
vmsig_grant grant; /* poller's rights ceiling (default-deny) */
|
||||
core_down_ctx dctx; /* token for emit_down */
|
||||
int active; /* 0 = detached/reaped (slot free) */
|
||||
int reap; /* reap requested (deferred) */
|
||||
core_slot* slot; /* SLOT_CONTROL fd slot (or NULL) */
|
||||
uint32_t pending; /* DOWN commands of this poller in ctx (fairness cap) */
|
||||
uint16_t gen; /* slot generation: +1 on each (re)use */
|
||||
} core_control_ent;
|
||||
|
||||
struct vmsig_core {
|
||||
int epfd;
|
||||
int wake_fd; /* eventfd: nudge + stop */
|
||||
vmsig_ctx* ctx;
|
||||
volatile sig_atomic_t stopping;
|
||||
|
||||
core_adapter_ent adapters[VMSIG_MAX_ADAPTERS];
|
||||
int nadapters;
|
||||
core_control_ent controls[VMSIG_MAX_CONTROLS];
|
||||
int ncontrols;
|
||||
|
||||
core_slot** slots; /* all allocated slots (for free) */
|
||||
int nslots;
|
||||
int cap_slots;
|
||||
|
||||
|
||||
uint32_t epoch[64]; /* per-endpoint VM session epoch */
|
||||
core_memctx_cell memctx[64]; /* per-endpoint retained context */
|
||||
|
||||
core_lease_cell lease[64][VMSIG_LEASE_CLASSES]; /* lease per (endpoint, class) */
|
||||
vmsig_arb_policy arb_cb; /* preemption policy (NULL=default) */
|
||||
void* arb_ud;
|
||||
|
||||
void (*audit_cb)(void* ud, const vmsig_audit* a);
|
||||
void* audit_ud;
|
||||
};
|
||||
|
||||
/* Emit an audit record (no-op if no callback is set). Defined in core.c. */
|
||||
void core_audit(vmsig_core* c, const vmsig_audit* a);
|
||||
|
||||
/* Register an fd in epoll + create a slot (see core.c). */
|
||||
core_slot* core_register_fd(vmsig_core* c, int fd, uint32_t epoll_events, slot_role role);
|
||||
|
||||
/* Register an arbitrary fd source with a callback (e.g. a socket listen-fd).
|
||||
* The callback is called on the loop thread when the fd is ready. on_free (may be NULL)
|
||||
* is called at vmsig_core_free to clean up the source's resource. 0/-1. */
|
||||
int core_add_source(vmsig_core* c, int fd, void (*cb)(void* user, uint32_t events),
|
||||
void* user, void (*on_free)(void* user));
|
||||
|
||||
/* Request detaching a control by id (deferred reap after the batch: epoll DEL,
|
||||
* close fd, ops->close). Safe to call from the control's own on_readable. */
|
||||
void core_request_drop(vmsig_core* c, int ctl_id);
|
||||
|
||||
/* emit hooks handed to adapters (UP) and controls (DOWN). Defined in loop.c. */
|
||||
int core_emit_up (void* token, vmsig_event* ev);
|
||||
int core_emit_down(void* token, vmsig_event* ev);
|
||||
|
||||
/* ===== Address-space context (MEMCTX seam; retained context) ===== */
|
||||
/* Context registration hooks (handed to the adapter in vmsig_emit; defined in core.c). */
|
||||
int core_register_memctx (void* token, const vmsig_memctx_reg* reg);
|
||||
void core_unregister_memctx(void* token, uint32_t endpoint);
|
||||
|
||||
/* Multicast MEMCTX to qualified subscribers + mark the retain cell valid
|
||||
* (from pump_up on the VMSIG_EV_MEMCTX trigger; defined in loop.c). */
|
||||
void core_memctx_route(vmsig_core* c, const vmsig_event* trigger);
|
||||
|
||||
/* Replay retained MEMCTX to a single (late) subscriber (from vmsig_core_add_control;
|
||||
* defined in loop.c). */
|
||||
void core_memctx_replay(vmsig_core* c, int ctl_id);
|
||||
|
||||
/* Bump the endpoint's epoch on a destructive lifecycle transition: epoch++, invalidate
|
||||
* the retain cell, emit MEMCTX_INVALIDATED, request re-bootstrap from the adapter.
|
||||
* Observed by the core in pump_up on UP VM_LIFECYCLE (defined in loop.c). */
|
||||
void core_epoch_bump(vmsig_core* c, uint32_t endpoint);
|
||||
|
||||
/* ===== Lease layer (defined in loop.c) ===== */
|
||||
/* Intercept CMD_ACQUIRE/RELEASE/LEASE_STATUS (synchronously from core_emit_down, not in ctx). */
|
||||
void core_lease_acquire(vmsig_core* c, int ctl_id, const vmsig_event* ev);
|
||||
void core_lease_release(vmsig_core* c, int ctl_id, const vmsig_event* ev);
|
||||
void core_lease_status (vmsig_core* c, int ctl_id, const vmsig_event* ev);
|
||||
|
||||
/* Reclaim the lease of a dead control (from core_reap, BEFORE e->active=0). */
|
||||
void core_lease_reap_control(vmsig_core* c, int ctl_id);
|
||||
|
||||
/* Wake the loop (eventfd nudge). Defined in loop.c. */
|
||||
void core_wake(vmsig_core* c);
|
||||
|
||||
#endif /* VMSIG_CORE_INTERNAL_H */
|
||||
@@ -0,0 +1,620 @@
|
||||
/* loop.c — non-blocking epoll loop, dispatch, pump up/down, emit hooks,
|
||||
* graceful shutdown. No sleep/polling/busy-wait: every wakeup is an fd. */
|
||||
#include "core_internal.h"
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <sys/epoll.h>
|
||||
|
||||
static void drain_counter_fd(int fd) {
|
||||
uint64_t v;
|
||||
while (read(fd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
|
||||
}
|
||||
|
||||
void core_wake(vmsig_core* c) {
|
||||
uint64_t one = 1;
|
||||
ssize_t r = write(c->wake_fd, &one, sizeof one);
|
||||
(void)r;
|
||||
}
|
||||
|
||||
int core_emit_up(void* token, vmsig_event* ev) {
|
||||
vmsig_core* c = token;
|
||||
int r = vmsig_ctx_submit(c->ctx, VMSIG_DIR_UP, ev);
|
||||
core_wake(c); /* nudge in case of emission off the loop thread */
|
||||
return r;
|
||||
}
|
||||
|
||||
/* origin = (gen<<16)|(id+1): low 16 bits are the control's id+1, high bits the slot
|
||||
* generation. Lets a reply be addressed to the initiator and stale reuse filtered out. */
|
||||
static uint32_t origin_pack(int id, uint16_t gen) {
|
||||
return ((uint32_t)gen << 16) | ((uint32_t)(id + 1) & 0xFFFFu);
|
||||
}
|
||||
/* Live control by origin with generation check; NULL if gone/slot reused. */
|
||||
static core_control_ent* origin_ctl(vmsig_core* c, uint32_t origin) {
|
||||
if (!origin) return NULL;
|
||||
int id = (int)(origin & 0xFFFFu) - 1;
|
||||
uint16_t gen = (uint16_t)(origin >> 16);
|
||||
if (id < 0 || id >= c->ncontrols) return NULL;
|
||||
core_control_ent* e = &c->controls[id];
|
||||
if (!e->active || e->gen != gen) return NULL;
|
||||
return e;
|
||||
}
|
||||
|
||||
/* Capability for a DOWN command (unknown => deny). Destructive CMD_LIFECYCLE
|
||||
* (powerdown/reset, code in inln[0]) requires CAP_POWER, safe ones CAP_LIFECYCLE. */
|
||||
static uint32_t cap_for_down(const vmsig_event* ev) {
|
||||
switch (ev->kind) {
|
||||
case VMSIG_EV_CMD_INPUT:
|
||||
case VMSIG_EV_CMD_QUERY_INPUT: return VMSIG_CAP_INPUT; /* injection / held-key query */
|
||||
case VMSIG_EV_CMD_LIFECYCLE:
|
||||
return (ev->inln[0] == VMSIG_LIFE_POWERDOWN || ev->inln[0] == VMSIG_LIFE_RESET)
|
||||
? VMSIG_CAP_POWER : VMSIG_CAP_LIFECYCLE;
|
||||
case VMSIG_EV_CMD_VM: /* op in inln[0] (vmsig_vm_cmd, op<256) */
|
||||
return (ev->inln[0] == VMSIG_VMOP_RESET || ev->inln[0] == VMSIG_VMOP_POWERDOWN ||
|
||||
ev->inln[0] == VMSIG_VMOP_QUIT) ? VMSIG_CAP_POWER : VMSIG_CAP_VM;
|
||||
case VMSIG_EV_CMD_MEMWRITE: return VMSIG_CAP_MEMWRITE; /* atomic guest-memory write */
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
/* ===== Lease layer: classification and helpers ===== */
|
||||
|
||||
/* Lease class for a DOWN command. MIRRORS cap_for_down by destructiveness:
|
||||
* - CMD_INPUT -> INPUT;
|
||||
* - CMD_LIFECYCLE powerdown/reset -> POWER;
|
||||
* - CMD_VM reset/powerdown/quit -> POWER;
|
||||
* - everything else (safe/read-only/stream/query) -> -1 (not lease-gated).
|
||||
* CMD_LIFECYCLE and CMD_VM route to DIFFERENT adapters (INPUT/VMHOST) but share ONE
|
||||
* POWER class per endpoint: a single owner of VM destruction (intentional). */
|
||||
static int lease_class_for_down(const vmsig_event* ev) {
|
||||
switch (ev->kind) {
|
||||
case VMSIG_EV_CMD_INPUT:
|
||||
return VMSIG_LEASE_INPUT;
|
||||
case VMSIG_EV_CMD_LIFECYCLE:
|
||||
return (ev->inln[0] == VMSIG_LIFE_POWERDOWN || ev->inln[0] == VMSIG_LIFE_RESET)
|
||||
? VMSIG_LEASE_POWER : -1;
|
||||
case VMSIG_EV_CMD_VM:
|
||||
return (ev->inln[0] == VMSIG_VMOP_RESET || ev->inln[0] == VMSIG_VMOP_POWERDOWN ||
|
||||
ev->inln[0] == VMSIG_VMOP_QUIT) ? VMSIG_LEASE_POWER : -1;
|
||||
case VMSIG_EV_CMD_MEMWRITE:
|
||||
return VMSIG_LEASE_MEMWRITE; /* always destructive (write to shared guest memory) */
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
/* Cap required to lease a class (probing/holding a class without the cap is forbidden). */
|
||||
static uint32_t cap_for_lease_class(int cls) {
|
||||
return cls == VMSIG_LEASE_INPUT ? VMSIG_CAP_INPUT
|
||||
: cls == VMSIG_LEASE_POWER ? VMSIG_CAP_POWER
|
||||
: cls == VMSIG_LEASE_MEMWRITE ? VMSIG_CAP_MEMWRITE : 0u;
|
||||
}
|
||||
/* Source bitmask permitted to hold a lease class: mirrors the grant's source ceiling
|
||||
* (which grant_allows_down enforces on the command itself). Leasing is intercepted
|
||||
* BEFORE grant_allows_down, so source is checked HERE — otherwise a principal without
|
||||
* the required seam could hold someone else's cell (DoS), bypassing source_mask.
|
||||
* INPUT -> SRC_INPUT; POWER -> SRC_INPUT (lifecycle) OR SRC_VMHOST (vm) — one
|
||||
* destructive path suffices; MEMWRITE -> SRC_MEMCTX (lives on the MEMCTX seam). */
|
||||
static uint32_t source_mask_for_lease_class(int cls) {
|
||||
return cls == VMSIG_LEASE_INPUT ? (1u << VMSIG_SRC_INPUT)
|
||||
: cls == VMSIG_LEASE_POWER ? ((1u << VMSIG_SRC_INPUT) | (1u << VMSIG_SRC_VMHOST))
|
||||
: cls == VMSIG_LEASE_MEMWRITE ? (1u << VMSIG_SRC_MEMCTX) : 0u;
|
||||
}
|
||||
|
||||
/* Capability to receive an UP event: address-space context (MEMCTX/MEMCTX_INVALIDATED)
|
||||
* -> CAP_MEMCTX; cursor is screen data, available to a GUI observer (OBSERVE) OR an
|
||||
* input actor (INPUT); otherwise CAP_OBSERVE (frames/SEAM/generic). The grant_allows_up
|
||||
* gate checks intersection, so OBSERVE|INPUT means "either of the two". */
|
||||
static uint32_t cap_for_up(const vmsig_event* ev) {
|
||||
if (ev->kind == VMSIG_EV_CURSOR_STATE) return VMSIG_CAP_OBSERVE | VMSIG_CAP_INPUT;
|
||||
return (ev->source == VMSIG_SRC_MEMCTX) ? VMSIG_CAP_MEMCTX : VMSIG_CAP_OBSERVE;
|
||||
}
|
||||
static int grant_allows_down(const vmsig_grant* g, const vmsig_event* ev) {
|
||||
if (ev->endpoint >= 64) return 0; /* 64-bit mask: <=64 VMs/cores */
|
||||
if (!(g->endpoint_mask & (1ull << ev->endpoint))) return 0;
|
||||
if (!(g->source_mask & (1u << ev->source))) return 0; /* source ceiling on DOWN too */
|
||||
uint32_t need = cap_for_down(ev);
|
||||
return need && (g->cap_mask & need);
|
||||
}
|
||||
static int grant_allows_up(const vmsig_grant* g, const vmsig_event* ev) {
|
||||
if (ev->endpoint >= 64) return 0;
|
||||
if (!(g->cap_mask & cap_for_up(ev))) return 0;
|
||||
if (!(g->endpoint_mask & (1ull << ev->endpoint))) return 0;
|
||||
if (!(g->source_mask & (1u << ev->source))) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Find an adapter by (endpoint, source). NULL if none. Used by pump_down to route a
|
||||
* DOWN command to its adapter. */
|
||||
static core_adapter_ent* core_find_adapter(vmsig_core* c, uint32_t endpoint,
|
||||
vmsig_source source) {
|
||||
for (int i = 0; i < c->nadapters; i++) {
|
||||
core_adapter_ent* e = &c->adapters[i];
|
||||
if (e->ops->source == source && e->endpoint == endpoint) return e;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* ===== Lease layer: grant/release/status/finalization/reclaim =====
|
||||
* Intercepted in core_emit_down BEFORE grant_allows_down (synchronous, not in ctx, does
|
||||
* not touch pending). Addressed UP replies to the initiator via core_emit_up
|
||||
* (origin+generation). */
|
||||
|
||||
/* Addressed UP reply to the initiator of a lease request. */
|
||||
static void lease_reply(vmsig_core* c, const vmsig_event* req, vmsig_kind kind,
|
||||
uint32_t cls, uint32_t reason) {
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = kind; up.source = VMSIG_SRC_CORE; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_URGENT; up.endpoint = req->endpoint; up.origin = req->origin;
|
||||
vmsig_lease_req lr = { cls, reason };
|
||||
memcpy(up.inln, &lr, sizeof lr);
|
||||
core_emit_up(c, &up);
|
||||
}
|
||||
|
||||
/* Lease denial: audit (visibility of authorization/contention denials — capability/
|
||||
* endpoint enumeration via ACQUIRE is observable) + addressed LEASE_DENIED to initiator. */
|
||||
static void lease_deny(vmsig_core* c, const vmsig_event* req, uint32_t principal,
|
||||
uint32_t cls, uint32_t reason) {
|
||||
vmsig_audit a = { VMSIG_AUDIT_LEASE_DENIED, principal, req->endpoint, cls, reason };
|
||||
core_audit(c, &a);
|
||||
lease_reply(c, req, VMSIG_EV_LEASE_DENIED, cls, reason);
|
||||
}
|
||||
|
||||
/* Principal of the cell owner (for STATUS); 0 if owner is dead/absent. */
|
||||
static uint32_t lease_owner_principal(vmsig_core* c, uint32_t owner) {
|
||||
core_control_ent* e = origin_ctl(c, owner);
|
||||
return e ? e->grant.principal : 0u;
|
||||
}
|
||||
|
||||
/* IMPORTANT (layer isolation): signaling does NOT release held keys on lease loss and
|
||||
* does NOT track held state at all. held is the ACTUATOR's record (vmctl); release is the
|
||||
* control's decision. On owner change/reset the cell is simply freed; stuck keys remain
|
||||
* the control's concern (it can issue CMD_QUERY_INPUT and release its own while owner). */
|
||||
|
||||
void core_lease_acquire(vmsig_core* c, int ctl_id, const vmsig_event* ev) {
|
||||
core_control_ent* e = &c->controls[ctl_id];
|
||||
uint32_t cls = ((const vmsig_lease_req*)ev->inln)->cls;
|
||||
uint32_t ep = ev->endpoint;
|
||||
|
||||
/* 1. validate class/endpoint/grant (default-deny; every denial is audited). */
|
||||
if (cls >= VMSIG_LEASE_CLASS_MAX) {
|
||||
lease_deny(c, ev, e->grant.principal, cls, VMSIG_LEASE_DENY_BADCLASS);
|
||||
return;
|
||||
}
|
||||
if (ep >= 64 || !(e->grant.endpoint_mask & (1ull << ep))) {
|
||||
lease_deny(c, ev, e->grant.principal, cls, VMSIG_LEASE_DENY_NOGRANT);
|
||||
return;
|
||||
}
|
||||
if (!(e->grant.cap_mask & cap_for_lease_class((int)cls))) {
|
||||
lease_deny(c, ev, e->grant.principal, cls, VMSIG_LEASE_DENY_NOCAP);
|
||||
return;
|
||||
}
|
||||
/* source ceiling: holding a class without rights to its seam is forbidden (else a
|
||||
* DoS hold of someone else's cell bypassing source_mask, since interception is
|
||||
* BEFORE grant_allows_down). */
|
||||
if (!(e->grant.source_mask & source_mask_for_lease_class((int)cls))) {
|
||||
lease_deny(c, ev, e->grant.principal, cls, VMSIG_LEASE_DENY_NOGRANT);
|
||||
return;
|
||||
}
|
||||
|
||||
core_lease_cell* cell = &c->lease[ep][cls];
|
||||
uint32_t me = ev->origin;
|
||||
|
||||
/* 2a. free OR dead owner (origin_ctl==NULL) => take as if free. */
|
||||
core_control_ent* owner_e = cell->owner ? origin_ctl(c, cell->owner) : NULL;
|
||||
if (cell->owner == 0 || !owner_e) {
|
||||
cell->owner = me; cell->owner_prio = e->grant.arb_prio;
|
||||
vmsig_audit a = { VMSIG_AUDIT_LEASE_GRANTED, e->grant.principal, ep, cls, 0 };
|
||||
core_audit(c, &a);
|
||||
lease_reply(c, ev, VMSIG_EV_LEASE_GRANTED, cls, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
/* 2b. owner is the caller itself => idempotent GRANTED. */
|
||||
if (cell->owner == me) {
|
||||
lease_reply(c, ev, VMSIG_EV_LEASE_GRANTED, cls, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
/* 2c. held by a LIVE other owner => policy. incumbent is the live grant. */
|
||||
vmsig_arb_decision dec;
|
||||
if (c->arb_cb) {
|
||||
dec = c->arb_cb(c->arb_ud, ep, cls, &owner_e->grant, &e->grant);
|
||||
} else {
|
||||
dec = (e->grant.arb_prio > cell->owner_prio) ? VMSIG_ARB_PREEMPT : VMSIG_ARB_DENY;
|
||||
}
|
||||
if (dec != VMSIG_ARB_PREEMPT) {
|
||||
/* equal priority => owner keeps it (HELD); strictly lower => LOWER_PRIO. */
|
||||
uint32_t reason = (e->grant.arb_prio < cell->owner_prio)
|
||||
? VMSIG_LEASE_DENY_LOWER_PRIO : VMSIG_LEASE_DENY_HELD;
|
||||
lease_deny(c, ev, e->grant.principal, cls, reason);
|
||||
return;
|
||||
}
|
||||
|
||||
/* PREEMPT: notify the old owner (REVOKED), switch owner, grant to the new one.
|
||||
* signaling does NOT release held keys (that is the control's decision): the
|
||||
* ex-owner is responsible for its stuck keys; the new owner can query held
|
||||
* (CMD_QUERY_INPUT) and release them. */
|
||||
uint32_t old_owner = cell->owner;
|
||||
{
|
||||
vmsig_event rv;
|
||||
memset(&rv, 0, sizeof rv);
|
||||
rv.endpoint = ep; rv.origin = old_owner;
|
||||
lease_reply(c, &rv, VMSIG_EV_LEASE_REVOKED, cls, 0);
|
||||
}
|
||||
{
|
||||
vmsig_audit a = { VMSIG_AUDIT_LEASE_REVOKED, owner_e->grant.principal, ep, cls, 0 };
|
||||
core_audit(c, &a);
|
||||
}
|
||||
cell->owner = me; cell->owner_prio = e->grant.arb_prio;
|
||||
{
|
||||
vmsig_audit a = { VMSIG_AUDIT_LEASE_GRANTED, e->grant.principal, ep, cls, 0 };
|
||||
core_audit(c, &a);
|
||||
}
|
||||
lease_reply(c, ev, VMSIG_EV_LEASE_GRANTED, cls, 0);
|
||||
}
|
||||
|
||||
void core_lease_release(vmsig_core* c, int ctl_id, const vmsig_event* ev) {
|
||||
core_control_ent* e = &c->controls[ctl_id];
|
||||
uint32_t cls = ((const vmsig_lease_req*)ev->inln)->cls;
|
||||
uint32_t ep = ev->endpoint;
|
||||
|
||||
/* cross-endpoint isolation + cap/source gate BEFORE any action (like acquire). */
|
||||
if (cls >= VMSIG_LEASE_CLASS_MAX || ep >= 64) return;
|
||||
if (!(e->grant.endpoint_mask & (1ull << ep))) return;
|
||||
if (!(e->grant.cap_mask & cap_for_lease_class((int)cls))) return;
|
||||
if (!(e->grant.source_mask & source_mask_for_lease_class((int)cls))) return;
|
||||
|
||||
core_lease_cell* cell = &c->lease[ep][cls];
|
||||
if (cell->owner != ev->origin) return; /* not owner => no-op */
|
||||
|
||||
/* signaling does NOT release held keys — that is the control's decision (it releases
|
||||
* its own keys before release if needed). Here we only free the cell. */
|
||||
cell->owner = 0; cell->owner_prio = 0;
|
||||
lease_reply(c, ev, VMSIG_EV_LEASE_RELEASED, cls, 0);
|
||||
}
|
||||
|
||||
void core_lease_status(vmsig_core* c, int ctl_id, const vmsig_event* ev) {
|
||||
core_control_ent* e = &c->controls[ctl_id];
|
||||
uint32_t cls = ((const vmsig_lease_req*)ev->inln)->cls;
|
||||
uint32_t ep = ev->endpoint;
|
||||
|
||||
/* busy-state can be probed only within one's own endpoint and with the class cap
|
||||
* (else a principal without CAP_INPUT/CAP_POWER would leak busy-state/other principal). */
|
||||
if (cls >= VMSIG_LEASE_CLASS_MAX || ep >= 64) return;
|
||||
if (!(e->grant.endpoint_mask & (1ull << ep))) return;
|
||||
if (!(e->grant.cap_mask & cap_for_lease_class((int)cls))) return;
|
||||
if (!(e->grant.source_mask & source_mask_for_lease_class((int)cls))) return;
|
||||
|
||||
core_lease_cell* cell = &c->lease[ep][cls];
|
||||
uint32_t busy = (cell->owner && origin_ctl(c, cell->owner)) ? 1u : 0u;
|
||||
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_LEASE_STATUS; up.source = VMSIG_SRC_CORE; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_URGENT; up.endpoint = ep; up.origin = ev->origin;
|
||||
vmsig_lease_status st = { cls, busy, busy ? lease_owner_principal(c, cell->owner) : 0u };
|
||||
memcpy(up.inln, &st, sizeof st);
|
||||
core_emit_up(c, &up);
|
||||
}
|
||||
|
||||
void core_lease_reap_control(vmsig_core* c, int ctl_id) {
|
||||
/* Clear all cells owned by this (still live) slot, BEFORE active=0.
|
||||
* origin is compared by the slot's current gen (active, gen valid at reap time). */
|
||||
uint32_t owner = origin_pack(ctl_id, c->controls[ctl_id].gen);
|
||||
for (uint32_t ep = 0; ep < 64; ep++) {
|
||||
for (int cls = 0; cls < VMSIG_LEASE_CLASSES; cls++) {
|
||||
core_lease_cell* cell = &c->lease[ep][cls];
|
||||
if (cell->owner != owner) continue;
|
||||
/* only free the cell; the dead owner's held keys are NOT our concern (vmctl's
|
||||
* record; the next owner sees them via CMD_QUERY_INPUT and decides itself). */
|
||||
cell->owner = 0; cell->owner_prio = 0;
|
||||
vmsig_audit a = { VMSIG_AUDIT_LEASE_RECLAIMED,
|
||||
c->controls[ctl_id].grant.principal, ep, (uint32_t)cls, 0 };
|
||||
core_audit(c, &a);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* DOWN emit from a control: enforcement against THIS control's grant. */
|
||||
int core_emit_down(void* token, vmsig_event* ev) {
|
||||
core_down_ctx* d = token;
|
||||
vmsig_core* c = d->core;
|
||||
core_control_ent* e = &c->controls[d->ctl_id];
|
||||
if (!e->active) { vmsig_payload_release(ev); return -1; }
|
||||
|
||||
/* Lease arbitration is intercepted HERE (synchronous, not in ctx, does not touch
|
||||
* pending). origin is needed for the addressed reply and as the owner key. */
|
||||
if (ev->kind == VMSIG_EV_CMD_ACQUIRE || ev->kind == VMSIG_EV_CMD_RELEASE ||
|
||||
ev->kind == VMSIG_EV_CMD_LEASE_STATUS) {
|
||||
ev->origin = origin_pack(d->ctl_id, e->gen);
|
||||
if (ev->kind == VMSIG_EV_CMD_ACQUIRE) core_lease_acquire(c, d->ctl_id, ev);
|
||||
else if (ev->kind == VMSIG_EV_CMD_RELEASE) core_lease_release(c, d->ctl_id, ev);
|
||||
else core_lease_status(c, d->ctl_id, ev);
|
||||
vmsig_payload_release(ev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!grant_allows_down(&e->grant, ev)) {
|
||||
vmsig_audit a = { VMSIG_AUDIT_DOWN_DENIED, e->grant.principal,
|
||||
ev->endpoint, (uint32_t)ev->kind, 0 };
|
||||
core_audit(c, &a); /* rejected by policy (endpoint/source/class) */
|
||||
vmsig_payload_release(ev);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Lease GATE: destruction is passed ONLY by the class's current owner.
|
||||
* A non-owner (or an owner whose slot is dead) => drop + audit LEASE_DENIED
|
||||
* (distinguishable from grant-deny). A free cell => also drop: destruction cannot be
|
||||
* used without an explicit lease. Safe/read-only commands (cls<0) are not gated. */
|
||||
{
|
||||
int cls = lease_class_for_down(ev);
|
||||
if (cls >= 0 && ev->endpoint < 64) {
|
||||
uint32_t me = origin_pack(d->ctl_id, e->gen);
|
||||
uint32_t owner = c->lease[ev->endpoint][cls].owner;
|
||||
if (owner != me || !origin_ctl(c, owner)) {
|
||||
vmsig_audit a = { VMSIG_AUDIT_LEASE_DENIED, e->grant.principal,
|
||||
ev->endpoint, (uint32_t)ev->kind, 0 };
|
||||
core_audit(c, &a);
|
||||
vmsig_payload_release(ev);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (e->pending >= VMSIG_DOWN_PENDING_MAX) { /* fairness/DoS: DOWN cap per poller */
|
||||
vmsig_audit a = { VMSIG_AUDIT_DOWN_DENIED, e->grant.principal,
|
||||
ev->endpoint, (uint32_t)ev->kind, 0 };
|
||||
core_audit(c, &a);
|
||||
vmsig_payload_release(ev);
|
||||
return -1;
|
||||
}
|
||||
ev->origin = origin_pack(d->ctl_id, e->gen); /* addressed reply + pending accounting */
|
||||
e->pending++;
|
||||
int r = vmsig_ctx_submit(c->ctx, VMSIG_DIR_DOWN, ev);
|
||||
if (r != 0) e->pending--; /* not enqueued (drop/err) */
|
||||
core_wake(c);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int sub_match(const vmsig_sub* sub, const vmsig_event* ev) {
|
||||
if (sub->source_mask && !(sub->source_mask & (1u << ev->source))) return 0;
|
||||
if (ev->prio < sub->prio_min) return 0;
|
||||
if (sub->endpoint_mask) {
|
||||
if (ev->endpoint >= 64 || !(sub->endpoint_mask & (1ull << ev->endpoint))) return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* ===== Address-space context (MEMCTX seam): multicast / retain-replay / epoch =====
|
||||
* The core vends ONE coherent datum per-endpoint: kcr3+locator paired with an RO-fd. A
|
||||
* MEMCTX trigger from the adapter => the core builds the AUTHORITATIVE locator from the
|
||||
* adapter snapshot (reg.describe) + stamps the epoch (single source of truth) and
|
||||
* distributes to qualified subscribers with re-sharing of the RO-fd. The same path serves
|
||||
* replay to a late subscriber. */
|
||||
|
||||
/* Build a MEMCTX delivery event for endpoint ep. segs are borrowed from the adapter's
|
||||
* buffer (delivery is synchronous on the loop thread; ownership is not transferred).
|
||||
* 1 — built. */
|
||||
static int core_memctx_build(vmsig_core* c, uint32_t ep, vmsig_event* ev) {
|
||||
if (ep >= 64) return 0;
|
||||
core_memctx_cell* cell = &c->memctx[ep];
|
||||
if (!cell->registered || !cell->reg.describe) return 0;
|
||||
|
||||
vmsig_memctx pod;
|
||||
memset(&pod, 0, sizeof pod);
|
||||
const vmsig_memseg* segs = NULL;
|
||||
uint32_t nseg = 0;
|
||||
cell->reg.describe(cell->reg.ctx, &pod, &segs, &nseg);
|
||||
pod.epoch = c->epoch[ep]; /* core stamps the epoch */
|
||||
pod.nseg = nseg;
|
||||
pod.flags |= VMSIG_MEMCTX_RDONLY; /* outward — always read-only */
|
||||
|
||||
memset(ev, 0, sizeof *ev);
|
||||
ev->kind = VMSIG_EV_MEMCTX; ev->source = VMSIG_SRC_MEMCTX; ev->dir = VMSIG_DIR_UP;
|
||||
ev->prio = VMSIG_PRIO_NORMAL; ev->endpoint = ep;
|
||||
memcpy(ev->inln, &pod, sizeof pod);
|
||||
ev->payload.data = (void*)segs; /* borrowed: owner is the adapter */
|
||||
ev->payload.len = (size_t)nseg * sizeof(vmsig_memseg);
|
||||
ev->payload.codec = VMSIG_CODEC_MEMCTX;
|
||||
ev->payload.flags = VMSIG_PL_BORROWED;
|
||||
ev->payload.release = NULL;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Deliver MEMCTX to one qualified control: fresh RO-fd from reg.share_fd
|
||||
* (socket -> cmsg, in-proc -> direct int), attach_memctx, close fd (the core does not own
|
||||
* the fd). On success — audit MEMCTX_GRANTED. */
|
||||
static void core_memctx_deliver_one(vmsig_core* c, core_memctx_cell* cell,
|
||||
core_control_ent* e, const vmsig_event* ev) {
|
||||
if (!e->ops->attach_memctx) return; /* control does not accept MEMCTX */
|
||||
int fd = cell->reg.share_fd ? cell->reg.share_fd(cell->reg.ctx) : -1;
|
||||
int r = e->ops->attach_memctx(e->ctl, ev, fd);
|
||||
if (fd >= 0) close(fd); /* the core does not own the ro-fd */
|
||||
if (r == 0) {
|
||||
vmsig_audit a = { VMSIG_AUDIT_MEMCTX_GRANTED, e->grant.principal,
|
||||
ev->endpoint, 0, 0 };
|
||||
core_audit(c, &a);
|
||||
}
|
||||
}
|
||||
|
||||
void core_memctx_route(vmsig_core* c, const vmsig_event* trigger) {
|
||||
uint32_t ep = trigger->endpoint;
|
||||
if (ep >= 64) return;
|
||||
core_memctx_cell* cell = &c->memctx[ep];
|
||||
if (!cell->registered) return;
|
||||
|
||||
vmsig_event ev;
|
||||
if (!core_memctx_build(c, ep, &ev)) return;
|
||||
|
||||
cell->valid = 1; /* epoch context published */
|
||||
cell->epoch = c->epoch[ep];
|
||||
|
||||
for (int i = 0; i < c->ncontrols; i++) {
|
||||
core_control_ent* e = &c->controls[i];
|
||||
if (!e->active) continue;
|
||||
if (grant_allows_up(&e->grant, &ev) && sub_match(&e->sub, &ev))
|
||||
core_memctx_deliver_one(c, cell, e, &ev);
|
||||
}
|
||||
}
|
||||
|
||||
void core_memctx_replay(vmsig_core* c, int ctl_id) {
|
||||
if (ctl_id < 0 || ctl_id >= c->ncontrols) return;
|
||||
core_control_ent* e = &c->controls[ctl_id];
|
||||
if (!e->active) return;
|
||||
for (uint32_t ep = 0; ep < 64; ep++) {
|
||||
core_memctx_cell* cell = &c->memctx[ep];
|
||||
if (!cell->registered || !cell->valid) continue;
|
||||
vmsig_event ev;
|
||||
if (!core_memctx_build(c, ep, &ev)) continue;
|
||||
if (grant_allows_up(&e->grant, &ev) && sub_match(&e->sub, &ev))
|
||||
core_memctx_deliver_one(c, cell, e, &ev);
|
||||
}
|
||||
}
|
||||
|
||||
void core_epoch_bump(vmsig_core* c, uint32_t endpoint) {
|
||||
if (endpoint >= 64) return;
|
||||
c->epoch[endpoint]++;
|
||||
core_memctx_cell* cell = &c->memctx[endpoint];
|
||||
cell->valid = 0; /* prior-epoch context is not replayed */
|
||||
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_MEMCTX_INVALIDATED; up.source = VMSIG_SRC_MEMCTX;
|
||||
up.dir = VMSIG_DIR_UP; up.prio = VMSIG_PRIO_URGENT; up.endpoint = endpoint;
|
||||
vmsig_memctx_inv inv = { endpoint, c->epoch[endpoint] };
|
||||
memcpy(up.inln, &inv, sizeof inv);
|
||||
core_emit_up(c, &up); /* broadcast to holders (CAP_MEMCTX gate) */
|
||||
|
||||
/* request re-bootstrap from the adapter: it re-emits MEMCTX{epoch+1} when ready. */
|
||||
if (cell->registered && cell->reg.invalidate)
|
||||
cell->reg.invalidate(cell->reg.ctx, c->epoch[endpoint]);
|
||||
}
|
||||
|
||||
/* UP: drain the context queue and dispatch to subscribed controls */
|
||||
static void pump_up(vmsig_core* c) {
|
||||
vmsig_event ev;
|
||||
while (vmsig_ctx_next(c->ctx, VMSIG_DIR_UP, &ev) == 1) {
|
||||
if (ev.kind == VMSIG_EV_MEMCTX) {
|
||||
/* Context trigger: the core builds the authoritative locator (adapter snapshot
|
||||
* + epoch stamp) and distributes to qualified holders with re-sharing of the
|
||||
* RO-fd. The trigger itself is NOT delivered as an ordinary event. */
|
||||
core_memctx_route(c, &ev);
|
||||
vmsig_payload_release(&ev); /* inline trigger (release=NULL) — harmless */
|
||||
continue;
|
||||
}
|
||||
if (ev.kind == VMSIG_EV_VM_LIFECYCLE && ev.origin == 0) {
|
||||
/* Epoch-transition observation: a destructive async transition (VMHOST
|
||||
* broadcast) invalidates the address-space context. NOT continue — VM_LIFECYCLE
|
||||
* still goes to subscribers below via the normal broadcast. */
|
||||
const vmsig_vm_state* vs = (const vmsig_vm_state*)ev.inln;
|
||||
if (vs->state == VMSIG_VM_RESET || vs->state == VMSIG_VM_POWERDOWN ||
|
||||
vs->state == VMSIG_VM_SHUTDOWN)
|
||||
core_epoch_bump(c, ev.endpoint);
|
||||
}
|
||||
if (ev.origin) {
|
||||
/* addressed reply ONLY to the initiator (origin+generation). The command was
|
||||
* already authorized by the grant => we deliver the reply without re-check; if
|
||||
* the initiator is gone/slot reused — we drop (private data, not broadcast). */
|
||||
core_control_ent* e = origin_ctl(c, ev.origin);
|
||||
if (e && e->ops->deliver) e->ops->deliver(e->ctl, &ev);
|
||||
} else {
|
||||
/* unaddressed event — broadcast; effective = grant ∩ sub */
|
||||
for (int i = 0; i < c->ncontrols; i++) {
|
||||
core_control_ent* e = &c->controls[i];
|
||||
if (!e->active) continue;
|
||||
if (grant_allows_up(&e->grant, &ev) && sub_match(&e->sub, &ev) && e->ops->deliver)
|
||||
e->ops->deliver(e->ctl, &ev);
|
||||
}
|
||||
}
|
||||
vmsig_payload_release(&ev);
|
||||
}
|
||||
}
|
||||
|
||||
/* DOWN: drain the queue and route the command to the adapter (source+endpoint) */
|
||||
static void pump_down(vmsig_core* c) {
|
||||
vmsig_event ev;
|
||||
while (vmsig_ctx_next(c->ctx, VMSIG_DIR_DOWN, &ev) == 1) {
|
||||
core_control_ent* oe = origin_ctl(c, ev.origin); /* command has left ctx */
|
||||
if (oe && oe->pending) oe->pending--; /* THE ONLY decrement */
|
||||
|
||||
/* In-flight fencing: destruction whose origin is NO LONGER the class owner (lease
|
||||
* lost between the emit_down gate and dequeue) is dropped BEFORE actuation. Does
|
||||
* NOT finalize (finalization is done by acquire/reap) — else a double key-up.
|
||||
* pending is NOT touched here (already decremented above). */
|
||||
int cls = lease_class_for_down(&ev);
|
||||
if (cls >= 0 && ev.endpoint < 64 && c->lease[ev.endpoint][cls].owner != ev.origin) {
|
||||
/* dropping destruction that lost the lease is observable (origin owner's principal). */
|
||||
vmsig_audit a = { VMSIG_AUDIT_LEASE_DENIED, lease_owner_principal(c, ev.origin),
|
||||
ev.endpoint, (uint32_t)ev.kind, (uint32_t)cls };
|
||||
core_audit(c, &a);
|
||||
vmsig_payload_release(&ev);
|
||||
continue;
|
||||
}
|
||||
|
||||
core_adapter_ent* e = core_find_adapter(c, ev.endpoint, ev.source);
|
||||
if (e && e->ops->submit) e->ops->submit(e->a, &ev);
|
||||
vmsig_payload_release(&ev);
|
||||
}
|
||||
}
|
||||
|
||||
/* Deferred reap of detached controls: after the batch (safe — not inside their own
|
||||
* on_readable). epoll DEL + mark slot dead + ops->close. */
|
||||
static void core_reap(vmsig_core* c) {
|
||||
for (int i = 0; i < c->ncontrols; i++) {
|
||||
core_control_ent* e = &c->controls[i];
|
||||
if (!e->reap || !e->active) continue;
|
||||
if (e->slot) {
|
||||
epoll_ctl(c->epfd, EPOLL_CTL_DEL, e->slot->fd, NULL);
|
||||
e->slot->role = SLOT_DEAD;
|
||||
}
|
||||
core_lease_reap_control(c, i); /* return leases + finalize held BEFORE active=0 */
|
||||
if (e->ops->close) e->ops->close(e->ctl);
|
||||
e->active = 0;
|
||||
e->reap = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int vmsig_core_run(vmsig_core* c) {
|
||||
if (!c) return -1;
|
||||
struct epoll_event evs[VMSIG_MAX_EVENTS];
|
||||
while (!__atomic_load_n(&c->stopping, __ATOMIC_ACQUIRE)) {
|
||||
int n = epoll_wait(c->epfd, evs, VMSIG_MAX_EVENTS, -1);
|
||||
if (n < 0) { if (errno == EINTR) continue; return -1; }
|
||||
for (int i = 0; i < n; i++) {
|
||||
core_slot* s = (core_slot*)evs[i].data.ptr;
|
||||
switch (s->role) {
|
||||
case SLOT_WAKEUP:
|
||||
drain_counter_fd(s->fd); /* stopping is checked in while */
|
||||
break;
|
||||
case SLOT_ADAPTER:
|
||||
if (s->ops->on_readiness)
|
||||
s->ops->on_readiness(s->adapter, s->cookie, evs[i].events);
|
||||
break;
|
||||
case SLOT_CTX_TIMING:
|
||||
drain_counter_fd(s->fd);
|
||||
break;
|
||||
case SLOT_CONTROL:
|
||||
if (s->cops->on_readable)
|
||||
s->cops->on_readable(s->ctl);
|
||||
break;
|
||||
case SLOT_SOURCE:
|
||||
if (s->on_source)
|
||||
s->on_source(s->source_user, evs[i].events);
|
||||
break;
|
||||
case SLOT_DEAD:
|
||||
break; /* detached — ignore */
|
||||
}
|
||||
}
|
||||
pump_up(c);
|
||||
pump_down(c);
|
||||
core_reap(c);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vmsig_core_stop(vmsig_core* c) {
|
||||
if (!c) return;
|
||||
__atomic_store_n(&c->stopping, 1, __ATOMIC_RELEASE); /* cross-thread stop signal */
|
||||
core_wake(c);
|
||||
}
|
||||
+203
@@ -0,0 +1,203 @@
|
||||
/* ctx.c — transfer context: priority, ordering, protocol timing.
|
||||
* This is the SISC-critical seam. No behavioral timing here: commands arrive
|
||||
* already decided by control; the context only orders and paces them. */
|
||||
#include "ctx_internal.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <sys/timerfd.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/* Default depth ceiling for a single band (per source,dir) when no policy is set. */
|
||||
#define VMSIG_CTX_DEFAULT_INFLIGHT 4096
|
||||
|
||||
static uint64_t now_ns(void) {
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec;
|
||||
}
|
||||
|
||||
/* ---- node recycling (free-list under the shared mutex) ------------------- */
|
||||
static ev_node* node_get(vmsig_ctx* c) {
|
||||
ev_node* n = c->freelist;
|
||||
if (n) { c->freelist = n->next; return n; }
|
||||
return malloc(sizeof *n);
|
||||
}
|
||||
static void node_put(vmsig_ctx* c, ev_node* n) {
|
||||
n->next = c->freelist;
|
||||
c->freelist = n;
|
||||
}
|
||||
|
||||
vmsig_ctx* vmsig_ctx_new(void) {
|
||||
vmsig_ctx* c = calloc(1, sizeof *c);
|
||||
if (!c) return NULL;
|
||||
if (pthread_mutex_init(&c->lock, NULL) != 0) { free(c); return NULL; }
|
||||
for (int d = 0; d < 2; d++) {
|
||||
c->dir[d].timing_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
|
||||
if (c->dir[d].timing_fd < 0) {
|
||||
for (int k = 0; k < d; k++) close(c->dir[k].timing_fd);
|
||||
pthread_mutex_destroy(&c->lock);
|
||||
free(c);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
void vmsig_ctx_free(vmsig_ctx* c) {
|
||||
if (!c) return;
|
||||
for (int d = 0; d < 2; d++) {
|
||||
for (int p = 0; p < VMSIG_PRIO_MAX; p++) {
|
||||
ev_node* n = c->dir[d].band[p].head;
|
||||
while (n) { ev_node* nx = n->next; vmsig_payload_release(&n->ev); free(n); n = nx; }
|
||||
}
|
||||
if (c->dir[d].timing_fd >= 0) close(c->dir[d].timing_fd);
|
||||
}
|
||||
/* actually free the recycled nodes (no payload attached) */
|
||||
ev_node* f = c->freelist;
|
||||
while (f) { ev_node* nx = f->next; free(f); f = nx; }
|
||||
pthread_mutex_destroy(&c->lock);
|
||||
free(c);
|
||||
}
|
||||
|
||||
int vmsig_ctx_set_policy(vmsig_ctx* c, vmsig_source src, vmsig_dir dir,
|
||||
vmsig_prio default_prio, const vmsig_timing* t) {
|
||||
if (!c || src >= VMSIG_SRC_MAX || dir > VMSIG_DIR_DOWN) return -1;
|
||||
pthread_mutex_lock(&c->lock);
|
||||
ctx_policy* pol = &c->policy[src][dir];
|
||||
pol->default_prio = default_prio;
|
||||
if (t) pol->timing = *t; else memset(&pol->timing, 0, sizeof pol->timing);
|
||||
pol->policy_set = 1;
|
||||
pthread_mutex_unlock(&c->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void band_push_tail(ev_band* b, ev_node* n) {
|
||||
n->next = NULL;
|
||||
if (b->tail) b->tail->next = n; else b->head = n;
|
||||
b->tail = n;
|
||||
b->count++;
|
||||
}
|
||||
|
||||
int vmsig_ctx_submit(vmsig_ctx* c, vmsig_dir dir, vmsig_event* ev) {
|
||||
if (!c || !ev || dir > VMSIG_DIR_DOWN) return -1;
|
||||
vmsig_source src = ev->source < VMSIG_SRC_MAX ? ev->source : VMSIG_SRC_NONE;
|
||||
|
||||
pthread_mutex_lock(&c->lock);
|
||||
ctx_policy* pol = &c->policy[src][dir];
|
||||
|
||||
/* effective priority = max(policy default, emitter request) */
|
||||
vmsig_prio eff = ev->prio > pol->default_prio ? ev->prio : pol->default_prio;
|
||||
if (eff >= VMSIG_PRIO_MAX) eff = VMSIG_PRIO_MAX - 1;
|
||||
|
||||
ev->seq = ++c->seq;
|
||||
if (ev->ts_ns == 0) ev->ts_ns = now_ns();
|
||||
ev->prio = eff;
|
||||
|
||||
ev_band* band = &c->dir[dir].band[eff];
|
||||
|
||||
/* coalescing: a burst of the same kind+endpoint is collapsed (newest wins) */
|
||||
if (pol->timing.coalesce_ns) {
|
||||
for (ev_node* n = band->head; n; n = n->next) {
|
||||
if (n->ev.kind == ev->kind && n->ev.endpoint == ev->endpoint) {
|
||||
vmsig_payload_release(&n->ev);
|
||||
uint32_t keep_seq = n->ev.seq; /* keep position in the order */
|
||||
n->ev = *ev;
|
||||
n->ev.seq = keep_seq;
|
||||
pthread_mutex_unlock(&c->lock);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* backpressure: channel depth is bounded. When no policy is set
|
||||
* (max_inflight==0), a BUILT-IN default ceiling applies (drop newest),
|
||||
* so the queue does not grow without bound under a command flood. */
|
||||
uint32_t cap = pol->timing.max_inflight ? pol->timing.max_inflight
|
||||
: VMSIG_CTX_DEFAULT_INFLIGHT;
|
||||
uint8_t dp = pol->timing.max_inflight ? pol->timing.drop_policy
|
||||
: VMSIG_DROP_NEWEST;
|
||||
if (band->count >= (int)cap) {
|
||||
if (dp == VMSIG_DROP_OLDEST) {
|
||||
ev_node* old = band->head; /* drop the oldest */
|
||||
if (old) {
|
||||
band->head = old->next;
|
||||
if (!band->head) band->tail = NULL;
|
||||
band->count--;
|
||||
vmsig_payload_release(&old->ev);
|
||||
node_put(c, old);
|
||||
}
|
||||
} else {
|
||||
/* NEWEST / BLOCK (the loop must not block) — drop the incoming event */
|
||||
vmsig_payload_release(ev);
|
||||
pthread_mutex_unlock(&c->lock);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
ev_node* node = node_get(c);
|
||||
if (!node) { pthread_mutex_unlock(&c->lock); return -1; }
|
||||
node->ev = *ev; /* take ownership of the payload */
|
||||
band_push_tail(band, node);
|
||||
pthread_mutex_unlock(&c->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vmsig_ctx_next(vmsig_ctx* c, vmsig_dir dir, vmsig_event* out) {
|
||||
if (!c || !out || dir > VMSIG_DIR_DOWN) return -1;
|
||||
pthread_mutex_lock(&c->lock);
|
||||
ctx_dir* d = &c->dir[dir];
|
||||
uint64_t now = now_ns();
|
||||
uint64_t min_rem = 0;
|
||||
int have_rem = 0;
|
||||
|
||||
/* Walk bands from highest priority to lowest, and within a band from head
|
||||
* to tail, returning the FIRST event "matured" against its protocol min_gap.
|
||||
* A paced source thus waits without blocking ready events of other sources.
|
||||
* Within one source the order is preserved (its earlier events come first). */
|
||||
for (int p = VMSIG_PRIO_MAX - 1; p >= 0; p--) {
|
||||
ev_band* b = &d->band[p];
|
||||
ev_node* prev = NULL;
|
||||
ev_node* n = b->head;
|
||||
while (n) {
|
||||
vmsig_source src = n->ev.source < VMSIG_SRC_MAX ? n->ev.source : VMSIG_SRC_NONE;
|
||||
ctx_policy* pol = &c->policy[src][dir];
|
||||
int due = 1;
|
||||
uint64_t rem = 0;
|
||||
if (pol->timing.min_gap_ns) {
|
||||
uint64_t due_at = pol->last_emit_ns + pol->timing.min_gap_ns;
|
||||
if (now < due_at) { due = 0; rem = due_at - now; }
|
||||
}
|
||||
if (due) {
|
||||
if (prev) prev->next = n->next; else b->head = n->next;
|
||||
if (b->tail == n) b->tail = prev;
|
||||
b->count--;
|
||||
pol->last_emit_ns = now;
|
||||
*out = n->ev; /* payload ownership -> caller */
|
||||
node_put(c, n);
|
||||
pthread_mutex_unlock(&c->lock);
|
||||
return 1;
|
||||
}
|
||||
if (!have_rem || rem < min_rem) { min_rem = rem; have_rem = 1; }
|
||||
prev = n;
|
||||
n = n->next;
|
||||
}
|
||||
}
|
||||
|
||||
/* nothing matured: arm the timing-fd for the nearest due time (if any waiting) */
|
||||
if (have_rem) {
|
||||
struct itimerspec its;
|
||||
memset(&its, 0, sizeof its);
|
||||
its.it_value.tv_sec = (time_t)(min_rem / 1000000000ull);
|
||||
its.it_value.tv_nsec = (long)(min_rem % 1000000000ull);
|
||||
if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0) its.it_value.tv_nsec = 1;
|
||||
timerfd_settime(d->timing_fd, 0, &its, NULL);
|
||||
}
|
||||
pthread_mutex_unlock(&c->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vmsig_ctx_timing_fd(vmsig_ctx* c, vmsig_dir dir) {
|
||||
if (!c || dir > VMSIG_DIR_DOWN) return -1;
|
||||
return c->dir[dir].timing_fd;
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
#ifndef VMSIG_CTX_INTERNAL_H
|
||||
#define VMSIG_CTX_INTERNAL_H
|
||||
#include "vmsig_ctx.h"
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* Private internals of the transfer context. Priority bands are simple
|
||||
* linked FIFOs (one node per event; recycling is a later optimization). */
|
||||
|
||||
typedef struct ev_node {
|
||||
vmsig_event ev;
|
||||
struct ev_node* next;
|
||||
} ev_node;
|
||||
|
||||
typedef struct {
|
||||
ev_node* head;
|
||||
ev_node* tail;
|
||||
int count;
|
||||
} ev_band;
|
||||
|
||||
typedef struct {
|
||||
vmsig_prio default_prio;
|
||||
vmsig_timing timing;
|
||||
uint64_t last_emit_ns; /* for protocol min_gap (per source,dir) */
|
||||
int policy_set;
|
||||
} ctx_policy;
|
||||
|
||||
typedef struct {
|
||||
ev_band band[VMSIG_PRIO_MAX]; /* 4 priority bands */
|
||||
int timing_fd; /* pacing timerfd (created in ctx_new) */
|
||||
} ctx_dir;
|
||||
|
||||
struct vmsig_ctx {
|
||||
pthread_mutex_t lock;
|
||||
uint32_t seq;
|
||||
ev_node* freelist; /* ev_node recycling */
|
||||
ctx_dir dir[2]; /* indexed by VMSIG_DIR_UP/DOWN */
|
||||
ctx_policy policy[VMSIG_SRC_MAX][2]; /* [source][dir] */
|
||||
};
|
||||
|
||||
#endif /* VMSIG_CTX_INTERNAL_H */
|
||||
@@ -0,0 +1,95 @@
|
||||
/* test_authz.c — authorization to receive the coherent address-space context (MEMCTX).
|
||||
* Several co-resident controls on one endpoint; MEMCTX is multicast upward and reaches
|
||||
* ONLY the qualified holder. We check the least-privilege matrix:
|
||||
* GOOD (CAP_MEMCTX + source_mask MEMCTX + endpoint) -> receives MEMCTX;
|
||||
* NOCAP (CAP_OBSERVE, no MEMCTX) -> does NOT receive (cap);
|
||||
* NOSRC (CAP_MEMCTX, source_mask without MEMCTX bit) -> does NOT receive (source_mask);
|
||||
* NOEP (CAP_MEMCTX, but endpoint outside the mask) -> does NOT receive (endpoint).
|
||||
* A vmhost stub provides watchdog ticks (VM_LIFECYCLE) — a termination guarantee if
|
||||
* MEMCTX somehow never arrives (then the asserts catch its absence). In-proc, under ASAN. */
|
||||
#define _GNU_SOURCE
|
||||
#include "vmsig.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static int g_fail = 0;
|
||||
#define CHECK(cond, msg) do { \
|
||||
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||
} while (0)
|
||||
|
||||
#define EP 0u
|
||||
|
||||
typedef struct {
|
||||
vmsig_core* core;
|
||||
const char* tag;
|
||||
int memctx; /* how many MEMCTX this control received */
|
||||
int ticks; /* watchdog: VM_LIFECYCLE (only on GOOD) */
|
||||
int is_good; /* GOOD stops the loop on the first MEMCTX */
|
||||
} holder;
|
||||
|
||||
static int on_ev(void* user, const vmsig_event* ev) {
|
||||
holder* h = user;
|
||||
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) {
|
||||
h->ticks++;
|
||||
if (h->is_good && h->ticks > 20) vmsig_core_stop(h->core); /* failsafe */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int on_memctx(void* user, const vmsig_event* ev, int fd) {
|
||||
holder* h = user;
|
||||
(void)ev; (void)fd;
|
||||
h->memctx++;
|
||||
if (h->is_good) vmsig_core_stop(h->core);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void add_holder(vmsig_core* core, holder* h, uint32_t cap,
|
||||
uint32_t source_mask, uint64_t endpoint_mask) {
|
||||
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||
cfg.on_event = on_ev; cfg.on_memctx = on_memctx; cfg.user = h;
|
||||
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||
g.principal = 7; g.endpoint_mask = endpoint_mask;
|
||||
g.source_mask = source_mask; g.cap_mask = cap;
|
||||
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
printf("test_authz (memctx least-privilege)\n");
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
|
||||
holder good = { core, "GOOD", 0, 0, 1 };
|
||||
holder nocap = { core, "NOCAP", 0, 0, 0 };
|
||||
holder nosrc = { core, "NOSRC", 0, 0, 0 };
|
||||
holder noep = { core, "NOEP", 0, 0, 0 };
|
||||
|
||||
/* GOOD: CAP_MEMCTX (+OBSERVE for watchdog lifecycle ticks), source MEMCTX, ep0 -> receives. */
|
||||
add_holder(core, &good, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << EP);
|
||||
/* NOCAP: OBSERVE only (no CAP_MEMCTX) -> deny by cap. */
|
||||
add_holder(core, &nocap, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << EP);
|
||||
/* NOSRC: CAP_MEMCTX, but source_mask without the MEMCTX bit -> deny by source_mask. */
|
||||
add_holder(core, &nosrc, VMSIG_CAP_MEMCTX, ~(1u << VMSIG_SRC_MEMCTX), 1ull << EP);
|
||||
/* NOEP: CAP_MEMCTX, source ok, but endpoint outside the mask (ep1) -> deny by endpoint. */
|
||||
add_holder(core, &noep, VMSIG_CAP_MEMCTX, 0xFFFFFFFFu, 1ull << 1);
|
||||
|
||||
/* watchdog lifecycle ticks + address-space context on one endpoint (stub). */
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, EP) >= 0, "add vmhost adapter");
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, EP) >= 0, "add memctx adapter");
|
||||
|
||||
int rc = vmsig_core_run(core);
|
||||
printf(" rc=%d GOOD.memctx=%d NOCAP=%d NOSRC=%d NOEP=%d\n",
|
||||
rc, good.memctx, nocap.memctx, nosrc.memctx, noep.memctx);
|
||||
|
||||
CHECK(good.memctx >= 1, "GOOD receives MEMCTX (cap+source+endpoint)");
|
||||
CHECK(nocap.memctx == 0, "NOCAP does not receive (no CAP_MEMCTX)");
|
||||
CHECK(nosrc.memctx == 0, "NOSRC does not receive (source_mask without MEMCTX)");
|
||||
CHECK(noep.memctx == 0, "NOEP does not receive (endpoint outside mask)");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
printf("authz tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
/* test_ctx.c — unit tests for the transfer context (public vmsig_ctx_* API):
|
||||
* priority->seq, coalescing, backpressure (drop oldest/newest), protocol
|
||||
* pacing via timing-fd. Links against libvmsig; run through ctest. */
|
||||
#include "vmsig.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <poll.h>
|
||||
|
||||
static int g_fail = 0;
|
||||
#define CHECK(cond, msg) do { \
|
||||
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||
} while (0)
|
||||
|
||||
/* Submit a DOWN event with the given prio/kind/endpoint/corr. */
|
||||
static int put(vmsig_ctx* c, vmsig_prio p, vmsig_kind k, uint32_t ep, uint32_t corr) {
|
||||
vmsig_event e;
|
||||
memset(&e, 0, sizeof e);
|
||||
e.source = VMSIG_SRC_NONE; e.dir = VMSIG_DIR_DOWN;
|
||||
e.prio = p; e.kind = k; e.endpoint = ep; e.corr = corr;
|
||||
return vmsig_ctx_submit(c, VMSIG_DIR_DOWN, &e);
|
||||
}
|
||||
|
||||
/* ---- 1. priority first, then FIFO by seq within a band ------------------- */
|
||||
static void test_priority_seq(void) {
|
||||
printf("test_priority_seq\n");
|
||||
vmsig_ctx* c = vmsig_ctx_new();
|
||||
put(c, VMSIG_PRIO_BULK, VMSIG_EV_CMD_INPUT, 0, 0xA);
|
||||
put(c, VMSIG_PRIO_URGENT, VMSIG_EV_CMD_INPUT, 0, 0xB);
|
||||
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 0xC);
|
||||
put(c, VMSIG_PRIO_BULK, VMSIG_EV_CMD_INPUT, 0, 0xD);
|
||||
put(c, VMSIG_PRIO_URGENT, VMSIG_EV_CMD_INPUT, 0, 0xE);
|
||||
uint32_t want[5] = { 0xB, 0xE, 0xC, 0xA, 0xD }; /* URGENT(seq) -> NORMAL -> BULK(seq) */
|
||||
vmsig_event o;
|
||||
for (int i = 0; i < 5; i++) {
|
||||
int r = vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o);
|
||||
CHECK(r == 1, "next must return an event");
|
||||
CHECK(o.corr == want[i], "priority->seq order");
|
||||
}
|
||||
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 0, "queue drained");
|
||||
vmsig_ctx_free(c);
|
||||
}
|
||||
|
||||
/* ---- 2. coalescing: a burst of one kind+endpoint collapses (newest wins) - */
|
||||
static void test_coalesce(void) {
|
||||
printf("test_coalesce\n");
|
||||
vmsig_ctx* c = vmsig_ctx_new();
|
||||
vmsig_timing t; memset(&t, 0, sizeof t); t.coalesce_ns = 1;
|
||||
vmsig_ctx_set_policy(c, VMSIG_SRC_NONE, VMSIG_DIR_DOWN, VMSIG_PRIO_NORMAL, &t);
|
||||
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_VM, 0, 0x11);
|
||||
int r2 = put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_VM, 0, 0x22); /* should coalesce */
|
||||
CHECK(r2 == 1, "second submit coalesced (=1)");
|
||||
vmsig_event o;
|
||||
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1, "one event after coalescing");
|
||||
CHECK(o.corr == 0x22, "newest data after coalescing");
|
||||
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 0, "nothing more");
|
||||
vmsig_ctx_free(c);
|
||||
}
|
||||
|
||||
/* ---- 3. backpressure drop_oldest --------------------------------------- */
|
||||
static void test_backpressure_oldest(void) {
|
||||
printf("test_backpressure_oldest\n");
|
||||
vmsig_ctx* c = vmsig_ctx_new();
|
||||
vmsig_timing t; memset(&t, 0, sizeof t);
|
||||
t.max_inflight = 2; t.drop_policy = VMSIG_DROP_OLDEST;
|
||||
vmsig_ctx_set_policy(c, VMSIG_SRC_NONE, VMSIG_DIR_DOWN, VMSIG_PRIO_NORMAL, &t);
|
||||
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 1);
|
||||
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 2);
|
||||
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 3); /* evicts 1 */
|
||||
vmsig_event o;
|
||||
int got[8], n = 0;
|
||||
while (vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1) got[n++] = (int)o.corr;
|
||||
CHECK(n == 2, "2 remain after drop_oldest");
|
||||
CHECK(n == 2 && got[0] == 2 && got[1] == 3, "oldest evicted (1)");
|
||||
vmsig_ctx_free(c);
|
||||
}
|
||||
|
||||
/* ---- 4. backpressure drop_newest --------------------------------------- */
|
||||
static void test_backpressure_newest(void) {
|
||||
printf("test_backpressure_newest\n");
|
||||
vmsig_ctx* c = vmsig_ctx_new();
|
||||
vmsig_timing t; memset(&t, 0, sizeof t);
|
||||
t.max_inflight = 2; t.drop_policy = VMSIG_DROP_NEWEST;
|
||||
vmsig_ctx_set_policy(c, VMSIG_SRC_NONE, VMSIG_DIR_DOWN, VMSIG_PRIO_NORMAL, &t);
|
||||
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 1);
|
||||
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 2);
|
||||
int r3 = put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 3); /* dropped */
|
||||
CHECK(r3 == 1, "third submit dropped (=1)");
|
||||
vmsig_event o;
|
||||
int got[8], n = 0;
|
||||
while (vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1) got[n++] = (int)o.corr;
|
||||
CHECK(n == 2 && got[0] == 1 && got[1] == 2, "newest dropped (3)");
|
||||
vmsig_ctx_free(c);
|
||||
}
|
||||
|
||||
/* ---- 5. protocol pacing via timing-fd ---------------------------------- */
|
||||
static void test_pacing(void) {
|
||||
printf("test_pacing\n");
|
||||
vmsig_ctx* c = vmsig_ctx_new();
|
||||
vmsig_timing t; memset(&t, 0, sizeof t);
|
||||
t.min_gap_ns = 20u * 1000000u; /* 20 ms gap */
|
||||
vmsig_ctx_set_policy(c, VMSIG_SRC_NONE, VMSIG_DIR_DOWN, VMSIG_PRIO_NORMAL, &t);
|
||||
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 1);
|
||||
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 2);
|
||||
vmsig_event o;
|
||||
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1, "first delivered immediately");
|
||||
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 0, "second paced (0 for now)");
|
||||
int tfd = vmsig_ctx_timing_fd(c, VMSIG_DIR_DOWN);
|
||||
CHECK(tfd >= 0, "timing-fd valid");
|
||||
struct pollfd pfd = { .fd = tfd, .events = POLLIN };
|
||||
int pr = poll(&pfd, 1, 1000); /* wait for it to fire, no longer than 1s */
|
||||
CHECK(pr == 1, "timing-fd became ready within the gap");
|
||||
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1, "after the gap the second matured");
|
||||
CHECK(o.corr == 2, "this is exactly the second event");
|
||||
vmsig_ctx_free(c);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
test_priority_seq();
|
||||
test_coalesce();
|
||||
test_backpressure_oldest();
|
||||
test_backpressure_newest();
|
||||
test_pacing();
|
||||
printf("ctx tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
@@ -0,0 +1,93 @@
|
||||
/* test_inputobs.c — input observation:
|
||||
* held-query: a control with CAP_INPUT, on CMD_QUERY_INPUT, receives INPUT_HELD from the
|
||||
* vmctl record (stub without vmctl => count=0); without CAP_INPUT — DOWN_DENIED.
|
||||
* (The cursor sensor moved out of signaling with the FRAME adapter: CURSOR_STATE is now
|
||||
* emitted by the out-of-repo vgpu-perception shell-as-control, not by a signaling adapter.)
|
||||
* In-proc, under ASAN. Links against libvmsig. */
|
||||
#define _GNU_SOURCE
|
||||
#include "vmsig.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static int g_fail = 0;
|
||||
#define CHECK(c, m) do { if (!(c)) { printf(" FAIL: %s\n", (m)); g_fail = 1; } } while (0)
|
||||
|
||||
#define EP 0u
|
||||
|
||||
typedef struct {
|
||||
vmsig_core* core;
|
||||
void* ctl;
|
||||
int held; /* INPUT_HELD count */
|
||||
int last_held_count;
|
||||
int stop_held; /* stop after N held replies (0=no) */
|
||||
} obs;
|
||||
|
||||
static int on_ev(void* u, const vmsig_event* ev) {
|
||||
obs* o = u;
|
||||
if (ev->kind == VMSIG_EV_INPUT_HELD) {
|
||||
const vmsig_input_held* h = (const vmsig_input_held*)ev->inln;
|
||||
o->held++; o->last_held_count = (int)h->count;
|
||||
if (o->stop_held && o->held >= o->stop_held) vmsig_core_stop(o->core);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void add_ctl(vmsig_core* core, obs* o, uint32_t cap, uint32_t src_mask) {
|
||||
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||
cfg.on_event = on_ev; cfg.user = o;
|
||||
cfg.sub.source_mask = src_mask; cfg.sub.prio_min = VMSIG_PRIO_BULK;
|
||||
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||
o->ctl = ctl;
|
||||
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||
g.endpoint_mask = 1ull << EP; g.source_mask = src_mask; g.cap_mask = cap;
|
||||
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||
}
|
||||
|
||||
static void send_query_input(void* ctl) {
|
||||
vmsig_event d; memset(&d, 0, sizeof d);
|
||||
d.kind = VMSIG_EV_CMD_QUERY_INPUT; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
|
||||
d.endpoint = EP; d.prio = VMSIG_PRIO_HIGH;
|
||||
vmsig_inproc_send(ctl, &d);
|
||||
}
|
||||
|
||||
static int g_down_denied = 0;
|
||||
static void audit_cb(void* ud, const vmsig_audit* a) {
|
||||
(void)ud; if (a->kind == VMSIG_AUDIT_DOWN_DENIED) g_down_denied++;
|
||||
}
|
||||
|
||||
/* ---- held-query: CAP_INPUT -> INPUT_HELD (stub count=0); without cap -> DOWN_DENIED ---- */
|
||||
static void test_held_query(void) {
|
||||
printf("test_held_query\n");
|
||||
g_down_denied = 0;
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
vmsig_core_set_audit(core, audit_cb, NULL);
|
||||
vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, EP); /* stub input (no vmctl) */
|
||||
|
||||
obs a; memset(&a, 0, sizeof a); a.core = core; a.stop_held = 1;
|
||||
add_ctl(core, &a, VMSIG_CAP_INPUT, 0xFFFFFFFFu);
|
||||
send_query_input(a.ctl);
|
||||
vmsig_core_run(core); /* pump_down -> INPUT_HELD -> pump_up */
|
||||
CHECK(a.held == 1, "held: CAP_INPUT receives INPUT_HELD");
|
||||
CHECK(a.last_held_count == 0, "held: stub without vmctl -> count=0");
|
||||
|
||||
/* without CAP_INPUT (OBSERVE only): CMD_QUERY_INPUT rejected BEFORE ctx (synchronously). */
|
||||
obs b; memset(&b, 0, sizeof b); b.core = core;
|
||||
add_ctl(core, &b, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu);
|
||||
int before = g_down_denied;
|
||||
send_query_input(b.ctl);
|
||||
CHECK(b.held == 0, "held: without CAP_INPUT -> no INPUT_HELD");
|
||||
CHECK(g_down_denied == before + 1, "held: without CAP_INPUT -> DOWN_DENIED");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
printf("test_inputobs\n");
|
||||
test_held_query();
|
||||
printf("inputobs tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
@@ -0,0 +1,525 @@
|
||||
/* test_lease.c — arbitration layer (exclusive lease of destructive resources).
|
||||
*
|
||||
* 13 cases from the contract (docs/plans/lease-arbitration.md §Tests). In-proc,
|
||||
* runs without a live loop where synchronous interception suffices
|
||||
* (ACQUIRE/RELEASE/STATUS and the lease gate are synchronous in core_emit_down);
|
||||
* a live vmsig_core_run() — to check finalization/fencing/reclaim, where the
|
||||
* input-adapter worker is involved (actuation on the worker thread, ACK via the loop).
|
||||
*
|
||||
* Observability of finalization ordering: the input-worker FIFO => ACT_ACK order ==
|
||||
* submit order. A synthesized key-up has origin=0 (broadcast), the owner's CMD_INPUT
|
||||
* is an addressed ACK. A shared monotonic log records the relative ordering. */
|
||||
#include "vmsig.h"
|
||||
#include "core_internal.h" /* core_request_drop: deterministic reclaim of an in-proc control */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static int g_fail = 0;
|
||||
#define CHECK(cond, msg) do { \
|
||||
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||
} while (0)
|
||||
|
||||
/* ---------- shared control infrastructure ---------- */
|
||||
|
||||
typedef struct {
|
||||
int granted, denied, released, revoked, status;
|
||||
int last_deny_reason;
|
||||
int last_status_busy;
|
||||
uint32_t last_status_owner;
|
||||
} lease_counts;
|
||||
|
||||
typedef struct {
|
||||
void* core;
|
||||
lease_counts cnt[4]; /* indexed by control */
|
||||
/* log of ACT_ACK (actuations) in arrival order: tag = corr (0 = synthesized up) */
|
||||
int ack_log[64];
|
||||
int nack;
|
||||
int stop_after_acks; /* stop the loop after N actuations (0=not auto) */
|
||||
int total_replies; /* count of all lease UP events (GRANTED/DENIED/RELEASED/STATUS/REVOKED) */
|
||||
int stop_replies; /* stop the loop when total_replies>=this (0=not auto) */
|
||||
/* phase orchestration for finalization/fencing (on the loop thread via on_ev) */
|
||||
void* ctl_a;
|
||||
void* ctl_b;
|
||||
int phase; /* count of scenario phases passed */
|
||||
int scenario; /* 0=none, 1=preempt-finalize, 2=in-flight-fence */
|
||||
} lease_state;
|
||||
|
||||
typedef struct { lease_state* s; int idx; } cref;
|
||||
|
||||
/* Registry of allocated crefs (control user-data): inproc close() frees the
|
||||
* ctl itself but not the user-data => we free them centrally at the end (ASAN cleanliness). */
|
||||
static cref* g_crefs[64];
|
||||
static int g_ncrefs = 0;
|
||||
static cref* cref_new(lease_state* s, int idx) {
|
||||
cref* r = calloc(1, sizeof *r);
|
||||
r->s = s; r->idx = idx;
|
||||
if (g_ncrefs < 64) g_crefs[g_ncrefs++] = r;
|
||||
return r;
|
||||
}
|
||||
static void cref_free_all(void) {
|
||||
for (int i = 0; i < g_ncrefs; i++) free(g_crefs[i]);
|
||||
g_ncrefs = 0;
|
||||
}
|
||||
|
||||
/* forward declarations of send helpers (used in on_ev for phase orchestration) */
|
||||
static int acquire(void* ctl, uint32_t cls, uint32_t ep);
|
||||
static int send_key(void* ctl, int code, int value, uint32_t corr, uint32_t ep);
|
||||
|
||||
static void on_lease_ev(lease_state* s, int idx, const vmsig_event* ev) {
|
||||
lease_counts* c = &s->cnt[idx];
|
||||
switch (ev->kind) {
|
||||
case VMSIG_EV_LEASE_GRANTED: c->granted++; s->total_replies++; break;
|
||||
case VMSIG_EV_LEASE_DENIED: {
|
||||
c->denied++;
|
||||
c->last_deny_reason = (int)((const vmsig_lease_req*)ev->inln)->reason;
|
||||
s->total_replies++;
|
||||
break;
|
||||
}
|
||||
case VMSIG_EV_LEASE_RELEASED: c->released++; s->total_replies++; break;
|
||||
case VMSIG_EV_LEASE_REVOKED: c->revoked++; s->total_replies++; break;
|
||||
case VMSIG_EV_LEASE_STATUS: {
|
||||
c->status++;
|
||||
const vmsig_lease_status* st = (const vmsig_lease_status*)ev->inln;
|
||||
c->last_status_busy = (int)st->busy;
|
||||
c->last_status_owner = st->owner_principal;
|
||||
s->total_replies++;
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
if (s->stop_replies && s->total_replies >= s->stop_replies)
|
||||
vmsig_core_stop(s->core);
|
||||
}
|
||||
|
||||
static int on_ev(void* user, const vmsig_event* ev) {
|
||||
cref* r = user; lease_state* s = r->s;
|
||||
on_lease_ev(s, r->idx, ev);
|
||||
if (ev->kind == VMSIG_EV_ACT_ACK) {
|
||||
if (s->nack < 64) s->ack_log[s->nack++] = (int)ev->corr;
|
||||
|
||||
/* Phase orchestration (loop thread): wait for the REAL actuation of A's key-down
|
||||
* (the held-set is filled in pump_down=in_submit), and only THEN let B preempt —
|
||||
* otherwise finalization on a synchronous acquire would run over an empty held-set. */
|
||||
if (s->scenario == 1 && ev->corr == 11 && s->phase == 0) {
|
||||
s->phase = 1;
|
||||
acquire(s->ctl_b, VMSIG_LEASE_INPUT, 0); /* B preempts AFTER A's actuation */
|
||||
send_key(s->ctl_b, 31, 1, 22, 0);
|
||||
send_key(s->ctl_b, 31, 0, 23, 0);
|
||||
}
|
||||
|
||||
if (s->stop_after_acks && s->nack >= s->stop_after_acks)
|
||||
vmsig_core_stop(s->core);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ---------- DOWN send helpers ---------- */
|
||||
|
||||
static int send_lease(void* ctl, vmsig_kind kind, uint32_t cls, uint32_t ep) {
|
||||
vmsig_event d;
|
||||
memset(&d, 0, sizeof d);
|
||||
d.kind = kind; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
|
||||
d.endpoint = ep; d.prio = VMSIG_PRIO_HIGH;
|
||||
vmsig_lease_req lr = { cls, 0 };
|
||||
memcpy(d.inln, &lr, sizeof lr);
|
||||
return vmsig_inproc_send(ctl, &d);
|
||||
}
|
||||
static int acquire(void* ctl, uint32_t cls, uint32_t ep) {
|
||||
return send_lease(ctl, VMSIG_EV_CMD_ACQUIRE, cls, ep);
|
||||
}
|
||||
static int release_(void* ctl, uint32_t cls, uint32_t ep) {
|
||||
return send_lease(ctl, VMSIG_EV_CMD_RELEASE, cls, ep);
|
||||
}
|
||||
static int status(void* ctl, uint32_t cls, uint32_t ep) {
|
||||
return send_lease(ctl, VMSIG_EV_CMD_LEASE_STATUS, cls, ep);
|
||||
}
|
||||
|
||||
/* CMD_INPUT: KEY/BTN with value, corr for tracking. */
|
||||
static int send_key(void* ctl, int code, int value, uint32_t corr, uint32_t ep) {
|
||||
vmsig_event d;
|
||||
memset(&d, 0, sizeof d);
|
||||
d.kind = VMSIG_EV_CMD_INPUT; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
|
||||
d.endpoint = ep; d.prio = VMSIG_PRIO_HIGH; d.corr = corr;
|
||||
vmsig_input in; memset(&in, 0, sizeof in);
|
||||
in.kind = VMSIG_INPUT_KEY; in.code = code; in.value = value;
|
||||
memcpy(d.inln, &in, sizeof in);
|
||||
return vmsig_inproc_send(ctl, &d);
|
||||
}
|
||||
|
||||
static int send_life(void* ctl, int op, uint32_t corr, uint32_t ep) {
|
||||
vmsig_event d;
|
||||
memset(&d, 0, sizeof d);
|
||||
d.kind = VMSIG_EV_CMD_LIFECYCLE; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
|
||||
d.endpoint = ep; d.prio = VMSIG_PRIO_URGENT; d.corr = corr; d.inln[0] = (uint8_t)op;
|
||||
return vmsig_inproc_send(ctl, &d);
|
||||
}
|
||||
|
||||
/* Pump the loop until `n` more lease replies arrive (UP delivery via ctx
|
||||
* requires pump_up). Lease DOWN intercepts are synchronous, but their UP replies are
|
||||
* paced by the context => a live loop is needed. */
|
||||
static void pump_n(lease_state* s, int n) {
|
||||
vmsig_core* c = (vmsig_core*)s->core;
|
||||
c->stopping = 0; /* white-box: reuse the loop between phases */
|
||||
s->stop_replies = s->total_replies + n;
|
||||
vmsig_core_run(c);
|
||||
s->stop_replies = 0;
|
||||
}
|
||||
|
||||
static void* add_ctl(vmsig_core* core, lease_state* s, int idx, uint32_t cap,
|
||||
uint32_t arb_prio, uint64_t epmask) {
|
||||
cref* r = cref_new(s, idx);
|
||||
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||
cfg.on_event = on_ev; cfg.user = r;
|
||||
cfg.sub.source_mask = 0xFFFFFFFFu; cfg.sub.prio_min = VMSIG_PRIO_BULK;
|
||||
cfg.sub.endpoint_mask = 0; /* all VMs */
|
||||
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||
g.endpoint_mask = epmask; g.source_mask = 0xFFFFFFFFu;
|
||||
g.cap_mask = cap | VMSIG_CAP_OBSERVE; /* OBSERVE => sees the broadcast finalization ACT_ACK */
|
||||
g.arb_prio = arb_prio;
|
||||
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||
return ctl;
|
||||
}
|
||||
|
||||
/* ===== Synchronous test group (no loop): ACQUIRE/RELEASE/STATUS interception ===== */
|
||||
|
||||
static void test_sync_group(void) {
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||
|
||||
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
|
||||
void* B = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 100, 1ull << 0);
|
||||
void* Lo= add_ctl(core, &s, 2, VMSIG_CAP_INPUT, 5, 1ull << 0); /* low priority */
|
||||
void* X = add_ctl(core, &s, 3, VMSIG_CAP_INPUT, 10, 1ull << 1); /* grant on ep1, not ep0 */
|
||||
|
||||
/* Lease DOWN intercepts are synchronous, but the UP reply is delivered via ctx => after
|
||||
* each lease request we pump the loop until the corresponding UP arrives. */
|
||||
|
||||
/* 1) acquire -> GRANTED; the owner's CMD_INPUT passes the gate (==0). */
|
||||
acquire(A, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
CHECK(s.cnt[0].granted == 1, "1: A gets GRANTED");
|
||||
CHECK(send_key(A, 30, 1, 1, 0) == 0, "1: owner's CMD_INPUT passes the gate");
|
||||
send_key(A, 30, 0, 2, 0); /* release, so as not to leave held for the next tests */
|
||||
|
||||
/* 2) gate: non-owner B -> CMD_INPUT dropped (-1). */
|
||||
CHECK(send_key(B, 30, 1, 3, 0) == -1, "2: non-owner: CMD_INPUT dropped by the gate");
|
||||
|
||||
/* 3) equal priorities: a contender of the same prio -> DENIED{HELD}. */
|
||||
{
|
||||
void* C = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 10, 1ull << 0); (void)C;
|
||||
int before = s.cnt[1].denied;
|
||||
acquire(C, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
CHECK(s.cnt[1].denied == before + 1, "3: equal prio -> DENIED");
|
||||
CHECK(s.cnt[1].last_deny_reason == VMSIG_LEASE_DENY_HELD, "3: reason=HELD");
|
||||
}
|
||||
|
||||
/* 7) without cap -> NOCAP; foreign endpoint -> NOGRANT. */
|
||||
{
|
||||
void* NC = add_ctl(core, &s, 2, 0u /* without INPUT */, 10, 1ull << 0);
|
||||
int before = s.cnt[2].denied;
|
||||
acquire(NC, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
CHECK(s.cnt[2].denied == before + 1, "7: without cap -> DENIED");
|
||||
CHECK(s.cnt[2].last_deny_reason == VMSIG_LEASE_DENY_NOCAP, "7: reason=NOCAP");
|
||||
}
|
||||
{
|
||||
int before = s.cnt[3].denied;
|
||||
acquire(X, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1); /* X has a grant on ep1, requests ep0 */
|
||||
CHECK(s.cnt[3].denied == before + 1, "7: foreign endpoint -> DENIED");
|
||||
CHECK(s.cnt[3].last_deny_reason == VMSIG_LEASE_DENY_NOGRANT, "7: reason=NOGRANT");
|
||||
}
|
||||
|
||||
/* 8) per-endpoint / per-class independence. */
|
||||
{
|
||||
void* P0 = add_ctl(core, &s, 1, VMSIG_CAP_POWER, 50, 1ull << 0);
|
||||
int gb = s.cnt[1].granted, gx = s.cnt[3].granted;
|
||||
acquire(X, VMSIG_LEASE_INPUT, 1); pump_n(&s, 1); /* X on its own ep1 — free */
|
||||
CHECK(s.cnt[3].granted == gx + 1, "8: X owns INPUT@ep1 independently");
|
||||
acquire(P0, VMSIG_LEASE_POWER, 0); pump_n(&s, 1); /* POWER@ep0 is free, even though INPUT@ep0 is held by A */
|
||||
CHECK(s.cnt[1].granted == gb + 1, "8: POWER@ep0 independent of INPUT@ep0");
|
||||
}
|
||||
|
||||
/* 11) STATUS: busy=1 while A holds INPUT@ep0. */
|
||||
{
|
||||
int before = s.cnt[1].status;
|
||||
status(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
CHECK(s.cnt[1].status == before + 1, "11: STATUS replies");
|
||||
CHECK(s.cnt[1].last_status_busy == 1, "11: busy=1 while A owns");
|
||||
}
|
||||
|
||||
/* 6) release -> reacquire: A releases, Lo (low prio) now gets it. */
|
||||
{
|
||||
int rb = s.cnt[0].released;
|
||||
release_(A, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
CHECK(s.cnt[0].released == rb + 1, "6: A gets RELEASED");
|
||||
int gb = s.cnt[2].granted;
|
||||
acquire(Lo, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1); /* free -> even low prio takes it */
|
||||
CHECK(s.cnt[2].granted == gb + 1, "6: reacquire after release succeeds");
|
||||
status(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
CHECK(s.cnt[1].last_status_busy == 1, "11: busy=1 after reacquire");
|
||||
release_(Lo, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
status(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
CHECK(s.cnt[1].last_status_busy == 0, "11: busy=0 after releasing all");
|
||||
}
|
||||
|
||||
/* 12) safe commands are NOT gated by the lease (nobody holds INPUT@ep0). */
|
||||
{
|
||||
void* SAFE = add_ctl(core, &s, 1, VMSIG_CAP_LIFECYCLE | VMSIG_CAP_INPUT, 1, 1ull << 0);
|
||||
/* PAUSE = safe lifecycle: lease_class_for_down -> -1 => not gated. */
|
||||
CHECK(send_life(SAFE, VMSIG_LIFE_PAUSE, 90, 0) == 0,
|
||||
"12: safe lifecycle (PAUSE) is not gated by the lease");
|
||||
}
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* ===== 13) policy seam: custom "always DENY" ===== */
|
||||
static vmsig_arb_decision policy_always_deny(void* ud, uint32_t ep, uint32_t cls,
|
||||
const vmsig_grant* inc, const vmsig_grant* con) {
|
||||
(void)ud; (void)ep; (void)cls; (void)inc; (void)con;
|
||||
return VMSIG_ARB_DENY;
|
||||
}
|
||||
static void test_policy_group(void) {
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
vmsig_core_set_arb_policy(core, policy_always_deny, NULL);
|
||||
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||
|
||||
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
|
||||
void* B = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 999, 1ull << 0); /* highest prio */
|
||||
|
||||
acquire(A, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
CHECK(s.cnt[0].granted == 1, "13: A owns");
|
||||
acquire(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1); /* high prio, but policy=DENY */
|
||||
CHECK(s.cnt[1].granted == 0, "13: custom DENY => high prio does NOT preempt");
|
||||
CHECK(s.cnt[1].denied == 1, "13: B got DENIED");
|
||||
CHECK(s.cnt[0].revoked == 0, "13: A not revoked");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* ===== 4) preemption: high prio takes the lease away (REVOKED to the old, GRANTED to the new).
|
||||
* signaling does NOT release what is held (rolling back finalization is the control's decision). ===== */
|
||||
static void test_preempt(void) {
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||
|
||||
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
|
||||
void* B = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 100, 1ull << 0);
|
||||
vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, 0); /* stub input */
|
||||
s.ctl_a = A; s.ctl_b = B; s.scenario = 1;
|
||||
|
||||
acquire(A, VMSIG_LEASE_INPUT, 0);
|
||||
send_key(A, 30, 1, 11, 0); /* A injects a KEY (corr=11) */
|
||||
/* B preempts from on_ev AFTER ack corr=11. There is NO finalization => wait for 3 actuations:
|
||||
* A-down(11), B-down(22), B-up(23). */
|
||||
s.stop_after_acks = 3;
|
||||
vmsig_core_run(core);
|
||||
|
||||
CHECK(s.cnt[1].granted == 1, "4: B gets GRANTED on preemption");
|
||||
CHECK(s.cnt[0].revoked == 1, "4: A gets LEASE_REVOKED");
|
||||
int saw22 = 0;
|
||||
for (int i = 0; i < s.nack; i++) if (s.ack_log[i] == 22) saw22 = 1;
|
||||
CHECK(saw22, "4: the new owner's (B) input is actuated after preemption");
|
||||
/* in-flight fencing of the ex-owner — covered separately in test_inflight_fence. */
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* ===== reacquire by the owner with a key held down does NOT synthesize an up (self-preemption) ===== */
|
||||
static void test_self_reacquire(void) {
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||
|
||||
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
|
||||
vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, 0);
|
||||
|
||||
acquire(A, VMSIG_LEASE_INPUT, 0);
|
||||
send_key(A, 30, 1, 11, 0); /* hold down */
|
||||
acquire(A, VMSIG_LEASE_INPUT, 0); /* reacquire by the same owner -> idempotent */
|
||||
send_key(A, 31, 1, 22, 0); /* another key */
|
||||
send_key(A, 30, 0, 33, 0);
|
||||
send_key(A, 31, 0, 44, 0);
|
||||
|
||||
s.stop_after_acks = 4; /* there must be NO synthesized up (corr=0) */
|
||||
vmsig_core_run(core);
|
||||
|
||||
CHECK(s.cnt[0].granted == 2, "self: repeated ACQUIRE -> another GRANTED");
|
||||
int saw_zero = 0;
|
||||
for (int i = 0; i < s.nack; i++) if (s.ack_log[i] == 0) saw_zero = 1;
|
||||
CHECK(!saw_zero, "self: reacquire by the owner does NOT synthesize a key-up");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* ===== 9) reclaim-on-death: the slot is freed, RECLAIMED, B GRANTED =====
|
||||
* We model death via core_request_drop(id): core_reap -> core_lease_reap_control
|
||||
* frees the owner's slot (held is NOT finalized — that's a vmctl write / control's decision). */
|
||||
static int g_reclaimed = 0;
|
||||
static int g_lease_denied = 0;
|
||||
static void audit_cb(void* ud, const vmsig_audit* a) {
|
||||
(void)ud;
|
||||
if (a->kind == VMSIG_AUDIT_LEASE_RECLAIMED) g_reclaimed++;
|
||||
if (a->kind == VMSIG_AUDIT_LEASE_DENIED) g_lease_denied++;
|
||||
}
|
||||
/* On RECLAIMED we stop the loop (to end the reap run deterministically): ud=core. */
|
||||
static void reclaim_audit_cb(void* ud, const vmsig_audit* a) {
|
||||
if (a->kind == VMSIG_AUDIT_LEASE_RECLAIMED) {
|
||||
g_reclaimed++;
|
||||
if (ud) vmsig_core_stop((vmsig_core*)ud);
|
||||
}
|
||||
}
|
||||
|
||||
/* Variant of add_ctl that returns the control's id (via out). */
|
||||
static void* add_ctl_id(vmsig_core* core, lease_state* s, int idx, uint32_t cap,
|
||||
uint32_t arb_prio, uint64_t epmask, int* out_id) {
|
||||
cref* r = cref_new(s, idx);
|
||||
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||
cfg.on_event = on_ev; cfg.user = r;
|
||||
cfg.sub.source_mask = 0xFFFFFFFFu; cfg.sub.prio_min = VMSIG_PRIO_BULK;
|
||||
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||
g.endpoint_mask = epmask; g.source_mask = 0xFFFFFFFFu;
|
||||
g.cap_mask = cap | VMSIG_CAP_OBSERVE; g.arb_prio = arb_prio;
|
||||
int id = vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||
if (out_id) *out_id = id;
|
||||
return ctl;
|
||||
}
|
||||
|
||||
static void test_reclaim(void) {
|
||||
g_reclaimed = 0;
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
vmsig_core_set_audit(core, reclaim_audit_cb, core); /* RECLAIMED -> stop the loop */
|
||||
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||
|
||||
int a_id = -1;
|
||||
void* A = add_ctl_id(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0, &a_id);
|
||||
void* B = add_ctl_id(core, &s, 1, VMSIG_CAP_INPUT, 5, 1ull << 0, NULL); /* LOW prio */
|
||||
|
||||
acquire(A, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
CHECK(s.cnt[0].granted == 1, "9: A owns");
|
||||
|
||||
/* A's death: reap frees its slot (RECLAIMED); the audit-cb stops the loop. */
|
||||
core_request_drop(core, a_id);
|
||||
core->stopping = 0; /* white-box: reuse the loop (like pump_n) */
|
||||
vmsig_core_run(core);
|
||||
CHECK(g_reclaimed == 1, "9: audit RECLAIMED on owner death");
|
||||
|
||||
/* the slot is free: B (low prio) takes it without preemption */
|
||||
acquire(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
CHECK(s.cnt[1].granted == 1, "9: B GRANTED after reclaim (slot is free)");
|
||||
|
||||
(void)A; (void)B;
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* ===== 10) in-flight fencing: losing the lease BEFORE pump_down -> drop ===== */
|
||||
/* A owns it, queues CMD_INPUT into ctx (via emit_down -> ctx), then B preempts
|
||||
* SYNCHRONOUSLY (acquire does not go through ctx). By the time pump_down reaches A's CMD_INPUT,
|
||||
* the owner is already B => the fence drops A's command (there must be NO actuation of A). */
|
||||
static void test_inflight_fence(void) {
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||
|
||||
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
|
||||
void* B = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 100, 1ull << 0);
|
||||
vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, 0);
|
||||
|
||||
acquire(A, VMSIG_LEASE_INPUT, 0);
|
||||
/* A queues a command into ctx (corr=55) — it passes the gate (A owns it), lands in the DOWN queue */
|
||||
send_key(A, 30, 1, 55, 0);
|
||||
/* B preempts SYNCHRONOUSLY (before the loop has called pump_down) */
|
||||
acquire(B, VMSIG_LEASE_INPUT, 0);
|
||||
/* B sends its own command (corr=66) */
|
||||
send_key(B, 31, 1, 66, 0);
|
||||
send_key(B, 31, 0, 67, 0);
|
||||
|
||||
/* Expected actuations: finalization on preemption (corr=0, but A held nothing by the
|
||||
* moment of preemption — A's down is still in ctx, the held-set is empty => finalize=0 ups),
|
||||
* then B's 66 and 67. A's 55 MUST be dropped by the fence (no corr=55). */
|
||||
s.stop_after_acks = 2; /* B's down + up */
|
||||
vmsig_core_run(core);
|
||||
|
||||
int saw55 = 0;
|
||||
for (int i = 0; i < s.nack; i++) if (s.ack_log[i] == 55) saw55 = 1;
|
||||
CHECK(!saw55, "10: in-flight ex-owner's command dropped by the fence");
|
||||
CHECK(s.cnt[1].granted == 1, "10: B owns after preemption");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* ===== lease source gate + audit on acquire-deny =====
|
||||
* Regression: ACQUIRE is intercepted BEFORE grant_allows_down, so source_mask and
|
||||
* audit of the deny paths must be checked/emitted IN THE lease layer ITSELF (otherwise: holding
|
||||
* someone else's slot bypassing source_mask = DoS; audit-invisible probing of caps/endpoints). */
|
||||
static void* add_ctl_src(vmsig_core* core, lease_state* s, int idx, uint32_t cap,
|
||||
uint64_t epmask, uint32_t source_mask) {
|
||||
cref* r = cref_new(s, idx);
|
||||
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||
cfg.on_event = on_ev; cfg.user = r; cfg.sub.source_mask = 0xFFFFFFFFu;
|
||||
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||
g.endpoint_mask = epmask; g.source_mask = source_mask;
|
||||
g.cap_mask = cap | VMSIG_CAP_OBSERVE; g.arb_prio = 10;
|
||||
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||
return ctl;
|
||||
}
|
||||
static void test_audit_and_source(void) {
|
||||
g_lease_denied = 0;
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
vmsig_core_set_audit(core, audit_cb, NULL);
|
||||
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||
|
||||
/* cap=INPUT, but source_mask WITHOUT SRC_INPUT: the INPUT lease cannot be acquired (DoS bypass). */
|
||||
void* NS = add_ctl_src(core, &s, 0, VMSIG_CAP_INPUT, 1ull << 0, ~(1u << VMSIG_SRC_INPUT));
|
||||
int before = g_lease_denied;
|
||||
acquire(NS, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
CHECK(s.cnt[0].denied == 1, "src: acquire without the SRC_INPUT bit -> DENIED");
|
||||
CHECK(s.cnt[0].last_deny_reason == VMSIG_LEASE_DENY_NOGRANT, "src: reason=NOGRANT");
|
||||
CHECK(g_lease_denied == before + 1, "audit: source-deny emits LEASE_DENIED");
|
||||
|
||||
/* without cap: NOCAP + audit (previously acquire-deny was invisible to the audit). */
|
||||
void* NC = add_ctl_src(core, &s, 1, 0u, 1ull << 0, 0xFFFFFFFFu);
|
||||
before = g_lease_denied;
|
||||
acquire(NC, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
CHECK(s.cnt[1].last_deny_reason == VMSIG_LEASE_DENY_NOCAP, "src: without cap -> NOCAP");
|
||||
CHECK(g_lease_denied == before + 1, "audit: NOCAP-deny emits LEASE_DENIED");
|
||||
|
||||
/* control case: cap+source -> GRANTED, without a spurious deny audit. */
|
||||
void* OK = add_ctl_src(core, &s, 2, VMSIG_CAP_INPUT, 1ull << 0, 0xFFFFFFFFu);
|
||||
before = g_lease_denied;
|
||||
acquire(OK, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||
CHECK(s.cnt[2].granted == 1, "src: cap+source -> GRANTED");
|
||||
CHECK(g_lease_denied == before, "audit: successful acquire does not emit a deny");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
printf("test_lease\n");
|
||||
test_sync_group();
|
||||
test_policy_group();
|
||||
test_preempt();
|
||||
test_self_reacquire();
|
||||
test_inflight_fence();
|
||||
test_reclaim();
|
||||
test_audit_and_source();
|
||||
cref_free_all();
|
||||
|
||||
printf("lease tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
@@ -0,0 +1,339 @@
|
||||
/* test_memctx.c — seam for the coherent address-space context (MEMCTX).
|
||||
*
|
||||
* 1) multicast + RO-fd + decode: a holder with CAP_MEMCTX receives MEMCTX, kcr3,
|
||||
* epoch=0, nseg=1; the received fd mmaps PROT_READ, while PROT_WRITE -> EACCES
|
||||
* (RO physically enforced); the vmsig_memctx_segs helper reconstructs segs[];
|
||||
* a co-resident holder without CAP_MEMCTX does NOT receive it (deny);
|
||||
* 2) epoch: a synthetic destructive VM_LIFECYCLE -> MEMCTX_INVALIDATED ->
|
||||
* re-multicast at epoch+1 with a NEW kcr3;
|
||||
* 3) retain/replay: a LATE subscriber (attached AFTER publication) receives
|
||||
* the retained MEMCTX with a valid re-shared RO-fd (synchronously on add_control);
|
||||
* 4) multi-VM: two endpoints, isolation (a VM holder does not see another's context);
|
||||
* 5) socket E2E: MEMCTX travels as a vmsig_wire frame + RO-fd in cmsg (SCM_RIGHTS), the
|
||||
* client mmaps RO via the received fd.
|
||||
* In-proc (except 5) and under ASAN. SISC: not a single control name in the adapter. */
|
||||
#define _GNU_SOURCE
|
||||
#include "vmsig.h"
|
||||
#include "vmsig_socket.h" /* vmsig_wire, vmsig_socket_attach */
|
||||
#include "core_internal.h" /* core_emit_up (synthetic lifecycle injection) */
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <pthread.h>
|
||||
#include <time.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/un.h>
|
||||
#include <stddef.h>
|
||||
|
||||
static int g_fail = 0;
|
||||
#define CHECK(cond, msg) do { \
|
||||
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||
} while (0)
|
||||
|
||||
/* ===== in-proc holder ===== */
|
||||
typedef struct holder holder;
|
||||
struct holder {
|
||||
vmsig_core* core;
|
||||
holder* peer; /* multi-VM: stop when both are ready (or NULL) */
|
||||
int is_driver; /* stops the loop on a condition */
|
||||
uint32_t expect_ep;
|
||||
int memctx, invalidated, ticks, bad_ep;
|
||||
uint64_t last_kcr3, kcr3_e0;
|
||||
uint32_t last_epoch, last_nseg;
|
||||
int ro_ok, rw_eacces, seg0_ok;
|
||||
int inject_reset, injected;
|
||||
int stop_epoch; /* stop when last_epoch>=stop_epoch (-1 = else) */
|
||||
};
|
||||
|
||||
static void maybe_stop(holder* h) {
|
||||
if (!h->is_driver) return;
|
||||
if (h->ticks > 30) { vmsig_core_stop(h->core); return; } /* failsafe (vmhost ticks) */
|
||||
if (h->stop_epoch >= 0) {
|
||||
if ((int)h->last_epoch >= h->stop_epoch && h->memctx >= 1) vmsig_core_stop(h->core);
|
||||
} else if (h->peer) {
|
||||
if (h->memctx >= 1 && h->peer->memctx >= 1) vmsig_core_stop(h->core);
|
||||
} else if (h->memctx >= 1) {
|
||||
vmsig_core_stop(h->core);
|
||||
}
|
||||
}
|
||||
|
||||
static int h_on_ev(void* u, const vmsig_event* ev) {
|
||||
holder* h = u;
|
||||
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) h->ticks++;
|
||||
else if (ev->kind == VMSIG_EV_MEMCTX_INVALIDATED) h->invalidated++;
|
||||
maybe_stop(h);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int h_on_memctx(void* u, const vmsig_event* ev, int fd) {
|
||||
holder* h = u;
|
||||
const vmsig_memctx* m = (const vmsig_memctx*)ev->inln;
|
||||
h->memctx++;
|
||||
if (ev->endpoint != h->expect_ep) h->bad_ep++;
|
||||
h->last_kcr3 = m->kcr3; h->last_epoch = m->epoch;
|
||||
if (m->epoch == 0) h->kcr3_e0 = m->kcr3;
|
||||
|
||||
uint32_t n = 0;
|
||||
const vmsig_memseg* segs = vmsig_memctx_segs(ev, &n);
|
||||
h->last_nseg = n;
|
||||
if (segs && n >= 1 && segs[0].gpa == 0 && segs[0].len == m->low) h->seg0_ok = 1;
|
||||
|
||||
if (fd >= 0 && m->low) {
|
||||
void* ro = mmap(NULL, (size_t)m->low, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (ro != MAP_FAILED) { h->ro_ok = 1; munmap(ro, (size_t)m->low); }
|
||||
void* rw = mmap(NULL, (size_t)m->low, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if (rw == MAP_FAILED) h->rw_eacces = 1; else munmap(rw, (size_t)m->low);
|
||||
}
|
||||
|
||||
/* epoch test: on the first context (epoch0) inject a destructive transition. */
|
||||
if (h->inject_reset && !h->injected && m->epoch == 0) {
|
||||
h->injected = 1;
|
||||
vmsig_event lc; memset(&lc, 0, sizeof lc);
|
||||
lc.kind = VMSIG_EV_VM_LIFECYCLE; lc.source = VMSIG_SRC_VMHOST; lc.dir = VMSIG_DIR_UP;
|
||||
lc.prio = VMSIG_PRIO_URGENT; lc.endpoint = h->expect_ep; lc.origin = 0;
|
||||
vmsig_vm_state vs = { VMSIG_VM_RESET, 0 };
|
||||
memcpy(lc.inln, &vs, sizeof vs);
|
||||
core_emit_up(h->core, &lc); /* core: epoch++ + invalidate + re-multicast */
|
||||
}
|
||||
maybe_stop(h);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void add_holder(vmsig_core* core, holder* h, uint32_t cap,
|
||||
uint32_t source_mask, uint64_t endpoint_mask) {
|
||||
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||
cfg.on_event = h_on_ev; cfg.on_memctx = h_on_memctx; cfg.user = h;
|
||||
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||
g.principal = 5; g.endpoint_mask = endpoint_mask;
|
||||
g.source_mask = source_mask; g.cap_mask = cap;
|
||||
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||
}
|
||||
|
||||
/* ---- 1. multicast + RO-fd + decode + deny ---------------------------------- */
|
||||
static void test_multicast(void) {
|
||||
printf("test_multicast\n");
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
|
||||
holder good; memset(&good, 0, sizeof good);
|
||||
good.core = core; good.is_driver = 1; good.expect_ep = 0; good.stop_epoch = -1;
|
||||
holder deny; memset(&deny, 0, sizeof deny);
|
||||
deny.core = core; deny.expect_ep = 0; deny.stop_epoch = -1;
|
||||
|
||||
add_holder(core, &good, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
|
||||
add_holder(core, &deny, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0); /* no MEMCTX */
|
||||
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
|
||||
|
||||
vmsig_core_run(core);
|
||||
|
||||
CHECK(good.memctx >= 1, "GOOD received MEMCTX");
|
||||
CHECK(good.last_kcr3 != 0, "kcr3 nonzero");
|
||||
CHECK(good.last_epoch == 0, "first publication is epoch 0");
|
||||
CHECK(good.last_nseg == 1, "nseg=1 (single-low)");
|
||||
CHECK(good.seg0_ok, "segs[] decoded by the helper (gpa=0,len=low)");
|
||||
CHECK(good.ro_ok, "RO-fd: mmap(PROT_READ) ok");
|
||||
CHECK(good.rw_eacces, "RO-fd: mmap(PROT_WRITE) -> EACCES (RO enforced)");
|
||||
CHECK(good.bad_ep == 0, "delivery endpoint is correct");
|
||||
CHECK(deny.memctx == 0, "deny without CAP_MEMCTX does NOT receive MEMCTX");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* ---- 2. epoch: invalidation + re-multicast epoch+1 ------------------------- */
|
||||
static void test_epoch(void) {
|
||||
printf("test_epoch\n");
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
|
||||
holder h; memset(&h, 0, sizeof h);
|
||||
h.core = core; h.is_driver = 1; h.expect_ep = 0; h.inject_reset = 1; h.stop_epoch = 1;
|
||||
|
||||
add_holder(core, &h, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
|
||||
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
|
||||
|
||||
vmsig_core_run(core);
|
||||
|
||||
CHECK(h.memctx >= 2, "contexts for epochs 0 and 1 received");
|
||||
CHECK(h.invalidated >= 1, "MEMCTX_INVALIDATED delivered on epoch change");
|
||||
CHECK(h.last_epoch == 1, "re-multicast at epoch+1");
|
||||
CHECK(h.kcr3_e0 != 0 && h.last_kcr3 != 0 && h.last_kcr3 != h.kcr3_e0,
|
||||
"new kcr3 after re-bootstrap (epoch 1 kcr3 != epoch 0)");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* ---- 3. retain/replay to a late subscriber --------------------------------- */
|
||||
static void test_retain(void) {
|
||||
printf("test_retain\n");
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
|
||||
holder a; memset(&a, 0, sizeof a);
|
||||
a.core = core; a.is_driver = 1; a.expect_ep = 0; a.stop_epoch = -1;
|
||||
|
||||
add_holder(core, &a, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
|
||||
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
|
||||
|
||||
vmsig_core_run(core); /* A receives MEMCTX, loop stopped */
|
||||
CHECK(a.memctx >= 1, "early subscriber A received MEMCTX");
|
||||
|
||||
/* LATE subscriber B: attaches AFTER publication. Replay of the retained context
|
||||
* happens SYNCHRONOUSLY in add_control (cell valid) — without a second loop run. */
|
||||
holder b; memset(&b, 0, sizeof b);
|
||||
b.core = core; b.expect_ep = 0; b.stop_epoch = -1;
|
||||
add_holder(core, &b, VMSIG_CAP_MEMCTX, 0xFFFFFFFFu, 1ull << 0);
|
||||
|
||||
CHECK(b.memctx >= 1, "late subscriber B received the retained MEMCTX (replay)");
|
||||
CHECK(b.last_kcr3 != 0, "B: kcr3 nonzero in the replay");
|
||||
CHECK(b.ro_ok, "B: re-shared RO-fd mmaps PROT_READ");
|
||||
CHECK(b.rw_eacces, "B: re-shared fd is RO (PROT_WRITE -> EACCES)");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* ---- 4. multi-VM: endpoint isolation --------------------------------------- */
|
||||
static void test_multivm(void) {
|
||||
printf("test_multivm\n");
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
|
||||
holder h0; memset(&h0, 0, sizeof h0);
|
||||
holder h1; memset(&h1, 0, sizeof h1);
|
||||
h0.core = core; h0.is_driver = 1; h0.expect_ep = 0; h0.stop_epoch = -1; h0.peer = &h1;
|
||||
h1.core = core; h1.is_driver = 1; h1.expect_ep = 1; h1.stop_epoch = -1; h1.peer = &h0;
|
||||
|
||||
/* each holder is scoped to its OWN endpoint (+OBSERVE for watchdog lifecycle ticks on ep0). */
|
||||
add_holder(core, &h0, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
|
||||
add_holder(core, &h1, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 1);
|
||||
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost ep0 (watchdog)");
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx ep0");
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 1) >= 0, "add memctx ep1");
|
||||
|
||||
vmsig_core_run(core);
|
||||
|
||||
CHECK(h0.memctx >= 1 && h0.bad_ep == 0, "VM0 receives ONLY its own context (ep0)");
|
||||
CHECK(h1.memctx >= 1 && h1.bad_ep == 0, "VM1 receives ONLY its own context (ep1)");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* ---- 5. socket end-to-end: MEMCTX frame + fd in cmsg ----------------------- */
|
||||
#define SOCK_EP 3u
|
||||
static vmsig_grant sock_policy(uint32_t uid, uint32_t pid, void* ud) {
|
||||
(void)pid; (void)ud;
|
||||
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||
g.principal = uid; g.endpoint_mask = 1ull << SOCK_EP;
|
||||
g.source_mask = 0xFFFFFFFFu; g.cap_mask = VMSIG_CAP_MEMCTX;
|
||||
return g;
|
||||
}
|
||||
static void* loop_main(void* p) { vmsig_core_run((vmsig_core*)p); return NULL; }
|
||||
|
||||
static int connect_abstract(const char* name) {
|
||||
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||
if (fd < 0) return -1;
|
||||
struct sockaddr_un a; memset(&a, 0, sizeof a); a.sun_family = AF_UNIX;
|
||||
size_t n = strlen(name);
|
||||
a.sun_path[0] = 0;
|
||||
memcpy(a.sun_path + 1, name + 1, n - 1);
|
||||
socklen_t alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
|
||||
if (connect(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; }
|
||||
return fd;
|
||||
}
|
||||
|
||||
/* Read ONE 80-byte vmsig_wire frame; the adjacent fd (cmsg) -> into *out_fd. */
|
||||
static int recv_wire(int fd, vmsig_wire* w, int* out_fd) {
|
||||
*out_fd = -1;
|
||||
struct iovec iov = { .iov_base = w, .iov_len = sizeof *w };
|
||||
union { char buf[CMSG_SPACE(sizeof(int))]; struct cmsghdr a; } cm;
|
||||
memset(&cm, 0, sizeof cm);
|
||||
struct msghdr mh; memset(&mh, 0, sizeof mh);
|
||||
mh.msg_iov = &iov; mh.msg_iovlen = 1;
|
||||
mh.msg_control = cm.buf; mh.msg_controllen = sizeof cm.buf;
|
||||
size_t got = 0;
|
||||
while (got < sizeof *w) {
|
||||
iov.iov_base = (char*)w + got; iov.iov_len = sizeof *w - got;
|
||||
ssize_t n = recvmsg(fd, &mh, MSG_CMSG_CLOEXEC);
|
||||
if (n <= 0) return (got == 0) ? 0 : -1;
|
||||
for (struct cmsghdr* c = CMSG_FIRSTHDR(&mh); c; c = CMSG_NXTHDR(&mh, c))
|
||||
if (c->cmsg_level == SOL_SOCKET && c->cmsg_type == SCM_RIGHTS)
|
||||
memcpy(out_fd, CMSG_DATA(c), sizeof(int));
|
||||
got += (size_t)n;
|
||||
mh.msg_control = NULL; mh.msg_controllen = 0; /* fd only on the first recvmsg */
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void test_socket(void) {
|
||||
printf("test_socket\n");
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, SOCK_EP) >= 0, "add memctx");
|
||||
const char* SOCK = "@vmsig-memctx-e2e";
|
||||
CHECK(vmsig_socket_attach(core, SOCK, sock_policy, NULL) == 0, "socket attach");
|
||||
|
||||
pthread_t th; pthread_create(&th, NULL, loop_main, core);
|
||||
|
||||
int c = connect_abstract(SOCK);
|
||||
CHECK(c >= 0, "client connected");
|
||||
if (c < 0) { vmsig_core_stop(core); pthread_join(th, NULL); vmsig_core_free(core); vmsig_ctx_free(ctx); return; }
|
||||
|
||||
struct timeval tv = { .tv_sec = 3, .tv_usec = 0 };
|
||||
setsockopt(c, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv);
|
||||
|
||||
int got_ctx = 0, ro_fd = -1, ro_ok = 0, rw_eacces = 0;
|
||||
vmsig_memctx pod; memset(&pod, 0, sizeof pod);
|
||||
for (int iter = 0; iter < 20 && !got_ctx; iter++) {
|
||||
vmsig_wire w; int wfd = -1;
|
||||
int r = recv_wire(c, &w, &wfd);
|
||||
if (r != 1) break;
|
||||
if (w.kind == VMSIG_EV_MEMCTX) {
|
||||
got_ctx = 1; ro_fd = wfd;
|
||||
memcpy(&pod, w.inln, sizeof pod);
|
||||
CHECK(ro_fd >= 0, "MEMCTX frame carries an RO-fd in cmsg");
|
||||
CHECK(pod.flags & VMSIG_MEMCTX_RDONLY, "RDONLY flag is set");
|
||||
if (ro_fd >= 0 && pod.low) {
|
||||
void* ro = mmap(NULL, (size_t)pod.low, PROT_READ, MAP_SHARED, ro_fd, 0);
|
||||
if (ro != MAP_FAILED) { ro_ok = 1; munmap(ro, (size_t)pod.low); }
|
||||
void* rw = mmap(NULL, (size_t)pod.low, PROT_READ | PROT_WRITE, MAP_SHARED, ro_fd, 0);
|
||||
if (rw == MAP_FAILED) rw_eacces = 1; else munmap(rw, (size_t)pod.low);
|
||||
}
|
||||
}
|
||||
}
|
||||
CHECK(got_ctx == 1, "MEMCTX frame arrived over the socket (wire framing)");
|
||||
CHECK(ro_ok, "mmap RO via the received fd");
|
||||
CHECK(rw_eacces, "write-mmap via the received fd fails (RO)");
|
||||
|
||||
if (ro_fd >= 0) close(ro_fd);
|
||||
close(c);
|
||||
vmsig_core_stop(core);
|
||||
pthread_join(th, NULL);
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
test_multicast();
|
||||
test_epoch();
|
||||
test_retain();
|
||||
test_multivm();
|
||||
test_socket();
|
||||
printf("memctx tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
@@ -0,0 +1,227 @@
|
||||
/* test_memwrite.c — write-signaled seam (MEMWRITE): atomic guest-memory write under an
|
||||
* exclusive lease. Stub mode (no VM): proves the full path cap -> grant -> lease-gate ->
|
||||
* route -> adapter -> ACT_ACK without actuation, plus the default-deny and fencing
|
||||
* invariants. The adapter never sees a control name (SISC).
|
||||
*
|
||||
* 1) happy path: CAP_MEMWRITE + a MEMWRITE lease -> CMD_MEMWRITE -> ACT_ACK{ok=1};
|
||||
* 2) extent default-deny: len > VMSIG_MEMWRITE_MAX and a missing SRC flag -> ACK{ok=0};
|
||||
* 3) lease gate: CMD_MEMWRITE WITHOUT an acquired lease -> dropped at the gate (no ACK);
|
||||
* 4) cap gate: a control WITHOUT CAP_MEMWRITE cannot acquire the lease (DENIED{NOCAP});
|
||||
* 5) in-flight fence: A holds the lease, queues a write, B preempts SYNCHRONOUSLY -> A's
|
||||
* queued write is dropped by the fence (no ACK for A's corr), B's write actuates.
|
||||
* In-proc, under ASAN. */
|
||||
#include "vmsig.h"
|
||||
#include "memctx.h" /* VMSIG_MEMWRITE_MAX: the adapter's extent bound (private) */
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static int g_fail = 0;
|
||||
#define CHECK(cond, msg) do { \
|
||||
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||
} while (0)
|
||||
|
||||
/* ---- in-proc control: records lease replies + MEMWRITE ACKs ---- */
|
||||
typedef struct {
|
||||
void* core;
|
||||
int granted, denied, last_deny_reason;
|
||||
int ack_ok[64]; /* ok flag per ACK in arrival order */
|
||||
uint32_t ack_corr[64]; /* corr per ACK */
|
||||
int nack;
|
||||
int stop_replies, replies; /* stop the loop after N lease replies (0=off) */
|
||||
int stop_acks; /* stop the loop after N acks (0=off) */
|
||||
} cstate;
|
||||
|
||||
typedef struct { cstate* s; } cref;
|
||||
static cref* g_refs[16]; static int g_nrefs = 0;
|
||||
static cref* cref_new(cstate* s) {
|
||||
cref* r = calloc(1, sizeof *r); r->s = s;
|
||||
if (g_nrefs < 16) g_refs[g_nrefs++] = r;
|
||||
return r;
|
||||
}
|
||||
static void cref_free_all(void) { for (int i = 0; i < g_nrefs; i++) free(g_refs[i]); g_nrefs = 0; }
|
||||
|
||||
static int on_ev(void* user, const vmsig_event* ev) {
|
||||
cref* r = user; cstate* s = r->s;
|
||||
switch (ev->kind) {
|
||||
case VMSIG_EV_LEASE_GRANTED: s->granted++; s->replies++; break;
|
||||
case VMSIG_EV_LEASE_DENIED:
|
||||
s->denied++;
|
||||
s->last_deny_reason = (int)((const vmsig_lease_req*)ev->inln)->reason;
|
||||
s->replies++;
|
||||
break;
|
||||
case VMSIG_EV_ACT_ACK:
|
||||
if (s->nack < 64) {
|
||||
/* inln layout from mc_memwrite_ack: {int ok; uint32_t corr; uint32_t origin}. */
|
||||
int ok; memcpy(&ok, ev->inln, sizeof ok);
|
||||
s->ack_ok[s->nack] = ok;
|
||||
s->ack_corr[s->nack] = ev->corr;
|
||||
s->nack++;
|
||||
}
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
if (s->stop_replies && s->replies >= s->stop_replies) vmsig_core_stop(s->core);
|
||||
if (s->stop_acks && s->nack >= s->stop_acks) vmsig_core_stop(s->core);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void* add_ctl(vmsig_core* core, cstate* s, uint32_t cap, uint32_t arb_prio) {
|
||||
cref* r = cref_new(s);
|
||||
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||
cfg.on_event = on_ev; cfg.user = r;
|
||||
cfg.sub.source_mask = 0xFFFFFFFFu; cfg.sub.prio_min = VMSIG_PRIO_BULK;
|
||||
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||
g.endpoint_mask = 1ull << 0; g.source_mask = 0xFFFFFFFFu;
|
||||
g.cap_mask = cap | VMSIG_CAP_OBSERVE; g.arb_prio = arb_prio;
|
||||
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||
return ctl;
|
||||
}
|
||||
|
||||
/* ---- DOWN send helpers ---- */
|
||||
static int acquire_mw(void* ctl) {
|
||||
vmsig_event d; memset(&d, 0, sizeof d);
|
||||
d.kind = VMSIG_EV_CMD_ACQUIRE; d.source = VMSIG_SRC_MEMCTX; d.dir = VMSIG_DIR_DOWN;
|
||||
d.endpoint = 0; d.prio = VMSIG_PRIO_HIGH;
|
||||
vmsig_lease_req lr = { VMSIG_LEASE_MEMWRITE, 0 };
|
||||
memcpy(d.inln, &lr, sizeof lr);
|
||||
return vmsig_inproc_send(ctl, &d);
|
||||
}
|
||||
|
||||
/* CMD_MEMWRITE with inline SRC; corr for tracking. flags: VMSIG_MW_SRC_* (0 => no SRC). */
|
||||
static int send_write(void* ctl, uint64_t gva, uint32_t len, uint32_t flags,
|
||||
const void* src, uint32_t corr) {
|
||||
vmsig_event d; memset(&d, 0, sizeof d);
|
||||
d.kind = VMSIG_EV_CMD_MEMWRITE; d.source = VMSIG_SRC_MEMCTX; d.dir = VMSIG_DIR_DOWN;
|
||||
d.endpoint = 0; d.prio = VMSIG_PRIO_HIGH; d.corr = corr;
|
||||
vmsig_memwrite mw = { gva, len, flags };
|
||||
memcpy(d.inln, &mw, sizeof mw);
|
||||
if ((flags & VMSIG_MW_SRC_INLINE) && src && len <= VMSIG_MEMWRITE_INLINE)
|
||||
memcpy(d.inln + sizeof mw, src, len);
|
||||
return vmsig_inproc_send(ctl, &d);
|
||||
}
|
||||
|
||||
/* Run the loop until N acks (used after queuing actuated writes). */
|
||||
static void run_until_acks(cstate* s, int n) {
|
||||
vmsig_core* c = (vmsig_core*)s->core;
|
||||
s->stop_acks = n; s->stop_replies = 0;
|
||||
vmsig_core_run(c);
|
||||
s->stop_acks = 0;
|
||||
}
|
||||
|
||||
/* ---- 1+2+3: happy path, extent default-deny, lease gate -------------------- */
|
||||
static void test_path_and_deny(void) {
|
||||
printf("test_path_and_deny\n");
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
cstate s; memset(&s, 0, sizeof s); s.core = core;
|
||||
|
||||
void* A = add_ctl(core, &s, VMSIG_CAP_MEMWRITE, 10);
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
|
||||
|
||||
/* 3) lease gate: without ACQUIRE the write is dropped at the gate (-1, no actuation). */
|
||||
uint8_t pat[8] = { 0xDE, 0xAD, 0xBE, 0xEF, 1, 2, 3, 4 };
|
||||
CHECK(send_write(A, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 99) == -1,
|
||||
"3: CMD_MEMWRITE without a lease is dropped by the gate");
|
||||
|
||||
/* acquire the MEMWRITE lease (synchronous intercept; UP reply paced by ctx). */
|
||||
CHECK(acquire_mw(A) == 0, "acquire submitted");
|
||||
|
||||
/* 1) happy path: inline write -> queued -> ACT_ACK{ok=1}. Also drains the GRANTED reply. */
|
||||
CHECK(send_write(A, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 11) == 0,
|
||||
"1: owner's CMD_MEMWRITE passes the gate");
|
||||
|
||||
/* 2) extent: len > MAX -> ACK{ok=0}, NOT actuated (queued ack on the loop thread). */
|
||||
CHECK(send_write(A, 0x2000, VMSIG_MEMWRITE_MAX + 1, VMSIG_MW_SRC_INLINE, pat, 22) == 0,
|
||||
"2: over-extent write is accepted by the gate (denied inside the adapter)");
|
||||
/* 2b) missing SRC flag -> ACK{ok=0}. */
|
||||
CHECK(send_write(A, 0x3000, 4, 0u, NULL, 33) == 0,
|
||||
"2b: no-SRC-flag write is accepted by the gate (denied inside the adapter)");
|
||||
|
||||
/* expect 3 ACKs (corr 11/22/33) + the GRANTED reply. */
|
||||
run_until_acks(&s, 3);
|
||||
|
||||
CHECK(s.granted == 1, "lease GRANTED once");
|
||||
int saw11_ok = -1, saw22_ok = -1, saw33_ok = -1, saw99 = 0;
|
||||
for (int i = 0; i < s.nack; i++) {
|
||||
if (s.ack_corr[i] == 11) saw11_ok = s.ack_ok[i];
|
||||
if (s.ack_corr[i] == 22) saw22_ok = s.ack_ok[i];
|
||||
if (s.ack_corr[i] == 33) saw33_ok = s.ack_ok[i];
|
||||
if (s.ack_corr[i] == 99) saw99 = 1;
|
||||
}
|
||||
CHECK(saw11_ok == 1, "1: happy-path write ACKs ok=1 (stub)");
|
||||
CHECK(saw22_ok == 0, "2: over-extent write ACKs ok=0 (default-deny)");
|
||||
CHECK(saw33_ok == 0, "2b: no-SRC-flag write ACKs ok=0 (default-deny)");
|
||||
CHECK(!saw99, "3: the gate-dropped write produced no ACK");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* ---- 4: cap gate — no CAP_MEMWRITE cannot acquire the lease ----------------- */
|
||||
static void test_cap_gate(void) {
|
||||
printf("test_cap_gate\n");
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
cstate s; memset(&s, 0, sizeof s); s.core = core;
|
||||
|
||||
void* NC = add_ctl(core, &s, 0u /* no MEMWRITE */, 10);
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
|
||||
|
||||
CHECK(acquire_mw(NC) == 0, "acquire submitted");
|
||||
s.stop_replies = 1; vmsig_core_run(core); s.stop_replies = 0;
|
||||
|
||||
CHECK(s.denied == 1, "4: acquire without CAP_MEMWRITE -> DENIED");
|
||||
CHECK(s.last_deny_reason == VMSIG_LEASE_DENY_NOCAP, "4: reason=NOCAP");
|
||||
CHECK(s.granted == 0, "4: not granted");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
/* ---- 5: in-flight fence — losing the lease before pump_down drops the write -- */
|
||||
static void test_inflight_fence(void) {
|
||||
printf("test_inflight_fence\n");
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
cstate s; memset(&s, 0, sizeof s); s.core = core;
|
||||
|
||||
void* A = add_ctl(core, &s, VMSIG_CAP_MEMWRITE, 10);
|
||||
void* B = add_ctl(core, &s, VMSIG_CAP_MEMWRITE, 100); /* higher prio: preempts */
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
|
||||
|
||||
uint8_t pat[4] = { 1, 2, 3, 4 };
|
||||
CHECK(acquire_mw(A) == 0, "A acquires");
|
||||
/* A queues a write (corr=55): passes the gate (A owns), lands in the DOWN queue. */
|
||||
CHECK(send_write(A, 0x1000, 4, VMSIG_MW_SRC_INLINE, pat, 55) == 0, "A queues write 55");
|
||||
/* B preempts SYNCHRONOUSLY (acquire does not go through ctx). */
|
||||
CHECK(acquire_mw(B) == 0, "B preempts");
|
||||
/* B's own write (corr=66) — should actuate. */
|
||||
CHECK(send_write(B, 0x2000, 4, VMSIG_MW_SRC_INLINE, pat, 66) == 0, "B queues write 66");
|
||||
|
||||
run_until_acks(&s, 1); /* B's 66 acks; A's 55 must be fenced (no ack) */
|
||||
|
||||
int saw55 = 0, saw66 = 0;
|
||||
for (int i = 0; i < s.nack; i++) {
|
||||
if (s.ack_corr[i] == 55) saw55 = 1;
|
||||
if (s.ack_corr[i] == 66) saw66 = 1;
|
||||
}
|
||||
CHECK(!saw55, "5: ex-owner A's in-flight write is dropped by the fence");
|
||||
CHECK(saw66, "5: new owner B's write actuates after preemption");
|
||||
CHECK(s.granted == 2, "5: A and B each got GRANTED");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
printf("test_memwrite\n");
|
||||
test_path_and_deny();
|
||||
test_cap_gate();
|
||||
test_inflight_fence();
|
||||
cref_free_all();
|
||||
printf("memwrite tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
/* test_mvm.c — mode A (single core, multiple VMs): per-endpoint multiplexing and
|
||||
* per-VM grant scoping on UP delivery. Two vmhost endpoints in one core (each stub
|
||||
* ticks VM_LIFECYCLE per endpoint); a poller granted only VM0 must see only ep0
|
||||
* lifecycle events, the VM1 poller — only ep1. */
|
||||
#include "vmsig.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
static int g_fail = 0;
|
||||
#define CHECK(cond, msg) do { \
|
||||
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||
} while (0)
|
||||
|
||||
typedef struct { vmsig_core* core; int seen[2][2]; } mvm; /* seen[control][endpoint] */
|
||||
typedef struct { mvm* d; int which; } ctlref;
|
||||
|
||||
static int on_ev(void* user, const vmsig_event* ev) {
|
||||
ctlref* r = user; mvm* d = r->d;
|
||||
if (ev->kind == VMSIG_EV_VM_LIFECYCLE && ev->endpoint < 2)
|
||||
d->seen[r->which][ev->endpoint]++;
|
||||
if (d->seen[0][0] >= 2 && d->seen[1][1] >= 2) vmsig_core_stop(d->core);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
|
||||
mvm d; memset(&d, 0, sizeof d); d.core = core;
|
||||
ctlref r0 = { &d, 0 }, r1 = { &d, 1 };
|
||||
|
||||
vmsig_inproc_cfg c0; memset(&c0, 0, sizeof c0); c0.on_event = on_ev; c0.user = &r0;
|
||||
vmsig_inproc_cfg c1; memset(&c1, 0, sizeof c1); c1.on_event = on_ev; c1.user = &r1;
|
||||
void* ctl0 = vmsig_inproc_control_new(&c0);
|
||||
void* ctl1 = vmsig_inproc_control_new(&c1);
|
||||
|
||||
/* grants segregate the pollers per VM */
|
||||
vmsig_grant g0; memset(&g0, 0, sizeof g0);
|
||||
g0.endpoint_mask = 1ull << 0; g0.source_mask = 0xFFFFFFFFu; g0.cap_mask = VMSIG_CAP_OBSERVE;
|
||||
vmsig_grant g1; memset(&g1, 0, sizeof g1);
|
||||
g1.endpoint_mask = 1ull << 1; g1.source_mask = 0xFFFFFFFFu; g1.cap_mask = VMSIG_CAP_OBSERVE;
|
||||
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl0, &g0);
|
||||
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl1, &g1);
|
||||
|
||||
/* two VMs in one core: a vmhost adapter per endpoint (stub ticks VM_LIFECYCLE) */
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "VM0 adapter");
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 1) >= 0, "VM1 adapter");
|
||||
|
||||
int rc = vmsig_core_run(core);
|
||||
printf("test_mvm rc=%d c0[ep0=%d ep1=%d] c1[ep0=%d ep1=%d]\n",
|
||||
rc, d.seen[0][0], d.seen[0][1], d.seen[1][0], d.seen[1][1]);
|
||||
|
||||
CHECK(d.seen[0][0] >= 2, "control0 sees lifecycle of its own VM0");
|
||||
CHECK(d.seen[0][1] == 0, "control0 does NOT see VM1 (grant scoping)");
|
||||
CHECK(d.seen[1][1] >= 2, "control1 sees lifecycle of its own VM1");
|
||||
CHECK(d.seen[1][0] == 0, "control1 does NOT see VM0");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
printf("multi-vm tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
/* test_sec.c — security layer: grant enforcement on DOWN commands.
|
||||
* Checks capability split (OBSERVE != INPUT != POWER != VM), source_mask
|
||||
* on DOWN, destructive vs safe lifecycle/VM, foreign endpoint, default-deny.
|
||||
* (Memory is no longer a DOWN command: the address-space context is multicast
|
||||
* upward and gated by CAP_MEMCTX — see test_authz/test_memctx; here only DOWN
|
||||
* actuation.) vmsig_inproc_send returns the result of core_emit_down (the grant of
|
||||
* THIS specific control) — no need to run the loop. */
|
||||
#include "vmsig.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static int g_fail = 0;
|
||||
#define CHECK(cond, msg) do { \
|
||||
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||
} while (0)
|
||||
|
||||
static int g_denied = 0;
|
||||
static void audit_cb(void* ud, const vmsig_audit* a) {
|
||||
(void)ud;
|
||||
if (a->kind == VMSIG_AUDIT_DOWN_DENIED) g_denied++;
|
||||
}
|
||||
|
||||
/* DOWN command of kind on endpoint ep; source derived from kind */
|
||||
static int send(void* ctl, vmsig_kind kind, uint32_t ep) {
|
||||
vmsig_event d;
|
||||
memset(&d, 0, sizeof d);
|
||||
d.kind = kind; d.dir = VMSIG_DIR_DOWN; d.endpoint = ep; d.prio = VMSIG_PRIO_NORMAL;
|
||||
d.source = (kind == VMSIG_EV_CMD_INPUT || kind == VMSIG_EV_CMD_LIFECYCLE) ? VMSIG_SRC_INPUT
|
||||
: VMSIG_SRC_VMHOST;
|
||||
return vmsig_inproc_send(ctl, &d);
|
||||
}
|
||||
/* CMD_LIFECYCLE with a specific operation (code in inln[0]) */
|
||||
static int send_life(void* ctl, int op, uint32_t ep) {
|
||||
vmsig_event d;
|
||||
memset(&d, 0, sizeof d);
|
||||
d.kind = VMSIG_EV_CMD_LIFECYCLE; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
|
||||
d.endpoint = ep; d.prio = VMSIG_PRIO_NORMAL; d.inln[0] = (uint8_t)op;
|
||||
return vmsig_inproc_send(ctl, &d);
|
||||
}
|
||||
/* Acquire a lease of class cls on ep (destructive/input now requires a lease). */
|
||||
static int acq(void* ctl, uint32_t cls, uint32_t ep) {
|
||||
vmsig_event d;
|
||||
memset(&d, 0, sizeof d);
|
||||
d.kind = VMSIG_EV_CMD_ACQUIRE; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
|
||||
d.endpoint = ep; d.prio = VMSIG_PRIO_HIGH;
|
||||
vmsig_lease_req lr = { cls, 0 };
|
||||
memcpy(d.inln, &lr, sizeof lr);
|
||||
return vmsig_inproc_send(ctl, &d);
|
||||
}
|
||||
|
||||
/* CMD_VM with an operation (vmsig_vm_cmd in inln) */
|
||||
static int send_vm(void* ctl, int op, uint32_t ep) {
|
||||
vmsig_event d;
|
||||
memset(&d, 0, sizeof d);
|
||||
d.kind = VMSIG_EV_CMD_VM; d.source = VMSIG_SRC_VMHOST; d.dir = VMSIG_DIR_DOWN;
|
||||
d.endpoint = ep; d.prio = VMSIG_PRIO_NORMAL;
|
||||
vmsig_vm_cmd c = { (uint32_t)op };
|
||||
memcpy(d.inln, &c, sizeof c);
|
||||
return vmsig_inproc_send(ctl, &d);
|
||||
}
|
||||
|
||||
static void* add_ctl(vmsig_core* core, uint32_t cap, uint32_t source_mask) {
|
||||
vmsig_inproc_cfg cfg;
|
||||
memset(&cfg, 0, sizeof cfg); /* on_event=NULL, sub=0: no UP needed */
|
||||
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||
g.endpoint_mask = 1ull << 0; g.source_mask = source_mask; g.cap_mask = cap;
|
||||
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||
return ctl;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
vmsig_core_set_audit(core, audit_cb, NULL);
|
||||
|
||||
void* A = add_ctl(core, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu); /* screen observer */
|
||||
void* B = add_ctl(core, VMSIG_CAP_INPUT | VMSIG_CAP_LIFECYCLE, 0xFFFFFFFFu);/* input + safe lifecycle */
|
||||
void* P = add_ctl(core, VMSIG_CAP_POWER, 0xFFFFFFFFu); /* destructive power */
|
||||
void* S = add_ctl(core, VMSIG_CAP_INPUT, 1u << VMSIG_SRC_FRAME); /* INPUT, but source=FRAME */
|
||||
void* V = add_ctl(core, VMSIG_CAP_VM, 0xFFFFFFFFu); /* VM control (safe) */
|
||||
void* C = vmsig_inproc_control_new(&(vmsig_inproc_cfg){0}); /* default-deny */
|
||||
vmsig_core_add_control(core, vmsig_inproc_control_ops(), C, NULL);
|
||||
|
||||
printf("test_security\n");
|
||||
/* A — screen observer: does NOT actuate input/lifecycle (split CAP) */
|
||||
CHECK(send(A, VMSIG_EV_CMD_INPUT, 0) == -1, "OBSERVE != input"); /* deny 1 */
|
||||
CHECK(send_life(A, VMSIG_LIFE_PAUSE, 0) == -1, "OBSERVE != lifecycle"); /* deny 2 */
|
||||
|
||||
/* B — input + SAFE lifecycle, but NOT destructive power. Destructive/input
|
||||
* now passes ONLY while holding a class lease => ACQUIRE first. */
|
||||
acq(B, VMSIG_LEASE_INPUT, 0);
|
||||
CHECK(send(B, VMSIG_EV_CMD_INPUT, 0) == 0, "INPUT => input allowed");
|
||||
CHECK(send_life(B, VMSIG_LIFE_PAUSE, 0) == 0, "LIFECYCLE => pause allowed");
|
||||
CHECK(send_life(B, VMSIG_LIFE_POWERDOWN, 0) == -1,"powerdown requires CAP_POWER"); /* deny 3 */
|
||||
|
||||
/* P — destructive power (with a POWER class lease) */
|
||||
acq(P, VMSIG_LEASE_POWER, 0);
|
||||
CHECK(send_life(P, VMSIG_LIFE_POWERDOWN, 0) == 0, "POWER => powerdown allowed");
|
||||
|
||||
/* S — has INPUT, but source_mask lacks SRC_INPUT: DOWN input denied */
|
||||
CHECK(send(S, VMSIG_EV_CMD_INPUT, 0) == -1, "source_mask on DOWN: SRC_INPUT denied"); /* deny 4 */
|
||||
|
||||
/* V — VM control: safe ops yes, destructive ones require CAP_POWER */
|
||||
CHECK(send_vm(V, VMSIG_VMOP_CONT, 0) == 0, "CAP_VM => cont allowed");
|
||||
CHECK(send_vm(V, VMSIG_VMOP_POWERDOWN, 0) == -1, "VM powerdown requires CAP_POWER"); /* deny 5 */
|
||||
CHECK(send_vm(P, VMSIG_VMOP_POWERDOWN, 0) == 0, "CAP_POWER => VM powerdown allowed");
|
||||
|
||||
/* C — default-deny */
|
||||
CHECK(send_vm(C, VMSIG_VMOP_QUERY, 0) == -1, "default-deny is deaf"); /* deny 6 */
|
||||
|
||||
/* audit recorded all 6 DOWN denials */
|
||||
CHECK(g_denied == 6, "audit: all DOWN denials recorded");
|
||||
|
||||
vmsig_core_free(core); /* closes/frees all controls */
|
||||
vmsig_ctx_free(ctx);
|
||||
|
||||
printf("security tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
/* test_sock.c — out-of-process control: wire codec + authentication/admission.
|
||||
* Bring up two listeners (one admitting, one rejecting) on abstract sockets, run
|
||||
* the core in a separate thread, connect clients and check: policy invoked,
|
||||
* valid poller admitted, unauthorized rejected (EOF), reap without a crash. */
|
||||
#define _GNU_SOURCE
|
||||
#include "vmsig.h"
|
||||
#include "vmsig_socket.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
#include <stdatomic.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/un.h>
|
||||
#include <unistd.h>
|
||||
#include <stddef.h>
|
||||
#include <time.h>
|
||||
|
||||
static int g_fail = 0;
|
||||
#define CHECK(cond, msg) do { \
|
||||
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||
} while (0)
|
||||
|
||||
static atomic_int g_auth = 0;
|
||||
static atomic_int g_deny = 0;
|
||||
static atomic_int g_admit = 0;
|
||||
static atomic_int g_reject = 0;
|
||||
|
||||
static void audit_cb(void* ud, const vmsig_audit* a) {
|
||||
(void)ud;
|
||||
if (a->kind == VMSIG_AUDIT_ADMIT) atomic_fetch_add(&g_admit, 1);
|
||||
else if (a->kind == VMSIG_AUDIT_REJECT) atomic_fetch_add(&g_reject, 1);
|
||||
}
|
||||
|
||||
static vmsig_grant pol_ok(uint32_t uid, uint32_t pid, void* ud) {
|
||||
(void)pid; (void)ud;
|
||||
atomic_fetch_add(&g_auth, 1);
|
||||
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||
g.principal = uid; g.endpoint_mask = 1u << 0;
|
||||
g.source_mask = 0xFFFFFFFFu; g.cap_mask = VMSIG_CAP_OBSERVE;
|
||||
return g;
|
||||
}
|
||||
static vmsig_grant pol_deny(uint32_t uid, uint32_t pid, void* ud) {
|
||||
(void)uid; (void)pid; (void)ud;
|
||||
atomic_fetch_add(&g_deny, 1);
|
||||
vmsig_grant g; memset(&g, 0, sizeof g); /* empty => reject */
|
||||
return g;
|
||||
}
|
||||
|
||||
static int connect_abstract(const char* name) {
|
||||
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||
if (fd < 0) return -1;
|
||||
struct sockaddr_un a; memset(&a, 0, sizeof a); a.sun_family = AF_UNIX;
|
||||
size_t n = strlen(name);
|
||||
a.sun_path[0] = 0;
|
||||
memcpy(a.sun_path + 1, name + 1, n - 1);
|
||||
socklen_t alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
|
||||
if (connect(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; }
|
||||
return fd;
|
||||
}
|
||||
|
||||
static void* loop_main(void* p) { vmsig_core_run((vmsig_core*)p); return NULL; }
|
||||
|
||||
static void wait_atomic(atomic_int* a, int want, int ms) {
|
||||
for (int i = 0; i < ms; i++) {
|
||||
if (atomic_load(a) >= want) return;
|
||||
struct timespec t = { .tv_sec = 0, .tv_nsec = 1000000 };
|
||||
nanosleep(&t, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_wire(void) {
|
||||
printf("test_wire\n");
|
||||
vmsig_event ev; memset(&ev, 0, sizeof ev);
|
||||
ev.kind = VMSIG_EV_CMD_VM; ev.source = VMSIG_SRC_VMHOST; ev.dir = VMSIG_DIR_DOWN;
|
||||
ev.prio = VMSIG_PRIO_HIGH; ev.endpoint = 0; ev.corr = 0xABCD;
|
||||
for (int i = 0; i < 48; i++) ev.inln[i] = (uint8_t)i;
|
||||
|
||||
vmsig_wire w; vmsig_wire_encode(&w, &ev);
|
||||
vmsig_event d;
|
||||
CHECK(vmsig_wire_decode(&w, &d) == 0, "decode ok");
|
||||
CHECK(d.kind == ev.kind && d.source == ev.source &&
|
||||
d.endpoint == ev.endpoint && d.corr == ev.corr, "frame fields match");
|
||||
CHECK(memcmp(d.inln, ev.inln, 48) == 0, "inln matches");
|
||||
|
||||
vmsig_wire bad = w; bad.magic = 0; vmsig_event x;
|
||||
CHECK(vmsig_wire_decode(&bad, &x) == -1, "bad magic rejected");
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
test_wire();
|
||||
|
||||
printf("test_socket\n");
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
vmsig_core_set_audit(core, audit_cb, NULL);
|
||||
const char* OK = "@vmsig-sock-ok-test";
|
||||
const char* DENY = "@vmsig-sock-deny-test";
|
||||
CHECK(vmsig_socket_attach(core, OK, pol_ok, NULL) == 0, "attach ok listener");
|
||||
CHECK(vmsig_socket_attach(core, DENY, pol_deny, NULL) == 0, "attach deny listener");
|
||||
|
||||
pthread_t th;
|
||||
pthread_create(&th, NULL, loop_main, core);
|
||||
|
||||
/* valid poller: connect -> policy -> admission */
|
||||
int c1 = connect_abstract(OK);
|
||||
CHECK(c1 >= 0, "client connected (ok)");
|
||||
wait_atomic(&g_auth, 1, 1000);
|
||||
CHECK(atomic_load(&g_auth) >= 1, "policy invoked — poller authenticated/admitted");
|
||||
if (c1 >= 0) close(c1); /* disconnect -> deferred reap (no crash) */
|
||||
|
||||
/* unauthorized: connect -> server closes -> EOF on the client */
|
||||
int c2 = connect_abstract(DENY);
|
||||
CHECK(c2 >= 0, "client connected (deny)");
|
||||
wait_atomic(&g_deny, 1, 1000);
|
||||
CHECK(atomic_load(&g_deny) >= 1, "deny policy invoked");
|
||||
if (c2 >= 0) {
|
||||
struct timeval tv = { .tv_sec = 1, .tv_usec = 0 };
|
||||
setsockopt(c2, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv);
|
||||
char b; ssize_t r = read(c2, &b, 1);
|
||||
CHECK(r == 0, "connection rejected by server (EOF)");
|
||||
close(c2);
|
||||
}
|
||||
|
||||
/* slot reuse: churn > MAX_CONTROLS(64). Without returning slots the listener
|
||||
* would die after 64 cycles. Each cycle: connect(ok) -> wait auth++ -> close. */
|
||||
int base = atomic_load(&g_auth);
|
||||
const int churn = 70;
|
||||
for (int k = 0; k < churn; k++) {
|
||||
int fc = connect_abstract(OK);
|
||||
if (fc < 0) { CHECK(0, "churn connect"); break; }
|
||||
wait_atomic(&g_auth, base + k + 1, 1000);
|
||||
close(fc);
|
||||
struct timespec ts = { .tv_sec = 0, .tv_nsec = 2 * 1000000 };
|
||||
nanosleep(&ts, NULL); /* let the loop reap before the next connection */
|
||||
}
|
||||
CHECK(atomic_load(&g_auth) >= base + churn,
|
||||
"slots reused: churn > MAX_CONTROLS admitted");
|
||||
|
||||
/* audit recorded admissions and rejections */
|
||||
CHECK(atomic_load(&g_admit) >= 1, "audit: poller admission");
|
||||
CHECK(atomic_load(&g_reject) >= 1, "audit: rejection (deny listener)");
|
||||
|
||||
struct timespec t = { .tv_sec = 0, .tv_nsec = 50 * 1000000 };
|
||||
nanosleep(&t, NULL); /* let the loop process the reaps */
|
||||
vmsig_core_stop(core);
|
||||
pthread_join(th, NULL);
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
|
||||
printf("socket tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
@@ -0,0 +1,146 @@
|
||||
/* test_vmhost.c — QEMU/QMP host-plane, armed path: fake QMP server (this test)
|
||||
* <-> real QMP client vmhost. We verify: handshake (greeting -> qmp_capabilities
|
||||
* -> return -> SEAM_UP), async events -> VM_LIFECYCLE (broadcast), CMD_VM{QUERY}
|
||||
* -> command to server -> return -> addressed VM_LIFECYCLE to the initiator, EOF -> SEAM_DOWN. */
|
||||
#define _GNU_SOURCE
|
||||
#include "vmsig.h"
|
||||
#include "vmhost.h" /* private cfg (CMake provides the include path) */
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
#include <stdatomic.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/un.h>
|
||||
#include <unistd.h>
|
||||
#include <stddef.h>
|
||||
#include <time.h>
|
||||
#include <errno.h>
|
||||
|
||||
static int g_fail = 0;
|
||||
#define CHECK(cond, msg) do { \
|
||||
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||
} while (0)
|
||||
|
||||
static atomic_int g_seamup = 0, g_seamdown = 0;
|
||||
static atomic_int g_paused = 0, g_running_bcast = 0, g_query_reply = 0;
|
||||
static void* g_ctl = NULL;
|
||||
|
||||
static int on_ev(void* user, const vmsig_event* ev) {
|
||||
(void)user;
|
||||
if (ev->kind == VMSIG_EV_SEAM_UP && ev->source == VMSIG_SRC_VMHOST) {
|
||||
atomic_store(&g_seamup, 1);
|
||||
vmsig_event d; memset(&d, 0, sizeof d); /* once ready — query status */
|
||||
d.kind = VMSIG_EV_CMD_VM; d.source = VMSIG_SRC_VMHOST; d.dir = VMSIG_DIR_DOWN;
|
||||
d.prio = VMSIG_PRIO_NORMAL; d.endpoint = 0; d.corr = 0x55;
|
||||
vmsig_vm_cmd c = { VMSIG_VMOP_QUERY }; memcpy(d.inln, &c, sizeof c);
|
||||
vmsig_inproc_send(g_ctl, &d);
|
||||
} else if (ev->kind == VMSIG_EV_SEAM_DOWN && ev->source == VMSIG_SRC_VMHOST) {
|
||||
atomic_store(&g_seamdown, 1);
|
||||
} else if (ev->kind == VMSIG_EV_VM_LIFECYCLE) {
|
||||
vmsig_vm_state vs; memcpy(&vs, ev->inln, sizeof vs);
|
||||
if (ev->origin) { /* addressed reply to our QUERY */
|
||||
if (vs.state == VMSIG_VM_RUNNING) atomic_store(&g_query_reply, 1);
|
||||
} else { /* broadcast async event */
|
||||
if (vs.state == VMSIG_VM_PAUSED) atomic_store(&g_paused, 1);
|
||||
if (vs.state == VMSIG_VM_RUNNING) atomic_store(&g_running_bcast, 1);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void* loop_main(void* p) { vmsig_core_run((vmsig_core*)p); return NULL; }
|
||||
|
||||
static int srv_listen(const char* name) {
|
||||
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||
if (fd < 0) return -1;
|
||||
struct sockaddr_un a; memset(&a, 0, sizeof a); a.sun_family = AF_UNIX;
|
||||
size_t n = strlen(name);
|
||||
a.sun_path[0] = 0; memcpy(a.sun_path + 1, name + 1, n - 1);
|
||||
socklen_t alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
|
||||
if (bind(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; }
|
||||
if (listen(fd, 4) < 0) { close(fd); return -1; }
|
||||
return fd;
|
||||
}
|
||||
static void srv_send(int fd, const char* s) { ssize_t r = write(fd, s, strlen(s)); (void)r; }
|
||||
static int srv_expect(int fd, const char* needle) {
|
||||
char buf[1024]; size_t len = 0;
|
||||
for (int i = 0; i < 200; i++) { /* up to ~2s */
|
||||
ssize_t r = read(fd, buf + len, sizeof buf - 1 - len);
|
||||
if (r > 0) { len += (size_t)r; buf[len] = 0; if (strstr(buf, needle)) return 1; }
|
||||
else if (r == 0) return 0;
|
||||
else { struct timespec t = { 0, 10 * 1000000 }; nanosleep(&t, NULL); }
|
||||
if (len >= sizeof buf - 1) len = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
static void wait_atomic(atomic_int* a, int ms) {
|
||||
for (int i = 0; i < ms; i++) {
|
||||
if (atomic_load(a)) return;
|
||||
struct timespec t = { 0, 1000000 }; nanosleep(&t, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
const char* QMP = "@vmsig-qmp-fake-test";
|
||||
int srv = srv_listen(QMP);
|
||||
if (srv < 0) { printf("srv_listen failed\n"); return 1; }
|
||||
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
|
||||
vmsig_inproc_cfg cc; memset(&cc, 0, sizeof cc); cc.on_event = on_ev;
|
||||
void* ctl = vmsig_inproc_control_new(&cc);
|
||||
g_ctl = ctl;
|
||||
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||
g.endpoint_mask = 1ull << 0; g.source_mask = 0xFFFFFFFFu;
|
||||
g.cap_mask = VMSIG_CAP_OBSERVE | VMSIG_CAP_VM;
|
||||
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||
|
||||
/* armed vmhost: it will connect to our fake QMP */
|
||||
vmsig_vmhost_cfg vcfg; memset(&vcfg, 0, sizeof vcfg); vcfg.qmp_path = QMP;
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), &vcfg, 0) >= 0, "vmhost armed attach");
|
||||
|
||||
pthread_t th; pthread_create(&th, NULL, loop_main, core);
|
||||
|
||||
/* === QMP server role === */
|
||||
int c = accept(srv, NULL, NULL);
|
||||
CHECK(c >= 0, "server accepted vmhost connection");
|
||||
if (c >= 0) {
|
||||
struct timeval tv = { 0, 50 * 1000 };
|
||||
setsockopt(c, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv);
|
||||
|
||||
srv_send(c, "{\"QMP\": {\"version\": {}, \"capabilities\": []}}\r\n");
|
||||
CHECK(srv_expect(c, "qmp_capabilities"), "client sent qmp_capabilities");
|
||||
srv_send(c, "{\"return\": {}}\r\n"); /* -> READY -> SEAM_UP */
|
||||
|
||||
srv_send(c, "{\"event\": \"STOP\"}\r\n"); /* -> broadcast PAUSED */
|
||||
CHECK(srv_expect(c, "query-status"), "client sent query-status (from CMD_VM)");
|
||||
srv_send(c, "{\"return\": {\"status\": \"running\"}, \"id\": 1}\r\n"); /* -> addressed reply */
|
||||
srv_send(c, "{\"event\": \"RESUME\"}\r\n"); /* -> broadcast RUNNING */
|
||||
|
||||
wait_atomic(&g_seamup, 1000);
|
||||
wait_atomic(&g_paused, 1000);
|
||||
wait_atomic(&g_query_reply, 1000);
|
||||
wait_atomic(&g_running_bcast, 1000);
|
||||
|
||||
close(c); /* EOF -> SEAM_DOWN */
|
||||
wait_atomic(&g_seamdown, 1000);
|
||||
}
|
||||
|
||||
CHECK(atomic_load(&g_seamup), "handshake complete (SEAM_UP)");
|
||||
CHECK(atomic_load(&g_paused), "async STOP -> VM_LIFECYCLE PAUSED (broadcast)");
|
||||
CHECK(atomic_load(&g_query_reply), "CMD_VM QUERY -> addressed VM_LIFECYCLE RUNNING");
|
||||
CHECK(atomic_load(&g_running_bcast),"async RESUME -> VM_LIFECYCLE RUNNING (broadcast)");
|
||||
CHECK(atomic_load(&g_seamdown), "EOF QMP -> SEAM_DOWN");
|
||||
|
||||
vmsig_core_stop(core);
|
||||
pthread_join(th, NULL);
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
close(srv);
|
||||
|
||||
printf("vmhost tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
Reference in New Issue
Block a user