mirror of
https://dev.lirent.ru/Vatrog/vm-automation-signaling.git
synced 2026-06-26 04:36:37 +03:00
vmsig: a neutral signaling layer between sensors/input and controls
An epoll-driven, neutral transfer-event bus that connects sensors and input actuators to one or more controls, bidirectionally. It owns the transfer context and events — delivery order, priority, protocol-level timing, and an interrupt-driven event model over fd sources (eventfd/timerfd/sockets) — and stays agnostic to both the sensor/input drivers and the control. What lives here: - memctx: a coherent address-space context per endpoint — the guest address-space root paired with a pre-opened read-only RAM-region fd, with per-endpoint epoch invalidation and retained replay to late subscribers. Perception lives in out-of-tree sensor libraries that consume this datum read-only. - exclusive-ownership leases for destructive resource classes (input, power, memory-write). - write-signaled memory writes (MEMWRITE): an atomic write to guest memory routed through the seam under an exclusive lease, never a writable mapping. - a host-management seam for VM lifecycle/status and a neutral input-injection command path. - multi-VM endpoints; capability-gated, audited control authorization over an in-process or unix-socket transport. Builds against headers only by default (a stub mode that exercises the seam without a VM); armed builds link the real sensor/input libraries behind flags.
This commit is contained in:
@@ -0,0 +1,44 @@
|
||||
#ifndef VMSIG_ADAPTER_UTIL_H
|
||||
#define VMSIG_ADAPTER_UTIL_H
|
||||
#include <stddef.h>
|
||||
|
||||
/* adapter_util.h — shared primitive "blocking API -> completion eventfd".
|
||||
*
|
||||
* A bridge turning a synchronous CPU-bound / blocking neighbor call (vmie,
|
||||
* vmctl) into a readiness source for the epoll core: the loop thread posts a request, a
|
||||
* separate worker thread runs the blocking work and signals a completion eventfd; on it
|
||||
* the loop wakes and collects the result in on_readiness. Reused by the memctx
|
||||
* (off-loop bootstrap) and input adapters. */
|
||||
|
||||
typedef struct vmsig_worker vmsig_worker;
|
||||
|
||||
#define VMSIG_WORK_SLOT 256 /* req/res slot size (POD, copied) */
|
||||
|
||||
/* Callback run IN the worker thread: req -> res (both POD <= VMSIG_WORK_SLOT).
|
||||
* Returns 0/-1 (the code is stored alongside, see vmsig_worker_poll). Must not touch
|
||||
* core structures — only compute res from req. */
|
||||
typedef int (*vmsig_work_fn)(void* user, const void* req, void* res);
|
||||
|
||||
/* Create a worker pool of nthreads threads over a shared queue (nthreads>=1). vmie
|
||||
* allows parallel read-only readers; for a serial channel (QMP) use 1. max_depth — the
|
||||
* request-queue depth ceiling (<=0 => default): submit beyond it is rejected (-1) so an
|
||||
* untrusted flood does not grow into OOM. NULL on error. */
|
||||
vmsig_worker* vmsig_worker_new(vmsig_work_fn fn, void* user, int nthreads, int max_depth);
|
||||
|
||||
/* Stop the threads (join) and free. Safe on NULL. */
|
||||
void vmsig_worker_free(vmsig_worker* w);
|
||||
|
||||
/* completion eventfd: the adapter registers it as a VMSIG_RDY_EVENTFD source. */
|
||||
int vmsig_worker_evfd(const vmsig_worker* w);
|
||||
|
||||
/* loop thread: post a request (copied, len <= VMSIG_WORK_SLOT). 0/-1. */
|
||||
int vmsig_worker_submit(vmsig_worker* w, const void* req, size_t len);
|
||||
|
||||
/* loop thread (in on_readiness): drain the completion eventfd. */
|
||||
void vmsig_worker_ack(vmsig_worker* w);
|
||||
|
||||
/* loop thread: collect a ready result. 1 — written to res (+ *rc = fn code),
|
||||
* 0 — empty, -1 — error. Drain in a loop until 0. */
|
||||
int vmsig_worker_poll(vmsig_worker* w, void* res, size_t cap, int* rc);
|
||||
|
||||
#endif /* VMSIG_ADAPTER_UTIL_H */
|
||||
@@ -0,0 +1,18 @@
|
||||
#ifndef VMSIG_INPUT_H
|
||||
#define VMSIG_INPUT_H
|
||||
|
||||
/* Private config of the input adapter (vmctl). cfg==NULL => stub mode. Armed mode
|
||||
* (VMSIG_WITH_VMCTL) opens vmctl_open() and actuates for real. driver is an int so
|
||||
* as not to pull vmctl.h into this header (values match VMCTL_DRIVER_*). */
|
||||
typedef struct {
|
||||
int stub;
|
||||
int driver; /* 0=QMP, 1=UINPUT (see VMCTL_DRIVER_*) */
|
||||
const char* qmp_path;
|
||||
const char* input_bus;
|
||||
int ptr_mode;
|
||||
} vmsig_input_cfg;
|
||||
|
||||
/* Input event codes/contract are PUBLIC: vmsig_input / vmsig_input_kind in
|
||||
* include/vmsig_event.h (external control encodes them into inln). No private duplicate. */
|
||||
|
||||
#endif /* VMSIG_INPUT_H */
|
||||
@@ -0,0 +1,230 @@
|
||||
/* input.c — input/actuator adapter for vmctl (input + power/lifecycle).
|
||||
*
|
||||
* Mechanism (recommended): vmctl is a blocking QMP round-trip; we run it on a
|
||||
* worker thread, completion ack via a completion-eventfd. The uinput path is a
|
||||
* local instantaneous write; when armed it would be done inline (see comment in submit).
|
||||
* Real actuation is under VMSIG_WITH_VMCTL; otherwise the stub acks (spine without a VM). */
|
||||
#include "vmsig_adapter.h"
|
||||
#include "adapter_util.h"
|
||||
#include "input.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/epoll.h>
|
||||
|
||||
#ifdef VMSIG_WITH_VMCTL
|
||||
#include "vmctl.h"
|
||||
#endif
|
||||
|
||||
/* POD request/result of the worker. */
|
||||
typedef struct {
|
||||
int cmd; /* 0 = input event, 1 = lifecycle */
|
||||
uint32_t corr;
|
||||
uint32_t origin; /* initiator (addressed ACK) */
|
||||
int kind; /* vmsig_input_kind (for cmd==0) */
|
||||
int code; /* axis/btn/evdev-code */
|
||||
int value; /* abs/rel/down */
|
||||
double scroll;
|
||||
int life_op; /* VMSIG_LIFE_* (powerdown/reset/wakeup/pause/resume) */
|
||||
} input_req;
|
||||
typedef struct { int ok; uint32_t corr; uint32_t origin; } input_res;
|
||||
|
||||
/* signaling does NOT track held state: the record of what is pressed lives in the
|
||||
* ACTUATOR (vmctl); we hand it to control on request (CMD_QUERY_INPUT), release is control's decision. */
|
||||
struct vmsig_adapter {
|
||||
uint32_t endpoint;
|
||||
int stub;
|
||||
vmsig_emit emit;
|
||||
vmsig_worker* worker;
|
||||
int driver; /* 0=QMP, 1=UINPUT (VMCTL_DRIVER_*); carried open->attach */
|
||||
const char* qmp_path; /* borrowed from cfg (valid through attach) */
|
||||
const char* input_bus;
|
||||
int ptr_mode;
|
||||
#ifdef VMSIG_WITH_VMCTL
|
||||
vmctl_t* vmctl;
|
||||
#endif
|
||||
};
|
||||
|
||||
static int input_job(void* user, const void* reqp, void* resp) {
|
||||
struct vmsig_adapter* a = user;
|
||||
const input_req* rq = reqp;
|
||||
input_res* rs = resp;
|
||||
memset(rs, 0, sizeof *rs);
|
||||
rs->corr = rq->corr;
|
||||
rs->origin = rq->origin;
|
||||
#ifdef VMSIG_WITH_VMCTL
|
||||
if (a->vmctl) {
|
||||
int r = -1;
|
||||
if (rq->cmd == 0) {
|
||||
vmctl_batch b; vmctl_batch_init(&b);
|
||||
switch (rq->kind) {
|
||||
case VMSIG_INPUT_ABS: vmctl_batch_abs(&b, rq->code, rq->value); break;
|
||||
case VMSIG_INPUT_REL: vmctl_batch_rel(&b, rq->code, rq->value); break;
|
||||
case VMSIG_INPUT_BTN: vmctl_batch_btn(&b, rq->code, rq->value); break;
|
||||
case VMSIG_INPUT_KEY: vmctl_batch_key(&b, rq->code, rq->value); break;
|
||||
case VMSIG_INPUT_SCROLL: vmctl_batch_scroll(&b, rq->code, rq->scroll); break;
|
||||
default: break;
|
||||
}
|
||||
r = vmctl_batch_send(a->vmctl, &b);
|
||||
} else {
|
||||
switch (rq->life_op) {
|
||||
case 0: r = vmctl_powerdown(a->vmctl); break;
|
||||
case 1: r = vmctl_reset(a->vmctl); break;
|
||||
case 2: r = vmctl_wakeup(a->vmctl); break;
|
||||
case 3: r = vmctl_pause(a->vmctl); break;
|
||||
case 4: r = vmctl_resume(a->vmctl); break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
rs->ok = (r == 0);
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
(void)a;
|
||||
rs->ok = 1; /* stub: ack without actuation */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static vmsig_adapter* in_open(const void* cfg, uint32_t endpoint) {
|
||||
const vmsig_input_cfg* c = cfg;
|
||||
struct vmsig_adapter* a = calloc(1, sizeof *a);
|
||||
if (!a) return NULL;
|
||||
a->endpoint = endpoint;
|
||||
a->stub = c ? c->stub : 1;
|
||||
if (c) { /* carry the driver selection to attach (cfg not passed there) */
|
||||
a->driver = c->driver;
|
||||
a->qmp_path = c->qmp_path;
|
||||
a->input_bus = c->input_bus;
|
||||
a->ptr_mode = c->ptr_mode;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
static int in_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
|
||||
if (cap < 1) return -1;
|
||||
a->emit = *emit;
|
||||
a->worker = vmsig_worker_new(input_job, a, 1, 64); /* QMP is a serial channel, cap 64 */
|
||||
if (!a->worker) return -1;
|
||||
|
||||
#ifdef VMSIG_WITH_VMCTL
|
||||
if (!a->stub) {
|
||||
/* armed: build vmctl_config from the carried cfg and open the actuator. UINPUT
|
||||
* (host uinput + optional virtio-input-host-pci passthrough via QMP) is the primary
|
||||
* input driver; QMP input-send-event is the fallback. */
|
||||
vmctl_config vcfg;
|
||||
memset(&vcfg, 0, sizeof vcfg);
|
||||
vcfg.driver = (a->driver == 1) ? VMCTL_DRIVER_UINPUT : VMCTL_DRIVER_QMP;
|
||||
vcfg.qmp_path = a->qmp_path;
|
||||
vcfg.input_bus = a->input_bus;
|
||||
vcfg.ptr_mode = a->ptr_mode;
|
||||
vcfg.uinput_id = NULL; /* built-in HID identity defaults */
|
||||
a->vmctl = vmctl_open(&vcfg);
|
||||
if (!a->vmctl) { vmsig_worker_free(a->worker); a->worker = NULL; return -1; }
|
||||
}
|
||||
#endif
|
||||
|
||||
reg[0].fd = vmsig_worker_evfd(a->worker);
|
||||
reg[0].epoll_events = EPOLLIN;
|
||||
reg[0].shape = VMSIG_RDY_EVENTFD;
|
||||
reg[0].cookie = 0;
|
||||
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_SEAM_UP; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int in_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
|
||||
(void)cookie; (void)events;
|
||||
vmsig_worker_ack(a->worker);
|
||||
input_res rs; int rc;
|
||||
while (vmsig_worker_poll(a->worker, &rs, sizeof rs, &rc) == 1) {
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||
up.corr = rs.corr; up.origin = rs.origin;
|
||||
up.payload.flags = VMSIG_PL_INLINE;
|
||||
memcpy(up.inln, &rs, sizeof up.inln < sizeof rs ? sizeof up.inln : sizeof rs);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int in_submit(vmsig_adapter* a, const vmsig_event* ev) {
|
||||
if (ev->kind == VMSIG_EV_CMD_QUERY_INPUT) {
|
||||
/* Return what is PRESSED from the vmctl ACTUATOR's record (signaling does NOT track
|
||||
* held itself). The read is read-only (no QMP round-trip) => on the loop thread;
|
||||
* addressed reply to the initiator. stub without vmctl => empty set (nothing to
|
||||
* actuate — nothing to hold). */
|
||||
vmsig_input_held h;
|
||||
memset(&h, 0, sizeof h);
|
||||
#ifdef VMSIG_WITH_VMCTL
|
||||
if (a->vmctl) {
|
||||
const uint32_t capn = (uint32_t)(sizeof h.ent / sizeof h.ent[0]);
|
||||
unsigned char bits[VMCTL_KEYS_SNAPSHOT_BYTES];
|
||||
int n = vmctl_keys_snapshot(a->vmctl, bits, sizeof bits);
|
||||
for (int code = 0; n > 0 && code <= VMCTL_KEY_CODE_MAX; code++)
|
||||
if (bits[code >> 3] & (1u << (code & 7))) {
|
||||
if (h.count < capn) { h.ent[h.count].kind = VMSIG_INPUT_KEY;
|
||||
h.ent[h.count].code = (uint16_t)code; h.count++; }
|
||||
else h.flags |= VMSIG_INPUT_HELD_TRUNC;
|
||||
}
|
||||
unsigned bm = vmctl_btns_snapshot(a->vmctl);
|
||||
for (int b = 0; b < 8; b++) if (bm & (1u << b)) {
|
||||
if (h.count < capn) { h.ent[h.count].kind = VMSIG_INPUT_BTN;
|
||||
h.ent[h.count].code = (uint16_t)b; h.count++; }
|
||||
else h.flags |= VMSIG_INPUT_HELD_TRUNC;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_INPUT_HELD; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint; up.origin = ev->origin;
|
||||
up.payload.flags = VMSIG_PL_INLINE;
|
||||
memcpy(up.inln, &h, sizeof up.inln < sizeof h ? sizeof up.inln : sizeof h);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
return 0;
|
||||
}
|
||||
|
||||
input_req rq;
|
||||
memset(&rq, 0, sizeof rq);
|
||||
rq.corr = ev->corr; rq.origin = ev->origin;
|
||||
if (ev->kind == VMSIG_EV_CMD_INPUT) {
|
||||
rq.cmd = 0;
|
||||
/* Decode the NEUTRAL public input contract from inln (vmsig_input). We do NOT track
|
||||
* held — that is the vmctl actuator's record (returned via CMD_QUERY_INPUT). */
|
||||
vmsig_input in;
|
||||
memcpy(&in, ev->inln, sizeof in <= sizeof ev->inln ? sizeof in : sizeof ev->inln);
|
||||
rq.kind = (int)in.kind;
|
||||
rq.code = (int)in.code;
|
||||
rq.value = (int)in.value;
|
||||
rq.scroll = in.scroll;
|
||||
} else if (ev->kind == VMSIG_EV_CMD_LIFECYCLE) {
|
||||
rq.cmd = 1;
|
||||
rq.life_op = (int)(unsigned char)ev->inln[0];
|
||||
} else {
|
||||
return 1; /* not for this seam */
|
||||
}
|
||||
return vmsig_worker_submit(a->worker, &rq, sizeof rq) == 0 ? 0 : -1;
|
||||
}
|
||||
|
||||
static void in_close(vmsig_adapter* a) {
|
||||
if (!a) return;
|
||||
vmsig_worker_free(a->worker);
|
||||
#ifdef VMSIG_WITH_VMCTL
|
||||
if (a->vmctl) vmctl_close(a->vmctl);
|
||||
#endif
|
||||
free(a);
|
||||
}
|
||||
|
||||
static const vmsig_adapter_ops IN_OPS = {
|
||||
.name = "input", .source = VMSIG_SRC_INPUT, .codec = VMSIG_CODEC_INPUT,
|
||||
.open = in_open, .attach = in_attach, .on_readiness = in_on_ready,
|
||||
.submit = in_submit, .close = in_close
|
||||
};
|
||||
|
||||
const vmsig_adapter_ops* vmsig_input_ops(void) { return &IN_OPS; }
|
||||
@@ -0,0 +1,162 @@
|
||||
/* worker.c — bridge "blocking API -> completion eventfd" (pool of N threads).
|
||||
* MPSC request/result queues under a mutex + condvar; result readiness is
|
||||
* signaled via eventfd, on which the core's epoll loop wakes. N threads share one
|
||||
* request queue (for vmie — parallel read-only readers; for QMP — N=1). */
|
||||
#include "adapter_util.h"
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/eventfd.h>
|
||||
|
||||
typedef struct work_node {
|
||||
struct work_node* next;
|
||||
int rc; /* fn return code (for results) */
|
||||
size_t len;
|
||||
unsigned char buf[VMSIG_WORK_SLOT];
|
||||
} work_node;
|
||||
|
||||
typedef struct { work_node* head; work_node* tail; } work_q;
|
||||
|
||||
struct vmsig_worker {
|
||||
pthread_t* threads;
|
||||
int nthreads;
|
||||
pthread_mutex_t lock;
|
||||
pthread_cond_t cv;
|
||||
work_q req; /* loop -> workers */
|
||||
work_q res; /* workers -> loop */
|
||||
int evfd;
|
||||
int stop;
|
||||
int max_depth; /* cap on req-queue depth */
|
||||
int req_count; /* current req-queue depth */
|
||||
vmsig_work_fn fn;
|
||||
void* user;
|
||||
};
|
||||
|
||||
static void q_push(work_q* q, work_node* n) {
|
||||
n->next = NULL;
|
||||
if (q->tail) q->tail->next = n; else q->head = n;
|
||||
q->tail = n;
|
||||
}
|
||||
static work_node* q_pop(work_q* q) {
|
||||
work_node* n = q->head;
|
||||
if (!n) return NULL;
|
||||
q->head = n->next;
|
||||
if (!q->head) q->tail = NULL;
|
||||
return n;
|
||||
}
|
||||
static void q_drain(work_q* q) {
|
||||
work_node* n = q->head;
|
||||
while (n) { work_node* nx = n->next; free(n); n = nx; }
|
||||
q->head = q->tail = NULL;
|
||||
}
|
||||
|
||||
static void* worker_main(void* arg) {
|
||||
vmsig_worker* w = arg;
|
||||
for (;;) {
|
||||
pthread_mutex_lock(&w->lock);
|
||||
while (!w->stop && !w->req.head) pthread_cond_wait(&w->cv, &w->lock);
|
||||
/* On stop we DRAIN the queue: run the remaining requests so that submitted
|
||||
* work is not silently lost (matters for jobs carrying resource ownership).
|
||||
* We exit only when stop AND the queue is empty. */
|
||||
if (w->stop && !w->req.head) { pthread_mutex_unlock(&w->lock); break; }
|
||||
work_node* rq = q_pop(&w->req);
|
||||
if (rq) w->req_count--;
|
||||
pthread_mutex_unlock(&w->lock);
|
||||
if (!rq) continue;
|
||||
|
||||
work_node* rs = calloc(1, sizeof *rs);
|
||||
if (rs) {
|
||||
rs->rc = w->fn ? w->fn(w->user, rq->buf, rs->buf) : -1;
|
||||
rs->len = VMSIG_WORK_SLOT;
|
||||
pthread_mutex_lock(&w->lock);
|
||||
q_push(&w->res, rs);
|
||||
pthread_mutex_unlock(&w->lock);
|
||||
uint64_t one = 1;
|
||||
ssize_t r = write(w->evfd, &one, sizeof one);
|
||||
(void)r;
|
||||
}
|
||||
free(rq);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
vmsig_worker* vmsig_worker_new(vmsig_work_fn fn, void* user, int nthreads, int max_depth) {
|
||||
if (nthreads < 1) nthreads = 1;
|
||||
vmsig_worker* w = calloc(1, sizeof *w);
|
||||
if (!w) return NULL;
|
||||
w->fn = fn; w->user = user; w->evfd = -1;
|
||||
w->max_depth = max_depth > 0 ? max_depth : 512;
|
||||
w->threads = calloc((size_t)nthreads, sizeof *w->threads);
|
||||
if (!w->threads) { free(w); return NULL; }
|
||||
w->evfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
|
||||
if (w->evfd < 0) { free(w->threads); free(w); return NULL; }
|
||||
if (pthread_mutex_init(&w->lock, NULL) != 0) { close(w->evfd); free(w->threads); free(w); return NULL; }
|
||||
if (pthread_cond_init(&w->cv, NULL) != 0) {
|
||||
pthread_mutex_destroy(&w->lock); close(w->evfd); free(w->threads); free(w); return NULL;
|
||||
}
|
||||
for (int i = 0; i < nthreads; i++) {
|
||||
if (pthread_create(&w->threads[i], NULL, worker_main, w) != 0) break;
|
||||
w->nthreads++;
|
||||
}
|
||||
if (w->nthreads == 0) {
|
||||
pthread_cond_destroy(&w->cv); pthread_mutex_destroy(&w->lock);
|
||||
close(w->evfd); free(w->threads); free(w); return NULL;
|
||||
}
|
||||
return w;
|
||||
}
|
||||
|
||||
void vmsig_worker_free(vmsig_worker* w) {
|
||||
if (!w) return;
|
||||
pthread_mutex_lock(&w->lock);
|
||||
w->stop = 1;
|
||||
pthread_cond_broadcast(&w->cv);
|
||||
pthread_mutex_unlock(&w->lock);
|
||||
for (int i = 0; i < w->nthreads; i++) pthread_join(w->threads[i], NULL);
|
||||
q_drain(&w->req);
|
||||
q_drain(&w->res);
|
||||
pthread_cond_destroy(&w->cv);
|
||||
pthread_mutex_destroy(&w->lock);
|
||||
if (w->evfd >= 0) close(w->evfd);
|
||||
free(w->threads);
|
||||
free(w);
|
||||
}
|
||||
|
||||
int vmsig_worker_evfd(const vmsig_worker* w) { return w ? w->evfd : -1; }
|
||||
|
||||
int vmsig_worker_submit(vmsig_worker* w, const void* req, size_t len) {
|
||||
if (!w || len > VMSIG_WORK_SLOT) return -1;
|
||||
pthread_mutex_lock(&w->lock);
|
||||
if (w->req_count >= w->max_depth) { /* queue cap: reject flooding */
|
||||
pthread_mutex_unlock(&w->lock);
|
||||
return -1;
|
||||
}
|
||||
work_node* n = calloc(1, sizeof *n);
|
||||
if (!n) { pthread_mutex_unlock(&w->lock); return -1; }
|
||||
if (req && len) memcpy(n->buf, req, len);
|
||||
n->len = len;
|
||||
q_push(&w->req, n);
|
||||
w->req_count++;
|
||||
pthread_cond_signal(&w->cv);
|
||||
pthread_mutex_unlock(&w->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vmsig_worker_ack(vmsig_worker* w) {
|
||||
if (!w) return;
|
||||
uint64_t v;
|
||||
while (read(w->evfd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
|
||||
}
|
||||
|
||||
int vmsig_worker_poll(vmsig_worker* w, void* res, size_t cap, int* rc) {
|
||||
if (!w) return -1;
|
||||
pthread_mutex_lock(&w->lock);
|
||||
work_node* n = q_pop(&w->res);
|
||||
pthread_mutex_unlock(&w->lock);
|
||||
if (!n) return 0;
|
||||
if (res && cap) memcpy(res, n->buf, cap < n->len ? cap : n->len);
|
||||
if (rc) *rc = n->rc;
|
||||
free(n);
|
||||
return 1;
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
#ifndef VMSIG_MEMCTX_CFG_H
|
||||
#define VMSIG_MEMCTX_CFG_H
|
||||
#include <stdint.h>
|
||||
|
||||
/* Private config of the memctx adapter (vmie). Passed as opaque to open(); NOT
|
||||
* public (layout per reference: src/<module>/include/). cfg==NULL => stub. */
|
||||
typedef struct {
|
||||
int stub; /* 1 => synthetic kcr3/RO-fd (spine without a VM) */
|
||||
const char* ram_path; /* armed: path to guest RAM backing (NOT published outward) */
|
||||
uint64_t low; /* below-4G split (vmie_win32_open / locator.low) */
|
||||
int ro_fd; /* >=0 => infra supplied a pre-sealed RO-fd (policy); */
|
||||
/* <0 => default: open(ram_path, O_RDONLY) / stub-memfd */
|
||||
} vmsig_memctx_cfg;
|
||||
|
||||
/* Max SRC bytes per atomic gva_write (bounds the worker POD slot; mc_req header + src
|
||||
* must stay <= VMSIG_WORK_SLOT). Private to the adapter (an executor bound), NOT part of
|
||||
* the neutral control contract — control only needs VMSIG_MEMWRITE_INLINE for inline SRC. */
|
||||
#define VMSIG_MEMWRITE_MAX 192u
|
||||
|
||||
#endif /* VMSIG_MEMCTX_CFG_H */
|
||||
@@ -0,0 +1,407 @@
|
||||
/* memctx.c — vmie sensor adapter: vends ONE coherent guest address-space context —
|
||||
* the permanent System DirectoryTableBase (`kcr3`) PAIRED with a RAM-region locator
|
||||
* and a pre-opened O_RDONLY fd. This is NOT perception and NOT semantics: signaling
|
||||
* multicasts the datum + RO-fd, while the holder (an S-lib / any control) opens ITS OWN
|
||||
* read-only vmie_mem from the fd and does gva_read/scan/pmap itself.
|
||||
*
|
||||
* Cold bring-up (host_bootstrap) is CPU-bound and blocking, so it runs on an off-loop
|
||||
* worker; the loop thread only assembles the locator on the completion-eventfd and emits
|
||||
* the MEMCTX trigger. The epoch is stamped by the CORE (retained-context); on an epoch
|
||||
* change the core calls reg.invalidate, the adapter re-bootstraps and re-emits MEMCTX.
|
||||
*
|
||||
* RO outward is physical: O_RDONLY fd => mmap(PROT_WRITE) -> EACCES, so a write into the
|
||||
* guest on the holder side is structurally impossible. stub mode (without VMSIG_WITH_VMIE
|
||||
* or ram_path==NULL) synthesizes a kcr3 and a genuinely RO-mappable fd (memfd + seal) —
|
||||
* the seam is provable without a VM. */
|
||||
#define _GNU_SOURCE
|
||||
#include "vmsig_adapter.h"
|
||||
#include "memctx.h"
|
||||
#include "adapter_util.h" /* vmsig_worker (off-loop bootstrap) */
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/epoll.h>
|
||||
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
#include "win32.h" /* vmie_win32_open/host_bootstrap/proc_list/close */
|
||||
#endif
|
||||
|
||||
/* memfd_create / seal — ABI fallbacks for old glibc/kernel (stub RO-fd backing). */
|
||||
#ifndef MFD_CLOEXEC
|
||||
#include <sys/syscall.h>
|
||||
#include <linux/memfd.h>
|
||||
static int memfd_create(const char* name, unsigned int flags) {
|
||||
return (int)syscall(SYS_memfd_create, name, flags);
|
||||
}
|
||||
#endif
|
||||
#ifndef MFD_ALLOW_SEALING
|
||||
#define MFD_ALLOW_SEALING 0x0002U
|
||||
#endif
|
||||
#ifndef F_ADD_SEALS
|
||||
#define F_ADD_SEALS (1024 + 9)
|
||||
#define F_SEAL_SHRINK 0x0002
|
||||
#define F_SEAL_GROW 0x0004
|
||||
#endif
|
||||
#ifndef F_SEAL_FUTURE_WRITE
|
||||
#define F_SEAL_FUTURE_WRITE 0x0010 /* kernel 5.1+: forbid future writable mappings */
|
||||
#endif
|
||||
|
||||
#define MC_STUB_SIZE 0x10000u /* 64 KB of synthetic RAM image (stub) */
|
||||
#define MC_MAX_SEG 8
|
||||
#define MC_WORKER_DEPTH 16 /* one off-loop thread: rare bootstrap + writes */
|
||||
|
||||
enum { MC_JOB_BOOTSTRAP = 0, MC_JOB_WRITE = 1 };
|
||||
|
||||
/* worker req/res (POD <= VMSIG_WORK_SLOT). One off-loop worker runs BOTH the cold
|
||||
* bootstrap and the atomic writes (FIFO serializes a write against the close-on-rebootstrap).
|
||||
* boot_count drives the stub kcr3 (changes per epoch); the real guest kcr3 does NOT depend
|
||||
* on it (armed reads the System DTB). MC_JOB_WRITE copies SRC off-loop into req.src. */
|
||||
typedef struct {
|
||||
uint32_t op; /* MC_JOB_* */
|
||||
uint32_t boot_count; /* MC_JOB_BOOTSTRAP */
|
||||
/* --- MC_JOB_WRITE --- */
|
||||
uint64_t gva;
|
||||
uint32_t len;
|
||||
uint32_t corr;
|
||||
uint32_t origin;
|
||||
uint8_t src[VMSIG_MEMWRITE_MAX]; /* SRC bytes copied off-loop (gva_write reads this) */
|
||||
} mc_req;
|
||||
typedef struct {
|
||||
uint32_t op; /* echoes the job type so on_ready demuxes */
|
||||
int ok; /* MC_JOB_WRITE result */
|
||||
uint32_t corr;
|
||||
uint32_t origin;
|
||||
uint64_t kcr3; /* MC_JOB_BOOTSTRAP result */
|
||||
} mc_res;
|
||||
|
||||
struct vmsig_adapter {
|
||||
uint32_t endpoint;
|
||||
int stub;
|
||||
const char* ram_path; /* armed: RAM-backing path (NOT published outward) */
|
||||
uint64_t low;
|
||||
int cfg_ro_fd; /* >=0 => infra-sealed RO-fd (policy); <0 => default */
|
||||
vmsig_emit emit;
|
||||
int registered; /* register_memctx already called */
|
||||
vmsig_worker* worker; /* off-loop bootstrap + atomic writes */
|
||||
uint32_t boot_count; /* incremented on each (re-)bootstrap */
|
||||
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
vmie_win32* win; /* held RW handle across the epoch (kcr3 source + gva_write target) */
|
||||
vmie_mem* mem; /* vmie_win32_mem(win); borrowed, valid until vmie_win32_close */
|
||||
#endif
|
||||
uint64_t kcr3; /* current System DTB (also published in cur_pod.kcr3) */
|
||||
|
||||
/* persistent locator: owned by the loop thread; worker only yields kcr3 into scratch. */
|
||||
int have_ctx;
|
||||
vmsig_memctx cur_pod; /* kcr3/low/nseg/flags (epoch stamped by the core) */
|
||||
vmsig_memseg cur_segs[MC_MAX_SEG];
|
||||
uint32_t cur_nseg;
|
||||
|
||||
int stub_fd; /* stub: memfd of synth RAM (+seal); share_fd reopens it */
|
||||
};
|
||||
|
||||
/* fwd: MEMWRITE completion ACK (defined below mc_submit; used in mc_on_ready demux). */
|
||||
static void mc_memwrite_ack(struct vmsig_adapter* a, int ok, uint32_t corr, uint32_t origin);
|
||||
|
||||
/* ---- stub RO-fd: memfd + deterministic contents + seal of future writes ---- */
|
||||
static int mc_make_stub_fd(uint32_t size) {
|
||||
int fd = memfd_create("vmsig_memctx", MFD_CLOEXEC | MFD_ALLOW_SEALING);
|
||||
if (fd < 0) fd = memfd_create("vmsig_memctx", MFD_CLOEXEC);
|
||||
if (fd < 0) return -1;
|
||||
if (ftruncate(fd, (off_t)size) != 0) { close(fd); return -1; }
|
||||
/* deterministic contents via a temporary RW mapping BEFORE the seal */
|
||||
uint8_t* p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if (p != MAP_FAILED) {
|
||||
for (uint32_t i = 0; i < size; i++) p[i] = (uint8_t)(i & 0xFFu);
|
||||
munmap(p, size);
|
||||
}
|
||||
/* FUTURE_WRITE: even if the holder reopens the fd as O_RDWR, it gets no writable mapping.
|
||||
* best-effort (kernel 5.1+); on older kernels only the O_RDONLY fd protects. */
|
||||
if (fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_FUTURE_WRITE) != 0)
|
||||
(void)fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW);
|
||||
return fd;
|
||||
}
|
||||
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
/* armed bring-up: open RAM (RW is vmie's internal concern), host_bootstrap, extract the
|
||||
* permanent System DTB as the System process cr3 (kcr3 — the root of the guest AS). The RW
|
||||
* handle is HELD across the epoch (kcr3 source + gva_write target); ONLY the RO-fd (share_fd)
|
||||
* leaves outward — write goes through this command plane, never a writable mmap. Runs on the
|
||||
* off-loop worker; a stale handle from a prior epoch is dropped first (serialized FIFO with
|
||||
* in-flight writes). */
|
||||
static int mc_bootstrap_armed(struct vmsig_adapter* a, uint64_t* out_kcr3) {
|
||||
if (a->win) { vmie_win32_close(a->win); a->win = NULL; a->mem = NULL; } /* drop stale epoch handle */
|
||||
vmie_win32* v = vmie_win32_open(a->ram_path, a->low);
|
||||
if (!v) return -1;
|
||||
if (host_bootstrap(v) != 0) { vmie_win32_close(v); return -1; }
|
||||
process procs[16];
|
||||
int n = proc_list(v, 0, procs, 16);
|
||||
uint64_t kcr3 = 0;
|
||||
for (int i = 0; i < n && i < 16; i++)
|
||||
if (!strcmp(procs[i].name, "System")) { kcr3 = procs[i].cr3; break; }
|
||||
if (!kcr3) { vmie_win32_close(v); return -1; }
|
||||
a->win = v; /* HOLD: RW handle lives across the epoch */
|
||||
a->mem = vmie_win32_mem(v); /* borrowed; valid until vmie_win32_close(v) */
|
||||
a->kcr3 = kcr3;
|
||||
*out_kcr3 = kcr3;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ---- worker job: cold bring-up OR atomic write, off-loop ----------------- *
|
||||
* Demultiplexed by rq->op. BOTH run on the SAME single worker thread, so a write on the
|
||||
* held handle never races the close-on-rebootstrap (FIFO). The job MUST NOT touch core
|
||||
* structures — it only reads a->mem/a->kcr3 (stable between re-bootstraps on this thread). */
|
||||
static int mc_job(void* user, const void* req, void* res) {
|
||||
struct vmsig_adapter* a = user;
|
||||
const mc_req* rq = req;
|
||||
mc_res* rs = res;
|
||||
memset(rs, 0, sizeof *rs);
|
||||
rs->op = rq->op;
|
||||
|
||||
if (rq->op == MC_JOB_WRITE) {
|
||||
rs->corr = rq->corr; rs->origin = rq->origin;
|
||||
if (a->stub) { rs->ok = 1; return 0; } /* stub: ack without actuation */
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
/* a->mem is NULL until a bootstrap has succeeded (or after one failed and cleared it):
|
||||
* the guard turns that into an ok=0 ACK (observable to the initiator), not a crash. */
|
||||
rs->ok = (a->mem && gva_write(a->mem, (uintptr_t)a->kcr3, (uintptr_t)rq->gva,
|
||||
rq->src, rq->len) == 0);
|
||||
return rs->ok ? 0 : -1;
|
||||
#else
|
||||
rs->ok = 0;
|
||||
return -1; /* armed without the build flag: write impossible */
|
||||
#endif
|
||||
}
|
||||
|
||||
/* MC_JOB_BOOTSTRAP */
|
||||
if (a->stub) {
|
||||
rs->kcr3 = 0xC0DE0000ull + (uint64_t)rq->boot_count * 0x1000ull; /* changes per epoch */
|
||||
return 0;
|
||||
}
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
uint64_t kcr3 = 0;
|
||||
if (mc_bootstrap_armed(a, &kcr3) != 0) return -1;
|
||||
rs->kcr3 = kcr3;
|
||||
return 0;
|
||||
#else
|
||||
return -1; /* armed without the build flag: bootstrap impossible -> ERROR */
|
||||
#endif
|
||||
}
|
||||
|
||||
static void mc_kick_bootstrap(struct vmsig_adapter* a) {
|
||||
a->boot_count++;
|
||||
mc_req rq;
|
||||
memset(&rq, 0, sizeof rq);
|
||||
rq.op = MC_JOB_BOOTSTRAP; rq.boot_count = a->boot_count;
|
||||
(void)vmsig_worker_submit(a->worker, &rq, sizeof rq); /* full => drop (rare) */
|
||||
}
|
||||
|
||||
/* ---- reg hooks (vmsig_memctx_reg.ctx = a; called by the core on the loop thread) ---- */
|
||||
static void mc_reg_describe(void* ctx, vmsig_memctx* out_pod,
|
||||
const vmsig_memseg** out_segs, uint32_t* out_nseg) {
|
||||
struct vmsig_adapter* a = ctx;
|
||||
*out_pod = a->cur_pod; /* kcr3/low/nseg/flags; the core overwrites the epoch */
|
||||
*out_segs = a->cur_segs;
|
||||
*out_nseg = a->cur_nseg;
|
||||
}
|
||||
|
||||
static int mc_reg_share_fd(void* ctx) {
|
||||
struct vmsig_adapter* a = ctx;
|
||||
if (a->cfg_ro_fd >= 0)
|
||||
return fcntl(a->cfg_ro_fd, F_DUPFD_CLOEXEC, 0); /* infra-sealed RO-fd: dup */
|
||||
if (a->stub) {
|
||||
if (a->stub_fd < 0) return -1;
|
||||
char path[64];
|
||||
snprintf(path, sizeof path, "/proc/self/fd/%d", a->stub_fd);
|
||||
return open(path, O_RDONLY | O_CLOEXEC); /* fresh O_RDONLY on the backing */
|
||||
}
|
||||
if (!a->ram_path) return -1;
|
||||
return open(a->ram_path, O_RDONLY | O_CLOEXEC); /* armed default */
|
||||
}
|
||||
|
||||
static void mc_reg_invalidate(void* ctx, uint32_t epoch) {
|
||||
struct vmsig_adapter* a = ctx;
|
||||
(void)epoch; /* the core owns the epoch; the adapter must re-bootstrap */
|
||||
a->have_ctx = 0; /* the previous context is invalid */
|
||||
mc_kick_bootstrap(a); /* off-loop; on_ready re-emits MEMCTX (new epoch) */
|
||||
}
|
||||
|
||||
/* ---- vtable ---- */
|
||||
static vmsig_adapter* mc_open(const void* cfg, uint32_t endpoint) {
|
||||
const vmsig_memctx_cfg* c = cfg;
|
||||
struct vmsig_adapter* a = calloc(1, sizeof *a);
|
||||
if (!a) return NULL;
|
||||
a->endpoint = endpoint;
|
||||
a->stub = c ? c->stub : 1;
|
||||
a->ram_path = c ? c->ram_path : NULL;
|
||||
a->low = c ? c->low : 0;
|
||||
a->cfg_ro_fd = (c && c->ro_fd >= 0) ? c->ro_fd : -1;
|
||||
if (!a->ram_path && a->cfg_ro_fd < 0) a->stub = 1; /* no path/fd => stub */
|
||||
a->stub_fd = -1;
|
||||
return a;
|
||||
}
|
||||
|
||||
static int mc_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
|
||||
if (cap < 1) return -1;
|
||||
a->emit = *emit;
|
||||
|
||||
a->worker = vmsig_worker_new(mc_job, a, 1, MC_WORKER_DEPTH);
|
||||
if (!a->worker) return -1;
|
||||
|
||||
if (a->stub && a->cfg_ro_fd < 0) {
|
||||
a->stub_fd = mc_make_stub_fd(MC_STUB_SIZE);
|
||||
if (a->stub_fd < 0) { vmsig_worker_free(a->worker); a->worker = NULL; return -1; }
|
||||
}
|
||||
|
||||
/* worker completion-eventfd as the readiness source (cookie=0). */
|
||||
reg[0].fd = vmsig_worker_evfd(a->worker);
|
||||
reg[0].epoll_events = EPOLLIN;
|
||||
reg[0].shape = VMSIG_RDY_EVENTFD;
|
||||
reg[0].cookie = 0;
|
||||
|
||||
/* register the reg BEFORE the first bootstrap: the core slot gets the hooks. describe
|
||||
* is not called until the slot is valid (which only happens after the first MEMCTX). */
|
||||
if (a->emit.register_memctx) {
|
||||
vmsig_memctx_reg r;
|
||||
memset(&r, 0, sizeof r);
|
||||
r.endpoint = a->endpoint;
|
||||
r.source = VMSIG_SRC_MEMCTX;
|
||||
r.ctx = a;
|
||||
r.describe = mc_reg_describe;
|
||||
r.share_fd = mc_reg_share_fd;
|
||||
r.invalidate = mc_reg_invalidate;
|
||||
if (a->emit.register_memctx(a->emit.token, &r) == 0) a->registered = 1;
|
||||
}
|
||||
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_SEAM_UP; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
|
||||
mc_kick_bootstrap(a); /* first bootstrap off-loop; assemble the locator on completion */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int mc_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
|
||||
(void)cookie; (void)events;
|
||||
vmsig_worker_ack(a->worker);
|
||||
mc_res rs;
|
||||
int rc;
|
||||
while (vmsig_worker_poll(a->worker, &rs, sizeof rs, &rc) == 1) {
|
||||
if (rs.op == MC_JOB_WRITE) {
|
||||
/* atomic write completed: addressed ACT_ACK to the initiator. */
|
||||
mc_memwrite_ack(a, rs.ok && rc == 0, rs.corr, rs.origin);
|
||||
continue;
|
||||
}
|
||||
if (rc != 0) {
|
||||
/* bootstrap failed: ERROR (source MEMCTX); do NOT publish an invalid kcr3. */
|
||||
vmsig_event er;
|
||||
memset(&er, 0, sizeof er);
|
||||
er.kind = VMSIG_EV_ERROR; er.source = VMSIG_SRC_MEMCTX; er.dir = VMSIG_DIR_UP;
|
||||
er.prio = VMSIG_PRIO_URGENT; er.endpoint = a->endpoint;
|
||||
a->emit.emit(a->emit.token, &er);
|
||||
continue;
|
||||
}
|
||||
/* assemble the locator on the loop thread from rs.kcr3. a->kcr3 is the gva_write
|
||||
* TARGET and is owned SOLELY by the worker thread (set in mc_bootstrap_armed, read by
|
||||
* MC_JOB_WRITE — same thread, FIFO happens-before); the loop must NOT also write it, or
|
||||
* an in-flight write at line ~170 would race it. cur_pod.kcr3 is loop-only (delivery). */
|
||||
memset(&a->cur_pod, 0, sizeof a->cur_pod);
|
||||
a->cur_pod.kcr3 = rs.kcr3;
|
||||
a->cur_pod.low = a->low ? a->low : MC_STUB_SIZE;
|
||||
a->cur_pod.flags = VMSIG_MEMCTX_RDONLY;
|
||||
a->cur_nseg = 1; /* single-low identity (gpa 0 .. low) */
|
||||
a->cur_segs[0].gpa = 0;
|
||||
a->cur_segs[0].len = a->cur_pod.low;
|
||||
a->cur_segs[0].file_off = 0;
|
||||
a->cur_pod.nseg = a->cur_nseg;
|
||||
a->have_ctx = 1;
|
||||
|
||||
/* emit the MEMCTX trigger: the core authoritatively re-describes + stamps the epoch. */
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_MEMCTX; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||
memcpy(up.inln, &a->cur_pod, sizeof a->cur_pod);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Emit an addressed ACT_ACK for a MEMWRITE (source MEMCTX, to the initiator). inln carries
|
||||
* {ok,corr,origin} (same shape as the input adapter's ACK), so control reads ok at offset 0.
|
||||
* ok=0 covers extent-deny / no-SRC / queue-full / write failure (default-deny, observable). */
|
||||
static void mc_memwrite_ack(struct vmsig_adapter* a, int ok, uint32_t corr, uint32_t origin) {
|
||||
struct { int ok; uint32_t corr; uint32_t origin; } body = { ok, corr, origin };
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||
up.corr = corr; up.origin = origin;
|
||||
up.payload.flags = VMSIG_PL_INLINE;
|
||||
memcpy(up.inln, &body, sizeof body);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
}
|
||||
|
||||
/* DOWN MEMWRITE handler: validate extent, copy SRC off-loop, submit the atomic gva_write to
|
||||
* the worker. Default-deny: any invalid path (no SRC flag, len out of bounds, short payload,
|
||||
* queue full) ACKs ok=0 and does NOT actuate. The completion ACK for a queued write arrives
|
||||
* via mc_on_ready. Returns 0 when the event is consumed by this seam, 1 when it is not ours. */
|
||||
static int mc_submit(vmsig_adapter* a, const vmsig_event* ev) {
|
||||
if (ev->kind != VMSIG_EV_CMD_MEMWRITE) return 1; /* not for this seam */
|
||||
|
||||
const vmsig_memwrite* mw = (const vmsig_memwrite*)ev->inln;
|
||||
uint32_t len = mw->len;
|
||||
if (len == 0 || len > VMSIG_MEMWRITE_MAX) { /* extent: bounded */
|
||||
mc_memwrite_ack(a, 0, ev->corr, ev->origin);
|
||||
return 0;
|
||||
}
|
||||
mc_req rq; memset(&rq, 0, sizeof rq);
|
||||
rq.op = MC_JOB_WRITE; rq.gva = mw->gva; rq.len = len;
|
||||
rq.corr = ev->corr; rq.origin = ev->origin;
|
||||
|
||||
/* copy SRC into the worker req (off-loop gva_write reads from rq.src). */
|
||||
if (mw->flags & VMSIG_MW_SRC_INLINE) {
|
||||
if (len > VMSIG_MEMWRITE_INLINE) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; }
|
||||
memcpy(rq.src, ev->inln + sizeof *mw, len); /* inln tail after the 16-byte header */
|
||||
} else if (mw->flags & VMSIG_MW_SRC_PAYLOAD) {
|
||||
if (!ev->payload.data || ev->payload.len < len) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; }
|
||||
memcpy(rq.src, ev->payload.data, len); /* in-proc borrowed payload */
|
||||
} else {
|
||||
mc_memwrite_ack(a, 0, ev->corr, ev->origin); /* no SRC flag */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vmsig_worker_submit(a->worker, &rq, sizeof rq) != 0) {
|
||||
mc_memwrite_ack(a, 0, ev->corr, ev->origin); /* queue full -> ACK err */
|
||||
return -1;
|
||||
}
|
||||
return 0; /* completion ACK arrives via mc_on_ready */
|
||||
}
|
||||
|
||||
static void mc_close(vmsig_adapter* a) {
|
||||
if (!a) return;
|
||||
if (a->registered && a->emit.unregister_memctx)
|
||||
a->emit.unregister_memctx(a->emit.token, a->endpoint);
|
||||
if (a->worker) vmsig_worker_free(a->worker); /* join: bootstrap + write jobs finished */
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
if (a->win) vmie_win32_close(a->win); /* AFTER worker join: no in-flight gva_write */
|
||||
#endif
|
||||
if (a->stub_fd >= 0) close(a->stub_fd);
|
||||
/* cfg_ro_fd belongs to the infrastructure (the open caller) — do NOT close it. */
|
||||
free(a);
|
||||
}
|
||||
|
||||
static const vmsig_adapter_ops MC_OPS = {
|
||||
.name = "memctx", .source = VMSIG_SRC_MEMCTX, .codec = VMSIG_CODEC_MEMCTX,
|
||||
.open = mc_open, .attach = mc_attach, .on_readiness = mc_on_ready,
|
||||
.submit = mc_submit, .close = mc_close
|
||||
};
|
||||
|
||||
const vmsig_adapter_ops* vmsig_memctx_ops(void) { return &MC_OPS; }
|
||||
@@ -0,0 +1,13 @@
|
||||
#ifndef VMSIG_VMHOST_H
|
||||
#define VMSIG_VMHOST_H
|
||||
|
||||
/* Private config of the vmhost adapter (signaling's own QMP client).
|
||||
* cfg==NULL or no qmp_path => stub mode (synthetic events, no QEMU).
|
||||
* qmp_path given => armed: connect to QEMU's QMP socket ('@' prefix = abstract).
|
||||
* No build flag needed — the client depends only on POSIX and its own code. */
|
||||
typedef struct {
|
||||
int stub;
|
||||
const char* qmp_path;
|
||||
} vmsig_vmhost_cfg;
|
||||
|
||||
#endif /* VMSIG_VMHOST_H */
|
||||
@@ -0,0 +1,313 @@
|
||||
/* vmhost.c — QEMU/QMP host-plane: signaling's OWN layer for observing the VM
|
||||
* and its basic control. Not a wrapper over a neighbor repo — an own QMP client;
|
||||
* depends only on POSIX, so it is always functional (no build flag).
|
||||
*
|
||||
* This is the first truly epoll-native source: the QMP socket (VMSIG_RDY_FD) lives
|
||||
* directly in the loop, non-blocking, async events. Up: QMP events -> VM_LIFECYCLE
|
||||
* (broadcast), EOF -> SEAM_DOWN. Down: CMD_VM -> QMP command with id correlation,
|
||||
* reply addressed to the initiator. stub mode (no QEMU) synthesizes events/replies. */
|
||||
#define _GNU_SOURCE
|
||||
#include "vmsig_adapter.h"
|
||||
#include "vmhost.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <stddef.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/un.h>
|
||||
#include <sys/timerfd.h>
|
||||
#include <sys/epoll.h>
|
||||
|
||||
#define VMHOST_BUF 4096
|
||||
#define VMHOST_STUB_MS 200
|
||||
#define VMHOST_MAX_PENDING 64
|
||||
|
||||
enum { ST_STUB = 0, ST_CONNECTING, ST_NEGOTIATING, ST_READY, ST_DEAD };
|
||||
|
||||
typedef struct { uint32_t id, origin, corr; uint8_t op; int used; } pend_ent;
|
||||
|
||||
struct vmsig_adapter {
|
||||
uint32_t endpoint;
|
||||
int stub;
|
||||
const char* qmp_path;
|
||||
vmsig_emit emit;
|
||||
int fd; /* QMP socket (armed) or timerfd (stub) */
|
||||
int st;
|
||||
uint32_t cur; /* current synthetic state (stub) */
|
||||
char buf[VMHOST_BUF];
|
||||
size_t buflen;
|
||||
uint32_t next_id;
|
||||
pend_ent pend[VMHOST_MAX_PENDING];
|
||||
};
|
||||
|
||||
/* ---- minimal QMP line parse (top-level keys only; full JSON — deferred) ---- */
|
||||
static int jstr(const char* line, const char* key, char* out, size_t cap) {
|
||||
const char* p = strstr(line, key);
|
||||
if (!p) return 0;
|
||||
p += strlen(key);
|
||||
while (*p == ' ' || *p == '\t' || *p == ':') p++;
|
||||
if (*p != '"') return 0;
|
||||
p++;
|
||||
size_t i = 0;
|
||||
while (*p && *p != '"' && i + 1 < cap) out[i++] = *p++;
|
||||
out[i] = 0;
|
||||
return 1;
|
||||
}
|
||||
static long jnum(const char* line, const char* key) {
|
||||
const char* p = strstr(line, key);
|
||||
if (!p) return -1;
|
||||
p += strlen(key);
|
||||
while (*p == ' ' || *p == '\t' || *p == ':') p++;
|
||||
if (*p < '0' || *p > '9') return -1;
|
||||
return strtol(p, NULL, 10);
|
||||
}
|
||||
static uint32_t ev_state(const char* n) {
|
||||
if (!strcmp(n, "RESUME")) return VMSIG_VM_RUNNING;
|
||||
if (!strcmp(n, "STOP")) return VMSIG_VM_PAUSED;
|
||||
if (!strcmp(n, "SHUTDOWN")) return VMSIG_VM_SHUTDOWN;
|
||||
if (!strcmp(n, "RESET")) return VMSIG_VM_RESET;
|
||||
if (!strcmp(n, "POWERDOWN")) return VMSIG_VM_POWERDOWN;
|
||||
if (!strcmp(n, "GUEST_PANICKED")) return VMSIG_VM_CRASHED;
|
||||
return VMSIG_VM_UNKNOWN;
|
||||
}
|
||||
static uint32_t status_state(const char* s) {
|
||||
if (!strcmp(s, "running")) return VMSIG_VM_RUNNING;
|
||||
if (!strcmp(s, "paused")) return VMSIG_VM_PAUSED;
|
||||
if (!strcmp(s, "shutdown")) return VMSIG_VM_SHUTDOWN;
|
||||
return VMSIG_VM_UNKNOWN;
|
||||
}
|
||||
static const char* op_qmp(uint32_t op) {
|
||||
switch (op) {
|
||||
case VMSIG_VMOP_QUERY: return "query-status";
|
||||
case VMSIG_VMOP_CONT: return "cont";
|
||||
case VMSIG_VMOP_STOP: return "stop";
|
||||
case VMSIG_VMOP_RESET: return "system_reset";
|
||||
case VMSIG_VMOP_POWERDOWN: return "system_powerdown";
|
||||
case VMSIG_VMOP_QUIT: return "quit";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static pend_ent* pend_alloc(struct vmsig_adapter* a) {
|
||||
for (int i = 0; i < VMHOST_MAX_PENDING; i++) if (!a->pend[i].used) return &a->pend[i];
|
||||
return NULL;
|
||||
}
|
||||
static pend_ent* pend_find(struct vmsig_adapter* a, uint32_t id) {
|
||||
for (int i = 0; i < VMHOST_MAX_PENDING; i++)
|
||||
if (a->pend[i].used && a->pend[i].id == id) return &a->pend[i];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* ---- emission of neutral UP events ---- */
|
||||
static void emit_vm(struct vmsig_adapter* a, uint32_t state, uint32_t origin, uint32_t corr) {
|
||||
vmsig_vm_state vs = { state, 0 };
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_VM_LIFECYCLE; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = (state == VMSIG_VM_RUNNING || state == VMSIG_VM_PAUSED)
|
||||
? VMSIG_PRIO_NORMAL : VMSIG_PRIO_URGENT;
|
||||
up.endpoint = a->endpoint; up.origin = origin; up.corr = corr;
|
||||
up.payload.flags = VMSIG_PL_INLINE;
|
||||
memcpy(up.inln, &vs, sizeof vs);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
}
|
||||
static void emit_seam(struct vmsig_adapter* a, vmsig_kind k) {
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = k; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_URGENT; up.endpoint = a->endpoint;
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
}
|
||||
static void emit_ack(struct vmsig_adapter* a, uint32_t origin, uint32_t corr, int ok) {
|
||||
vmsig_event up;
|
||||
memset(&up, 0, sizeof up);
|
||||
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
|
||||
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint; up.origin = origin; up.corr = corr;
|
||||
up.payload.flags = VMSIG_PL_INLINE;
|
||||
up.inln[0] = (uint8_t)(ok ? 1 : 0);
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
}
|
||||
|
||||
/* ---- armed: handle one QMP line ---- */
|
||||
static void handle_line(struct vmsig_adapter* a, const char* line) {
|
||||
switch (a->st) {
|
||||
case ST_CONNECTING:
|
||||
if (strstr(line, "\"QMP\"")) { /* greeting -> negotiate capabilities */
|
||||
static const char cap[] = "{\"execute\":\"qmp_capabilities\"}\r\n";
|
||||
ssize_t r = write(a->fd, cap, sizeof cap - 1); (void)r;
|
||||
a->st = ST_NEGOTIATING;
|
||||
}
|
||||
break;
|
||||
case ST_NEGOTIATING:
|
||||
if (strstr(line, "\"return\"")) { a->st = ST_READY; emit_seam(a, VMSIG_EV_SEAM_UP); }
|
||||
break;
|
||||
case ST_READY:
|
||||
if (strstr(line, "\"event\"")) {
|
||||
char name[64];
|
||||
if (jstr(line, "\"event\"", name, sizeof name)) {
|
||||
uint32_t s = ev_state(name);
|
||||
if (s != VMSIG_VM_UNKNOWN) emit_vm(a, s, 0, 0); /* broadcast */
|
||||
}
|
||||
} else if (strstr(line, "\"return\"") || strstr(line, "\"error\"")) {
|
||||
long id = jnum(line, "\"id\"");
|
||||
pend_ent* p = id >= 0 ? pend_find(a, (uint32_t)id) : NULL;
|
||||
if (p) {
|
||||
if (p->op == VMSIG_VMOP_QUERY && strstr(line, "\"return\"")) {
|
||||
char stbuf[32]; uint32_t s = VMSIG_VM_UNKNOWN;
|
||||
if (jstr(line, "\"status\"", stbuf, sizeof stbuf)) s = status_state(stbuf);
|
||||
emit_vm(a, s, p->origin, p->corr); /* addressed reply */
|
||||
} else {
|
||||
emit_ack(a, p->origin, p->corr, strstr(line, "\"return\"") != NULL);
|
||||
}
|
||||
p->used = 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
static void armed_dead(struct vmsig_adapter* a) {
|
||||
emit_seam(a, VMSIG_EV_SEAM_DOWN); /* VM transport died */
|
||||
if (a->fd >= 0) { close(a->fd); a->fd = -1; } /* close removes the fd from epoll */
|
||||
a->st = ST_DEAD;
|
||||
}
|
||||
|
||||
/* ---- vtable ---- */
|
||||
static vmsig_adapter* vh_open(const void* cfg, uint32_t endpoint) {
|
||||
const vmsig_vmhost_cfg* c = cfg;
|
||||
struct vmsig_adapter* a = calloc(1, sizeof *a);
|
||||
if (!a) return NULL;
|
||||
a->endpoint = endpoint;
|
||||
a->qmp_path = (c && c->qmp_path && c->qmp_path[0]) ? c->qmp_path : NULL;
|
||||
a->stub = (a->qmp_path == NULL); /* path given => armed, otherwise stub */
|
||||
a->fd = -1;
|
||||
a->cur = VMSIG_VM_RUNNING;
|
||||
return a;
|
||||
}
|
||||
|
||||
static int vh_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
|
||||
if (cap < 1) return -1;
|
||||
a->emit = *emit;
|
||||
|
||||
if (a->stub) {
|
||||
a->fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
|
||||
if (a->fd < 0) return -1;
|
||||
struct itimerspec its;
|
||||
memset(&its, 0, sizeof its);
|
||||
its.it_interval.tv_sec = VMHOST_STUB_MS / 1000u;
|
||||
its.it_interval.tv_nsec = (long)(VMHOST_STUB_MS % 1000u) * 1000000L;
|
||||
its.it_value = its.it_interval;
|
||||
if (timerfd_settime(a->fd, 0, &its, NULL) < 0) { close(a->fd); a->fd = -1; return -1; }
|
||||
a->st = ST_STUB;
|
||||
reg[0].fd = a->fd; reg[0].epoll_events = EPOLLIN;
|
||||
reg[0].shape = VMSIG_RDY_TIMERFD; reg[0].cookie = 0;
|
||||
emit_seam(a, VMSIG_EV_SEAM_UP);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* armed: connect to QEMU's QMP socket */
|
||||
int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
|
||||
if (fd < 0) return -1;
|
||||
struct sockaddr_un addr;
|
||||
memset(&addr, 0, sizeof addr);
|
||||
addr.sun_family = AF_UNIX;
|
||||
socklen_t alen;
|
||||
size_t n = strlen(a->qmp_path);
|
||||
if (a->qmp_path[0] == '@') {
|
||||
if (n > sizeof addr.sun_path) { close(fd); return -1; }
|
||||
addr.sun_path[0] = 0;
|
||||
memcpy(addr.sun_path + 1, a->qmp_path + 1, n - 1);
|
||||
alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
|
||||
} else {
|
||||
if (n >= sizeof addr.sun_path) { close(fd); return -1; }
|
||||
memcpy(addr.sun_path, a->qmp_path, n);
|
||||
alen = (socklen_t)sizeof addr;
|
||||
}
|
||||
if (connect(fd, (struct sockaddr*)&addr, alen) < 0 && errno != EINPROGRESS) {
|
||||
close(fd); return -1;
|
||||
}
|
||||
a->fd = fd; a->st = ST_CONNECTING;
|
||||
reg[0].fd = fd; reg[0].epoll_events = EPOLLIN;
|
||||
reg[0].shape = VMSIG_RDY_FD; reg[0].cookie = 0;
|
||||
/* SEAM_UP is emitted upon reaching READY (after qmp_capabilities) */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int vh_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
|
||||
(void)cookie; (void)events;
|
||||
|
||||
if (a->stub) {
|
||||
uint64_t ticks;
|
||||
while (read(a->fd, &ticks, sizeof ticks) == (ssize_t)sizeof ticks) { /* drain */ }
|
||||
a->cur = (a->cur == VMSIG_VM_RUNNING) ? VMSIG_VM_PAUSED : VMSIG_VM_RUNNING;
|
||||
emit_vm(a, a->cur, 0, 0); /* broadcast */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (a->st == ST_DEAD) return 0;
|
||||
for (;;) {
|
||||
if (a->buflen >= sizeof a->buf) a->buflen = 0; /* line overflow -> reset */
|
||||
ssize_t r = read(a->fd, a->buf + a->buflen, sizeof a->buf - a->buflen);
|
||||
if (r == 0) { armed_dead(a); return 0; }
|
||||
if (r < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK) break; armed_dead(a); return 0; }
|
||||
a->buflen += (size_t)r;
|
||||
size_t start = 0;
|
||||
for (size_t i = 0; i < a->buflen; i++) {
|
||||
if (a->buf[i] == '\n') { a->buf[i] = 0; handle_line(a, a->buf + start); start = i + 1; }
|
||||
}
|
||||
if (start > 0) { memmove(a->buf, a->buf + start, a->buflen - start); a->buflen -= start; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vh_submit(vmsig_adapter* a, const vmsig_event* ev) {
|
||||
if (ev->kind != VMSIG_EV_CMD_VM) return 1; /* not for this seam */
|
||||
vmsig_vm_cmd cmd;
|
||||
memcpy(&cmd, ev->inln, sizeof cmd);
|
||||
|
||||
if (a->stub) {
|
||||
uint32_t s;
|
||||
switch (cmd.op) {
|
||||
case VMSIG_VMOP_QUERY: s = a->cur; break;
|
||||
case VMSIG_VMOP_CONT: s = a->cur = VMSIG_VM_RUNNING; break;
|
||||
case VMSIG_VMOP_STOP: s = a->cur = VMSIG_VM_PAUSED; break;
|
||||
case VMSIG_VMOP_RESET: s = VMSIG_VM_RESET; break;
|
||||
case VMSIG_VMOP_POWERDOWN: s = VMSIG_VM_POWERDOWN; break;
|
||||
case VMSIG_VMOP_QUIT: s = VMSIG_VM_SHUTDOWN; break;
|
||||
default: return 1;
|
||||
}
|
||||
emit_vm(a, s, ev->origin, ev->corr); /* reply addressed to the initiator */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (a->st != ST_READY) return -1;
|
||||
const char* q = op_qmp(cmd.op);
|
||||
if (!q) return 1;
|
||||
pend_ent* p = pend_alloc(a);
|
||||
if (!p) return -1; /* backpressure: pending table is full */
|
||||
uint32_t id = ++a->next_id;
|
||||
p->used = 1; p->id = id; p->origin = ev->origin; p->corr = ev->corr; p->op = (uint8_t)cmd.op;
|
||||
char line[160];
|
||||
int len = snprintf(line, sizeof line, "{\"execute\":\"%s\",\"id\":%u}\r\n", q, id);
|
||||
ssize_t r = write(a->fd, line, (size_t)len);
|
||||
if (r != (ssize_t)len) { p->used = 0; return -1; }
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vh_close(vmsig_adapter* a) {
|
||||
if (!a) return;
|
||||
if (a->fd >= 0) close(a->fd);
|
||||
free(a);
|
||||
}
|
||||
|
||||
static const vmsig_adapter_ops VH_OPS = {
|
||||
.name = "vmhost", .source = VMSIG_SRC_VMHOST, .codec = VMSIG_CODEC_VMHOST,
|
||||
.open = vh_open, .attach = vh_attach, .on_readiness = vh_on_ready,
|
||||
.submit = vh_submit, .close = vh_close
|
||||
};
|
||||
|
||||
const vmsig_adapter_ops* vmsig_vmhost_ops(void) { return &VH_OPS; }
|
||||
Reference in New Issue
Block a user