vmsig: a neutral signaling layer between sensors/input and controls

An epoll-driven, neutral transfer-event bus that connects sensors and input
actuators to one or more controls, bidirectionally. It owns the transfer context
and events — delivery order, priority, protocol-level timing, and an
interrupt-driven event model over fd sources (eventfd/timerfd/sockets) — and
stays agnostic to both the sensor/input drivers and the control.

What lives here:
- memctx: a coherent address-space context per endpoint — the guest address-space
  root paired with a pre-opened read-only RAM-region fd, with per-endpoint epoch
  invalidation and retained replay to late subscribers. Perception lives in
  out-of-tree sensor libraries that consume this datum read-only.
- exclusive-ownership leases for destructive resource classes (input, power,
  memory-write).
- write-signaled memory writes (MEMWRITE): an atomic write to guest memory routed
  through the seam under an exclusive lease, never a writable mapping.
- a host-management seam for VM lifecycle/status and a neutral input-injection
  command path.
- multi-VM endpoints; capability-gated, audited control authorization over an
  in-process or unix-socket transport.

Builds against headers only by default (a stub mode that exercises the seam
without a VM); armed builds link the real sensor/input libraries behind flags.
This commit is contained in:
2026-06-20 21:21:20 +03:00
commit e9aee057c7
36 changed files with 5820 additions and 0 deletions
+44
View File
@@ -0,0 +1,44 @@
#ifndef VMSIG_ADAPTER_UTIL_H
#define VMSIG_ADAPTER_UTIL_H
#include <stddef.h>
/* adapter_util.h — shared primitive "blocking API -> completion eventfd".
*
* A bridge turning a synchronous CPU-bound / blocking neighbor call (vmie,
* vmctl) into a readiness source for the epoll core: the loop thread posts a request, a
* separate worker thread runs the blocking work and signals a completion eventfd; on it
* the loop wakes and collects the result in on_readiness. Reused by the memctx
* (off-loop bootstrap) and input adapters. */
typedef struct vmsig_worker vmsig_worker;
#define VMSIG_WORK_SLOT 256 /* req/res slot size (POD, copied) */
/* Callback run IN the worker thread: req -> res (both POD <= VMSIG_WORK_SLOT).
* Returns 0/-1 (the code is stored alongside, see vmsig_worker_poll). Must not touch
* core structures — only compute res from req. */
typedef int (*vmsig_work_fn)(void* user, const void* req, void* res);
/* Create a worker pool of nthreads threads over a shared queue (nthreads>=1). vmie
* allows parallel read-only readers; for a serial channel (QMP) use 1. max_depth — the
* request-queue depth ceiling (<=0 => default): submit beyond it is rejected (-1) so an
* untrusted flood does not grow into OOM. NULL on error. */
vmsig_worker* vmsig_worker_new(vmsig_work_fn fn, void* user, int nthreads, int max_depth);
/* Stop the threads (join) and free. Safe on NULL. */
void vmsig_worker_free(vmsig_worker* w);
/* completion eventfd: the adapter registers it as a VMSIG_RDY_EVENTFD source. */
int vmsig_worker_evfd(const vmsig_worker* w);
/* loop thread: post a request (copied, len <= VMSIG_WORK_SLOT). 0/-1. */
int vmsig_worker_submit(vmsig_worker* w, const void* req, size_t len);
/* loop thread (in on_readiness): drain the completion eventfd. */
void vmsig_worker_ack(vmsig_worker* w);
/* loop thread: collect a ready result. 1 — written to res (+ *rc = fn code),
* 0 — empty, -1 — error. Drain in a loop until 0. */
int vmsig_worker_poll(vmsig_worker* w, void* res, size_t cap, int* rc);
#endif /* VMSIG_ADAPTER_UTIL_H */
+18
View File
@@ -0,0 +1,18 @@
#ifndef VMSIG_INPUT_H
#define VMSIG_INPUT_H
/* Private config of the input adapter (vmctl). cfg==NULL => stub mode. Armed mode
* (VMSIG_WITH_VMCTL) opens vmctl_open() and actuates for real. driver is an int so
* as not to pull vmctl.h into this header (values match VMCTL_DRIVER_*). */
typedef struct {
int stub;
int driver; /* 0=QMP, 1=UINPUT (see VMCTL_DRIVER_*) */
const char* qmp_path;
const char* input_bus;
int ptr_mode;
} vmsig_input_cfg;
/* Input event codes/contract are PUBLIC: vmsig_input / vmsig_input_kind in
* include/vmsig_event.h (external control encodes them into inln). No private duplicate. */
#endif /* VMSIG_INPUT_H */
+230
View File
@@ -0,0 +1,230 @@
/* input.c — input/actuator adapter for vmctl (input + power/lifecycle).
*
* Mechanism (recommended): vmctl is a blocking QMP round-trip; we run it on a
* worker thread, completion ack via a completion-eventfd. The uinput path is a
* local instantaneous write; when armed it would be done inline (see comment in submit).
* Real actuation is under VMSIG_WITH_VMCTL; otherwise the stub acks (spine without a VM). */
#include "vmsig_adapter.h"
#include "adapter_util.h"
#include "input.h"
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/epoll.h>
#ifdef VMSIG_WITH_VMCTL
#include "vmctl.h"
#endif
/* POD request/result of the worker. */
typedef struct {
int cmd; /* 0 = input event, 1 = lifecycle */
uint32_t corr;
uint32_t origin; /* initiator (addressed ACK) */
int kind; /* vmsig_input_kind (for cmd==0) */
int code; /* axis/btn/evdev-code */
int value; /* abs/rel/down */
double scroll;
int life_op; /* VMSIG_LIFE_* (powerdown/reset/wakeup/pause/resume) */
} input_req;
typedef struct { int ok; uint32_t corr; uint32_t origin; } input_res;
/* signaling does NOT track held state: the record of what is pressed lives in the
* ACTUATOR (vmctl); we hand it to control on request (CMD_QUERY_INPUT), release is control's decision. */
struct vmsig_adapter {
uint32_t endpoint;
int stub;
vmsig_emit emit;
vmsig_worker* worker;
int driver; /* 0=QMP, 1=UINPUT (VMCTL_DRIVER_*); carried open->attach */
const char* qmp_path; /* borrowed from cfg (valid through attach) */
const char* input_bus;
int ptr_mode;
#ifdef VMSIG_WITH_VMCTL
vmctl_t* vmctl;
#endif
};
static int input_job(void* user, const void* reqp, void* resp) {
struct vmsig_adapter* a = user;
const input_req* rq = reqp;
input_res* rs = resp;
memset(rs, 0, sizeof *rs);
rs->corr = rq->corr;
rs->origin = rq->origin;
#ifdef VMSIG_WITH_VMCTL
if (a->vmctl) {
int r = -1;
if (rq->cmd == 0) {
vmctl_batch b; vmctl_batch_init(&b);
switch (rq->kind) {
case VMSIG_INPUT_ABS: vmctl_batch_abs(&b, rq->code, rq->value); break;
case VMSIG_INPUT_REL: vmctl_batch_rel(&b, rq->code, rq->value); break;
case VMSIG_INPUT_BTN: vmctl_batch_btn(&b, rq->code, rq->value); break;
case VMSIG_INPUT_KEY: vmctl_batch_key(&b, rq->code, rq->value); break;
case VMSIG_INPUT_SCROLL: vmctl_batch_scroll(&b, rq->code, rq->scroll); break;
default: break;
}
r = vmctl_batch_send(a->vmctl, &b);
} else {
switch (rq->life_op) {
case 0: r = vmctl_powerdown(a->vmctl); break;
case 1: r = vmctl_reset(a->vmctl); break;
case 2: r = vmctl_wakeup(a->vmctl); break;
case 3: r = vmctl_pause(a->vmctl); break;
case 4: r = vmctl_resume(a->vmctl); break;
default: break;
}
}
rs->ok = (r == 0);
return r;
}
#endif
(void)a;
rs->ok = 1; /* stub: ack without actuation */
return 0;
}
static vmsig_adapter* in_open(const void* cfg, uint32_t endpoint) {
const vmsig_input_cfg* c = cfg;
struct vmsig_adapter* a = calloc(1, sizeof *a);
if (!a) return NULL;
a->endpoint = endpoint;
a->stub = c ? c->stub : 1;
if (c) { /* carry the driver selection to attach (cfg not passed there) */
a->driver = c->driver;
a->qmp_path = c->qmp_path;
a->input_bus = c->input_bus;
a->ptr_mode = c->ptr_mode;
}
return a;
}
static int in_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
if (cap < 1) return -1;
a->emit = *emit;
a->worker = vmsig_worker_new(input_job, a, 1, 64); /* QMP is a serial channel, cap 64 */
if (!a->worker) return -1;
#ifdef VMSIG_WITH_VMCTL
if (!a->stub) {
/* armed: build vmctl_config from the carried cfg and open the actuator. UINPUT
* (host uinput + optional virtio-input-host-pci passthrough via QMP) is the primary
* input driver; QMP input-send-event is the fallback. */
vmctl_config vcfg;
memset(&vcfg, 0, sizeof vcfg);
vcfg.driver = (a->driver == 1) ? VMCTL_DRIVER_UINPUT : VMCTL_DRIVER_QMP;
vcfg.qmp_path = a->qmp_path;
vcfg.input_bus = a->input_bus;
vcfg.ptr_mode = a->ptr_mode;
vcfg.uinput_id = NULL; /* built-in HID identity defaults */
a->vmctl = vmctl_open(&vcfg);
if (!a->vmctl) { vmsig_worker_free(a->worker); a->worker = NULL; return -1; }
}
#endif
reg[0].fd = vmsig_worker_evfd(a->worker);
reg[0].epoll_events = EPOLLIN;
reg[0].shape = VMSIG_RDY_EVENTFD;
reg[0].cookie = 0;
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_SEAM_UP; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
a->emit.emit(a->emit.token, &up);
return 1;
}
static int in_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
(void)cookie; (void)events;
vmsig_worker_ack(a->worker);
input_res rs; int rc;
while (vmsig_worker_poll(a->worker, &rs, sizeof rs, &rc) == 1) {
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
up.corr = rs.corr; up.origin = rs.origin;
up.payload.flags = VMSIG_PL_INLINE;
memcpy(up.inln, &rs, sizeof up.inln < sizeof rs ? sizeof up.inln : sizeof rs);
a->emit.emit(a->emit.token, &up);
}
return 0;
}
static int in_submit(vmsig_adapter* a, const vmsig_event* ev) {
if (ev->kind == VMSIG_EV_CMD_QUERY_INPUT) {
/* Return what is PRESSED from the vmctl ACTUATOR's record (signaling does NOT track
* held itself). The read is read-only (no QMP round-trip) => on the loop thread;
* addressed reply to the initiator. stub without vmctl => empty set (nothing to
* actuate — nothing to hold). */
vmsig_input_held h;
memset(&h, 0, sizeof h);
#ifdef VMSIG_WITH_VMCTL
if (a->vmctl) {
const uint32_t capn = (uint32_t)(sizeof h.ent / sizeof h.ent[0]);
unsigned char bits[VMCTL_KEYS_SNAPSHOT_BYTES];
int n = vmctl_keys_snapshot(a->vmctl, bits, sizeof bits);
for (int code = 0; n > 0 && code <= VMCTL_KEY_CODE_MAX; code++)
if (bits[code >> 3] & (1u << (code & 7))) {
if (h.count < capn) { h.ent[h.count].kind = VMSIG_INPUT_KEY;
h.ent[h.count].code = (uint16_t)code; h.count++; }
else h.flags |= VMSIG_INPUT_HELD_TRUNC;
}
unsigned bm = vmctl_btns_snapshot(a->vmctl);
for (int b = 0; b < 8; b++) if (bm & (1u << b)) {
if (h.count < capn) { h.ent[h.count].kind = VMSIG_INPUT_BTN;
h.ent[h.count].code = (uint16_t)b; h.count++; }
else h.flags |= VMSIG_INPUT_HELD_TRUNC;
}
}
#endif
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_INPUT_HELD; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint; up.origin = ev->origin;
up.payload.flags = VMSIG_PL_INLINE;
memcpy(up.inln, &h, sizeof up.inln < sizeof h ? sizeof up.inln : sizeof h);
a->emit.emit(a->emit.token, &up);
return 0;
}
input_req rq;
memset(&rq, 0, sizeof rq);
rq.corr = ev->corr; rq.origin = ev->origin;
if (ev->kind == VMSIG_EV_CMD_INPUT) {
rq.cmd = 0;
/* Decode the NEUTRAL public input contract from inln (vmsig_input). We do NOT track
* held — that is the vmctl actuator's record (returned via CMD_QUERY_INPUT). */
vmsig_input in;
memcpy(&in, ev->inln, sizeof in <= sizeof ev->inln ? sizeof in : sizeof ev->inln);
rq.kind = (int)in.kind;
rq.code = (int)in.code;
rq.value = (int)in.value;
rq.scroll = in.scroll;
} else if (ev->kind == VMSIG_EV_CMD_LIFECYCLE) {
rq.cmd = 1;
rq.life_op = (int)(unsigned char)ev->inln[0];
} else {
return 1; /* not for this seam */
}
return vmsig_worker_submit(a->worker, &rq, sizeof rq) == 0 ? 0 : -1;
}
static void in_close(vmsig_adapter* a) {
if (!a) return;
vmsig_worker_free(a->worker);
#ifdef VMSIG_WITH_VMCTL
if (a->vmctl) vmctl_close(a->vmctl);
#endif
free(a);
}
static const vmsig_adapter_ops IN_OPS = {
.name = "input", .source = VMSIG_SRC_INPUT, .codec = VMSIG_CODEC_INPUT,
.open = in_open, .attach = in_attach, .on_readiness = in_on_ready,
.submit = in_submit, .close = in_close
};
const vmsig_adapter_ops* vmsig_input_ops(void) { return &IN_OPS; }
+162
View File
@@ -0,0 +1,162 @@
/* worker.c — bridge "blocking API -> completion eventfd" (pool of N threads).
* MPSC request/result queues under a mutex + condvar; result readiness is
* signaled via eventfd, on which the core's epoll loop wakes. N threads share one
* request queue (for vmie — parallel read-only readers; for QMP — N=1). */
#include "adapter_util.h"
#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>
#include <sys/eventfd.h>
typedef struct work_node {
struct work_node* next;
int rc; /* fn return code (for results) */
size_t len;
unsigned char buf[VMSIG_WORK_SLOT];
} work_node;
typedef struct { work_node* head; work_node* tail; } work_q;
struct vmsig_worker {
pthread_t* threads;
int nthreads;
pthread_mutex_t lock;
pthread_cond_t cv;
work_q req; /* loop -> workers */
work_q res; /* workers -> loop */
int evfd;
int stop;
int max_depth; /* cap on req-queue depth */
int req_count; /* current req-queue depth */
vmsig_work_fn fn;
void* user;
};
static void q_push(work_q* q, work_node* n) {
n->next = NULL;
if (q->tail) q->tail->next = n; else q->head = n;
q->tail = n;
}
static work_node* q_pop(work_q* q) {
work_node* n = q->head;
if (!n) return NULL;
q->head = n->next;
if (!q->head) q->tail = NULL;
return n;
}
static void q_drain(work_q* q) {
work_node* n = q->head;
while (n) { work_node* nx = n->next; free(n); n = nx; }
q->head = q->tail = NULL;
}
static void* worker_main(void* arg) {
vmsig_worker* w = arg;
for (;;) {
pthread_mutex_lock(&w->lock);
while (!w->stop && !w->req.head) pthread_cond_wait(&w->cv, &w->lock);
/* On stop we DRAIN the queue: run the remaining requests so that submitted
* work is not silently lost (matters for jobs carrying resource ownership).
* We exit only when stop AND the queue is empty. */
if (w->stop && !w->req.head) { pthread_mutex_unlock(&w->lock); break; }
work_node* rq = q_pop(&w->req);
if (rq) w->req_count--;
pthread_mutex_unlock(&w->lock);
if (!rq) continue;
work_node* rs = calloc(1, sizeof *rs);
if (rs) {
rs->rc = w->fn ? w->fn(w->user, rq->buf, rs->buf) : -1;
rs->len = VMSIG_WORK_SLOT;
pthread_mutex_lock(&w->lock);
q_push(&w->res, rs);
pthread_mutex_unlock(&w->lock);
uint64_t one = 1;
ssize_t r = write(w->evfd, &one, sizeof one);
(void)r;
}
free(rq);
}
return NULL;
}
vmsig_worker* vmsig_worker_new(vmsig_work_fn fn, void* user, int nthreads, int max_depth) {
if (nthreads < 1) nthreads = 1;
vmsig_worker* w = calloc(1, sizeof *w);
if (!w) return NULL;
w->fn = fn; w->user = user; w->evfd = -1;
w->max_depth = max_depth > 0 ? max_depth : 512;
w->threads = calloc((size_t)nthreads, sizeof *w->threads);
if (!w->threads) { free(w); return NULL; }
w->evfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
if (w->evfd < 0) { free(w->threads); free(w); return NULL; }
if (pthread_mutex_init(&w->lock, NULL) != 0) { close(w->evfd); free(w->threads); free(w); return NULL; }
if (pthread_cond_init(&w->cv, NULL) != 0) {
pthread_mutex_destroy(&w->lock); close(w->evfd); free(w->threads); free(w); return NULL;
}
for (int i = 0; i < nthreads; i++) {
if (pthread_create(&w->threads[i], NULL, worker_main, w) != 0) break;
w->nthreads++;
}
if (w->nthreads == 0) {
pthread_cond_destroy(&w->cv); pthread_mutex_destroy(&w->lock);
close(w->evfd); free(w->threads); free(w); return NULL;
}
return w;
}
void vmsig_worker_free(vmsig_worker* w) {
if (!w) return;
pthread_mutex_lock(&w->lock);
w->stop = 1;
pthread_cond_broadcast(&w->cv);
pthread_mutex_unlock(&w->lock);
for (int i = 0; i < w->nthreads; i++) pthread_join(w->threads[i], NULL);
q_drain(&w->req);
q_drain(&w->res);
pthread_cond_destroy(&w->cv);
pthread_mutex_destroy(&w->lock);
if (w->evfd >= 0) close(w->evfd);
free(w->threads);
free(w);
}
int vmsig_worker_evfd(const vmsig_worker* w) { return w ? w->evfd : -1; }
int vmsig_worker_submit(vmsig_worker* w, const void* req, size_t len) {
if (!w || len > VMSIG_WORK_SLOT) return -1;
pthread_mutex_lock(&w->lock);
if (w->req_count >= w->max_depth) { /* queue cap: reject flooding */
pthread_mutex_unlock(&w->lock);
return -1;
}
work_node* n = calloc(1, sizeof *n);
if (!n) { pthread_mutex_unlock(&w->lock); return -1; }
if (req && len) memcpy(n->buf, req, len);
n->len = len;
q_push(&w->req, n);
w->req_count++;
pthread_cond_signal(&w->cv);
pthread_mutex_unlock(&w->lock);
return 0;
}
void vmsig_worker_ack(vmsig_worker* w) {
if (!w) return;
uint64_t v;
while (read(w->evfd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
}
int vmsig_worker_poll(vmsig_worker* w, void* res, size_t cap, int* rc) {
if (!w) return -1;
pthread_mutex_lock(&w->lock);
work_node* n = q_pop(&w->res);
pthread_mutex_unlock(&w->lock);
if (!n) return 0;
if (res && cap) memcpy(res, n->buf, cap < n->len ? cap : n->len);
if (rc) *rc = n->rc;
free(n);
return 1;
}
+20
View File
@@ -0,0 +1,20 @@
#ifndef VMSIG_MEMCTX_CFG_H
#define VMSIG_MEMCTX_CFG_H
#include <stdint.h>
/* Private config of the memctx adapter (vmie). Passed as opaque to open(); NOT
* public (layout per reference: src/<module>/include/). cfg==NULL => stub. */
typedef struct {
int stub; /* 1 => synthetic kcr3/RO-fd (spine without a VM) */
const char* ram_path; /* armed: path to guest RAM backing (NOT published outward) */
uint64_t low; /* below-4G split (vmie_win32_open / locator.low) */
int ro_fd; /* >=0 => infra supplied a pre-sealed RO-fd (policy); */
/* <0 => default: open(ram_path, O_RDONLY) / stub-memfd */
} vmsig_memctx_cfg;
/* Max SRC bytes per atomic gva_write (bounds the worker POD slot; mc_req header + src
* must stay <= VMSIG_WORK_SLOT). Private to the adapter (an executor bound), NOT part of
* the neutral control contract — control only needs VMSIG_MEMWRITE_INLINE for inline SRC. */
#define VMSIG_MEMWRITE_MAX 192u
#endif /* VMSIG_MEMCTX_CFG_H */
+407
View File
@@ -0,0 +1,407 @@
/* memctx.c — vmie sensor adapter: vends ONE coherent guest address-space context —
* the permanent System DirectoryTableBase (`kcr3`) PAIRED with a RAM-region locator
* and a pre-opened O_RDONLY fd. This is NOT perception and NOT semantics: signaling
* multicasts the datum + RO-fd, while the holder (an S-lib / any control) opens ITS OWN
* read-only vmie_mem from the fd and does gva_read/scan/pmap itself.
*
* Cold bring-up (host_bootstrap) is CPU-bound and blocking, so it runs on an off-loop
* worker; the loop thread only assembles the locator on the completion-eventfd and emits
* the MEMCTX trigger. The epoch is stamped by the CORE (retained-context); on an epoch
* change the core calls reg.invalidate, the adapter re-bootstraps and re-emits MEMCTX.
*
* RO outward is physical: O_RDONLY fd => mmap(PROT_WRITE) -> EACCES, so a write into the
* guest on the holder side is structurally impossible. stub mode (without VMSIG_WITH_VMIE
* or ram_path==NULL) synthesizes a kcr3 and a genuinely RO-mappable fd (memfd + seal) —
* the seam is provable without a VM. */
#define _GNU_SOURCE
#include "vmsig_adapter.h"
#include "memctx.h"
#include "adapter_util.h" /* vmsig_worker (off-loop bootstrap) */
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/epoll.h>
#ifdef VMSIG_WITH_VMIE
#include "win32.h" /* vmie_win32_open/host_bootstrap/proc_list/close */
#endif
/* memfd_create / seal — ABI fallbacks for old glibc/kernel (stub RO-fd backing). */
#ifndef MFD_CLOEXEC
#include <sys/syscall.h>
#include <linux/memfd.h>
static int memfd_create(const char* name, unsigned int flags) {
return (int)syscall(SYS_memfd_create, name, flags);
}
#endif
#ifndef MFD_ALLOW_SEALING
#define MFD_ALLOW_SEALING 0x0002U
#endif
#ifndef F_ADD_SEALS
#define F_ADD_SEALS (1024 + 9)
#define F_SEAL_SHRINK 0x0002
#define F_SEAL_GROW 0x0004
#endif
#ifndef F_SEAL_FUTURE_WRITE
#define F_SEAL_FUTURE_WRITE 0x0010 /* kernel 5.1+: forbid future writable mappings */
#endif
#define MC_STUB_SIZE 0x10000u /* 64 KB of synthetic RAM image (stub) */
#define MC_MAX_SEG 8
#define MC_WORKER_DEPTH 16 /* one off-loop thread: rare bootstrap + writes */
enum { MC_JOB_BOOTSTRAP = 0, MC_JOB_WRITE = 1 };
/* worker req/res (POD <= VMSIG_WORK_SLOT). One off-loop worker runs BOTH the cold
* bootstrap and the atomic writes (FIFO serializes a write against the close-on-rebootstrap).
* boot_count drives the stub kcr3 (changes per epoch); the real guest kcr3 does NOT depend
* on it (armed reads the System DTB). MC_JOB_WRITE copies SRC off-loop into req.src. */
typedef struct {
uint32_t op; /* MC_JOB_* */
uint32_t boot_count; /* MC_JOB_BOOTSTRAP */
/* --- MC_JOB_WRITE --- */
uint64_t gva;
uint32_t len;
uint32_t corr;
uint32_t origin;
uint8_t src[VMSIG_MEMWRITE_MAX]; /* SRC bytes copied off-loop (gva_write reads this) */
} mc_req;
typedef struct {
uint32_t op; /* echoes the job type so on_ready demuxes */
int ok; /* MC_JOB_WRITE result */
uint32_t corr;
uint32_t origin;
uint64_t kcr3; /* MC_JOB_BOOTSTRAP result */
} mc_res;
struct vmsig_adapter {
uint32_t endpoint;
int stub;
const char* ram_path; /* armed: RAM-backing path (NOT published outward) */
uint64_t low;
int cfg_ro_fd; /* >=0 => infra-sealed RO-fd (policy); <0 => default */
vmsig_emit emit;
int registered; /* register_memctx already called */
vmsig_worker* worker; /* off-loop bootstrap + atomic writes */
uint32_t boot_count; /* incremented on each (re-)bootstrap */
#ifdef VMSIG_WITH_VMIE
vmie_win32* win; /* held RW handle across the epoch (kcr3 source + gva_write target) */
vmie_mem* mem; /* vmie_win32_mem(win); borrowed, valid until vmie_win32_close */
#endif
uint64_t kcr3; /* current System DTB (also published in cur_pod.kcr3) */
/* persistent locator: owned by the loop thread; worker only yields kcr3 into scratch. */
int have_ctx;
vmsig_memctx cur_pod; /* kcr3/low/nseg/flags (epoch stamped by the core) */
vmsig_memseg cur_segs[MC_MAX_SEG];
uint32_t cur_nseg;
int stub_fd; /* stub: memfd of synth RAM (+seal); share_fd reopens it */
};
/* fwd: MEMWRITE completion ACK (defined below mc_submit; used in mc_on_ready demux). */
static void mc_memwrite_ack(struct vmsig_adapter* a, int ok, uint32_t corr, uint32_t origin);
/* ---- stub RO-fd: memfd + deterministic contents + seal of future writes ---- */
static int mc_make_stub_fd(uint32_t size) {
int fd = memfd_create("vmsig_memctx", MFD_CLOEXEC | MFD_ALLOW_SEALING);
if (fd < 0) fd = memfd_create("vmsig_memctx", MFD_CLOEXEC);
if (fd < 0) return -1;
if (ftruncate(fd, (off_t)size) != 0) { close(fd); return -1; }
/* deterministic contents via a temporary RW mapping BEFORE the seal */
uint8_t* p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (p != MAP_FAILED) {
for (uint32_t i = 0; i < size; i++) p[i] = (uint8_t)(i & 0xFFu);
munmap(p, size);
}
/* FUTURE_WRITE: even if the holder reopens the fd as O_RDWR, it gets no writable mapping.
* best-effort (kernel 5.1+); on older kernels only the O_RDONLY fd protects. */
if (fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_FUTURE_WRITE) != 0)
(void)fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW);
return fd;
}
#ifdef VMSIG_WITH_VMIE
/* armed bring-up: open RAM (RW is vmie's internal concern), host_bootstrap, extract the
* permanent System DTB as the System process cr3 (kcr3 — the root of the guest AS). The RW
* handle is HELD across the epoch (kcr3 source + gva_write target); ONLY the RO-fd (share_fd)
* leaves outward — write goes through this command plane, never a writable mmap. Runs on the
* off-loop worker; a stale handle from a prior epoch is dropped first (serialized FIFO with
* in-flight writes). */
static int mc_bootstrap_armed(struct vmsig_adapter* a, uint64_t* out_kcr3) {
if (a->win) { vmie_win32_close(a->win); a->win = NULL; a->mem = NULL; } /* drop stale epoch handle */
vmie_win32* v = vmie_win32_open(a->ram_path, a->low);
if (!v) return -1;
if (host_bootstrap(v) != 0) { vmie_win32_close(v); return -1; }
process procs[16];
int n = proc_list(v, 0, procs, 16);
uint64_t kcr3 = 0;
for (int i = 0; i < n && i < 16; i++)
if (!strcmp(procs[i].name, "System")) { kcr3 = procs[i].cr3; break; }
if (!kcr3) { vmie_win32_close(v); return -1; }
a->win = v; /* HOLD: RW handle lives across the epoch */
a->mem = vmie_win32_mem(v); /* borrowed; valid until vmie_win32_close(v) */
a->kcr3 = kcr3;
*out_kcr3 = kcr3;
return 0;
}
#endif
/* ---- worker job: cold bring-up OR atomic write, off-loop ----------------- *
* Demultiplexed by rq->op. BOTH run on the SAME single worker thread, so a write on the
* held handle never races the close-on-rebootstrap (FIFO). The job MUST NOT touch core
* structures — it only reads a->mem/a->kcr3 (stable between re-bootstraps on this thread). */
static int mc_job(void* user, const void* req, void* res) {
struct vmsig_adapter* a = user;
const mc_req* rq = req;
mc_res* rs = res;
memset(rs, 0, sizeof *rs);
rs->op = rq->op;
if (rq->op == MC_JOB_WRITE) {
rs->corr = rq->corr; rs->origin = rq->origin;
if (a->stub) { rs->ok = 1; return 0; } /* stub: ack without actuation */
#ifdef VMSIG_WITH_VMIE
/* a->mem is NULL until a bootstrap has succeeded (or after one failed and cleared it):
* the guard turns that into an ok=0 ACK (observable to the initiator), not a crash. */
rs->ok = (a->mem && gva_write(a->mem, (uintptr_t)a->kcr3, (uintptr_t)rq->gva,
rq->src, rq->len) == 0);
return rs->ok ? 0 : -1;
#else
rs->ok = 0;
return -1; /* armed without the build flag: write impossible */
#endif
}
/* MC_JOB_BOOTSTRAP */
if (a->stub) {
rs->kcr3 = 0xC0DE0000ull + (uint64_t)rq->boot_count * 0x1000ull; /* changes per epoch */
return 0;
}
#ifdef VMSIG_WITH_VMIE
uint64_t kcr3 = 0;
if (mc_bootstrap_armed(a, &kcr3) != 0) return -1;
rs->kcr3 = kcr3;
return 0;
#else
return -1; /* armed without the build flag: bootstrap impossible -> ERROR */
#endif
}
static void mc_kick_bootstrap(struct vmsig_adapter* a) {
a->boot_count++;
mc_req rq;
memset(&rq, 0, sizeof rq);
rq.op = MC_JOB_BOOTSTRAP; rq.boot_count = a->boot_count;
(void)vmsig_worker_submit(a->worker, &rq, sizeof rq); /* full => drop (rare) */
}
/* ---- reg hooks (vmsig_memctx_reg.ctx = a; called by the core on the loop thread) ---- */
static void mc_reg_describe(void* ctx, vmsig_memctx* out_pod,
const vmsig_memseg** out_segs, uint32_t* out_nseg) {
struct vmsig_adapter* a = ctx;
*out_pod = a->cur_pod; /* kcr3/low/nseg/flags; the core overwrites the epoch */
*out_segs = a->cur_segs;
*out_nseg = a->cur_nseg;
}
static int mc_reg_share_fd(void* ctx) {
struct vmsig_adapter* a = ctx;
if (a->cfg_ro_fd >= 0)
return fcntl(a->cfg_ro_fd, F_DUPFD_CLOEXEC, 0); /* infra-sealed RO-fd: dup */
if (a->stub) {
if (a->stub_fd < 0) return -1;
char path[64];
snprintf(path, sizeof path, "/proc/self/fd/%d", a->stub_fd);
return open(path, O_RDONLY | O_CLOEXEC); /* fresh O_RDONLY on the backing */
}
if (!a->ram_path) return -1;
return open(a->ram_path, O_RDONLY | O_CLOEXEC); /* armed default */
}
static void mc_reg_invalidate(void* ctx, uint32_t epoch) {
struct vmsig_adapter* a = ctx;
(void)epoch; /* the core owns the epoch; the adapter must re-bootstrap */
a->have_ctx = 0; /* the previous context is invalid */
mc_kick_bootstrap(a); /* off-loop; on_ready re-emits MEMCTX (new epoch) */
}
/* ---- vtable ---- */
static vmsig_adapter* mc_open(const void* cfg, uint32_t endpoint) {
const vmsig_memctx_cfg* c = cfg;
struct vmsig_adapter* a = calloc(1, sizeof *a);
if (!a) return NULL;
a->endpoint = endpoint;
a->stub = c ? c->stub : 1;
a->ram_path = c ? c->ram_path : NULL;
a->low = c ? c->low : 0;
a->cfg_ro_fd = (c && c->ro_fd >= 0) ? c->ro_fd : -1;
if (!a->ram_path && a->cfg_ro_fd < 0) a->stub = 1; /* no path/fd => stub */
a->stub_fd = -1;
return a;
}
static int mc_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
if (cap < 1) return -1;
a->emit = *emit;
a->worker = vmsig_worker_new(mc_job, a, 1, MC_WORKER_DEPTH);
if (!a->worker) return -1;
if (a->stub && a->cfg_ro_fd < 0) {
a->stub_fd = mc_make_stub_fd(MC_STUB_SIZE);
if (a->stub_fd < 0) { vmsig_worker_free(a->worker); a->worker = NULL; return -1; }
}
/* worker completion-eventfd as the readiness source (cookie=0). */
reg[0].fd = vmsig_worker_evfd(a->worker);
reg[0].epoll_events = EPOLLIN;
reg[0].shape = VMSIG_RDY_EVENTFD;
reg[0].cookie = 0;
/* register the reg BEFORE the first bootstrap: the core slot gets the hooks. describe
* is not called until the slot is valid (which only happens after the first MEMCTX). */
if (a->emit.register_memctx) {
vmsig_memctx_reg r;
memset(&r, 0, sizeof r);
r.endpoint = a->endpoint;
r.source = VMSIG_SRC_MEMCTX;
r.ctx = a;
r.describe = mc_reg_describe;
r.share_fd = mc_reg_share_fd;
r.invalidate = mc_reg_invalidate;
if (a->emit.register_memctx(a->emit.token, &r) == 0) a->registered = 1;
}
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_SEAM_UP; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
a->emit.emit(a->emit.token, &up);
mc_kick_bootstrap(a); /* first bootstrap off-loop; assemble the locator on completion */
return 1;
}
static int mc_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
(void)cookie; (void)events;
vmsig_worker_ack(a->worker);
mc_res rs;
int rc;
while (vmsig_worker_poll(a->worker, &rs, sizeof rs, &rc) == 1) {
if (rs.op == MC_JOB_WRITE) {
/* atomic write completed: addressed ACT_ACK to the initiator. */
mc_memwrite_ack(a, rs.ok && rc == 0, rs.corr, rs.origin);
continue;
}
if (rc != 0) {
/* bootstrap failed: ERROR (source MEMCTX); do NOT publish an invalid kcr3. */
vmsig_event er;
memset(&er, 0, sizeof er);
er.kind = VMSIG_EV_ERROR; er.source = VMSIG_SRC_MEMCTX; er.dir = VMSIG_DIR_UP;
er.prio = VMSIG_PRIO_URGENT; er.endpoint = a->endpoint;
a->emit.emit(a->emit.token, &er);
continue;
}
/* assemble the locator on the loop thread from rs.kcr3. a->kcr3 is the gva_write
* TARGET and is owned SOLELY by the worker thread (set in mc_bootstrap_armed, read by
* MC_JOB_WRITE — same thread, FIFO happens-before); the loop must NOT also write it, or
* an in-flight write at line ~170 would race it. cur_pod.kcr3 is loop-only (delivery). */
memset(&a->cur_pod, 0, sizeof a->cur_pod);
a->cur_pod.kcr3 = rs.kcr3;
a->cur_pod.low = a->low ? a->low : MC_STUB_SIZE;
a->cur_pod.flags = VMSIG_MEMCTX_RDONLY;
a->cur_nseg = 1; /* single-low identity (gpa 0 .. low) */
a->cur_segs[0].gpa = 0;
a->cur_segs[0].len = a->cur_pod.low;
a->cur_segs[0].file_off = 0;
a->cur_pod.nseg = a->cur_nseg;
a->have_ctx = 1;
/* emit the MEMCTX trigger: the core authoritatively re-describes + stamps the epoch. */
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_MEMCTX; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
memcpy(up.inln, &a->cur_pod, sizeof a->cur_pod);
a->emit.emit(a->emit.token, &up);
}
return 0;
}
/* Emit an addressed ACT_ACK for a MEMWRITE (source MEMCTX, to the initiator). inln carries
* {ok,corr,origin} (same shape as the input adapter's ACK), so control reads ok at offset 0.
* ok=0 covers extent-deny / no-SRC / queue-full / write failure (default-deny, observable). */
static void mc_memwrite_ack(struct vmsig_adapter* a, int ok, uint32_t corr, uint32_t origin) {
struct { int ok; uint32_t corr; uint32_t origin; } body = { ok, corr, origin };
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
up.corr = corr; up.origin = origin;
up.payload.flags = VMSIG_PL_INLINE;
memcpy(up.inln, &body, sizeof body);
a->emit.emit(a->emit.token, &up);
}
/* DOWN MEMWRITE handler: validate extent, copy SRC off-loop, submit the atomic gva_write to
* the worker. Default-deny: any invalid path (no SRC flag, len out of bounds, short payload,
* queue full) ACKs ok=0 and does NOT actuate. The completion ACK for a queued write arrives
* via mc_on_ready. Returns 0 when the event is consumed by this seam, 1 when it is not ours. */
static int mc_submit(vmsig_adapter* a, const vmsig_event* ev) {
if (ev->kind != VMSIG_EV_CMD_MEMWRITE) return 1; /* not for this seam */
const vmsig_memwrite* mw = (const vmsig_memwrite*)ev->inln;
uint32_t len = mw->len;
if (len == 0 || len > VMSIG_MEMWRITE_MAX) { /* extent: bounded */
mc_memwrite_ack(a, 0, ev->corr, ev->origin);
return 0;
}
mc_req rq; memset(&rq, 0, sizeof rq);
rq.op = MC_JOB_WRITE; rq.gva = mw->gva; rq.len = len;
rq.corr = ev->corr; rq.origin = ev->origin;
/* copy SRC into the worker req (off-loop gva_write reads from rq.src). */
if (mw->flags & VMSIG_MW_SRC_INLINE) {
if (len > VMSIG_MEMWRITE_INLINE) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; }
memcpy(rq.src, ev->inln + sizeof *mw, len); /* inln tail after the 16-byte header */
} else if (mw->flags & VMSIG_MW_SRC_PAYLOAD) {
if (!ev->payload.data || ev->payload.len < len) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; }
memcpy(rq.src, ev->payload.data, len); /* in-proc borrowed payload */
} else {
mc_memwrite_ack(a, 0, ev->corr, ev->origin); /* no SRC flag */
return 0;
}
if (vmsig_worker_submit(a->worker, &rq, sizeof rq) != 0) {
mc_memwrite_ack(a, 0, ev->corr, ev->origin); /* queue full -> ACK err */
return -1;
}
return 0; /* completion ACK arrives via mc_on_ready */
}
static void mc_close(vmsig_adapter* a) {
if (!a) return;
if (a->registered && a->emit.unregister_memctx)
a->emit.unregister_memctx(a->emit.token, a->endpoint);
if (a->worker) vmsig_worker_free(a->worker); /* join: bootstrap + write jobs finished */
#ifdef VMSIG_WITH_VMIE
if (a->win) vmie_win32_close(a->win); /* AFTER worker join: no in-flight gva_write */
#endif
if (a->stub_fd >= 0) close(a->stub_fd);
/* cfg_ro_fd belongs to the infrastructure (the open caller) — do NOT close it. */
free(a);
}
static const vmsig_adapter_ops MC_OPS = {
.name = "memctx", .source = VMSIG_SRC_MEMCTX, .codec = VMSIG_CODEC_MEMCTX,
.open = mc_open, .attach = mc_attach, .on_readiness = mc_on_ready,
.submit = mc_submit, .close = mc_close
};
const vmsig_adapter_ops* vmsig_memctx_ops(void) { return &MC_OPS; }
+13
View File
@@ -0,0 +1,13 @@
#ifndef VMSIG_VMHOST_H
#define VMSIG_VMHOST_H
/* Private config of the vmhost adapter (signaling's own QMP client).
* cfg==NULL or no qmp_path => stub mode (synthetic events, no QEMU).
* qmp_path given => armed: connect to QEMU's QMP socket ('@' prefix = abstract).
* No build flag needed — the client depends only on POSIX and its own code. */
typedef struct {
int stub;
const char* qmp_path;
} vmsig_vmhost_cfg;
#endif /* VMSIG_VMHOST_H */
+313
View File
@@ -0,0 +1,313 @@
/* vmhost.c — QEMU/QMP host-plane: signaling's OWN layer for observing the VM
* and its basic control. Not a wrapper over a neighbor repo — an own QMP client;
* depends only on POSIX, so it is always functional (no build flag).
*
* This is the first truly epoll-native source: the QMP socket (VMSIG_RDY_FD) lives
* directly in the loop, non-blocking, async events. Up: QMP events -> VM_LIFECYCLE
* (broadcast), EOF -> SEAM_DOWN. Down: CMD_VM -> QMP command with id correlation,
* reply addressed to the initiator. stub mode (no QEMU) synthesizes events/replies. */
#define _GNU_SOURCE
#include "vmsig_adapter.h"
#include "vmhost.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <stddef.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/timerfd.h>
#include <sys/epoll.h>
#define VMHOST_BUF 4096
#define VMHOST_STUB_MS 200
#define VMHOST_MAX_PENDING 64
enum { ST_STUB = 0, ST_CONNECTING, ST_NEGOTIATING, ST_READY, ST_DEAD };
typedef struct { uint32_t id, origin, corr; uint8_t op; int used; } pend_ent;
struct vmsig_adapter {
uint32_t endpoint;
int stub;
const char* qmp_path;
vmsig_emit emit;
int fd; /* QMP socket (armed) or timerfd (stub) */
int st;
uint32_t cur; /* current synthetic state (stub) */
char buf[VMHOST_BUF];
size_t buflen;
uint32_t next_id;
pend_ent pend[VMHOST_MAX_PENDING];
};
/* ---- minimal QMP line parse (top-level keys only; full JSON — deferred) ---- */
static int jstr(const char* line, const char* key, char* out, size_t cap) {
const char* p = strstr(line, key);
if (!p) return 0;
p += strlen(key);
while (*p == ' ' || *p == '\t' || *p == ':') p++;
if (*p != '"') return 0;
p++;
size_t i = 0;
while (*p && *p != '"' && i + 1 < cap) out[i++] = *p++;
out[i] = 0;
return 1;
}
static long jnum(const char* line, const char* key) {
const char* p = strstr(line, key);
if (!p) return -1;
p += strlen(key);
while (*p == ' ' || *p == '\t' || *p == ':') p++;
if (*p < '0' || *p > '9') return -1;
return strtol(p, NULL, 10);
}
static uint32_t ev_state(const char* n) {
if (!strcmp(n, "RESUME")) return VMSIG_VM_RUNNING;
if (!strcmp(n, "STOP")) return VMSIG_VM_PAUSED;
if (!strcmp(n, "SHUTDOWN")) return VMSIG_VM_SHUTDOWN;
if (!strcmp(n, "RESET")) return VMSIG_VM_RESET;
if (!strcmp(n, "POWERDOWN")) return VMSIG_VM_POWERDOWN;
if (!strcmp(n, "GUEST_PANICKED")) return VMSIG_VM_CRASHED;
return VMSIG_VM_UNKNOWN;
}
static uint32_t status_state(const char* s) {
if (!strcmp(s, "running")) return VMSIG_VM_RUNNING;
if (!strcmp(s, "paused")) return VMSIG_VM_PAUSED;
if (!strcmp(s, "shutdown")) return VMSIG_VM_SHUTDOWN;
return VMSIG_VM_UNKNOWN;
}
static const char* op_qmp(uint32_t op) {
switch (op) {
case VMSIG_VMOP_QUERY: return "query-status";
case VMSIG_VMOP_CONT: return "cont";
case VMSIG_VMOP_STOP: return "stop";
case VMSIG_VMOP_RESET: return "system_reset";
case VMSIG_VMOP_POWERDOWN: return "system_powerdown";
case VMSIG_VMOP_QUIT: return "quit";
default: return NULL;
}
}
static pend_ent* pend_alloc(struct vmsig_adapter* a) {
for (int i = 0; i < VMHOST_MAX_PENDING; i++) if (!a->pend[i].used) return &a->pend[i];
return NULL;
}
static pend_ent* pend_find(struct vmsig_adapter* a, uint32_t id) {
for (int i = 0; i < VMHOST_MAX_PENDING; i++)
if (a->pend[i].used && a->pend[i].id == id) return &a->pend[i];
return NULL;
}
/* ---- emission of neutral UP events ---- */
static void emit_vm(struct vmsig_adapter* a, uint32_t state, uint32_t origin, uint32_t corr) {
vmsig_vm_state vs = { state, 0 };
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_VM_LIFECYCLE; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
up.prio = (state == VMSIG_VM_RUNNING || state == VMSIG_VM_PAUSED)
? VMSIG_PRIO_NORMAL : VMSIG_PRIO_URGENT;
up.endpoint = a->endpoint; up.origin = origin; up.corr = corr;
up.payload.flags = VMSIG_PL_INLINE;
memcpy(up.inln, &vs, sizeof vs);
a->emit.emit(a->emit.token, &up);
}
static void emit_seam(struct vmsig_adapter* a, vmsig_kind k) {
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = k; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_URGENT; up.endpoint = a->endpoint;
a->emit.emit(a->emit.token, &up);
}
static void emit_ack(struct vmsig_adapter* a, uint32_t origin, uint32_t corr, int ok) {
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint; up.origin = origin; up.corr = corr;
up.payload.flags = VMSIG_PL_INLINE;
up.inln[0] = (uint8_t)(ok ? 1 : 0);
a->emit.emit(a->emit.token, &up);
}
/* ---- armed: handle one QMP line ---- */
static void handle_line(struct vmsig_adapter* a, const char* line) {
switch (a->st) {
case ST_CONNECTING:
if (strstr(line, "\"QMP\"")) { /* greeting -> negotiate capabilities */
static const char cap[] = "{\"execute\":\"qmp_capabilities\"}\r\n";
ssize_t r = write(a->fd, cap, sizeof cap - 1); (void)r;
a->st = ST_NEGOTIATING;
}
break;
case ST_NEGOTIATING:
if (strstr(line, "\"return\"")) { a->st = ST_READY; emit_seam(a, VMSIG_EV_SEAM_UP); }
break;
case ST_READY:
if (strstr(line, "\"event\"")) {
char name[64];
if (jstr(line, "\"event\"", name, sizeof name)) {
uint32_t s = ev_state(name);
if (s != VMSIG_VM_UNKNOWN) emit_vm(a, s, 0, 0); /* broadcast */
}
} else if (strstr(line, "\"return\"") || strstr(line, "\"error\"")) {
long id = jnum(line, "\"id\"");
pend_ent* p = id >= 0 ? pend_find(a, (uint32_t)id) : NULL;
if (p) {
if (p->op == VMSIG_VMOP_QUERY && strstr(line, "\"return\"")) {
char stbuf[32]; uint32_t s = VMSIG_VM_UNKNOWN;
if (jstr(line, "\"status\"", stbuf, sizeof stbuf)) s = status_state(stbuf);
emit_vm(a, s, p->origin, p->corr); /* addressed reply */
} else {
emit_ack(a, p->origin, p->corr, strstr(line, "\"return\"") != NULL);
}
p->used = 0;
}
}
break;
default: break;
}
}
static void armed_dead(struct vmsig_adapter* a) {
emit_seam(a, VMSIG_EV_SEAM_DOWN); /* VM transport died */
if (a->fd >= 0) { close(a->fd); a->fd = -1; } /* close removes the fd from epoll */
a->st = ST_DEAD;
}
/* ---- vtable ---- */
static vmsig_adapter* vh_open(const void* cfg, uint32_t endpoint) {
const vmsig_vmhost_cfg* c = cfg;
struct vmsig_adapter* a = calloc(1, sizeof *a);
if (!a) return NULL;
a->endpoint = endpoint;
a->qmp_path = (c && c->qmp_path && c->qmp_path[0]) ? c->qmp_path : NULL;
a->stub = (a->qmp_path == NULL); /* path given => armed, otherwise stub */
a->fd = -1;
a->cur = VMSIG_VM_RUNNING;
return a;
}
static int vh_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
if (cap < 1) return -1;
a->emit = *emit;
if (a->stub) {
a->fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
if (a->fd < 0) return -1;
struct itimerspec its;
memset(&its, 0, sizeof its);
its.it_interval.tv_sec = VMHOST_STUB_MS / 1000u;
its.it_interval.tv_nsec = (long)(VMHOST_STUB_MS % 1000u) * 1000000L;
its.it_value = its.it_interval;
if (timerfd_settime(a->fd, 0, &its, NULL) < 0) { close(a->fd); a->fd = -1; return -1; }
a->st = ST_STUB;
reg[0].fd = a->fd; reg[0].epoll_events = EPOLLIN;
reg[0].shape = VMSIG_RDY_TIMERFD; reg[0].cookie = 0;
emit_seam(a, VMSIG_EV_SEAM_UP);
return 1;
}
/* armed: connect to QEMU's QMP socket */
int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
if (fd < 0) return -1;
struct sockaddr_un addr;
memset(&addr, 0, sizeof addr);
addr.sun_family = AF_UNIX;
socklen_t alen;
size_t n = strlen(a->qmp_path);
if (a->qmp_path[0] == '@') {
if (n > sizeof addr.sun_path) { close(fd); return -1; }
addr.sun_path[0] = 0;
memcpy(addr.sun_path + 1, a->qmp_path + 1, n - 1);
alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
} else {
if (n >= sizeof addr.sun_path) { close(fd); return -1; }
memcpy(addr.sun_path, a->qmp_path, n);
alen = (socklen_t)sizeof addr;
}
if (connect(fd, (struct sockaddr*)&addr, alen) < 0 && errno != EINPROGRESS) {
close(fd); return -1;
}
a->fd = fd; a->st = ST_CONNECTING;
reg[0].fd = fd; reg[0].epoll_events = EPOLLIN;
reg[0].shape = VMSIG_RDY_FD; reg[0].cookie = 0;
/* SEAM_UP is emitted upon reaching READY (after qmp_capabilities) */
return 1;
}
static int vh_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
(void)cookie; (void)events;
if (a->stub) {
uint64_t ticks;
while (read(a->fd, &ticks, sizeof ticks) == (ssize_t)sizeof ticks) { /* drain */ }
a->cur = (a->cur == VMSIG_VM_RUNNING) ? VMSIG_VM_PAUSED : VMSIG_VM_RUNNING;
emit_vm(a, a->cur, 0, 0); /* broadcast */
return 0;
}
if (a->st == ST_DEAD) return 0;
for (;;) {
if (a->buflen >= sizeof a->buf) a->buflen = 0; /* line overflow -> reset */
ssize_t r = read(a->fd, a->buf + a->buflen, sizeof a->buf - a->buflen);
if (r == 0) { armed_dead(a); return 0; }
if (r < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK) break; armed_dead(a); return 0; }
a->buflen += (size_t)r;
size_t start = 0;
for (size_t i = 0; i < a->buflen; i++) {
if (a->buf[i] == '\n') { a->buf[i] = 0; handle_line(a, a->buf + start); start = i + 1; }
}
if (start > 0) { memmove(a->buf, a->buf + start, a->buflen - start); a->buflen -= start; }
}
return 0;
}
static int vh_submit(vmsig_adapter* a, const vmsig_event* ev) {
if (ev->kind != VMSIG_EV_CMD_VM) return 1; /* not for this seam */
vmsig_vm_cmd cmd;
memcpy(&cmd, ev->inln, sizeof cmd);
if (a->stub) {
uint32_t s;
switch (cmd.op) {
case VMSIG_VMOP_QUERY: s = a->cur; break;
case VMSIG_VMOP_CONT: s = a->cur = VMSIG_VM_RUNNING; break;
case VMSIG_VMOP_STOP: s = a->cur = VMSIG_VM_PAUSED; break;
case VMSIG_VMOP_RESET: s = VMSIG_VM_RESET; break;
case VMSIG_VMOP_POWERDOWN: s = VMSIG_VM_POWERDOWN; break;
case VMSIG_VMOP_QUIT: s = VMSIG_VM_SHUTDOWN; break;
default: return 1;
}
emit_vm(a, s, ev->origin, ev->corr); /* reply addressed to the initiator */
return 0;
}
if (a->st != ST_READY) return -1;
const char* q = op_qmp(cmd.op);
if (!q) return 1;
pend_ent* p = pend_alloc(a);
if (!p) return -1; /* backpressure: pending table is full */
uint32_t id = ++a->next_id;
p->used = 1; p->id = id; p->origin = ev->origin; p->corr = ev->corr; p->op = (uint8_t)cmd.op;
char line[160];
int len = snprintf(line, sizeof line, "{\"execute\":\"%s\",\"id\":%u}\r\n", q, id);
ssize_t r = write(a->fd, line, (size_t)len);
if (r != (ssize_t)len) { p->used = 0; return -1; }
return 0;
}
static void vh_close(vmsig_adapter* a) {
if (!a) return;
if (a->fd >= 0) close(a->fd);
free(a);
}
static const vmsig_adapter_ops VH_OPS = {
.name = "vmhost", .source = VMSIG_SRC_VMHOST, .codec = VMSIG_CODEC_VMHOST,
.open = vh_open, .attach = vh_attach, .on_readiness = vh_on_ready,
.submit = vh_submit, .close = vh_close
};
const vmsig_adapter_ops* vmsig_vmhost_ops(void) { return &VH_OPS; }