vmsig: a neutral signaling layer between sensors/input and controls

An epoll-driven, neutral transfer-event bus that connects sensors and input
actuators to one or more controls, bidirectionally. It owns the transfer context
and events — delivery order, priority, protocol-level timing, and an
interrupt-driven event model over fd sources (eventfd/timerfd/sockets) — and
stays agnostic to both the sensor/input drivers and the control.

What lives here:
- memctx: a coherent address-space context per endpoint — the guest address-space
  root paired with a pre-opened read-only RAM-region fd, with per-endpoint epoch
  invalidation and retained replay to late subscribers. Perception lives in
  out-of-tree sensor libraries that consume this datum read-only.
- exclusive-ownership leases for destructive resource classes (input, power,
  memory-write).
- write-signaled memory writes (MEMWRITE): an atomic write to guest memory routed
  through the seam under an exclusive lease, never a writable mapping.
- a host-management seam for VM lifecycle/status and a neutral input-injection
  command path.
- multi-VM endpoints; capability-gated, audited control authorization over an
  in-process or unix-socket transport.

Builds against headers only by default (a stub mode that exercises the seam
without a VM); armed builds link the real sensor/input libraries behind flags.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-20 18:46:31 +03:00
commit 709f4b586a
36 changed files with 5820 additions and 0 deletions
+5
View File
@@ -0,0 +1,5 @@
.*/
cmake-*/
compile*
Testing/
CLAUDE.md
+137
View File
@@ -0,0 +1,137 @@
cmake_minimum_required(VERSION 3.16)
project(vmsig C)
set(CMAKE_C_STANDARD 17)
set(CMAKE_C_STANDARD_REQUIRED ON)
set(CMAKE_C_EXTENSIONS ON) # epoll/eventfd/timerfd/clock_gettime: gnu ext
option(VMSIG_LTO "Enable LTO" OFF)
# Link the real sibling libraries (their .a, built with -fPIC). By default the spine
# builds against headers only: the SI calls are hidden behind these flags, and the
# stub mode proves the seam without a real VM.
option(VMSIG_WITH_VMIE "Link real vmie (libvmie.a, PIC) for armed memctx" OFF)
option(VMSIG_WITH_VMCTL "Link real vmctl (libvmctl.a, PIC) for armed input" OFF)
# ---- Sibling library sources (set these to your local checkouts) ------------
# Only needed for the armed builds below; the default stub build needs neither.
set(LIBVMIE_PATH "" CACHE PATH "Path to the vmie library sources (for VMSIG_WITH_VMIE)")
set(LIBVMCTL_PATH "" CACHE PATH "Path to the vmctl library sources (for VMSIG_WITH_VMCTL)")
find_package(Threads REQUIRED)
# ---- signaling library ------------------------------------------------------
add_library(vmsig SHARED
src/core/core.c
src/core/linux/loop.c
src/ctx/ctx.c
src/adapter/linux/worker.c
src/adapter/memctx/memctx.c
src/adapter/input/input.c
src/adapter/vmhost/vmhost.c
src/control/inproc.c
src/control/socket.c)
target_include_directories(vmsig
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/core/include
${CMAKE_CURRENT_SOURCE_DIR}/src/ctx/include
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/include
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/memctx/include
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/input/include
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/vmhost/include)
target_link_libraries(vmsig PRIVATE Threads::Threads)
# armed: the real vmie as a pre-built PIC .a (IMPORTED) — headers and symbols.
if(VMSIG_WITH_VMIE)
add_library(vmie STATIC IMPORTED)
set_target_properties(vmie PROPERTIES
IMPORTED_LOCATION ${LIBVMIE_PATH}/.build/libvmie.a
INTERFACE_INCLUDE_DIRECTORIES ${LIBVMIE_PATH}/include)
target_link_libraries(vmsig PRIVATE vmie)
target_compile_definitions(vmsig PRIVATE VMSIG_WITH_VMIE)
endif()
# armed: the real vmctl as a pre-built PIC .a (IMPORTED).
if(VMSIG_WITH_VMCTL)
add_library(vmctl STATIC IMPORTED)
set_target_properties(vmctl PROPERTIES
IMPORTED_LOCATION ${LIBVMCTL_PATH}/.build/libvmctl.a
INTERFACE_INCLUDE_DIRECTORIES ${LIBVMCTL_PATH}/include)
target_link_libraries(vmsig PRIVATE vmctl)
target_compile_definitions(vmsig PRIVATE VMSIG_WITH_VMCTL)
endif()
target_compile_options(vmsig PRIVATE -O2 -Wall -Wextra)
if(VMSIG_LTO)
target_compile_options(vmsig PRIVATE -flto)
target_link_options(vmsig PRIVATE -flto)
endif()
# ---- demonstrator on top of the library (like vmie_cli / vmctl) -------------
add_executable(vmsig_cli src/cli.c)
target_link_libraries(vmsig_cli PRIVATE vmsig)
target_compile_options(vmsig_cli PRIVATE -Wall -Wextra)
# ---- transfer-context tests (ctest) -----------------------------------------
enable_testing()
add_executable(vmsig_test src/test/test_ctx.c)
target_link_libraries(vmsig_test PRIVATE vmsig)
target_compile_options(vmsig_test PRIVATE -Wall -Wextra)
add_test(NAME ctx COMMAND vmsig_test)
add_executable(vmsig_sectest src/test/test_sec.c)
target_link_libraries(vmsig_sectest PRIVATE vmsig)
target_compile_options(vmsig_sectest PRIVATE -Wall -Wextra)
add_test(NAME sec COMMAND vmsig_sectest)
add_executable(vmsig_socktest src/test/test_sock.c)
target_link_libraries(vmsig_socktest PRIVATE vmsig Threads::Threads)
target_compile_options(vmsig_socktest PRIVATE -Wall -Wextra)
add_test(NAME sock COMMAND vmsig_socktest)
add_executable(vmsig_mvmtest src/test/test_mvm.c)
target_link_libraries(vmsig_mvmtest PRIVATE vmsig)
target_compile_options(vmsig_mvmtest PRIVATE -Wall -Wextra)
add_test(NAME mvm COMMAND vmsig_mvmtest)
add_executable(vmsig_authztest src/test/test_authz.c)
target_link_libraries(vmsig_authztest PRIVATE vmsig)
target_compile_options(vmsig_authztest PRIVATE -Wall -Wextra)
add_test(NAME authz COMMAND vmsig_authztest)
add_executable(vmsig_memctxtest src/test/test_memctx.c)
target_link_libraries(vmsig_memctxtest PRIVATE vmsig Threads::Threads)
target_include_directories(vmsig_memctxtest PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/core/include)
target_compile_options(vmsig_memctxtest PRIVATE -Wall -Wextra)
add_test(NAME memctx COMMAND vmsig_memctxtest)
add_executable(vmsig_vmhosttest src/test/test_vmhost.c)
target_link_libraries(vmsig_vmhosttest PRIVATE vmsig Threads::Threads)
target_include_directories(vmsig_vmhosttest PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/vmhost/include)
target_compile_options(vmsig_vmhosttest PRIVATE -Wall -Wextra)
add_test(NAME vmhost COMMAND vmsig_vmhosttest)
add_executable(vmsig_leasetest src/test/test_lease.c)
target_link_libraries(vmsig_leasetest PRIVATE vmsig Threads::Threads)
target_include_directories(vmsig_leasetest PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/core/include
${CMAKE_CURRENT_SOURCE_DIR}/src/ctx/include)
target_compile_options(vmsig_leasetest PRIVATE -Wall -Wextra)
add_test(NAME lease COMMAND vmsig_leasetest)
add_executable(vmsig_inputobstest src/test/test_inputobs.c)
target_link_libraries(vmsig_inputobstest PRIVATE vmsig Threads::Threads)
target_compile_options(vmsig_inputobstest PRIVATE -Wall -Wextra)
add_test(NAME inputobs COMMAND vmsig_inputobstest)
add_executable(vmsig_memwritetest src/test/test_memwrite.c)
target_link_libraries(vmsig_memwritetest PRIVATE vmsig Threads::Threads)
target_include_directories(vmsig_memwritetest PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/memctx/include)
target_compile_options(vmsig_memwritetest PRIVATE -Wall -Wextra)
add_test(NAME memwrite COMMAND vmsig_memwritetest)
# the demonstrator doubles as an end-to-end seam test (self-terminates rc=0)
add_test(NAME cli COMMAND vmsig_cli)
+21
View File
@@ -0,0 +1,21 @@
#ifndef VMSIG_H
#define VMSIG_H
/* vmsig.h — umbrella header for the signaling layer of the SISC paradigm.
*
* vmsig binds the three SI repos (sensors vmie/vgpustream + input vmctl) to the control
* (algorithm OR human), bidirectionally translating transfer events. The layer itself
* contains NO sensing, actuation, or decision/behavioral-timing logic.
*
* Application wiring: create the context (vmsig_ctx_new) -> core (vmsig_core_new) ->
* register per-VM adapters (vmsig_core_add_adapter) and control
* (vmsig_core_add_control) -> vmsig_core_run. */
#include "vmsig_event.h" /* neutral transfer-event model + payload */
#include "vmsig_memctx.h" /* address-space context handoff contract (kcr3+locator) */
#include "vmsig_ctx.h" /* transfer context: priority/seq/protocol timing */
#include "vmsig_adapter.h" /* unified SI adapter interface + factories */
#include "vmsig_control.h" /* control-agnostic seam + reference in-proc */
#include "vmsig_core.h" /* epoll core */
#endif /* VMSIG_H */
+81
View File
@@ -0,0 +1,81 @@
#ifndef VMSIG_ADAPTER_H
#define VMSIG_ADAPTER_H
#include "vmsig_event.h"
#include "vmsig_memctx.h" /* vmsig_memctx_reg — address-space context registration seam */
/* vmsig_adapter.h — unified SI adapter interface. One vtable, three readiness
* shapes behind it. The adapter is the ONLY place that includes neighbor headers
* (memmodel.h/win32.h/vgpu_stream.h/vmctl.h). It registers 0..N fds with the core;
* the core does not know whether it is a socket, eventfd or timerfd. SI specifics
* never leave these functions. */
typedef struct vmsig_adapter vmsig_adapter; /* opaque adapter instance */
/* How the adapter expresses readiness. The core treats all three as ordinary
* epoll fds; the enum is documentation + the choice of default epoll flags. */
typedef enum {
VMSIG_RDY_FD = 0, /* native pollable fd (socket) */
VMSIG_RDY_TIMERFD = 1, /* timerfd; adapter samples shared memory */
VMSIG_RDY_EVENTFD = 2 /* worker thread bridges a blocking API -> eventfd */
} vmsig_readiness;
/* Sink handed by the core to the adapter for emitting UP events without knowing
* the internals of the context. emit() is thread-safe (also called from worker
* threads); register_memctx/unregister_memctx are called ONLY on the loop thread.
* The registration hooks may be NULL (adapters/tests need not call them). */
typedef struct {
int (*emit)(void* token, vmsig_event* ev); /* UP (thread-safe) */
int (*register_memctx)(void* token, const vmsig_memctx_reg* reg); /* loop thread: AS context; 0/-1 */
void (*unregister_memctx)(void* token, uint32_t endpoint); /* loop thread: context gone */
void* token;
} vmsig_emit;
/* One fd contributed by the adapter, with epoll flags and a cookie for demux. */
typedef struct {
int fd;
uint32_t epoll_events; /* EPOLLIN / EPOLLOUT / ... */
vmsig_readiness shape;
uint32_t cookie; /* adapter-private fd discriminator */
} vmsig_fd_reg;
/* Adapter vtable. Each SI adapter implements this; SI specifics do not leak. */
typedef struct vmsig_adapter_ops {
const char* name; /* "memctx"/"input"/"vmhost" — diagnostics */
vmsig_source source; /* neutral seam role */
uint32_t codec; /* vmsig_codec owned by the adapter */
/* Create an instance from opaque cfg (adapter parses it; core passes as-is).
* Returns an instance or NULL. `endpoint` is the id of the VM it binds to. */
vmsig_adapter* (*open)(const void* cfg, uint32_t endpoint);
/* Attach: open the SI contract, bring up workers, hand fds into reg[]
* (<=cap), store `emit` for UP. Returns the number of registered fds (>=0) / -1. */
int (*attach)(vmsig_adapter* a, const vmsig_emit* emit,
vmsig_fd_reg* reg, int cap);
/* Readiness of one of the adapter's fds: `cookie` identifies the fd, `events`
* are the epoll flags. The adapter does NON-blocking work (reads the socket /
* drains the eventfd / reads the timerfd + samples counters) and calls emit on
* each UP. 0 — ok, -1 — fatal (the core detaches the adapter). */
int (*on_readiness)(vmsig_adapter* a, uint32_t cookie, uint32_t events);
/* Consume a DOWN event (a control decision): encode it into the contract
* (vmctl_batch / vmctl power; write the vgpu control block; read request to vmie).
* For blocking sinks it hands the work to a worker and returns immediately;
* completion arrives later as an UP VMSIG_EV_ACT_ACK (keyed by ev->corr).
* 0 — accepted, 1 — rejected (not for this seam), -1 — error. */
int (*submit)(vmsig_adapter* a, const vmsig_event* ev);
/* Detach + free: stop workers, close SI handles and fds. */
void (*close)(vmsig_adapter* a);
} vmsig_adapter_ops;
/* Factories (defined in each adapter's TU — the only symbol the build/cli layer
* needs; keeps neighbor headers out of the core's include-path). */
const vmsig_adapter_ops* vmsig_memctx_ops(void); /* vmie: address-space context (kcr3+locator) */
const vmsig_adapter_ops* vmsig_input_ops(void); /* vmctl */
const vmsig_adapter_ops* vmsig_vmhost_ops(void); /* QEMU/QMP (its own signaling) */
/* (vgpu frame sensor is no longer a signaling adapter: vgpu perception lives in an
* out-of-repo S-lib that consumes memctx; see vgpu-perception-handoff.) */
#endif /* VMSIG_ADAPTER_H */
+126
View File
@@ -0,0 +1,126 @@
#ifndef VMSIG_CONTROL_H
#define VMSIG_CONTROL_H
#include "vmsig_event.h"
/* vmsig_control.h — control-agnostic seam. Control (an algorithm OR a human)
* attaches via ONE neutral interface: a command queue (down) + an event
* subscription (up). In-process implements the vtable with direct callbacks
* (fd = -1); out-of-process is a socket whose fd is registered with the core like
* any source. The core treats both the same. Orchestration is NOT wired in here —
* only the seam. */
/* Subscription filter: which UP events the control WANTS. This is only a
* NARROWING; the real ceiling is set by the grant (effective = sub ∩ grant). */
typedef struct {
uint32_t source_mask; /* bit (1u<<vmsig_source) for the seam of interest */
vmsig_prio prio_min; /* drop UP below this priority */
uint64_t endpoint_mask; /* 0 = all VMs; otherwise bit (1ull<<endpoint) */
} vmsig_sub;
/* ===== Security layer: a control's grant (capability set) =====
* Neutral ceiling of a poller's rights: which VMs, which UP sources, which classes
* of DOWN commands. The enforcement mechanism is in the core (admission/pump_up/
* emit_down); the policy (who gets what) is set by the embedding program/
* orchestrator. Default DENY: an empty grant => not a valid poller (receives and
* sends nothing). */
#define VMSIG_CAP_OBSERVE 0x1u /* UP of SEAM/generic coherent state (observation) */
#define VMSIG_CAP_INPUT 0x2u /* CMD_INPUT */
/* (0x4 is the freed bit of the removed CAP_STREAM; the future vgpu-control down-path
* returns via write-signaled/MEMWRITE. Do NOT reuse.) */
#define VMSIG_CAP_LIFECYCLE 0x8u /* CMD_LIFECYCLE safe ones (pause/resume/wakeup) */
/* (0x10 is the freed bit of the removed CAP_MEMREAD; do NOT reuse: a stale grant
* with this bit must not silently alias to the privileged memory cap.) */
#define VMSIG_CAP_POWER 0x20u /* destructive lifecycle/VM (powerdown/reset/quit) */
#define VMSIG_CAP_VM 0x40u /* CMD_VM safe ones (query/cont/stop), VMHOST seam */
#define VMSIG_CAP_MEMCTX 0x80u /* SUBSCRIPTION to a coherent AS context (UP MEMCTX*, re-share RO-fd).
* NOT an access broker (that is OS-DAC on the fd) — gates RECEIVING the datum. */
#define VMSIG_CAP_MEMWRITE 0x100u /* CMD_MEMWRITE: atomic write-signaled mutation of shared guest memory
* (separate from the freed CAP_MEMREAD bit — read != write; fresh bit
* avoids stale-grant aliasing to this privileged cap). */
typedef struct {
uint32_t principal; /* id for auditing (uid/token) */
uint64_t endpoint_mask; /* which VMs (bit 1ull<<endpoint, endpoint<64); 0=none */
uint32_t source_mask; /* which UP sources (bit 1u<<vmsig_source) */
uint32_t cap_mask; /* VMSIG_CAP_* */
uint32_t arb_prio; /* lease arbitration priority: higher=stronger; supervisor=max. */
/* Separate from vmsig_prio (on-wire ordering). The default */
/* arbitration policy compares it (STRICTLY higher preempts, ties=owner). */
} vmsig_grant;
/* ===== Lease arbitration policy (orchestrator; §5) =====
* Signaling owns the MECHANISM (exclusivity, preemption, fencing, finalization);
* the POLICY (preempt or deny on conflict) is set by the orchestrator via a
* pluggable callback. Default (cb==NULL) = arb_prio comparison: STRICTLY higher ->
* PREEMPT, otherwise DENY. */
typedef enum {
VMSIG_ARB_DENY = 0, /* deny the contender, the owner keeps it */
VMSIG_ARB_PREEMPT = 1 /* take it from the owner, give it to the contender (QUEUE — reserved) */
} vmsig_arb_decision;
/* Called ONLY when (endpoint,class) is held by a LIVE owner (incumbent) and an
* ACQUIRE arrives from another contender. incumbent/contender are the parties'
* grants (live, not copies); incumbent is NEVER NULL (a dead owner is treated as a
* free slot and policy is not called). Called on the loop thread. */
typedef vmsig_arb_decision (*vmsig_arb_policy)(void* ud, uint32_t endpoint, uint32_t cls,
const vmsig_grant* incumbent,
const vmsig_grant* contender);
/* Control endpoint vtable. The core calls deliver() for UP; control sends DOWN via
* the emit hook that the core installs in set_emit_down(). */
typedef struct vmsig_control_ops {
const char* name;
/* fd for an out-of-process control (socket). -1 => in-process, callbacks only
* (no registration in epoll). */
int (*fd)(void* ctl);
/* Declare interest (called once at attach). */
int (*subscribe)(void* ctl, vmsig_sub* out);
/* Core -> control: an UP event for the subscriber. For in-process, a direct
* call; for socket-control, serialization onto the wire. Borrowed: whatever
* must outlive the call must be copied. */
int (*deliver)(void* ctl, const vmsig_event* ev);
/* Core -> control (socket only): the control-fd is readable; the implementation
* parses the wire into DOWN events and calls the installed down-emit. */
int (*on_readable)(void* ctl);
/* The core installs the hook by which control sends DOWN commands; the core
* routes them into vmsig_ctx_submit(ctx, VMSIG_DIR_DOWN, ev). */
void (*set_emit_down)(void* ctl, int (*emit)(void* token, vmsig_event*),
void* token);
void (*close)(void* ctl);
/* Core -> control: deliver a coherent address-space context (UP MEMCTX) + RO-fd
* of the RAM region. Socket: a vmsig_wire frame (kind=MEMCTX, inln=vmsig_memctx) + fd in cmsg
* (SCM_RIGHTS); the segs payload does NOT go on the wire (the holder opens
* via `low`). In-proc: direct fd + event (segs in payload, decode with vmsig_memctx_segs).
* The fd is BORROWED for the duration of the call (the core closes it afterwards) — the holder
* dup's/mmap's it to keep it. Optional: NULL => control does not accept MEMCTX. 0/-1. */
int (*attach_memctx)(void* ctl, const vmsig_event* ev, int fd);
} vmsig_control_ops;
/* Reference in-process control: a thin shim turning a C callback into a vtable, for
* embedding an algorithm directly. */
typedef struct {
int (*on_event)(void* user, const vmsig_event* up); /* core -> algorithm */
void* user;
vmsig_sub sub; /* subscription filter */
/* Core -> algorithm: a coherent AS context (UP MEMCTX) + RO-fd as a direct int. The fd
* is borrowed (dup/mmap to keep it). NULL => does not accept. 0/-1. */
int (*on_memctx)(void* user, const vmsig_event* ev, int fd);
} vmsig_inproc_cfg;
/* Create a reference in-proc control over cfg (which is copied). Returns an opaque
* ctl for vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl). Freed via
* ops->close(ctl). NULL on OOM. */
const vmsig_control_ops* vmsig_inproc_control_ops(void);
void* vmsig_inproc_control_new(const vmsig_inproc_cfg* cfg);
/* Send a DOWN command from an in-proc control (after attach). 0 — ok, -1 — error. */
int vmsig_inproc_send(void* ctl, vmsig_event* down);
#endif /* VMSIG_CONTROL_H */
+70
View File
@@ -0,0 +1,70 @@
#ifndef VMSIG_CORE_H
#define VMSIG_CORE_H
#include "vmsig_event.h"
#include "vmsig_ctx.h"
#include "vmsig_adapter.h"
#include "vmsig_control.h"
/* vmsig_core.h — non-blocking epoll core. It knows a single vocabulary: "here is
* an fd — call the neutral handler on readiness; the handler produces/consumes
* neutral events". All neighbor mechanisms are just different ways to spawn an
* fd. The core structurally cannot name a neighbor's type: neighbor headers are
* visible only from the adapter TUs. */
typedef struct vmsig_core vmsig_core;
/* Create the core over a transfer context (the core does NOT own ctx; ctx's
* lifetime must cover the core). NULL on error. */
vmsig_core* vmsig_core_new(vmsig_ctx* ctx);
/* Stop, detach all adapters/control, free. Safe on NULL. */
void vmsig_core_free(vmsig_core* c);
/* ===== Audit (observability of admissions/denials) ===== */
typedef enum {
VMSIG_AUDIT_ADMIT = 0, /* poller admitted (socket accept) */
VMSIG_AUDIT_REJECT = 1, /* poller rejected at accept (empty grant) */
VMSIG_AUDIT_DOWN_DENIED = 2, /* DOWN command denied by grant/cap */
/* --- lease arbitration --- */
VMSIG_AUDIT_LEASE_GRANTED = 3, /* lease granted/preempted */
VMSIG_AUDIT_LEASE_DENIED = 4, /* ACQUIRE denied OR destructive dropped by lease gate */
VMSIG_AUDIT_LEASE_REVOKED = 5, /* lease revoked by preemption */
VMSIG_AUDIT_LEASE_RECLAIMED = 6, /* lease reclaimed on owner death (reclaim) */
VMSIG_AUDIT_MEMCTX_GRANTED = 7 /* address-space context granted/replayed to holder */
} vmsig_audit_kind;
typedef struct {
vmsig_audit_kind kind;
uint32_t principal; /* uid/token (grant.principal or peer uid) */
uint32_t endpoint;
uint32_t cmd; /* vmsig_kind for DOWN_DENIED */
uint32_t detail; /* extra (e.g. peer pid) */
} vmsig_audit;
/* Set the audit callback (NULL = off). Called on the loop thread. */
void vmsig_core_set_audit(vmsig_core* c,
void (*cb)(void* ud, const vmsig_audit* a), void* ud);
/* Set the lease arbitration policy (NULL => default: contender.arb_prio >
* incumbent.arb_prio ? PREEMPT : DENY). Called on the loop thread. */
void vmsig_core_set_arb_policy(vmsig_core* c, vmsig_arb_policy cb, void* ud);
/* Register an adapter for VM `endpoint`: open(cfg,endpoint) -> attach(...),
* enroll each yielded fd into epoll and into the dispatch table fd->(adapter,cookie).
* Returns the adapter id (>=0) or -1. */
int vmsig_core_add_adapter(vmsig_core* c, const vmsig_adapter_ops* ops,
const void* cfg, uint32_t endpoint);
/* Attach a control endpoint (in-process or socket) with a GRANT (capability set).
* grant == NULL => default-deny (poller inert). The core sees only the neutral
* vtable + grant + (opt.) fd. Returns the control id (>=0) or -1. */
int vmsig_core_add_control(vmsig_core* c, const vmsig_control_ops* ops, void* ctl,
const vmsig_grant* grant);
/* Spin the loop until a stop is requested. 0 — clean, -1 — fatal. */
int vmsig_core_run(vmsig_core* c);
/* Asynchronous, signal-safe stop request: writes the wakeup eventfd. */
void vmsig_core_stop(vmsig_core* c);
#endif /* VMSIG_CORE_H */
+48
View File
@@ -0,0 +1,48 @@
#ifndef VMSIG_CTX_H
#define VMSIG_CTX_H
#include "vmsig_event.h"
/* vmsig_ctx.h — the "transfer context": the SISC-critical seam owning PRIORITY,
* SEQUENCING and PROTOCOL timing of delivery. Behavioral timing does NOT belong
* here — commands arrive already decided from control; the context merely
* orders and paces them on the "wire". */
typedef struct vmsig_ctx vmsig_ctx; /* opaque: queues, seq, timing */
/* Protocol (RS232-like) transmission timings — transport ONLY, not behavior.
* All zeros = pass-through (no pacing). */
typedef struct {
uint32_t min_gap_ns; /* min. gap between channel events (rate-cap) */
uint32_t coalesce_ns; /* collapse bursts of one kind within a window */
uint32_t max_inflight; /* backpressure depth on a channel before drop */
uint8_t drop_policy; /* VMSIG_DROP_* */
} vmsig_timing;
#define VMSIG_DROP_OLDEST 0
#define VMSIG_DROP_NEWEST 1
#define VMSIG_DROP_BLOCK 2
vmsig_ctx* vmsig_ctx_new(void);
void vmsig_ctx_free(vmsig_ctx* c);
/* Policy per (source,dir): default priority + protocol timing. They live
* here, NOT in adapters and NOT in control. */
int vmsig_ctx_set_policy(vmsig_ctx* c, vmsig_source src, vmsig_dir dir,
vmsig_prio default_prio, const vmsig_timing* t);
/* Enqueue an event into the `dir`-direction context (assigns seq, applies
* priority/timing/coalescing/backpressure). 0 — enqueued, 1 —
* coalesced/dropped by policy, -1 — error. On success takes ownership of
* ev->payload. Thread-safe (the UP side is called from worker threads). */
int vmsig_ctx_submit(vmsig_ctx* c, vmsig_dir dir, vmsig_event* ev);
/* Fetch the next event of direction `dir` ready for delivery, honoring
* priority + protocol timing. 1 — event written to out, 0 — nothing yet
* (caller arms timing_fd), -1 — error. */
int vmsig_ctx_next(vmsig_ctx* c, vmsig_dir dir, vmsig_event* out);
/* timerfd by which the context wakes the loop when a paced/coalesced event
* has matured. Registered in the core like any source. -1 if not needed. */
int vmsig_ctx_timing_fd(vmsig_ctx* c, vmsig_dir dir);
#endif /* VMSIG_CTX_H */
+280
View File
@@ -0,0 +1,280 @@
#ifndef VMSIG_EVENT_H
#define VMSIG_EVENT_H
#include <stdint.h>
#include <stddef.h>
/* vmsig_event.h — neutral "transfer event" + "payload" model.
*
* This is the ONLY type that crosses the signaling core. The taxonomy names the
* transfer SEMANTICS, not neighbor types: a TU compiled against this header
* cannot name vmctl_batch, vgpu_producer_t, or vmie_mem. The SI data body lives
* in an opaque payload owned by the source adapter's codec; the core does NOT
* dereference it — it only routes the event and carries the payload. */
/* Transfer direction relative to control. */
typedef enum {
VMSIG_DIR_UP = 0, /* sensor/state -> control */
VMSIG_DIR_DOWN = 1 /* control decision -> actuation/SI */
} vmsig_dir;
/* Logical seam (SI role) the event crosses. NEUTRAL roles, not driver names:
* assigned at adapter registration, used only for routing, the priority default,
* and the subscription filter. */
typedef enum {
VMSIG_SRC_NONE = 0,
VMSIG_SRC_FRAME = 1, /* vgpu desktop sensor role; reserved: no signaling adapter,
* the future vgpu-perception shell-as-control carries it (CURSOR_STATE) */
VMSIG_SRC_INPUT = 2, /* input/actuation + lifecycle (vmctl role) */
VMSIG_SRC_CONTROL = 3, /* originated by a control endpoint */
VMSIG_SRC_CORE = 4, /* core-internal (shutdown/error/tick) */
VMSIG_SRC_VMHOST = 5, /* VM substrate / QEMU: lifecycle + events (own QMP) */
VMSIG_SRC_MEMCTX = 6, /* coherent guest address-space context (kcr3+locator) */
VMSIG_SRC_MAX
} vmsig_source;
/* Delivery priority class. Higher value — earlier delivery. This is NOT a
* behavioral timing but ordering on the "wire". The default is assigned per
* source at registration; the emitter may override it per event. */
typedef enum {
VMSIG_PRIO_BULK = 0, /* frames, large state deltas */
VMSIG_PRIO_NORMAL = 1, /* routine ack/samples */
VMSIG_PRIO_HIGH = 2, /* input commands (latency-sensitive) */
VMSIG_PRIO_URGENT = 3, /* lifecycle, seam-down, errors */
VMSIG_PRIO_MAX
} vmsig_prio;
/* NEUTRAL event taxonomy: each kind is a transfer MEANING that exactly one
* adapter codec decodes from / encodes into its contract. The core routes by
* kind + source + dir + prio and does not interpret the payload. */
typedef enum {
/* --- generic / lifecycle (any seam) --- */
VMSIG_EV_NONE = 0,
VMSIG_EV_SEAM_UP = 1, /* SI seam came up (attach/bootstrap ok) */
VMSIG_EV_SEAM_DOWN = 2, /* seam lost (heartbeat stale, socket closed) */
VMSIG_EV_ERROR = 3, /* adapter/core error, details in payload */
/* (16..19 — retired STATE_* of the MEMSTATE seam; do NOT reuse numbers: on a
* version skew an old STATE kind must not alias a new kind on the wire.) */
/* (32..36 — retired FRAME_READY/FRAME_STATE/BULK_ATTACHED/BULK_READY/BULK_DETACHED of
* the removed FRAME adapter + bulk data-plane (vgpu perception moved to an S-lib);
* do NOT reuse numbers — wire-skew safety.) */
/* --- UP: cursor (vgpu sensor; emitted by the vgpu-perception shell-as-control) --- */
VMSIG_EV_CURSOR_STATE = 37, /* cursor position/visibility; inln=vmsig_cursor; cap OBSERVE|INPUT */
/* --- UP: input/lifecycle ack (INPUT seam) --- */
VMSIG_EV_ACT_ACK = 48, /* down-command completed (ok/err) */
VMSIG_EV_VM_LIFECYCLE = 49, /* power/lifecycle state report */
/* --- UP: lease arbitration (all addressed, origin=initiator; source=CORE) --- */
VMSIG_EV_LEASE_GRANTED = 50, /* lease granted (CMD_ACQUIRE succeeded) */
VMSIG_EV_LEASE_DENIED = 51, /* lease denied (reason in vmsig_lease_req) */
VMSIG_EV_LEASE_RELEASED= 52, /* lease released by owner (CMD_RELEASE) */
VMSIG_EV_LEASE_REVOKED = 53, /* lease taken away by preemption/death */
VMSIG_EV_LEASE_STATUS = 54, /* response to CMD_LEASE_STATUS (vmsig_lease_status) */
/* --- UP: response to a held-input query (INPUT seam, addressed to initiator) --- */
VMSIG_EV_INPUT_HELD = 55, /* set of held KEY/BTN from the vmctl record; inln=vmsig_input_held */
/* --- DOWN: control decisions --- */
VMSIG_EV_CMD_INPUT = 64, /* input injection (abs/rel/btn/key/scroll) */
VMSIG_EV_CMD_LIFECYCLE = 65, /* powerdown/reset/wakeup/pause/resume */
/* (66 — retired CMD_STREAM of the removed FRAME adapter; the future vgpu-control
* down-path returns via write-signaled/MEMWRITE. 67..69 — retired
* CMD_QUERY/WATCH/UNWATCH; do NOT reuse numbers.) */
VMSIG_EV_CMD_VM = 70, /* base VM control (vmsig_vm_cmd; VMHOST seam) */
/* (71..72 — retired CMD_SUBSCRIBE_BULK/UNSUBSCRIBE_BULK of the bulk data-plane;
* do NOT reuse numbers.) */
/* --- DOWN: lease arbitration (intercepted by the core, not forwarded to the adapter) --- */
VMSIG_EV_CMD_ACQUIRE = 73, /* request an exclusive lease of a class: inln=vmsig_lease_req */
VMSIG_EV_CMD_RELEASE = 74, /* release your own lease of a class: inln=vmsig_lease_req */
VMSIG_EV_CMD_LEASE_STATUS = 75, /* query lease status of a class: inln=vmsig_lease_req */
VMSIG_EV_CMD_QUERY_INPUT = 76, /* query held KEY/BTN (from the vmctl record); reply UP INPUT_HELD; cap INPUT */
/* --- UP: address-space context (MEMCTX seam; coherent kcr3+locator datum) --- */
VMSIG_EV_MEMCTX = 77, /* context multicast/replay: inln=vmsig_memctx,
* payload=vmsig_memseg[] (owned), RO-fd alongside */
VMSIG_EV_MEMCTX_INVALIDATED = 78, /* epoch invalidation: inln=vmsig_memctx_inv (URGENT) */
/* --- DOWN: coherent memory write (write-signaled; MEMCTX seam) --- */
VMSIG_EV_CMD_MEMWRITE = 79, /* atomic gva_write under the held lease; inln=vmsig_memwrite (+tail/payload bytes);
* cap MEMWRITE + lease MEMWRITE + extent. ACK via ACT_ACK{ok,corr}. */
VMSIG_EV_KIND_MAX
} vmsig_kind;
/* ===== Lease arbitration (exclusive-ownership layer for destructive resources) =====
* A destructive VM resource is owned by EXACTLY one control (per endpoint+class pair).
* The class is generic; INPUT, POWER and MEMWRITE are active. MEMWRITE is the
* write-signaled atomic guest-memory write on the MEMCTX seam. */
typedef enum {
VMSIG_LEASE_INPUT = 0, /* exclusive grab of input (CMD_INPUT) */
VMSIG_LEASE_POWER = 1, /* exclusive destructive power (lifecycle/VM) */
VMSIG_LEASE_MEMWRITE = 2, /* exclusive atomic guest-memory write (gva_write); NO finalization */
VMSIG_LEASE_CLASS_MAX
} vmsig_lease_class;
/* Lease denial reason (vmsig_lease_req.reason in UP LEASE_DENIED). */
enum {
VMSIG_LEASE_DENY_HELD = 0, /* held by an equal/higher; the owner holds it */
VMSIG_LEASE_DENY_NOCAP = 1, /* no cap for the class (CAP_INPUT/CAP_POWER) */
VMSIG_LEASE_DENY_NOGRANT = 2, /* endpoint outside the grant (endpoint_mask) */
VMSIG_LEASE_DENY_BADCLASS = 3, /* class out of range */
VMSIG_LEASE_DENY_LOWER_PRIO = 4 /* contender priority not above the owner's */
};
/* Lease request/response (DOWN CMD_ACQUIRE/RELEASE/LEASE_STATUS and UP LEASE_*, in inln). */
typedef struct {
uint32_t cls; /* vmsig_lease_class */
uint32_t reason; /* DOWN: 0; UP LEASE_DENIED: VMSIG_LEASE_DENY_* */
} vmsig_lease_req;
/* Response to CMD_LEASE_STATUS (UP LEASE_STATUS, in inln). */
typedef struct {
uint32_t cls; /* requested class */
uint32_t busy; /* 1=held by a live owner, 0=free */
uint32_t owner_principal; /* owner principal (for audit/UI); 0 if free */
} vmsig_lease_status;
/* Lifecycle operations for CMD_LIFECYCLE (code in inln[0]). Destructive ones
* (POWERDOWN/RESET) require CAP_POWER; safe ones — CAP_LIFECYCLE. */
enum {
VMSIG_LIFE_POWERDOWN = 0,
VMSIG_LIFE_RESET = 1,
VMSIG_LIFE_WAKEUP = 2,
VMSIG_LIFE_PAUSE = 3,
VMSIG_LIFE_RESUME = 4
};
/* ===== Input (DOWN VMSIG_EV_CMD_INPUT, in inln) — NEUTRAL =====
* control describes input abstractly (axis/button/key/scroll), WITHOUT knowing the driver
* (uinput/QMP): the input adapter translates it into its contract. Requires CAP_INPUT. This
* is the ONLY public input-encoding contract — an external control encodes vmsig_input into
* vmsig_event.inln. */
typedef enum {
VMSIG_INPUT_ABS = 0, /* absolute axis: code=axis, value=coordinate */
VMSIG_INPUT_REL = 1, /* relative axis: code=axis, value=delta */
VMSIG_INPUT_BTN = 2, /* button: code=button, value=pressed(1)/released(0) */
VMSIG_INPUT_KEY = 3, /* key: code=evdev code, value=pressed/released */
VMSIG_INPUT_SCROLL = 4 /* scroll: code=axis, scroll=magnitude */
} vmsig_input_kind;
typedef struct {
uint32_t kind; /* vmsig_input_kind */
int32_t code; /* axis / button / evdev code (neutral event code) */
int32_t value; /* abs coordinate / rel delta / pressed(1)|released(0) */
double scroll; /* scroll magnitude (VMSIG_INPUT_SCROLL only) */
} vmsig_input; /* fits in vmsig_event.inln[48] */
/* ===== Memory write (DOWN VMSIG_EV_CMD_MEMWRITE) — NEUTRAL, write-signaled =====
* control describes an ATOMIC write into guest memory abstractly (guest VA + length),
* WITHOUT knowing vmie/cr3: the memctx adapter resolves it under the held kcr3 and does
* ONE gva_write. Requires CAP_MEMWRITE + an exclusive MEMWRITE lease + an extent check.
* SRC bytes: inline (<= VMSIG_MEMWRITE_INLINE) ride in the inln tail right after this header
* (flags & INLINE); larger in-proc writes ride in the borrowed payload (flags & PAYLOAD). */
#define VMSIG_MEMWRITE_INLINE 32u /* inln tail capacity for SRC (48 - 16 header) */
#define VMSIG_MW_SRC_INLINE 0x1u /* SRC bytes are in inln tail (len<=INLINE) */
#define VMSIG_MW_SRC_PAYLOAD 0x2u /* SRC bytes are in ev->payload.data (in-proc) */
typedef struct {
uint64_t gva; /* guest virtual address to write (resolved under the adapter's kcr3) */
uint32_t len; /* number of bytes to write (1..VMSIG_MEMWRITE_MAX) */
uint32_t flags; /* VMSIG_MW_SRC_INLINE | VMSIG_MW_SRC_PAYLOAD */
/* inline SRC tail (when VMSIG_MW_SRC_INLINE): up to VMSIG_MEMWRITE_INLINE bytes follow */
} vmsig_memwrite; /* header = 8+4+4 = 16 bytes; +32 tail = 48 (exactly inln[48]) */
/* ===== Cursor (UP VMSIG_EV_CURSOR_STATE, in inln) — NEUTRAL =====
* Cursor position from the SCREEN sensor (vgpu). NEUTRAL payload format only: emitted by the
* out-of-repo vgpu-perception shell-as-control (source VMSIG_SRC_FRAME), not by a signaling
* adapter — signaling just fans it out. x,y signed (multi-monitor -> negative). cap OBSERVE|INPUT. */
typedef struct {
int32_t x; /* screen coordinate X (signed) */
int32_t y; /* screen coordinate Y (signed) */
uint32_t visible; /* 1=shown, 0=hidden */
uint32_t seq; /* monotonic cursor-publication counter (vgpu) */
} vmsig_cursor;
/* ===== Held input (UP VMSIG_EV_INPUT_HELD, in inln) — response to CMD_QUERY_INPUT =====
* Set of held KEY/BTN from the ACTUATOR record (vmctl): signaling only returns it on request,
* does NOT track it itself and does NOT decide release (that is control). flags & TRUNC => more
* held than ent. */
#define VMSIG_INPUT_HELD_TRUNC 0x1u
typedef struct {
uint32_t count; /* number of valid entries in ent[] */
uint32_t flags; /* VMSIG_INPUT_HELD_TRUNC if more held than capacity */
struct { uint16_t kind; uint16_t code; } ent[10]; /* kind=VMSIG_INPUT_KEY/BTN; code */
} vmsig_input_held; /* 4+4+10*4 = 48 (exactly inln[48]) */
/* ===== QEMU/QMP host-plane (VMHOST seam) — VM-substrate control =====
* VM state (UP VMSIG_EV_VM_LIFECYCLE, in inln). */
enum {
VMSIG_VM_RUNNING = 0, VMSIG_VM_PAUSED, VMSIG_VM_SHUTDOWN,
VMSIG_VM_RESET, VMSIG_VM_POWERDOWN, VMSIG_VM_CRASHED, VMSIG_VM_UNKNOWN
};
typedef struct { uint32_t state; uint32_t detail; } vmsig_vm_state;
/* VM control operations (DOWN VMSIG_EV_CMD_VM, in inln). Destructive ones
* (RESET/POWERDOWN/QUIT) require CAP_POWER; safe ones — CAP_VM. */
enum {
VMSIG_VMOP_QUERY = 0, /* query-status */
VMSIG_VMOP_CONT, /* cont (resume) */
VMSIG_VMOP_STOP, /* stop (pause) */
VMSIG_VMOP_RESET, /* system_reset (destructive) */
VMSIG_VMOP_POWERDOWN, /* system_powerdown (destructive) */
VMSIG_VMOP_QUIT /* quit (destructive) */
};
typedef struct { uint32_t op; } vmsig_vm_cmd;
/* Codec tags: which adapter owns the payload body (for release/diagnostics). */
typedef enum {
VMSIG_CODEC_NONE = 0,
VMSIG_CODEC_INPUT = 1,
VMSIG_CODEC_VMHOST = 2,
VMSIG_CODEC_MEMCTX = 3 /* owned-payload locator (vmsig_memseg[]) of the MEMCTX seam */
} vmsig_codec;
/* Payload ownership flags. */
#define VMSIG_PL_OWNED 0x1u /* core frees it via release() on drop */
#define VMSIG_PL_BORROWED 0x2u /* borrowed (e.g. a seqlock frame): copy */
/* or revalidate before release() */
#define VMSIG_PL_INLINE 0x4u /* small body lives in vmsig_event.inln */
/* Opaque, releasable payload. The body is owned by the emitting adapter's codec
* (mmap'd frame slot, vmie heap diff, ...). The core carries the bearer and calls
* release() EXACTLY once on consumption/drop. The core never dereferences data. */
typedef struct vmsig_payload {
void* data; /* opaque body, codec-defined */
size_t len; /* bytes in data (0 if borrowed) */
uint32_t codec; /* vmsig_codec: whose payload it is */
uint32_t flags; /* VMSIG_PL_* */
void (*release)(struct vmsig_payload*); /* idempotent; may be NULL */
void* owner; /* codec context for release() */
} vmsig_payload;
/* TRANSFER EVENT. Fixed-size header + a small inline zone; large bodies hang off
* the payload. */
typedef struct vmsig_event {
vmsig_kind kind;
vmsig_source source; /* source seam */
vmsig_dir dir;
vmsig_prio prio;
uint32_t endpoint; /* VM/endpoint id — multi-VM-ready */
uint32_t seq; /* monotonic sequence (set by the context) */
uint32_t corr; /* correlation: links an ACK to its CMD */
uint32_t origin; /* INTERNAL: id+1 of the control that initiated DOWN (0=none/broadcast). */
/* Set by the core in emit_down; NOT serialized onto the wire */
/* (a poller cannot forge it). Addressed reply delivery. */
uint64_t ts_ns; /* CLOCK_MONOTONIC at emit time */
vmsig_payload payload; /* opaque body (may be empty) */
uint8_t inln[48]; /* inline zone for small events (VMSIG_PL_INLINE) */
} vmsig_event;
/* Release the event's payload (if it has release and is not yet freed). Idempotent. */
static inline void vmsig_payload_release(vmsig_event* ev) {
if (ev && ev->payload.release) {
ev->payload.release(&ev->payload);
ev->payload.release = NULL;
}
}
#endif /* VMSIG_EVENT_H */
+101
View File
@@ -0,0 +1,101 @@
#ifndef VMSIG_MEMCTX_H
#define VMSIG_MEMCTX_H
#include <stdint.h>
#include <stddef.h>
#include "vmsig_event.h"
/* vmsig_memctx.h — NEUTRAL handoff contract for the guest address-space context.
*
* signaling is a COHERENCE layer for shared state, not perception. Over memory it
* vends ONE coherent datum: the root of the guest address space (the permanent System
* DirectoryTableBase, `kcr3`) PAIRED with a RAM-region locator — a pre-opened `O_RDONLY`
* fd. The holder (an S library / any control, including a human operator via their shim)
* subscribes to this datum, opens ITS OWN read-only context FROM the received fd (keyed on
* `kcr3`), and does proc_list/gva_read/scan/pmap itself. Perception and semantics are NOT here.
*
* Holder invariants:
* - The locator is valid ONLY against the received `O_RDONLY` fd. From it the holder opens
* its own read-only context, keyed on the vended `kcr3`:
* * raw reads under a cr3 it already holds — vmie_mem_from_ro_fd(fd, low) (nseg==0) or
* vmie_mem_from_ro_fd_segs(fd, segs, nseg) (nseg>0); gva_read keyed on (mem, kcr3);
* * FULL read context WITH process/module discovery — vmie_win32_open_ro_fd(fd, low,
* kcr3): builds the offset profile read-only from the image (no beacon/ACK) and
* enables proc_list/proc_modules plus the section/import/export/scan surfaces. A
* sensor that must FIND a process (then read its private AS) needs this one — kcr3
* alone gives reads-under-a-known-cr3, not discovery.
* Both map PROT_READ (gva_write -> -1). `kcr3` is valid ONLY within its `epoch`.
* - On UP MEMCTX_INVALIDATED{endpoint,epoch} the holder closes its context/fd-mmap
* and waits for the next MEMCTX{epoch+1} (re-multicast with a new kcr3 and a fresh fd).
* - The fd is always `O_RDONLY` (VMSIG_MEMCTX_RDONLY set by this layer): mmap(PROT_WRITE)
* through it -> EACCES. Writing into the guest is structurally impossible on the holder
* side — it goes through the write-signaled MEMWRITE command (CMD_MEMWRITE under the
* MEMWRITE lease), never this RO mapping. */
/* Locator-POD flag: the region is vended read-only (always set by signaling). */
#define VMSIG_MEMCTX_RDONLY 0x1u
/* Address-space context locator-POD (rides in vmsig_event.inln; <=48 bytes).
* Flat self-describing encoding: nseg explicit, no offset magic. */
typedef struct {
uint64_t kcr3; /* permanent System DirectoryTableBase (guest AS root) */
uint64_t low; /* below-4G RAM size (PCI-hole split point; single-low open) */
uint32_t epoch; /* VM-session epoch; kcr3 valid ONLY within it */
uint32_t nseg; /* number of segments in the owned-payload (0 => single-low by `low`) */
uint32_t flags; /* VMSIG_MEMCTX_RDONLY */
uint32_t _pad;
} vmsig_memctx; /* 8+8+4+4+4+4 = 32 bytes */
/* One GPA->file segment (mirrors the neighbor's gpa_seg from memmodel.h, but self-contained:
* this header does NOT pull in the neighbor's contract). Rides in the owned-payload of the
* MEMCTX event when nseg>0. For a single-low image nseg==0 and the holder opens by `low`. */
typedef struct {
uint64_t gpa; /* GPA of the window */
uint64_t len; /* window length in bytes */
uint64_t file_off; /* offset into the RAM-backing file */
} vmsig_memseg;
/* Epoch invalidation (UP VMSIG_EV_MEMCTX_INVALIDATED, in inln). */
typedef struct {
uint32_t endpoint;
uint32_t epoch; /* new epoch; the previous one's context is invalid */
} vmsig_memctx_inv;
/* Decode the MEMCTX event's owned-payload into segs[] (pointer + nseg). A pure function over
* the event: no ownership, no allocations. Returns a pointer to the segments (or NULL, setting
* *out_nseg=0, if there are none — e.g. a single-low image OR socket delivery, where the
* payload does not cross the wire and the holder opens by `low`). */
static inline const vmsig_memseg* vmsig_memctx_segs(const vmsig_event* ev,
uint32_t* out_nseg) {
const vmsig_memctx* m = (const vmsig_memctx*)ev->inln;
uint32_t n = m->nseg;
if (!n || !ev->payload.data ||
ev->payload.len < (size_t)n * sizeof(vmsig_memseg)) {
if (out_nseg) *out_nseg = 0;
return NULL;
}
if (out_nseg) *out_nseg = n;
return (const vmsig_memseg*)ev->payload.data;
}
/* ===== Registration seam adapter -> core =====
*
* The memctx adapter registers THIS in the core via vmsig_emit.register_memctx. The core
* keeps the registration per-endpoint (retained-context) and does NOT store a copy of the
* locator: on delivery/replay it calls describe() (current locator snapshot) + share_fd()
* (fresh O_RDONLY fd). The epoch is stamped by the CORE (single source of truth); describe
* does NOT fill it. invalidate() — the core asks the adapter to re-bootstrap on an epoch
* change (the adapter re-emits MEMCTX once ready). All callbacks are called on the loop
* thread. ctx — the adapter's private context. */
typedef struct vmsig_memctx_reg {
uint32_t endpoint;
uint32_t source; /* VMSIG_SRC_MEMCTX */
void* ctx; /* adapter's private context */
/* Current locator snapshot: kcr3/low/nseg/flags + segs (borrowed, owned by the
* adapter; lives across epochs). The core overwrites epoch with its own value. */
void (*describe)(void* ctx, vmsig_memctx* out_pod,
const vmsig_memseg** out_segs, uint32_t* out_nseg);
int (*share_fd)(void* ctx); /* fresh O_RDONLY fd of the RAM region (caller closes) */
void (*invalidate)(void* ctx, uint32_t epoch); /* re-bootstrap for the new epoch */
} vmsig_memctx_reg;
#endif /* VMSIG_MEMCTX_H */
+42
View File
@@ -0,0 +1,42 @@
#ifndef VMSIG_SOCKET_H
#define VMSIG_SOCKET_H
#include "vmsig_event.h"
#include "vmsig_control.h" /* vmsig_grant */
#include "vmsig_core.h" /* vmsig_core */
/* vmsig_socket.h — out-of-process control over a unix socket (human/service poller).
* signaling LISTENS; each accepted connection is authenticated (SO_PEERCRED) and,
* per policy, receives a grant -> becomes a distinct control behind the same seam. */
/* Wire format: fixed-size, pointer-free — the same contract on the external
* poller. Single host (unix socket) => native byte order. Only the event's
* inline part is serialized (payload pointers do not go on the wire). */
#define VMSIG_WIRE_MAGIC 0x47495356u /* 'VSIG' */
#define VMSIG_WIRE_VERSION 1u
typedef struct {
uint32_t magic;
uint32_t version;
uint32_t kind; /* vmsig_kind */
uint32_t source; /* vmsig_source */
uint32_t dir; /* vmsig_dir */
uint32_t prio; /* vmsig_prio */
uint32_t endpoint;
uint32_t corr;
uint8_t inln[48]; /* inline event payload */
} vmsig_wire;
/* Frame <-> event codec (for external clients too). */
void vmsig_wire_encode(vmsig_wire* w, const vmsig_event* ev);
int vmsig_wire_decode(const vmsig_wire* w, vmsig_event* ev); /* 0 ok, -1 bad magic/ver */
/* Admission policy: given the authenticated peer (SO_PEERCRED), return a grant.
* An empty grant (cap_mask==0 || endpoint_mask==0) => connection is rejected. */
typedef vmsig_grant (*vmsig_socket_policy)(uint32_t uid, uint32_t pid, void* ud);
/* Bring up a unix-socket control listener on `path` (prefix '@' => abstract socket).
* Driven by the epoll core: accept -> SO_PEERCRED -> policy -> grant -> per-conn
* control. Returns 0/-1. */
int vmsig_socket_attach(vmsig_core* core, const char* path,
vmsig_socket_policy policy, void* ud);
#endif /* VMSIG_SOCKET_H */
+44
View File
@@ -0,0 +1,44 @@
#ifndef VMSIG_ADAPTER_UTIL_H
#define VMSIG_ADAPTER_UTIL_H
#include <stddef.h>
/* adapter_util.h — shared primitive "blocking API -> completion eventfd".
*
* A bridge turning a synchronous CPU-bound / blocking neighbor call (vmie,
* vmctl) into a readiness source for the epoll core: the loop thread posts a request, a
* separate worker thread runs the blocking work and signals a completion eventfd; on it
* the loop wakes and collects the result in on_readiness. Reused by the memctx
* (off-loop bootstrap) and input adapters. */
typedef struct vmsig_worker vmsig_worker;
#define VMSIG_WORK_SLOT 256 /* req/res slot size (POD, copied) */
/* Callback run IN the worker thread: req -> res (both POD <= VMSIG_WORK_SLOT).
* Returns 0/-1 (the code is stored alongside, see vmsig_worker_poll). Must not touch
* core structures — only compute res from req. */
typedef int (*vmsig_work_fn)(void* user, const void* req, void* res);
/* Create a worker pool of nthreads threads over a shared queue (nthreads>=1). vmie
* allows parallel read-only readers; for a serial channel (QMP) use 1. max_depth — the
* request-queue depth ceiling (<=0 => default): submit beyond it is rejected (-1) so an
* untrusted flood does not grow into OOM. NULL on error. */
vmsig_worker* vmsig_worker_new(vmsig_work_fn fn, void* user, int nthreads, int max_depth);
/* Stop the threads (join) and free. Safe on NULL. */
void vmsig_worker_free(vmsig_worker* w);
/* completion eventfd: the adapter registers it as a VMSIG_RDY_EVENTFD source. */
int vmsig_worker_evfd(const vmsig_worker* w);
/* loop thread: post a request (copied, len <= VMSIG_WORK_SLOT). 0/-1. */
int vmsig_worker_submit(vmsig_worker* w, const void* req, size_t len);
/* loop thread (in on_readiness): drain the completion eventfd. */
void vmsig_worker_ack(vmsig_worker* w);
/* loop thread: collect a ready result. 1 — written to res (+ *rc = fn code),
* 0 — empty, -1 — error. Drain in a loop until 0. */
int vmsig_worker_poll(vmsig_worker* w, void* res, size_t cap, int* rc);
#endif /* VMSIG_ADAPTER_UTIL_H */
+18
View File
@@ -0,0 +1,18 @@
#ifndef VMSIG_INPUT_H
#define VMSIG_INPUT_H
/* Private config of the input adapter (vmctl). cfg==NULL => stub mode. Armed mode
* (VMSIG_WITH_VMCTL) opens vmctl_open() and actuates for real. driver is an int so
* as not to pull vmctl.h into this header (values match VMCTL_DRIVER_*). */
typedef struct {
int stub;
int driver; /* 0=QMP, 1=UINPUT (see VMCTL_DRIVER_*) */
const char* qmp_path;
const char* input_bus;
int ptr_mode;
} vmsig_input_cfg;
/* Input event codes/contract are PUBLIC: vmsig_input / vmsig_input_kind in
* include/vmsig_event.h (external control encodes them into inln). No private duplicate. */
#endif /* VMSIG_INPUT_H */
+230
View File
@@ -0,0 +1,230 @@
/* input.c — input/actuator adapter for vmctl (input + power/lifecycle).
*
* Mechanism (recommended): vmctl is a blocking QMP round-trip; we run it on a
* worker thread, completion ack via a completion-eventfd. The uinput path is a
* local instantaneous write; when armed it would be done inline (see comment in submit).
* Real actuation is under VMSIG_WITH_VMCTL; otherwise the stub acks (spine without a VM). */
#include "vmsig_adapter.h"
#include "adapter_util.h"
#include "input.h"
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/epoll.h>
#ifdef VMSIG_WITH_VMCTL
#include "vmctl.h"
#endif
/* POD request/result of the worker. */
typedef struct {
int cmd; /* 0 = input event, 1 = lifecycle */
uint32_t corr;
uint32_t origin; /* initiator (addressed ACK) */
int kind; /* vmsig_input_kind (for cmd==0) */
int code; /* axis/btn/evdev-code */
int value; /* abs/rel/down */
double scroll;
int life_op; /* VMSIG_LIFE_* (powerdown/reset/wakeup/pause/resume) */
} input_req;
typedef struct { int ok; uint32_t corr; uint32_t origin; } input_res;
/* signaling does NOT track held state: the record of what is pressed lives in the
* ACTUATOR (vmctl); we hand it to control on request (CMD_QUERY_INPUT), release is control's decision. */
struct vmsig_adapter {
uint32_t endpoint;
int stub;
vmsig_emit emit;
vmsig_worker* worker;
int driver; /* 0=QMP, 1=UINPUT (VMCTL_DRIVER_*); carried open->attach */
const char* qmp_path; /* borrowed from cfg (valid through attach) */
const char* input_bus;
int ptr_mode;
#ifdef VMSIG_WITH_VMCTL
vmctl_t* vmctl;
#endif
};
static int input_job(void* user, const void* reqp, void* resp) {
struct vmsig_adapter* a = user;
const input_req* rq = reqp;
input_res* rs = resp;
memset(rs, 0, sizeof *rs);
rs->corr = rq->corr;
rs->origin = rq->origin;
#ifdef VMSIG_WITH_VMCTL
if (a->vmctl) {
int r = -1;
if (rq->cmd == 0) {
vmctl_batch b; vmctl_batch_init(&b);
switch (rq->kind) {
case VMSIG_INPUT_ABS: vmctl_batch_abs(&b, rq->code, rq->value); break;
case VMSIG_INPUT_REL: vmctl_batch_rel(&b, rq->code, rq->value); break;
case VMSIG_INPUT_BTN: vmctl_batch_btn(&b, rq->code, rq->value); break;
case VMSIG_INPUT_KEY: vmctl_batch_key(&b, rq->code, rq->value); break;
case VMSIG_INPUT_SCROLL: vmctl_batch_scroll(&b, rq->code, rq->scroll); break;
default: break;
}
r = vmctl_batch_send(a->vmctl, &b);
} else {
switch (rq->life_op) {
case 0: r = vmctl_powerdown(a->vmctl); break;
case 1: r = vmctl_reset(a->vmctl); break;
case 2: r = vmctl_wakeup(a->vmctl); break;
case 3: r = vmctl_pause(a->vmctl); break;
case 4: r = vmctl_resume(a->vmctl); break;
default: break;
}
}
rs->ok = (r == 0);
return r;
}
#endif
(void)a;
rs->ok = 1; /* stub: ack without actuation */
return 0;
}
static vmsig_adapter* in_open(const void* cfg, uint32_t endpoint) {
const vmsig_input_cfg* c = cfg;
struct vmsig_adapter* a = calloc(1, sizeof *a);
if (!a) return NULL;
a->endpoint = endpoint;
a->stub = c ? c->stub : 1;
if (c) { /* carry the driver selection to attach (cfg not passed there) */
a->driver = c->driver;
a->qmp_path = c->qmp_path;
a->input_bus = c->input_bus;
a->ptr_mode = c->ptr_mode;
}
return a;
}
static int in_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
if (cap < 1) return -1;
a->emit = *emit;
a->worker = vmsig_worker_new(input_job, a, 1, 64); /* QMP is a serial channel, cap 64 */
if (!a->worker) return -1;
#ifdef VMSIG_WITH_VMCTL
if (!a->stub) {
/* armed: build vmctl_config from the carried cfg and open the actuator. UINPUT
* (host uinput + optional virtio-input-host-pci passthrough via QMP) is the primary
* input driver; QMP input-send-event is the fallback. */
vmctl_config vcfg;
memset(&vcfg, 0, sizeof vcfg);
vcfg.driver = (a->driver == 1) ? VMCTL_DRIVER_UINPUT : VMCTL_DRIVER_QMP;
vcfg.qmp_path = a->qmp_path;
vcfg.input_bus = a->input_bus;
vcfg.ptr_mode = a->ptr_mode;
vcfg.uinput_id = NULL; /* built-in HID identity defaults */
a->vmctl = vmctl_open(&vcfg);
if (!a->vmctl) { vmsig_worker_free(a->worker); a->worker = NULL; return -1; }
}
#endif
reg[0].fd = vmsig_worker_evfd(a->worker);
reg[0].epoll_events = EPOLLIN;
reg[0].shape = VMSIG_RDY_EVENTFD;
reg[0].cookie = 0;
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_SEAM_UP; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
a->emit.emit(a->emit.token, &up);
return 1;
}
static int in_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
(void)cookie; (void)events;
vmsig_worker_ack(a->worker);
input_res rs; int rc;
while (vmsig_worker_poll(a->worker, &rs, sizeof rs, &rc) == 1) {
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
up.corr = rs.corr; up.origin = rs.origin;
up.payload.flags = VMSIG_PL_INLINE;
memcpy(up.inln, &rs, sizeof up.inln < sizeof rs ? sizeof up.inln : sizeof rs);
a->emit.emit(a->emit.token, &up);
}
return 0;
}
static int in_submit(vmsig_adapter* a, const vmsig_event* ev) {
if (ev->kind == VMSIG_EV_CMD_QUERY_INPUT) {
/* Return what is PRESSED from the vmctl ACTUATOR's record (signaling does NOT track
* held itself). The read is read-only (no QMP round-trip) => on the loop thread;
* addressed reply to the initiator. stub without vmctl => empty set (nothing to
* actuate — nothing to hold). */
vmsig_input_held h;
memset(&h, 0, sizeof h);
#ifdef VMSIG_WITH_VMCTL
if (a->vmctl) {
const uint32_t capn = (uint32_t)(sizeof h.ent / sizeof h.ent[0]);
unsigned char bits[VMCTL_KEYS_SNAPSHOT_BYTES];
int n = vmctl_keys_snapshot(a->vmctl, bits, sizeof bits);
for (int code = 0; n > 0 && code <= VMCTL_KEY_CODE_MAX; code++)
if (bits[code >> 3] & (1u << (code & 7))) {
if (h.count < capn) { h.ent[h.count].kind = VMSIG_INPUT_KEY;
h.ent[h.count].code = (uint16_t)code; h.count++; }
else h.flags |= VMSIG_INPUT_HELD_TRUNC;
}
unsigned bm = vmctl_btns_snapshot(a->vmctl);
for (int b = 0; b < 8; b++) if (bm & (1u << b)) {
if (h.count < capn) { h.ent[h.count].kind = VMSIG_INPUT_BTN;
h.ent[h.count].code = (uint16_t)b; h.count++; }
else h.flags |= VMSIG_INPUT_HELD_TRUNC;
}
}
#endif
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_INPUT_HELD; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint; up.origin = ev->origin;
up.payload.flags = VMSIG_PL_INLINE;
memcpy(up.inln, &h, sizeof up.inln < sizeof h ? sizeof up.inln : sizeof h);
a->emit.emit(a->emit.token, &up);
return 0;
}
input_req rq;
memset(&rq, 0, sizeof rq);
rq.corr = ev->corr; rq.origin = ev->origin;
if (ev->kind == VMSIG_EV_CMD_INPUT) {
rq.cmd = 0;
/* Decode the NEUTRAL public input contract from inln (vmsig_input). We do NOT track
* held — that is the vmctl actuator's record (returned via CMD_QUERY_INPUT). */
vmsig_input in;
memcpy(&in, ev->inln, sizeof in <= sizeof ev->inln ? sizeof in : sizeof ev->inln);
rq.kind = (int)in.kind;
rq.code = (int)in.code;
rq.value = (int)in.value;
rq.scroll = in.scroll;
} else if (ev->kind == VMSIG_EV_CMD_LIFECYCLE) {
rq.cmd = 1;
rq.life_op = (int)(unsigned char)ev->inln[0];
} else {
return 1; /* not for this seam */
}
return vmsig_worker_submit(a->worker, &rq, sizeof rq) == 0 ? 0 : -1;
}
static void in_close(vmsig_adapter* a) {
if (!a) return;
vmsig_worker_free(a->worker);
#ifdef VMSIG_WITH_VMCTL
if (a->vmctl) vmctl_close(a->vmctl);
#endif
free(a);
}
static const vmsig_adapter_ops IN_OPS = {
.name = "input", .source = VMSIG_SRC_INPUT, .codec = VMSIG_CODEC_INPUT,
.open = in_open, .attach = in_attach, .on_readiness = in_on_ready,
.submit = in_submit, .close = in_close
};
const vmsig_adapter_ops* vmsig_input_ops(void) { return &IN_OPS; }
+162
View File
@@ -0,0 +1,162 @@
/* worker.c — bridge "blocking API -> completion eventfd" (pool of N threads).
* MPSC request/result queues under a mutex + condvar; result readiness is
* signaled via eventfd, on which the core's epoll loop wakes. N threads share one
* request queue (for vmie — parallel read-only readers; for QMP — N=1). */
#include "adapter_util.h"
#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>
#include <sys/eventfd.h>
typedef struct work_node {
struct work_node* next;
int rc; /* fn return code (for results) */
size_t len;
unsigned char buf[VMSIG_WORK_SLOT];
} work_node;
typedef struct { work_node* head; work_node* tail; } work_q;
struct vmsig_worker {
pthread_t* threads;
int nthreads;
pthread_mutex_t lock;
pthread_cond_t cv;
work_q req; /* loop -> workers */
work_q res; /* workers -> loop */
int evfd;
int stop;
int max_depth; /* cap on req-queue depth */
int req_count; /* current req-queue depth */
vmsig_work_fn fn;
void* user;
};
static void q_push(work_q* q, work_node* n) {
n->next = NULL;
if (q->tail) q->tail->next = n; else q->head = n;
q->tail = n;
}
static work_node* q_pop(work_q* q) {
work_node* n = q->head;
if (!n) return NULL;
q->head = n->next;
if (!q->head) q->tail = NULL;
return n;
}
static void q_drain(work_q* q) {
work_node* n = q->head;
while (n) { work_node* nx = n->next; free(n); n = nx; }
q->head = q->tail = NULL;
}
static void* worker_main(void* arg) {
vmsig_worker* w = arg;
for (;;) {
pthread_mutex_lock(&w->lock);
while (!w->stop && !w->req.head) pthread_cond_wait(&w->cv, &w->lock);
/* On stop we DRAIN the queue: run the remaining requests so that submitted
* work is not silently lost (matters for jobs carrying resource ownership).
* We exit only when stop AND the queue is empty. */
if (w->stop && !w->req.head) { pthread_mutex_unlock(&w->lock); break; }
work_node* rq = q_pop(&w->req);
if (rq) w->req_count--;
pthread_mutex_unlock(&w->lock);
if (!rq) continue;
work_node* rs = calloc(1, sizeof *rs);
if (rs) {
rs->rc = w->fn ? w->fn(w->user, rq->buf, rs->buf) : -1;
rs->len = VMSIG_WORK_SLOT;
pthread_mutex_lock(&w->lock);
q_push(&w->res, rs);
pthread_mutex_unlock(&w->lock);
uint64_t one = 1;
ssize_t r = write(w->evfd, &one, sizeof one);
(void)r;
}
free(rq);
}
return NULL;
}
vmsig_worker* vmsig_worker_new(vmsig_work_fn fn, void* user, int nthreads, int max_depth) {
if (nthreads < 1) nthreads = 1;
vmsig_worker* w = calloc(1, sizeof *w);
if (!w) return NULL;
w->fn = fn; w->user = user; w->evfd = -1;
w->max_depth = max_depth > 0 ? max_depth : 512;
w->threads = calloc((size_t)nthreads, sizeof *w->threads);
if (!w->threads) { free(w); return NULL; }
w->evfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
if (w->evfd < 0) { free(w->threads); free(w); return NULL; }
if (pthread_mutex_init(&w->lock, NULL) != 0) { close(w->evfd); free(w->threads); free(w); return NULL; }
if (pthread_cond_init(&w->cv, NULL) != 0) {
pthread_mutex_destroy(&w->lock); close(w->evfd); free(w->threads); free(w); return NULL;
}
for (int i = 0; i < nthreads; i++) {
if (pthread_create(&w->threads[i], NULL, worker_main, w) != 0) break;
w->nthreads++;
}
if (w->nthreads == 0) {
pthread_cond_destroy(&w->cv); pthread_mutex_destroy(&w->lock);
close(w->evfd); free(w->threads); free(w); return NULL;
}
return w;
}
void vmsig_worker_free(vmsig_worker* w) {
if (!w) return;
pthread_mutex_lock(&w->lock);
w->stop = 1;
pthread_cond_broadcast(&w->cv);
pthread_mutex_unlock(&w->lock);
for (int i = 0; i < w->nthreads; i++) pthread_join(w->threads[i], NULL);
q_drain(&w->req);
q_drain(&w->res);
pthread_cond_destroy(&w->cv);
pthread_mutex_destroy(&w->lock);
if (w->evfd >= 0) close(w->evfd);
free(w->threads);
free(w);
}
int vmsig_worker_evfd(const vmsig_worker* w) { return w ? w->evfd : -1; }
int vmsig_worker_submit(vmsig_worker* w, const void* req, size_t len) {
if (!w || len > VMSIG_WORK_SLOT) return -1;
pthread_mutex_lock(&w->lock);
if (w->req_count >= w->max_depth) { /* queue cap: reject flooding */
pthread_mutex_unlock(&w->lock);
return -1;
}
work_node* n = calloc(1, sizeof *n);
if (!n) { pthread_mutex_unlock(&w->lock); return -1; }
if (req && len) memcpy(n->buf, req, len);
n->len = len;
q_push(&w->req, n);
w->req_count++;
pthread_cond_signal(&w->cv);
pthread_mutex_unlock(&w->lock);
return 0;
}
void vmsig_worker_ack(vmsig_worker* w) {
if (!w) return;
uint64_t v;
while (read(w->evfd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
}
int vmsig_worker_poll(vmsig_worker* w, void* res, size_t cap, int* rc) {
if (!w) return -1;
pthread_mutex_lock(&w->lock);
work_node* n = q_pop(&w->res);
pthread_mutex_unlock(&w->lock);
if (!n) return 0;
if (res && cap) memcpy(res, n->buf, cap < n->len ? cap : n->len);
if (rc) *rc = n->rc;
free(n);
return 1;
}
+20
View File
@@ -0,0 +1,20 @@
#ifndef VMSIG_MEMCTX_CFG_H
#define VMSIG_MEMCTX_CFG_H
#include <stdint.h>
/* Private config of the memctx adapter (vmie). Passed as opaque to open(); NOT
* public (layout per reference: src/<module>/include/). cfg==NULL => stub. */
typedef struct {
int stub; /* 1 => synthetic kcr3/RO-fd (spine without a VM) */
const char* ram_path; /* armed: path to guest RAM backing (NOT published outward) */
uint64_t low; /* below-4G split (vmie_win32_open / locator.low) */
int ro_fd; /* >=0 => infra supplied a pre-sealed RO-fd (policy); */
/* <0 => default: open(ram_path, O_RDONLY) / stub-memfd */
} vmsig_memctx_cfg;
/* Max SRC bytes per atomic gva_write (bounds the worker POD slot; mc_req header + src
* must stay <= VMSIG_WORK_SLOT). Private to the adapter (an executor bound), NOT part of
* the neutral control contract — control only needs VMSIG_MEMWRITE_INLINE for inline SRC. */
#define VMSIG_MEMWRITE_MAX 192u
#endif /* VMSIG_MEMCTX_CFG_H */
+407
View File
@@ -0,0 +1,407 @@
/* memctx.c — vmie sensor adapter: vends ONE coherent guest address-space context —
* the permanent System DirectoryTableBase (`kcr3`) PAIRED with a RAM-region locator
* and a pre-opened O_RDONLY fd. This is NOT perception and NOT semantics: signaling
* multicasts the datum + RO-fd, while the holder (an S-lib / any control) opens ITS OWN
* read-only vmie_mem from the fd and does gva_read/scan/pmap itself.
*
* Cold bring-up (host_bootstrap) is CPU-bound and blocking, so it runs on an off-loop
* worker; the loop thread only assembles the locator on the completion-eventfd and emits
* the MEMCTX trigger. The epoch is stamped by the CORE (retained-context); on an epoch
* change the core calls reg.invalidate, the adapter re-bootstraps and re-emits MEMCTX.
*
* RO outward is physical: O_RDONLY fd => mmap(PROT_WRITE) -> EACCES, so a write into the
* guest on the holder side is structurally impossible. stub mode (without VMSIG_WITH_VMIE
* or ram_path==NULL) synthesizes a kcr3 and a genuinely RO-mappable fd (memfd + seal) —
* the seam is provable without a VM. */
#define _GNU_SOURCE
#include "vmsig_adapter.h"
#include "memctx.h"
#include "adapter_util.h" /* vmsig_worker (off-loop bootstrap) */
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/epoll.h>
#ifdef VMSIG_WITH_VMIE
#include "win32.h" /* vmie_win32_open/host_bootstrap/proc_list/close */
#endif
/* memfd_create / seal — ABI fallbacks for old glibc/kernel (stub RO-fd backing). */
#ifndef MFD_CLOEXEC
#include <sys/syscall.h>
#include <linux/memfd.h>
static int memfd_create(const char* name, unsigned int flags) {
return (int)syscall(SYS_memfd_create, name, flags);
}
#endif
#ifndef MFD_ALLOW_SEALING
#define MFD_ALLOW_SEALING 0x0002U
#endif
#ifndef F_ADD_SEALS
#define F_ADD_SEALS (1024 + 9)
#define F_SEAL_SHRINK 0x0002
#define F_SEAL_GROW 0x0004
#endif
#ifndef F_SEAL_FUTURE_WRITE
#define F_SEAL_FUTURE_WRITE 0x0010 /* kernel 5.1+: forbid future writable mappings */
#endif
#define MC_STUB_SIZE 0x10000u /* 64 KB of synthetic RAM image (stub) */
#define MC_MAX_SEG 8
#define MC_WORKER_DEPTH 16 /* one off-loop thread: rare bootstrap + writes */
enum { MC_JOB_BOOTSTRAP = 0, MC_JOB_WRITE = 1 };
/* worker req/res (POD <= VMSIG_WORK_SLOT). One off-loop worker runs BOTH the cold
* bootstrap and the atomic writes (FIFO serializes a write against the close-on-rebootstrap).
* boot_count drives the stub kcr3 (changes per epoch); the real guest kcr3 does NOT depend
* on it (armed reads the System DTB). MC_JOB_WRITE copies SRC off-loop into req.src. */
typedef struct {
uint32_t op; /* MC_JOB_* */
uint32_t boot_count; /* MC_JOB_BOOTSTRAP */
/* --- MC_JOB_WRITE --- */
uint64_t gva;
uint32_t len;
uint32_t corr;
uint32_t origin;
uint8_t src[VMSIG_MEMWRITE_MAX]; /* SRC bytes copied off-loop (gva_write reads this) */
} mc_req;
typedef struct {
uint32_t op; /* echoes the job type so on_ready demuxes */
int ok; /* MC_JOB_WRITE result */
uint32_t corr;
uint32_t origin;
uint64_t kcr3; /* MC_JOB_BOOTSTRAP result */
} mc_res;
struct vmsig_adapter {
uint32_t endpoint;
int stub;
const char* ram_path; /* armed: RAM-backing path (NOT published outward) */
uint64_t low;
int cfg_ro_fd; /* >=0 => infra-sealed RO-fd (policy); <0 => default */
vmsig_emit emit;
int registered; /* register_memctx already called */
vmsig_worker* worker; /* off-loop bootstrap + atomic writes */
uint32_t boot_count; /* incremented on each (re-)bootstrap */
#ifdef VMSIG_WITH_VMIE
vmie_win32* win; /* held RW handle across the epoch (kcr3 source + gva_write target) */
vmie_mem* mem; /* vmie_win32_mem(win); borrowed, valid until vmie_win32_close */
#endif
uint64_t kcr3; /* current System DTB (also published in cur_pod.kcr3) */
/* persistent locator: owned by the loop thread; worker only yields kcr3 into scratch. */
int have_ctx;
vmsig_memctx cur_pod; /* kcr3/low/nseg/flags (epoch stamped by the core) */
vmsig_memseg cur_segs[MC_MAX_SEG];
uint32_t cur_nseg;
int stub_fd; /* stub: memfd of synth RAM (+seal); share_fd reopens it */
};
/* fwd: MEMWRITE completion ACK (defined below mc_submit; used in mc_on_ready demux). */
static void mc_memwrite_ack(struct vmsig_adapter* a, int ok, uint32_t corr, uint32_t origin);
/* ---- stub RO-fd: memfd + deterministic contents + seal of future writes ---- */
static int mc_make_stub_fd(uint32_t size) {
int fd = memfd_create("vmsig_memctx", MFD_CLOEXEC | MFD_ALLOW_SEALING);
if (fd < 0) fd = memfd_create("vmsig_memctx", MFD_CLOEXEC);
if (fd < 0) return -1;
if (ftruncate(fd, (off_t)size) != 0) { close(fd); return -1; }
/* deterministic contents via a temporary RW mapping BEFORE the seal */
uint8_t* p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (p != MAP_FAILED) {
for (uint32_t i = 0; i < size; i++) p[i] = (uint8_t)(i & 0xFFu);
munmap(p, size);
}
/* FUTURE_WRITE: even if the holder reopens the fd as O_RDWR, it gets no writable mapping.
* best-effort (kernel 5.1+); on older kernels only the O_RDONLY fd protects. */
if (fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_FUTURE_WRITE) != 0)
(void)fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW);
return fd;
}
#ifdef VMSIG_WITH_VMIE
/* armed bring-up: open RAM (RW is vmie's internal concern), host_bootstrap, extract the
* permanent System DTB as the System process cr3 (kcr3 — the root of the guest AS). The RW
* handle is HELD across the epoch (kcr3 source + gva_write target); ONLY the RO-fd (share_fd)
* leaves outward — write goes through this command plane, never a writable mmap. Runs on the
* off-loop worker; a stale handle from a prior epoch is dropped first (serialized FIFO with
* in-flight writes). */
static int mc_bootstrap_armed(struct vmsig_adapter* a, uint64_t* out_kcr3) {
if (a->win) { vmie_win32_close(a->win); a->win = NULL; a->mem = NULL; } /* drop stale epoch handle */
vmie_win32* v = vmie_win32_open(a->ram_path, a->low);
if (!v) return -1;
if (host_bootstrap(v) != 0) { vmie_win32_close(v); return -1; }
process procs[16];
int n = proc_list(v, 0, procs, 16);
uint64_t kcr3 = 0;
for (int i = 0; i < n && i < 16; i++)
if (!strcmp(procs[i].name, "System")) { kcr3 = procs[i].cr3; break; }
if (!kcr3) { vmie_win32_close(v); return -1; }
a->win = v; /* HOLD: RW handle lives across the epoch */
a->mem = vmie_win32_mem(v); /* borrowed; valid until vmie_win32_close(v) */
a->kcr3 = kcr3;
*out_kcr3 = kcr3;
return 0;
}
#endif
/* ---- worker job: cold bring-up OR atomic write, off-loop ----------------- *
* Demultiplexed by rq->op. BOTH run on the SAME single worker thread, so a write on the
* held handle never races the close-on-rebootstrap (FIFO). The job MUST NOT touch core
* structures — it only reads a->mem/a->kcr3 (stable between re-bootstraps on this thread). */
static int mc_job(void* user, const void* req, void* res) {
struct vmsig_adapter* a = user;
const mc_req* rq = req;
mc_res* rs = res;
memset(rs, 0, sizeof *rs);
rs->op = rq->op;
if (rq->op == MC_JOB_WRITE) {
rs->corr = rq->corr; rs->origin = rq->origin;
if (a->stub) { rs->ok = 1; return 0; } /* stub: ack without actuation */
#ifdef VMSIG_WITH_VMIE
/* a->mem is NULL until a bootstrap has succeeded (or after one failed and cleared it):
* the guard turns that into an ok=0 ACK (observable to the initiator), not a crash. */
rs->ok = (a->mem && gva_write(a->mem, (uintptr_t)a->kcr3, (uintptr_t)rq->gva,
rq->src, rq->len) == 0);
return rs->ok ? 0 : -1;
#else
rs->ok = 0;
return -1; /* armed without the build flag: write impossible */
#endif
}
/* MC_JOB_BOOTSTRAP */
if (a->stub) {
rs->kcr3 = 0xC0DE0000ull + (uint64_t)rq->boot_count * 0x1000ull; /* changes per epoch */
return 0;
}
#ifdef VMSIG_WITH_VMIE
uint64_t kcr3 = 0;
if (mc_bootstrap_armed(a, &kcr3) != 0) return -1;
rs->kcr3 = kcr3;
return 0;
#else
return -1; /* armed without the build flag: bootstrap impossible -> ERROR */
#endif
}
static void mc_kick_bootstrap(struct vmsig_adapter* a) {
a->boot_count++;
mc_req rq;
memset(&rq, 0, sizeof rq);
rq.op = MC_JOB_BOOTSTRAP; rq.boot_count = a->boot_count;
(void)vmsig_worker_submit(a->worker, &rq, sizeof rq); /* full => drop (rare) */
}
/* ---- reg hooks (vmsig_memctx_reg.ctx = a; called by the core on the loop thread) ---- */
static void mc_reg_describe(void* ctx, vmsig_memctx* out_pod,
const vmsig_memseg** out_segs, uint32_t* out_nseg) {
struct vmsig_adapter* a = ctx;
*out_pod = a->cur_pod; /* kcr3/low/nseg/flags; the core overwrites the epoch */
*out_segs = a->cur_segs;
*out_nseg = a->cur_nseg;
}
static int mc_reg_share_fd(void* ctx) {
struct vmsig_adapter* a = ctx;
if (a->cfg_ro_fd >= 0)
return fcntl(a->cfg_ro_fd, F_DUPFD_CLOEXEC, 0); /* infra-sealed RO-fd: dup */
if (a->stub) {
if (a->stub_fd < 0) return -1;
char path[64];
snprintf(path, sizeof path, "/proc/self/fd/%d", a->stub_fd);
return open(path, O_RDONLY | O_CLOEXEC); /* fresh O_RDONLY on the backing */
}
if (!a->ram_path) return -1;
return open(a->ram_path, O_RDONLY | O_CLOEXEC); /* armed default */
}
static void mc_reg_invalidate(void* ctx, uint32_t epoch) {
struct vmsig_adapter* a = ctx;
(void)epoch; /* the core owns the epoch; the adapter must re-bootstrap */
a->have_ctx = 0; /* the previous context is invalid */
mc_kick_bootstrap(a); /* off-loop; on_ready re-emits MEMCTX (new epoch) */
}
/* ---- vtable ---- */
static vmsig_adapter* mc_open(const void* cfg, uint32_t endpoint) {
const vmsig_memctx_cfg* c = cfg;
struct vmsig_adapter* a = calloc(1, sizeof *a);
if (!a) return NULL;
a->endpoint = endpoint;
a->stub = c ? c->stub : 1;
a->ram_path = c ? c->ram_path : NULL;
a->low = c ? c->low : 0;
a->cfg_ro_fd = (c && c->ro_fd >= 0) ? c->ro_fd : -1;
if (!a->ram_path && a->cfg_ro_fd < 0) a->stub = 1; /* no path/fd => stub */
a->stub_fd = -1;
return a;
}
static int mc_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
if (cap < 1) return -1;
a->emit = *emit;
a->worker = vmsig_worker_new(mc_job, a, 1, MC_WORKER_DEPTH);
if (!a->worker) return -1;
if (a->stub && a->cfg_ro_fd < 0) {
a->stub_fd = mc_make_stub_fd(MC_STUB_SIZE);
if (a->stub_fd < 0) { vmsig_worker_free(a->worker); a->worker = NULL; return -1; }
}
/* worker completion-eventfd as the readiness source (cookie=0). */
reg[0].fd = vmsig_worker_evfd(a->worker);
reg[0].epoll_events = EPOLLIN;
reg[0].shape = VMSIG_RDY_EVENTFD;
reg[0].cookie = 0;
/* register the reg BEFORE the first bootstrap: the core slot gets the hooks. describe
* is not called until the slot is valid (which only happens after the first MEMCTX). */
if (a->emit.register_memctx) {
vmsig_memctx_reg r;
memset(&r, 0, sizeof r);
r.endpoint = a->endpoint;
r.source = VMSIG_SRC_MEMCTX;
r.ctx = a;
r.describe = mc_reg_describe;
r.share_fd = mc_reg_share_fd;
r.invalidate = mc_reg_invalidate;
if (a->emit.register_memctx(a->emit.token, &r) == 0) a->registered = 1;
}
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_SEAM_UP; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
a->emit.emit(a->emit.token, &up);
mc_kick_bootstrap(a); /* first bootstrap off-loop; assemble the locator on completion */
return 1;
}
static int mc_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
(void)cookie; (void)events;
vmsig_worker_ack(a->worker);
mc_res rs;
int rc;
while (vmsig_worker_poll(a->worker, &rs, sizeof rs, &rc) == 1) {
if (rs.op == MC_JOB_WRITE) {
/* atomic write completed: addressed ACT_ACK to the initiator. */
mc_memwrite_ack(a, rs.ok && rc == 0, rs.corr, rs.origin);
continue;
}
if (rc != 0) {
/* bootstrap failed: ERROR (source MEMCTX); do NOT publish an invalid kcr3. */
vmsig_event er;
memset(&er, 0, sizeof er);
er.kind = VMSIG_EV_ERROR; er.source = VMSIG_SRC_MEMCTX; er.dir = VMSIG_DIR_UP;
er.prio = VMSIG_PRIO_URGENT; er.endpoint = a->endpoint;
a->emit.emit(a->emit.token, &er);
continue;
}
/* assemble the locator on the loop thread from rs.kcr3. a->kcr3 is the gva_write
* TARGET and is owned SOLELY by the worker thread (set in mc_bootstrap_armed, read by
* MC_JOB_WRITE — same thread, FIFO happens-before); the loop must NOT also write it, or
* an in-flight write at line ~170 would race it. cur_pod.kcr3 is loop-only (delivery). */
memset(&a->cur_pod, 0, sizeof a->cur_pod);
a->cur_pod.kcr3 = rs.kcr3;
a->cur_pod.low = a->low ? a->low : MC_STUB_SIZE;
a->cur_pod.flags = VMSIG_MEMCTX_RDONLY;
a->cur_nseg = 1; /* single-low identity (gpa 0 .. low) */
a->cur_segs[0].gpa = 0;
a->cur_segs[0].len = a->cur_pod.low;
a->cur_segs[0].file_off = 0;
a->cur_pod.nseg = a->cur_nseg;
a->have_ctx = 1;
/* emit the MEMCTX trigger: the core authoritatively re-describes + stamps the epoch. */
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_MEMCTX; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
memcpy(up.inln, &a->cur_pod, sizeof a->cur_pod);
a->emit.emit(a->emit.token, &up);
}
return 0;
}
/* Emit an addressed ACT_ACK for a MEMWRITE (source MEMCTX, to the initiator). inln carries
* {ok,corr,origin} (same shape as the input adapter's ACK), so control reads ok at offset 0.
* ok=0 covers extent-deny / no-SRC / queue-full / write failure (default-deny, observable). */
static void mc_memwrite_ack(struct vmsig_adapter* a, int ok, uint32_t corr, uint32_t origin) {
struct { int ok; uint32_t corr; uint32_t origin; } body = { ok, corr, origin };
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
up.corr = corr; up.origin = origin;
up.payload.flags = VMSIG_PL_INLINE;
memcpy(up.inln, &body, sizeof body);
a->emit.emit(a->emit.token, &up);
}
/* DOWN MEMWRITE handler: validate extent, copy SRC off-loop, submit the atomic gva_write to
* the worker. Default-deny: any invalid path (no SRC flag, len out of bounds, short payload,
* queue full) ACKs ok=0 and does NOT actuate. The completion ACK for a queued write arrives
* via mc_on_ready. Returns 0 when the event is consumed by this seam, 1 when it is not ours. */
static int mc_submit(vmsig_adapter* a, const vmsig_event* ev) {
if (ev->kind != VMSIG_EV_CMD_MEMWRITE) return 1; /* not for this seam */
const vmsig_memwrite* mw = (const vmsig_memwrite*)ev->inln;
uint32_t len = mw->len;
if (len == 0 || len > VMSIG_MEMWRITE_MAX) { /* extent: bounded */
mc_memwrite_ack(a, 0, ev->corr, ev->origin);
return 0;
}
mc_req rq; memset(&rq, 0, sizeof rq);
rq.op = MC_JOB_WRITE; rq.gva = mw->gva; rq.len = len;
rq.corr = ev->corr; rq.origin = ev->origin;
/* copy SRC into the worker req (off-loop gva_write reads from rq.src). */
if (mw->flags & VMSIG_MW_SRC_INLINE) {
if (len > VMSIG_MEMWRITE_INLINE) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; }
memcpy(rq.src, ev->inln + sizeof *mw, len); /* inln tail after the 16-byte header */
} else if (mw->flags & VMSIG_MW_SRC_PAYLOAD) {
if (!ev->payload.data || ev->payload.len < len) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; }
memcpy(rq.src, ev->payload.data, len); /* in-proc borrowed payload */
} else {
mc_memwrite_ack(a, 0, ev->corr, ev->origin); /* no SRC flag */
return 0;
}
if (vmsig_worker_submit(a->worker, &rq, sizeof rq) != 0) {
mc_memwrite_ack(a, 0, ev->corr, ev->origin); /* queue full -> ACK err */
return -1;
}
return 0; /* completion ACK arrives via mc_on_ready */
}
static void mc_close(vmsig_adapter* a) {
if (!a) return;
if (a->registered && a->emit.unregister_memctx)
a->emit.unregister_memctx(a->emit.token, a->endpoint);
if (a->worker) vmsig_worker_free(a->worker); /* join: bootstrap + write jobs finished */
#ifdef VMSIG_WITH_VMIE
if (a->win) vmie_win32_close(a->win); /* AFTER worker join: no in-flight gva_write */
#endif
if (a->stub_fd >= 0) close(a->stub_fd);
/* cfg_ro_fd belongs to the infrastructure (the open caller) — do NOT close it. */
free(a);
}
static const vmsig_adapter_ops MC_OPS = {
.name = "memctx", .source = VMSIG_SRC_MEMCTX, .codec = VMSIG_CODEC_MEMCTX,
.open = mc_open, .attach = mc_attach, .on_readiness = mc_on_ready,
.submit = mc_submit, .close = mc_close
};
const vmsig_adapter_ops* vmsig_memctx_ops(void) { return &MC_OPS; }
+13
View File
@@ -0,0 +1,13 @@
#ifndef VMSIG_VMHOST_H
#define VMSIG_VMHOST_H
/* Private config of the vmhost adapter (signaling's own QMP client).
* cfg==NULL or no qmp_path => stub mode (synthetic events, no QEMU).
* qmp_path given => armed: connect to QEMU's QMP socket ('@' prefix = abstract).
* No build flag needed — the client depends only on POSIX and its own code. */
typedef struct {
int stub;
const char* qmp_path;
} vmsig_vmhost_cfg;
#endif /* VMSIG_VMHOST_H */
+313
View File
@@ -0,0 +1,313 @@
/* vmhost.c — QEMU/QMP host-plane: signaling's OWN layer for observing the VM
* and its basic control. Not a wrapper over a neighbor repo — an own QMP client;
* depends only on POSIX, so it is always functional (no build flag).
*
* This is the first truly epoll-native source: the QMP socket (VMSIG_RDY_FD) lives
* directly in the loop, non-blocking, async events. Up: QMP events -> VM_LIFECYCLE
* (broadcast), EOF -> SEAM_DOWN. Down: CMD_VM -> QMP command with id correlation,
* reply addressed to the initiator. stub mode (no QEMU) synthesizes events/replies. */
#define _GNU_SOURCE
#include "vmsig_adapter.h"
#include "vmhost.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <stddef.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/timerfd.h>
#include <sys/epoll.h>
#define VMHOST_BUF 4096
#define VMHOST_STUB_MS 200
#define VMHOST_MAX_PENDING 64
enum { ST_STUB = 0, ST_CONNECTING, ST_NEGOTIATING, ST_READY, ST_DEAD };
typedef struct { uint32_t id, origin, corr; uint8_t op; int used; } pend_ent;
struct vmsig_adapter {
uint32_t endpoint;
int stub;
const char* qmp_path;
vmsig_emit emit;
int fd; /* QMP socket (armed) or timerfd (stub) */
int st;
uint32_t cur; /* current synthetic state (stub) */
char buf[VMHOST_BUF];
size_t buflen;
uint32_t next_id;
pend_ent pend[VMHOST_MAX_PENDING];
};
/* ---- minimal QMP line parse (top-level keys only; full JSON — deferred) ---- */
static int jstr(const char* line, const char* key, char* out, size_t cap) {
const char* p = strstr(line, key);
if (!p) return 0;
p += strlen(key);
while (*p == ' ' || *p == '\t' || *p == ':') p++;
if (*p != '"') return 0;
p++;
size_t i = 0;
while (*p && *p != '"' && i + 1 < cap) out[i++] = *p++;
out[i] = 0;
return 1;
}
static long jnum(const char* line, const char* key) {
const char* p = strstr(line, key);
if (!p) return -1;
p += strlen(key);
while (*p == ' ' || *p == '\t' || *p == ':') p++;
if (*p < '0' || *p > '9') return -1;
return strtol(p, NULL, 10);
}
static uint32_t ev_state(const char* n) {
if (!strcmp(n, "RESUME")) return VMSIG_VM_RUNNING;
if (!strcmp(n, "STOP")) return VMSIG_VM_PAUSED;
if (!strcmp(n, "SHUTDOWN")) return VMSIG_VM_SHUTDOWN;
if (!strcmp(n, "RESET")) return VMSIG_VM_RESET;
if (!strcmp(n, "POWERDOWN")) return VMSIG_VM_POWERDOWN;
if (!strcmp(n, "GUEST_PANICKED")) return VMSIG_VM_CRASHED;
return VMSIG_VM_UNKNOWN;
}
static uint32_t status_state(const char* s) {
if (!strcmp(s, "running")) return VMSIG_VM_RUNNING;
if (!strcmp(s, "paused")) return VMSIG_VM_PAUSED;
if (!strcmp(s, "shutdown")) return VMSIG_VM_SHUTDOWN;
return VMSIG_VM_UNKNOWN;
}
static const char* op_qmp(uint32_t op) {
switch (op) {
case VMSIG_VMOP_QUERY: return "query-status";
case VMSIG_VMOP_CONT: return "cont";
case VMSIG_VMOP_STOP: return "stop";
case VMSIG_VMOP_RESET: return "system_reset";
case VMSIG_VMOP_POWERDOWN: return "system_powerdown";
case VMSIG_VMOP_QUIT: return "quit";
default: return NULL;
}
}
static pend_ent* pend_alloc(struct vmsig_adapter* a) {
for (int i = 0; i < VMHOST_MAX_PENDING; i++) if (!a->pend[i].used) return &a->pend[i];
return NULL;
}
static pend_ent* pend_find(struct vmsig_adapter* a, uint32_t id) {
for (int i = 0; i < VMHOST_MAX_PENDING; i++)
if (a->pend[i].used && a->pend[i].id == id) return &a->pend[i];
return NULL;
}
/* ---- emission of neutral UP events ---- */
static void emit_vm(struct vmsig_adapter* a, uint32_t state, uint32_t origin, uint32_t corr) {
vmsig_vm_state vs = { state, 0 };
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_VM_LIFECYCLE; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
up.prio = (state == VMSIG_VM_RUNNING || state == VMSIG_VM_PAUSED)
? VMSIG_PRIO_NORMAL : VMSIG_PRIO_URGENT;
up.endpoint = a->endpoint; up.origin = origin; up.corr = corr;
up.payload.flags = VMSIG_PL_INLINE;
memcpy(up.inln, &vs, sizeof vs);
a->emit.emit(a->emit.token, &up);
}
static void emit_seam(struct vmsig_adapter* a, vmsig_kind k) {
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = k; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_URGENT; up.endpoint = a->endpoint;
a->emit.emit(a->emit.token, &up);
}
static void emit_ack(struct vmsig_adapter* a, uint32_t origin, uint32_t corr, int ok) {
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint; up.origin = origin; up.corr = corr;
up.payload.flags = VMSIG_PL_INLINE;
up.inln[0] = (uint8_t)(ok ? 1 : 0);
a->emit.emit(a->emit.token, &up);
}
/* ---- armed: handle one QMP line ---- */
static void handle_line(struct vmsig_adapter* a, const char* line) {
switch (a->st) {
case ST_CONNECTING:
if (strstr(line, "\"QMP\"")) { /* greeting -> negotiate capabilities */
static const char cap[] = "{\"execute\":\"qmp_capabilities\"}\r\n";
ssize_t r = write(a->fd, cap, sizeof cap - 1); (void)r;
a->st = ST_NEGOTIATING;
}
break;
case ST_NEGOTIATING:
if (strstr(line, "\"return\"")) { a->st = ST_READY; emit_seam(a, VMSIG_EV_SEAM_UP); }
break;
case ST_READY:
if (strstr(line, "\"event\"")) {
char name[64];
if (jstr(line, "\"event\"", name, sizeof name)) {
uint32_t s = ev_state(name);
if (s != VMSIG_VM_UNKNOWN) emit_vm(a, s, 0, 0); /* broadcast */
}
} else if (strstr(line, "\"return\"") || strstr(line, "\"error\"")) {
long id = jnum(line, "\"id\"");
pend_ent* p = id >= 0 ? pend_find(a, (uint32_t)id) : NULL;
if (p) {
if (p->op == VMSIG_VMOP_QUERY && strstr(line, "\"return\"")) {
char stbuf[32]; uint32_t s = VMSIG_VM_UNKNOWN;
if (jstr(line, "\"status\"", stbuf, sizeof stbuf)) s = status_state(stbuf);
emit_vm(a, s, p->origin, p->corr); /* addressed reply */
} else {
emit_ack(a, p->origin, p->corr, strstr(line, "\"return\"") != NULL);
}
p->used = 0;
}
}
break;
default: break;
}
}
static void armed_dead(struct vmsig_adapter* a) {
emit_seam(a, VMSIG_EV_SEAM_DOWN); /* VM transport died */
if (a->fd >= 0) { close(a->fd); a->fd = -1; } /* close removes the fd from epoll */
a->st = ST_DEAD;
}
/* ---- vtable ---- */
static vmsig_adapter* vh_open(const void* cfg, uint32_t endpoint) {
const vmsig_vmhost_cfg* c = cfg;
struct vmsig_adapter* a = calloc(1, sizeof *a);
if (!a) return NULL;
a->endpoint = endpoint;
a->qmp_path = (c && c->qmp_path && c->qmp_path[0]) ? c->qmp_path : NULL;
a->stub = (a->qmp_path == NULL); /* path given => armed, otherwise stub */
a->fd = -1;
a->cur = VMSIG_VM_RUNNING;
return a;
}
static int vh_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
if (cap < 1) return -1;
a->emit = *emit;
if (a->stub) {
a->fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
if (a->fd < 0) return -1;
struct itimerspec its;
memset(&its, 0, sizeof its);
its.it_interval.tv_sec = VMHOST_STUB_MS / 1000u;
its.it_interval.tv_nsec = (long)(VMHOST_STUB_MS % 1000u) * 1000000L;
its.it_value = its.it_interval;
if (timerfd_settime(a->fd, 0, &its, NULL) < 0) { close(a->fd); a->fd = -1; return -1; }
a->st = ST_STUB;
reg[0].fd = a->fd; reg[0].epoll_events = EPOLLIN;
reg[0].shape = VMSIG_RDY_TIMERFD; reg[0].cookie = 0;
emit_seam(a, VMSIG_EV_SEAM_UP);
return 1;
}
/* armed: connect to QEMU's QMP socket */
int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
if (fd < 0) return -1;
struct sockaddr_un addr;
memset(&addr, 0, sizeof addr);
addr.sun_family = AF_UNIX;
socklen_t alen;
size_t n = strlen(a->qmp_path);
if (a->qmp_path[0] == '@') {
if (n > sizeof addr.sun_path) { close(fd); return -1; }
addr.sun_path[0] = 0;
memcpy(addr.sun_path + 1, a->qmp_path + 1, n - 1);
alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
} else {
if (n >= sizeof addr.sun_path) { close(fd); return -1; }
memcpy(addr.sun_path, a->qmp_path, n);
alen = (socklen_t)sizeof addr;
}
if (connect(fd, (struct sockaddr*)&addr, alen) < 0 && errno != EINPROGRESS) {
close(fd); return -1;
}
a->fd = fd; a->st = ST_CONNECTING;
reg[0].fd = fd; reg[0].epoll_events = EPOLLIN;
reg[0].shape = VMSIG_RDY_FD; reg[0].cookie = 0;
/* SEAM_UP is emitted upon reaching READY (after qmp_capabilities) */
return 1;
}
static int vh_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
(void)cookie; (void)events;
if (a->stub) {
uint64_t ticks;
while (read(a->fd, &ticks, sizeof ticks) == (ssize_t)sizeof ticks) { /* drain */ }
a->cur = (a->cur == VMSIG_VM_RUNNING) ? VMSIG_VM_PAUSED : VMSIG_VM_RUNNING;
emit_vm(a, a->cur, 0, 0); /* broadcast */
return 0;
}
if (a->st == ST_DEAD) return 0;
for (;;) {
if (a->buflen >= sizeof a->buf) a->buflen = 0; /* line overflow -> reset */
ssize_t r = read(a->fd, a->buf + a->buflen, sizeof a->buf - a->buflen);
if (r == 0) { armed_dead(a); return 0; }
if (r < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK) break; armed_dead(a); return 0; }
a->buflen += (size_t)r;
size_t start = 0;
for (size_t i = 0; i < a->buflen; i++) {
if (a->buf[i] == '\n') { a->buf[i] = 0; handle_line(a, a->buf + start); start = i + 1; }
}
if (start > 0) { memmove(a->buf, a->buf + start, a->buflen - start); a->buflen -= start; }
}
return 0;
}
static int vh_submit(vmsig_adapter* a, const vmsig_event* ev) {
if (ev->kind != VMSIG_EV_CMD_VM) return 1; /* not for this seam */
vmsig_vm_cmd cmd;
memcpy(&cmd, ev->inln, sizeof cmd);
if (a->stub) {
uint32_t s;
switch (cmd.op) {
case VMSIG_VMOP_QUERY: s = a->cur; break;
case VMSIG_VMOP_CONT: s = a->cur = VMSIG_VM_RUNNING; break;
case VMSIG_VMOP_STOP: s = a->cur = VMSIG_VM_PAUSED; break;
case VMSIG_VMOP_RESET: s = VMSIG_VM_RESET; break;
case VMSIG_VMOP_POWERDOWN: s = VMSIG_VM_POWERDOWN; break;
case VMSIG_VMOP_QUIT: s = VMSIG_VM_SHUTDOWN; break;
default: return 1;
}
emit_vm(a, s, ev->origin, ev->corr); /* reply addressed to the initiator */
return 0;
}
if (a->st != ST_READY) return -1;
const char* q = op_qmp(cmd.op);
if (!q) return 1;
pend_ent* p = pend_alloc(a);
if (!p) return -1; /* backpressure: pending table is full */
uint32_t id = ++a->next_id;
p->used = 1; p->id = id; p->origin = ev->origin; p->corr = ev->corr; p->op = (uint8_t)cmd.op;
char line[160];
int len = snprintf(line, sizeof line, "{\"execute\":\"%s\",\"id\":%u}\r\n", q, id);
ssize_t r = write(a->fd, line, (size_t)len);
if (r != (ssize_t)len) { p->used = 0; return -1; }
return 0;
}
static void vh_close(vmsig_adapter* a) {
if (!a) return;
if (a->fd >= 0) close(a->fd);
free(a);
}
static const vmsig_adapter_ops VH_OPS = {
.name = "vmhost", .source = VMSIG_SRC_VMHOST, .codec = VMSIG_CODEC_VMHOST,
.open = vh_open, .attach = vh_attach, .on_readiness = vh_on_ready,
.submit = vh_submit, .close = vh_close
};
const vmsig_adapter_ops* vmsig_vmhost_ops(void) { return &VH_OPS; }
+182
View File
@@ -0,0 +1,182 @@
/* cli.c — vmsig spine demonstrator (no real VM).
*
* Brings up the context + epoll core, attaches an in-proc control and a set of stub
* adapters (input/vmhost/memctx) on a single endpoint (VM 0). Proves the bidirectional seam:
* UP: SEAM_UP, VM_LIFECYCLE (vmhost stub tick), MEMCTX (kcr3+locator + RO-fd);
* DOWN: CMD_ACQUIRE+CMD_INPUT -> input adapter -> ACT_ACK (correlation);
* CMD_VM QUERY -> vmhost -> VM_LIFECYCLE (addressed reply).
* The address-space context arrives via MULTICAST: control receives kcr3 and a
* pre-opened O_RDONLY fd of the RAM region (control does NOT see ram_path; it mmaps
* the fd itself, write -> EACCES). (vgpu frame perception now lives in an out-of-repo
* S-lib that consumes this MEMCTX seam — not in signaling.)
* Shutdown: on SIGINT or automatically, once all paths are proven. */
#include "vmsig.h"
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
#include <sys/mman.h>
static vmsig_core* g_core;
static void on_sigint(int s) { (void)s; if (g_core) vmsig_core_stop(g_core); }
typedef struct {
vmsig_core* core;
void* ctl;
int total, lifecycles, acks, seams, memctx;
uint64_t last_kcr3;
uint32_t last_epoch;
int sent_first; /* sent acquire+input+vm on the first lifecycle tick */
} demo;
static const char* kind_name(vmsig_kind k) {
switch (k) {
case VMSIG_EV_SEAM_UP: return "SEAM_UP";
case VMSIG_EV_SEAM_DOWN: return "SEAM_DOWN";
case VMSIG_EV_VM_LIFECYCLE: return "VM_LIFECYCLE";
case VMSIG_EV_ACT_ACK: return "ACT_ACK";
case VMSIG_EV_MEMCTX: return "MEMCTX";
default: return "?";
}
}
/* Core -> control: address-space context + pre-opened O_RDONLY fd of the RAM region.
* Demonstrate RO: mmap(PROT_READ) ok, mmap(PROT_WRITE) -> EACCES. The fd is borrowed
* (closed by the core after the call) — here we mmap and immediately unmap. */
static int on_memctx(void* user, const vmsig_event* ev, int fd) {
demo* d = user;
const vmsig_memctx* m = (const vmsig_memctx*)ev->inln;
d->memctx++;
d->last_kcr3 = m->kcr3; d->last_epoch = m->epoch;
uint32_t nseg = 0;
const vmsig_memseg* segs = vmsig_memctx_segs(ev, &nseg);
printf(" UP MEMCTX ep=%u kcr3=%#llx low=%#llx epoch=%u nseg=%u rdonly=%d\n",
(unsigned)ev->endpoint, (unsigned long long)m->kcr3,
(unsigned long long)m->low, (unsigned)m->epoch, (unsigned)nseg,
(m->flags & VMSIG_MEMCTX_RDONLY) ? 1 : 0);
if (fd >= 0 && m->low) {
void* ro = mmap(NULL, (size_t)m->low, PROT_READ, MAP_SHARED, fd, 0);
if (ro != MAP_FAILED) {
void* rw = mmap(NULL, (size_t)m->low, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
printf(" RO-fd: mmap(PROT_READ) ok, mmap(PROT_WRITE) %s\n",
rw == MAP_FAILED ? "EACCES (RO enforced)" : "UNEXPECTEDLY succeeded!");
if (rw != MAP_FAILED) munmap(rw, (size_t)m->low);
munmap(ro, (size_t)m->low);
}
}
(void)segs;
return 0;
}
static int on_event(void* user, const vmsig_event* ev) {
demo* d = user;
d->total++;
switch (ev->kind) {
case VMSIG_EV_SEAM_UP: d->seams++; break;
case VMSIG_EV_ACT_ACK: d->acks++; break;
default: break;
}
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) {
d->lifecycles++;
vmsig_vm_state vs; memcpy(&vs, ev->inln, sizeof vs);
printf(" UP VM_LIFECYCLE ep=%u state=%u%s\n",
(unsigned)ev->endpoint, (unsigned)vs.state, ev->origin ? " (reply)" : "");
} else if (ev->kind != VMSIG_EV_MEMCTX) { /* MEMCTX is printed in on_memctx */
printf(" UP %-12s src=%u ep=%u seq=%u prio=%u\n",
kind_name(ev->kind), (unsigned)ev->source, (unsigned)ev->endpoint,
(unsigned)ev->seq, (unsigned)ev->prio);
}
/* On the first lifecycle tick: acquire the INPUT lease, send input, and query VM status. */
if (ev->kind == VMSIG_EV_VM_LIFECYCLE && !ev->origin && !d->sent_first) {
d->sent_first = 1;
/* Input is a destructive class: first acquire the exclusive INPUT lease. */
vmsig_event acq;
memset(&acq, 0, sizeof acq);
acq.kind = VMSIG_EV_CMD_ACQUIRE; acq.source = VMSIG_SRC_INPUT; acq.dir = VMSIG_DIR_DOWN;
acq.prio = VMSIG_PRIO_HIGH; acq.endpoint = 0;
((vmsig_lease_req*)acq.inln)->cls = VMSIG_LEASE_INPUT;
printf(" DOWN CMD_ACQUIRE INPUT@ep0\n");
vmsig_inproc_send(d->ctl, &acq);
vmsig_event in;
memset(&in, 0, sizeof in);
in.kind = VMSIG_EV_CMD_INPUT; in.source = VMSIG_SRC_INPUT; in.dir = VMSIG_DIR_DOWN;
in.prio = VMSIG_PRIO_HIGH; in.endpoint = 0; in.corr = 0xC0FFEEu;
in.payload.flags = VMSIG_PL_INLINE;
vmsig_input act; memset(&act, 0, sizeof act); /* neutral public input contract */
act.kind = VMSIG_INPUT_ABS; act.code = 0; act.value = 100; /* demo: abs axis X = 100 */
memcpy(in.inln, &act, sizeof act);
printf(" DOWN CMD_INPUT ABS axis=0 val=100 corr=0x%X\n", (unsigned)in.corr);
vmsig_inproc_send(d->ctl, &in);
vmsig_event vm;
memset(&vm, 0, sizeof vm);
vm.kind = VMSIG_EV_CMD_VM; vm.source = VMSIG_SRC_VMHOST; vm.dir = VMSIG_DIR_DOWN;
vm.prio = VMSIG_PRIO_NORMAL; vm.endpoint = 0; vm.corr = 0x5Au;
vmsig_vm_cmd vc = { VMSIG_VMOP_QUERY };
memcpy(vm.inln, &vc, sizeof vc);
printf(" DOWN CMD_VM QUERY\n");
vmsig_inproc_send(d->ctl, &vm);
}
/* All paths proven — stop (for automated verification). */
if (d->memctx >= 1 && d->acks >= 1 && d->lifecycles >= 2) vmsig_core_stop(d->core);
return 0;
}
int main(void) {
vmsig_ctx* ctx = vmsig_ctx_new();
if (!ctx) { fprintf(stderr, "ctx_new failed\n"); return 1; }
vmsig_core* core = vmsig_core_new(ctx);
if (!core) { fprintf(stderr, "core_new failed\n"); vmsig_ctx_free(ctx); return 1; }
g_core = core;
signal(SIGINT, on_sigint);
demo d;
memset(&d, 0, sizeof d);
d.core = core;
vmsig_inproc_cfg ccfg;
memset(&ccfg, 0, sizeof ccfg);
ccfg.on_event = on_event;
ccfg.on_memctx = on_memctx;
ccfg.user = &d;
ccfg.sub.source_mask = 0; /* all sources */
ccfg.sub.prio_min = VMSIG_PRIO_BULK;
ccfg.sub.endpoint_mask = 0; /* all VMs */
void* ctl = vmsig_inproc_control_new(&ccfg);
if (!ctl) { fprintf(stderr, "control_new failed\n"); vmsig_core_free(core); vmsig_ctx_free(ctx); return 1; }
d.ctl = ctl;
/* Trusted in-proc control: full grant on VM 0 (the policy is set by the embedding
* program; for an out-of-process poller the grant would be issued upon authentication). */
vmsig_grant grant;
memset(&grant, 0, sizeof grant);
grant.principal = 1;
grant.endpoint_mask = 1u << 0;
grant.source_mask = 0xFFFFFFFFu;
grant.cap_mask = VMSIG_CAP_OBSERVE | VMSIG_CAP_INPUT | VMSIG_CAP_LIFECYCLE |
VMSIG_CAP_MEMCTX | VMSIG_CAP_POWER | VMSIG_CAP_VM;
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &grant);
/* Single endpoint (VM 0), stub adapters (cfg = NULL). */
if (vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, 0) < 0 ||
vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) < 0 || /* stub QEMU plane */
vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) < 0) { /* stub AS context */
fprintf(stderr, "add_adapter failed\n");
vmsig_core_free(core); vmsig_ctx_free(ctx); return 1;
}
printf("vmsig_cli: loop started (Ctrl-C to stop)\n");
int rc = vmsig_core_run(core);
printf("vmsig_cli: loop finished rc=%d (events=%d seams=%d lifecycles=%d acks=%d memctx=%d kcr3=%#llx epoch=%u)\n",
rc, d.total, d.seams, d.lifecycles, d.acks, d.memctx,
(unsigned long long)d.last_kcr3, (unsigned)d.last_epoch);
vmsig_core_free(core);
vmsig_ctx_free(ctx);
return rc;
}
+57
View File
@@ -0,0 +1,57 @@
/* inproc.c — reference in-process control: a thin shim turning a C callback into
* the neutral control vtable. fd = -1 (no epoll registration); UP arrives via a
* direct on_event call, DOWN leaves through the emit hook installed by the core. */
#include "vmsig_control.h"
#include <stdlib.h>
#include <string.h>
typedef struct {
vmsig_inproc_cfg cfg;
int (*emit_down)(void* token, vmsig_event*);
void* token;
} inproc_ctl;
static int ip_fd(void* ctl) { (void)ctl; return -1; }
static int ip_subscribe(void* ctl, vmsig_sub* out) { inproc_ctl* c = ctl; *out = c->cfg.sub; return 0; }
static int ip_deliver(void* ctl, const vmsig_event* ev) {
inproc_ctl* c = ctl;
return c->cfg.on_event ? c->cfg.on_event(c->cfg.user, ev) : 0;
}
static void ip_set_emit_down(void* ctl, int (*emit)(void* token, vmsig_event*), void* token) {
inproc_ctl* c = ctl; c->emit_down = emit; c->token = token;
}
static void ip_close(void* ctl) { free(ctl); }
/* Core -> in-proc algorithm: address-space context (MEMCTX) + RO-fd as a direct int.
* The fd is borrowed (dup/mmap to retain it); the core closes it after the call. */
static int ip_attach_memctx(void* ctl, const vmsig_event* ev, int fd) {
inproc_ctl* c = ctl;
if (!c->cfg.on_memctx) return -1;
return c->cfg.on_memctx(c->cfg.user, ev, fd);
}
static const vmsig_control_ops IP_OPS = {
.name = "inproc",
.fd = ip_fd,
.subscribe = ip_subscribe,
.deliver = ip_deliver,
.on_readable = NULL, /* no fd — nothing to read */
.set_emit_down = ip_set_emit_down,
.close = ip_close,
.attach_memctx = ip_attach_memctx
};
const vmsig_control_ops* vmsig_inproc_control_ops(void) { return &IP_OPS; }
void* vmsig_inproc_control_new(const vmsig_inproc_cfg* cfg) {
inproc_ctl* c = calloc(1, sizeof *c);
if (!c) return NULL;
if (cfg) c->cfg = *cfg;
return c;
}
int vmsig_inproc_send(void* ctl, vmsig_event* down) {
inproc_ctl* c = ctl;
if (!c || !c->emit_down) return -1;
return c->emit_down(c->token, down);
}
+318
View File
@@ -0,0 +1,318 @@
/* socket.c — out-of-process control over a unix socket.
*
* The listener registers in the core as a SLOT_SOURCE (listen-fd). On accept the
* peer is authenticated via SO_PEERCRED, the policy issues a neutral grant; an empty
* grant => the connection is closed (not a valid poller). Otherwise a per-conn
* control is created: its fd is driven by the epoll core, DOWN frames are parsed and
* dispatched through emit_down (enforced by the grant), UP events are serialized into
* a frame. On EOF — deferred reap.
*
* DoS protection: per-uid limit of concurrent connections (against eviction of
* legitimate ones); a janitor timerfd detaches "stuck" partial frames (slowloris).
* The global ceiling and slot reuse live in the core. */
#define _GNU_SOURCE
#include "vmsig_socket.h"
#include "core_internal.h" /* core_add_source, core_request_drop, add_control */
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/timerfd.h>
#include <sys/stat.h> /* umask */
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <stddef.h>
#include <errno.h>
#include <stdint.h>
#include <time.h>
#define VMSIG_SOCK_PER_UID_MAX 8 /* concurrent connections per uid */
#define VMSIG_SOCK_IDLE_NS (10ull * 1000000000ull) /* timeout for a stuck partial frame */
#define VMSIG_SOCK_JANITOR_S 5 /* sweep period */
typedef struct sock_listener sock_listener;
static uint64_t now_ns(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec;
}
/* ===== wire codec (public — also for external clients) ===== */
void vmsig_wire_encode(vmsig_wire* w, const vmsig_event* ev) {
memset(w, 0, sizeof *w);
w->magic = VMSIG_WIRE_MAGIC; w->version = VMSIG_WIRE_VERSION;
w->kind = ev->kind; w->source = ev->source; w->dir = ev->dir; w->prio = ev->prio;
w->endpoint = ev->endpoint; w->corr = ev->corr;
memcpy(w->inln, ev->inln, sizeof w->inln);
}
int vmsig_wire_decode(const vmsig_wire* w, vmsig_event* ev) {
if (w->magic != VMSIG_WIRE_MAGIC || w->version != VMSIG_WIRE_VERSION) return -1;
memset(ev, 0, sizeof *ev);
ev->kind = w->kind; ev->source = w->source; ev->dir = w->dir; ev->prio = w->prio;
ev->endpoint = w->endpoint; ev->corr = w->corr;
ev->payload.flags = VMSIG_PL_INLINE;
memcpy(ev->inln, w->inln, sizeof ev->inln);
return 0;
}
/* ===== per-conn control ===== */
typedef struct sock_conn {
int fd;
vmsig_core* core;
int id;
uint32_t uid;
uint64_t last_ns; /* activity for the janitor */
sock_listener* L;
struct sock_conn* lnext; /* listener's connection list */
int (*emit_down)(void* token, vmsig_event*);
void* token;
uint8_t buf[sizeof(vmsig_wire)];
size_t buflen;
} sock_conn;
static int conn_fd(void* ctl) { return ((sock_conn*)ctl)->fd; }
static int conn_subscribe(void* ctl, vmsig_sub* out) {
(void)ctl; memset(out, 0, sizeof *out); return 0; /* everything; the grant gates it */
}
static int conn_deliver(void* ctl, const vmsig_event* ev) {
sock_conn* c = ctl;
vmsig_wire w;
vmsig_wire_encode(&w, ev);
ssize_t r = write(c->fd, &w, sizeof w); /* best-effort; EAGAIN => frame dropped */
(void)r;
return 0;
}
static void conn_set_emit_down(void* ctl, int (*emit)(void* token, vmsig_event*), void* token) {
sock_conn* c = ctl; c->emit_down = emit; c->token = token;
}
static int conn_on_readable(void* ctl) {
sock_conn* c = ctl;
for (;;) {
ssize_t n = read(c->fd, c->buf + c->buflen, sizeof c->buf - c->buflen);
if (n == 0) { core_request_drop(c->core, c->id); return 0; } /* EOF */
if (n < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) break;
core_request_drop(c->core, c->id);
return 0;
}
c->last_ns = now_ns();
c->buflen += (size_t)n;
if (c->buflen == sizeof c->buf) {
vmsig_event ev;
if (vmsig_wire_decode((const vmsig_wire*)c->buf, &ev) == 0) {
ev.dir = VMSIG_DIR_DOWN; /* from a poller — DOWN only */
if (c->emit_down) c->emit_down(c->token, &ev); /* enforced by the grant */
}
c->buflen = 0;
}
}
return 0;
}
/* ===== listener ===== */
struct sock_listener {
int listen_fd;
int janitor_fd;
vmsig_core* core;
vmsig_socket_policy policy;
void* ud;
sock_conn* conns; /* singly-linked list of active connections */
};
static void listener_unlink(sock_listener* L, sock_conn* c) {
sock_conn** pp = &L->conns;
while (*pp) { if (*pp == c) { *pp = c->lnext; return; } pp = &(*pp)->lnext; }
}
static int listener_uid_count(sock_listener* L, uint32_t uid) {
int n = 0;
for (sock_conn* c = L->conns; c; c = c->lnext) if (c->uid == uid) n++;
return n;
}
static void conn_close(void* ctl) {
sock_conn* c = ctl;
if (c->L) listener_unlink(c->L, c);
if (c->fd >= 0) close(c->fd);
free(c);
}
/* Send a SINGLE 80-byte vmsig_wire frame + ONE RO-fd in a cmsg (SCM_RIGHTS). This keeps
* the control-socket stream fixed-framed at sizeof(vmsig_wire): the client reads one
* frame via recvmsg and extracts the fd only on an fd-carrying frame. Partial cmsg
* transfer is not allowed (the fd is all-or-nothing): a short sendmsg -> -1. Shared
* primitive for the memctx handoff (one SCM_RIGHTS mechanism). */
static int conn_send_fd_frame(sock_conn* c, const vmsig_wire* w, int fd) {
struct iovec iov;
iov.iov_base = (void*)w;
iov.iov_len = sizeof *w;
union {
char buf[CMSG_SPACE(sizeof(int))];
struct cmsghdr align;
} cm;
memset(&cm, 0, sizeof cm);
struct msghdr mh;
memset(&mh, 0, sizeof mh);
mh.msg_iov = &iov;
mh.msg_iovlen = 1;
mh.msg_control = cm.buf;
mh.msg_controllen = sizeof cm.buf;
struct cmsghdr* cmsg = CMSG_FIRSTHDR(&mh);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
for (;;) {
ssize_t n = sendmsg(c->fd, &mh, MSG_NOSIGNAL);
if (n < 0) {
if (errno == EINTR) continue;
return -1;
}
return ((size_t)n == sizeof *w) ? 0 : -1; /* partial frame -> failure */
}
}
/* Core -> socket-control: handoff of an address-space context (kind=MEMCTX, inln=vmsig_memctx
* POD) + RO-fd of the RAM region in a cmsg. The segs payload does NOT go on the wire (the
* fixed-framed vmsig_wire carries only inln); the holder opens it at `low`. */
static int conn_attach_memctx(void* ctl, const vmsig_event* ev, int fd) {
sock_conn* c = ctl;
if (fd < 0 || !ev) return -1;
vmsig_wire w;
vmsig_wire_encode(&w, ev); /* kind=MEMCTX, inln=vmsig_memctx; payload is not serialized */
return conn_send_fd_frame(c, &w, fd);
}
static const vmsig_control_ops CONN_OPS = {
.name = "socket",
.fd = conn_fd, .subscribe = conn_subscribe, .deliver = conn_deliver,
.on_readable = conn_on_readable, .set_emit_down = conn_set_emit_down, .close = conn_close,
.attach_memctx = conn_attach_memctx
};
static void on_accept(void* user, uint32_t events) {
(void)events;
sock_listener* L = user;
for (;;) {
int fd = accept4(L->listen_fd, NULL, NULL, SOCK_NONBLOCK | SOCK_CLOEXEC);
if (fd < 0) break; /* EAGAIN / other — done */
uint32_t uid = (uint32_t)-1, pid = 0;
struct ucred uc; socklen_t ul = sizeof uc;
if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &uc, &ul) == 0) {
uid = (uint32_t)uc.uid; pid = (uint32_t)uc.pid;
}
vmsig_grant g;
if (L->policy) g = L->policy(uid, pid, L->ud);
else memset(&g, 0, sizeof g);
if (g.cap_mask == 0 || g.endpoint_mask == 0) { /* not a valid poller */
vmsig_audit a = { VMSIG_AUDIT_REJECT, uid, 0, 0, pid };
core_audit(L->core, &a);
close(fd);
continue;
}
if (listener_uid_count(L, uid) >= VMSIG_SOCK_PER_UID_MAX) { /* anti-eviction */
vmsig_audit a = { VMSIG_AUDIT_REJECT, uid, 0, 0, pid };
core_audit(L->core, &a);
close(fd);
continue;
}
sock_conn* conn = calloc(1, sizeof *conn);
if (!conn) { close(fd); continue; }
conn->fd = fd; conn->core = L->core; conn->id = -1;
conn->uid = uid; conn->last_ns = now_ns(); conn->L = L;
conn->lnext = L->conns; L->conns = conn;
int id = vmsig_core_add_control(L->core, &CONN_OPS, conn, &g);
if (id < 0) { /* no slot — reject */
vmsig_audit a = { VMSIG_AUDIT_REJECT, uid, 0, 0, pid };
core_audit(L->core, &a);
listener_unlink(L, conn); close(fd); free(conn); continue;
}
conn->id = id;
vmsig_audit a = { VMSIG_AUDIT_ADMIT, g.principal, 0, 0, pid };
core_audit(L->core, &a);
}
}
/* janitor: detach connections with a stuck partial frame (slowloris) */
static void on_janitor(void* user, uint32_t events) {
(void)events;
sock_listener* L = user;
uint64_t v;
while (read(L->janitor_fd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
uint64_t now = now_ns();
for (sock_conn* c = L->conns; c; c = c->lnext)
if (c->buflen > 0 && now - c->last_ns > VMSIG_SOCK_IDLE_NS)
core_request_drop(c->core, c->id);
}
/* listener cleanup on core_free (owner = the core, via on_free of the first source) */
static void listener_free(void* user) {
sock_listener* L = user;
if (L->janitor_fd >= 0) close(L->janitor_fd);
if (L->listen_fd >= 0) close(L->listen_fd);
free(L);
}
int vmsig_socket_attach(vmsig_core* core, const char* path,
vmsig_socket_policy policy, void* ud) {
if (!core || !path || !*path) return -1;
int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
if (fd < 0) return -1;
struct sockaddr_un addr;
memset(&addr, 0, sizeof addr);
addr.sun_family = AF_UNIX;
socklen_t alen;
size_t n = strlen(path);
if (path[0] == '@') { /* abstract namespace */
if (n > sizeof addr.sun_path) { close(fd); return -1; }
addr.sun_path[0] = 0;
memcpy(addr.sun_path + 1, path + 1, n - 1);
alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
} else { /* filesystem path */
if (n >= sizeof addr.sun_path) { close(fd); return -1; }
unlink(path);
memcpy(addr.sun_path, path, n);
alen = (socklen_t)sizeof addr;
}
/* Create the filesystem socket with restrictive perms (0600): the path must not be
* the only gate — connect requires write, so we open it to the owner only.
* (An abstract socket has no FS perms; its access is bounded by the net namespace.) */
mode_t old_um = 0;
int restrict_perm = (path[0] != '@');
if (restrict_perm) old_um = umask(0177);
int br = bind(fd, (struct sockaddr*)&addr, alen);
if (restrict_perm) umask(old_um);
if (br < 0) { close(fd); return -1; }
if (listen(fd, 64) < 0) { close(fd); return -1; }
int jfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
if (jfd < 0) { close(fd); return -1; }
struct itimerspec its;
memset(&its, 0, sizeof its);
its.it_interval.tv_sec = VMSIG_SOCK_JANITOR_S;
its.it_value = its.it_interval;
if (timerfd_settime(jfd, 0, &its, NULL) < 0) { close(jfd); close(fd); return -1; }
sock_listener* L = calloc(1, sizeof *L);
if (!L) { close(jfd); close(fd); return -1; }
L->listen_fd = fd; L->janitor_fd = jfd; L->core = core; L->policy = policy; L->ud = ud;
/* the listen source owns the listener (on_free=listener_free closes both fds + free) */
if (core_add_source(core, fd, on_accept, L, listener_free) < 0) {
close(jfd); close(fd); free(L); return -1;
}
/* janitor without on_free (L already belongs to the core); on error core_free releases it */
if (core_add_source(core, jfd, on_janitor, L, NULL) < 0) return -1;
return 0;
}
+224
View File
@@ -0,0 +1,224 @@
/* core.c — core lifecycle and registration of adapters/controls.
* The loop and pumps live in loop.c. */
#include "core_internal.h"
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/epoll.h>
#include <sys/eventfd.h>
core_slot* core_register_fd(vmsig_core* c, int fd, uint32_t epoll_events, slot_role role) {
/* reuse a detached (SLOT_DEAD) slot so c->slots[] does not grow on every
* connection */
core_slot* s = NULL;
for (int i = 0; i < c->nslots; i++)
if (c->slots[i]->role == SLOT_DEAD) { s = c->slots[i]; break; }
if (!s) {
if (c->nslots == c->cap_slots) {
int ncap = c->cap_slots ? c->cap_slots * 2 : 16;
core_slot** ns = realloc(c->slots, (size_t)ncap * sizeof *ns);
if (!ns) return NULL;
c->slots = ns;
c->cap_slots = ncap;
}
s = calloc(1, sizeof *s);
if (!s) return NULL;
c->slots[c->nslots++] = s;
}
memset(s, 0, sizeof *s);
s->role = role;
s->fd = fd;
struct epoll_event ee;
memset(&ee, 0, sizeof ee);
ee.events = epoll_events;
ee.data.ptr = s;
if (epoll_ctl(c->epfd, EPOLL_CTL_ADD, fd, &ee) < 0) { s->role = SLOT_DEAD; return NULL; }
return s;
}
vmsig_core* vmsig_core_new(vmsig_ctx* ctx) {
if (!ctx) return NULL;
vmsig_core* c = calloc(1, sizeof *c);
if (!c) return NULL;
c->ctx = ctx;
c->epfd = -1;
c->wake_fd = -1;
c->epfd = epoll_create1(EPOLL_CLOEXEC);
if (c->epfd < 0) { free(c); return NULL; }
c->wake_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
if (c->wake_fd < 0) { close(c->epfd); free(c); return NULL; }
if (!core_register_fd(c, c->wake_fd, EPOLLIN, SLOT_WAKEUP)) {
close(c->wake_fd); close(c->epfd); free(c); return NULL;
}
/* context pacing timerfds (created in ctx_new) as loop sources */
for (int d = VMSIG_DIR_UP; d <= VMSIG_DIR_DOWN; d++) {
int tfd = vmsig_ctx_timing_fd(ctx, (vmsig_dir)d);
if (tfd >= 0) core_register_fd(c, tfd, EPOLLIN, SLOT_CTX_TIMING);
}
return c;
}
int vmsig_core_add_adapter(vmsig_core* c, const vmsig_adapter_ops* ops,
const void* cfg, uint32_t endpoint) {
if (!c || !ops || c->nadapters >= VMSIG_MAX_ADAPTERS) return -1;
vmsig_adapter* a = ops->open(cfg, endpoint);
if (!a) return -1;
vmsig_emit emit = { core_emit_up, core_register_memctx, core_unregister_memctx, c };
vmsig_fd_reg reg[VMSIG_ADAPTER_FDS];
memset(reg, 0, sizeof reg);
int n = ops->attach(a, &emit, reg, VMSIG_ADAPTER_FDS);
if (n < 0) { ops->close(a); return -1; }
for (int i = 0; i < n; i++) {
uint32_t events = reg[i].epoll_events ? reg[i].epoll_events : (uint32_t)EPOLLIN;
core_slot* s = core_register_fd(c, reg[i].fd, events, SLOT_ADAPTER);
if (!s) { ops->close(a); return -1; }
s->ops = ops;
s->adapter = a;
s->cookie = reg[i].cookie;
}
int id = c->nadapters;
c->adapters[c->nadapters].ops = ops;
c->adapters[c->nadapters].a = a;
c->adapters[c->nadapters].endpoint = endpoint;
c->nadapters++;
return id;
}
int vmsig_core_add_control(vmsig_core* c, const vmsig_control_ops* ops, void* ctl,
const vmsig_grant* grant) {
if (!c || !ops) return -1;
/* reuse a freed (reaped) slot; otherwise grow up to the ceiling */
int id = -1;
for (int i = 0; i < c->ncontrols; i++)
if (!c->controls[i].active) { id = i; break; }
if (id < 0) {
if (c->ncontrols >= VMSIG_MAX_CONTROLS) return -1;
id = c->ncontrols++;
}
core_control_ent* e = &c->controls[id];
uint16_t gen = e->gen; /* generation survives the slot memset */
memset(e, 0, sizeof *e);
e->gen = (uint16_t)(gen + 1); /* new generation for this (re)use */
e->ops = ops;
e->ctl = ctl;
e->active = 1;
if (grant) e->grant = *grant; /* otherwise stays zero => default-deny */
e->dctx.core = c;
e->dctx.ctl_id = id;
if (ops->subscribe) ops->subscribe(ctl, &e->sub);
/* emit_down token is our down_ctx, so emit_down can find this control's grant */
if (ops->set_emit_down) ops->set_emit_down(ctl, core_emit_down, &e->dctx);
int fd = ops->fd ? ops->fd(ctl) : -1;
if (fd >= 0) {
core_slot* s = core_register_fd(c, fd, EPOLLIN, SLOT_CONTROL);
if (!s) return -1;
s->cops = ops;
s->ctl = ctl;
e->slot = s;
}
/* Late subscriber: replay retained MEMCTX (if a context is already published and
* this control is qualified). For a control added BEFORE the first publication,
* the cell is not yet valid — it receives MEMCTX via the normal multicast in pump_up. */
core_memctx_replay(c, id);
return id; /* ncontrols already bumped when picking id (on growth); reuse does not grow it */
}
/* ===== MEMCTX registration: per-endpoint retain cell (called by the adapter on the loop thread) =====
* Registers the address-space context adapter's reg hooks. The core holds THIS and does
* NOT store a copy of the locator: on delivery/replay it calls reg.describe/share_fd.
* valid/epoch are maintained in route/epoch_bump (not here): register only records that
* "the adapter is connected". */
int core_register_memctx(void* token, const vmsig_memctx_reg* reg) {
vmsig_core* c = token;
if (!c || !reg || reg->endpoint >= 64) return -1;
core_memctx_cell* cell = &c->memctx[reg->endpoint];
cell->reg = *reg;
cell->registered = 1;
return 0;
}
void core_unregister_memctx(void* token, uint32_t endpoint) {
vmsig_core* c = token;
if (!c || endpoint >= 64) return;
core_memctx_cell* cell = &c->memctx[endpoint];
cell->registered = 0;
cell->valid = 0;
memset(&cell->reg, 0, sizeof cell->reg);
}
void vmsig_core_set_audit(vmsig_core* c, void (*cb)(void* ud, const vmsig_audit* a), void* ud) {
if (!c) return;
c->audit_cb = cb;
c->audit_ud = ud;
}
void core_audit(vmsig_core* c, const vmsig_audit* a) {
if (c && c->audit_cb) c->audit_cb(c->audit_ud, a);
}
void vmsig_core_set_arb_policy(vmsig_core* c, vmsig_arb_policy cb, void* ud) {
if (!c) return;
c->arb_cb = cb;
c->arb_ud = ud;
/* lease[][] is zeroed in vmsig_core_new (calloc) => all cells free. */
}
int core_add_source(vmsig_core* c, int fd, void (*cb)(void* user, uint32_t events),
void* user, void (*on_free)(void* user)) {
if (!c || fd < 0 || !cb) return -1;
core_slot* s = core_register_fd(c, fd, EPOLLIN, SLOT_SOURCE);
if (!s) return -1;
s->on_source = cb;
s->on_free = on_free;
s->source_user = user;
return 0;
}
void core_request_drop(vmsig_core* c, int ctl_id) {
if (!c || ctl_id < 0 || ctl_id >= c->ncontrols) return;
c->controls[ctl_id].reap = 1;
core_wake(c); /* wake the loop for a reap pass (without stop) */
}
void vmsig_core_free(vmsig_core* c) {
if (!c) return;
/* graceful: stop workers and close SI handles / sockets. Adapters are closed
* FIRST: their close stops off-loop workers and unregisters their seams (e.g.
* memctx) BEFORE destruction. */
for (int i = 0; i < c->nadapters; i++)
if (c->adapters[i].ops->close) c->adapters[i].ops->close(c->adapters[i].a);
for (int i = 0; i < c->ncontrols; i++)
if (c->controls[i].active && c->controls[i].ops->close)
c->controls[i].ops->close(c->controls[i].ctl);
/* cleanup of fd sources (e.g. unix listener: close listen/janitor fd + free) */
for (int i = 0; i < c->nslots; i++)
if (c->slots[i]->role == SLOT_SOURCE && c->slots[i]->on_free)
c->slots[i]->on_free(c->slots[i]->source_user);
for (int i = 0; i < c->nslots; i++) free(c->slots[i]);
free(c->slots);
if (c->wake_fd >= 0) close(c->wake_fd);
if (c->epfd >= 0) close(c->epfd);
/* ctx is not ours: its owner frees it */
free(c);
}
+170
View File
@@ -0,0 +1,170 @@
#ifndef VMSIG_CORE_INTERNAL_H
#define VMSIG_CORE_INTERNAL_H
#include "vmsig_core.h"
#include <signal.h>
/* Private internals of the epoll core. Each registered fd carries a
* core_slot* in epoll_event.data.ptr; the slot's role decides how to handle it. */
#define VMSIG_MAX_EVENTS 64
#define VMSIG_MAX_ADAPTERS 256 /* up to ~64 VMs * 3 adapters + slack (mode A) */
#define VMSIG_MAX_CONTROLS 64 /* concurrent pollers; more => processes (C) */
#define VMSIG_ADAPTER_FDS 8 /* max fds per adapter */
#define VMSIG_DOWN_PENDING_MAX 256 /* ceiling of DOWN commands per poller in ctx (fairness) */
typedef enum {
SLOT_WAKEUP, /* wake/stop eventfd */
SLOT_ADAPTER, /* adapter fd (timerfd/eventfd/socket) */
SLOT_CTX_TIMING, /* context pacing timerfd */
SLOT_CONTROL, /* out-of-process control socket */
SLOT_SOURCE, /* arbitrary fd + callback (e.g. listen-fd) */
SLOT_DEAD /* detached (reaped); loop ignores it */
} slot_role;
typedef struct core_slot {
slot_role role;
int fd;
/* for SLOT_ADAPTER */
const vmsig_adapter_ops* ops;
vmsig_adapter* adapter;
uint32_t cookie;
/* for SLOT_CONTROL */
const vmsig_control_ops* cops;
void* ctl;
/* for SLOT_SOURCE */
void (*on_source)(void* user, uint32_t events);
void (*on_free)(void* user); /* invoked at core_free (source cleanup) */
void* source_user;
} core_slot;
typedef struct {
const vmsig_adapter_ops* ops;
vmsig_adapter* a;
uint32_t endpoint;
} core_adapter_ent;
/* ===== Retained address-space context (MEMCTX seam) =====
* The core retains per-endpoint "a current context exists in the current epoch" + the
* adapter's reg pointer (describe/share_fd/invalidate). Replays to a late qualified
* subscriber (CAP_MEMCTX + source_mask + endpoint) re-sharing the RO-fd. Does NOT store a
* copy of the locator: on delivery/replay it calls reg.describe (adapter snapshot) +
* reg.share_fd (fresh RO-fd). Invalidated on epoch change; cleared on unregister/free. */
typedef struct {
int registered; /* adapter called register_memctx (reg valid) */
int valid; /* a published context exists in the current epoch */
uint32_t epoch; /* snapshot epoch (== core epoch[ep] when valid) */
vmsig_memctx_reg reg; /* valid when registered */
} core_memctx_cell;
/* ===== Lease layer (arbitration of exclusive ownership of destructive resources) =====
* One cell per (endpoint, lease-class): who owns it (origin) + a snapshot of arb_prio at
* acquisition time. owner=0 => free. The snapshot (rather than the live grant) makes the
* policy resilient to the owner's grant changing after acquisition. */
#define VMSIG_LEASE_CLASSES 3 /* INPUT, POWER, MEMWRITE (== VMSIG_LEASE_CLASS_MAX) */
typedef struct {
uint32_t owner; /* origin (gen<<16)|(id+1) of the owner; 0 = free */
uint32_t owner_prio; /* owner's arb_prio at acquisition time (snapshot) */
} core_lease_cell;
struct vmsig_core; /* fwd for core_down_ctx */
/* DOWN emission context: handed to a control in set_emit_down so emit_down knows WHICH
* control issued the command (for grant lookup and enforcement). Stable: lives in the
* fixed controls[] array. */
typedef struct {
struct vmsig_core* core;
int ctl_id;
} core_down_ctx;
typedef struct {
const vmsig_control_ops* ops;
void* ctl;
vmsig_sub sub;
vmsig_grant grant; /* poller's rights ceiling (default-deny) */
core_down_ctx dctx; /* token for emit_down */
int active; /* 0 = detached/reaped (slot free) */
int reap; /* reap requested (deferred) */
core_slot* slot; /* SLOT_CONTROL fd slot (or NULL) */
uint32_t pending; /* DOWN commands of this poller in ctx (fairness cap) */
uint16_t gen; /* slot generation: +1 on each (re)use */
} core_control_ent;
struct vmsig_core {
int epfd;
int wake_fd; /* eventfd: nudge + stop */
vmsig_ctx* ctx;
volatile sig_atomic_t stopping;
core_adapter_ent adapters[VMSIG_MAX_ADAPTERS];
int nadapters;
core_control_ent controls[VMSIG_MAX_CONTROLS];
int ncontrols;
core_slot** slots; /* all allocated slots (for free) */
int nslots;
int cap_slots;
uint32_t epoch[64]; /* per-endpoint VM session epoch */
core_memctx_cell memctx[64]; /* per-endpoint retained context */
core_lease_cell lease[64][VMSIG_LEASE_CLASSES]; /* lease per (endpoint, class) */
vmsig_arb_policy arb_cb; /* preemption policy (NULL=default) */
void* arb_ud;
void (*audit_cb)(void* ud, const vmsig_audit* a);
void* audit_ud;
};
/* Emit an audit record (no-op if no callback is set). Defined in core.c. */
void core_audit(vmsig_core* c, const vmsig_audit* a);
/* Register an fd in epoll + create a slot (see core.c). */
core_slot* core_register_fd(vmsig_core* c, int fd, uint32_t epoll_events, slot_role role);
/* Register an arbitrary fd source with a callback (e.g. a socket listen-fd).
* The callback is called on the loop thread when the fd is ready. on_free (may be NULL)
* is called at vmsig_core_free to clean up the source's resource. 0/-1. */
int core_add_source(vmsig_core* c, int fd, void (*cb)(void* user, uint32_t events),
void* user, void (*on_free)(void* user));
/* Request detaching a control by id (deferred reap after the batch: epoll DEL,
* close fd, ops->close). Safe to call from the control's own on_readable. */
void core_request_drop(vmsig_core* c, int ctl_id);
/* emit hooks handed to adapters (UP) and controls (DOWN). Defined in loop.c. */
int core_emit_up (void* token, vmsig_event* ev);
int core_emit_down(void* token, vmsig_event* ev);
/* ===== Address-space context (MEMCTX seam; retained context) ===== */
/* Context registration hooks (handed to the adapter in vmsig_emit; defined in core.c). */
int core_register_memctx (void* token, const vmsig_memctx_reg* reg);
void core_unregister_memctx(void* token, uint32_t endpoint);
/* Multicast MEMCTX to qualified subscribers + mark the retain cell valid
* (from pump_up on the VMSIG_EV_MEMCTX trigger; defined in loop.c). */
void core_memctx_route(vmsig_core* c, const vmsig_event* trigger);
/* Replay retained MEMCTX to a single (late) subscriber (from vmsig_core_add_control;
* defined in loop.c). */
void core_memctx_replay(vmsig_core* c, int ctl_id);
/* Bump the endpoint's epoch on a destructive lifecycle transition: epoch++, invalidate
* the retain cell, emit MEMCTX_INVALIDATED, request re-bootstrap from the adapter.
* Observed by the core in pump_up on UP VM_LIFECYCLE (defined in loop.c). */
void core_epoch_bump(vmsig_core* c, uint32_t endpoint);
/* ===== Lease layer (defined in loop.c) ===== */
/* Intercept CMD_ACQUIRE/RELEASE/LEASE_STATUS (synchronously from core_emit_down, not in ctx). */
void core_lease_acquire(vmsig_core* c, int ctl_id, const vmsig_event* ev);
void core_lease_release(vmsig_core* c, int ctl_id, const vmsig_event* ev);
void core_lease_status (vmsig_core* c, int ctl_id, const vmsig_event* ev);
/* Reclaim the lease of a dead control (from core_reap, BEFORE e->active=0). */
void core_lease_reap_control(vmsig_core* c, int ctl_id);
/* Wake the loop (eventfd nudge). Defined in loop.c. */
void core_wake(vmsig_core* c);
#endif /* VMSIG_CORE_INTERNAL_H */
+620
View File
@@ -0,0 +1,620 @@
/* loop.c — non-blocking epoll loop, dispatch, pump up/down, emit hooks,
* graceful shutdown. No sleep/polling/busy-wait: every wakeup is an fd. */
#include "core_internal.h"
#include <unistd.h>
#include <errno.h>
#include <stdint.h>
#include <string.h>
#include <sys/epoll.h>
static void drain_counter_fd(int fd) {
uint64_t v;
while (read(fd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
}
void core_wake(vmsig_core* c) {
uint64_t one = 1;
ssize_t r = write(c->wake_fd, &one, sizeof one);
(void)r;
}
int core_emit_up(void* token, vmsig_event* ev) {
vmsig_core* c = token;
int r = vmsig_ctx_submit(c->ctx, VMSIG_DIR_UP, ev);
core_wake(c); /* nudge in case of emission off the loop thread */
return r;
}
/* origin = (gen<<16)|(id+1): low 16 bits are the control's id+1, high bits the slot
* generation. Lets a reply be addressed to the initiator and stale reuse filtered out. */
static uint32_t origin_pack(int id, uint16_t gen) {
return ((uint32_t)gen << 16) | ((uint32_t)(id + 1) & 0xFFFFu);
}
/* Live control by origin with generation check; NULL if gone/slot reused. */
static core_control_ent* origin_ctl(vmsig_core* c, uint32_t origin) {
if (!origin) return NULL;
int id = (int)(origin & 0xFFFFu) - 1;
uint16_t gen = (uint16_t)(origin >> 16);
if (id < 0 || id >= c->ncontrols) return NULL;
core_control_ent* e = &c->controls[id];
if (!e->active || e->gen != gen) return NULL;
return e;
}
/* Capability for a DOWN command (unknown => deny). Destructive CMD_LIFECYCLE
* (powerdown/reset, code in inln[0]) requires CAP_POWER, safe ones CAP_LIFECYCLE. */
static uint32_t cap_for_down(const vmsig_event* ev) {
switch (ev->kind) {
case VMSIG_EV_CMD_INPUT:
case VMSIG_EV_CMD_QUERY_INPUT: return VMSIG_CAP_INPUT; /* injection / held-key query */
case VMSIG_EV_CMD_LIFECYCLE:
return (ev->inln[0] == VMSIG_LIFE_POWERDOWN || ev->inln[0] == VMSIG_LIFE_RESET)
? VMSIG_CAP_POWER : VMSIG_CAP_LIFECYCLE;
case VMSIG_EV_CMD_VM: /* op in inln[0] (vmsig_vm_cmd, op<256) */
return (ev->inln[0] == VMSIG_VMOP_RESET || ev->inln[0] == VMSIG_VMOP_POWERDOWN ||
ev->inln[0] == VMSIG_VMOP_QUIT) ? VMSIG_CAP_POWER : VMSIG_CAP_VM;
case VMSIG_EV_CMD_MEMWRITE: return VMSIG_CAP_MEMWRITE; /* atomic guest-memory write */
default: return 0;
}
}
/* ===== Lease layer: classification and helpers ===== */
/* Lease class for a DOWN command. MIRRORS cap_for_down by destructiveness:
* - CMD_INPUT -> INPUT;
* - CMD_LIFECYCLE powerdown/reset -> POWER;
* - CMD_VM reset/powerdown/quit -> POWER;
* - everything else (safe/read-only/stream/query) -> -1 (not lease-gated).
* CMD_LIFECYCLE and CMD_VM route to DIFFERENT adapters (INPUT/VMHOST) but share ONE
* POWER class per endpoint: a single owner of VM destruction (intentional). */
static int lease_class_for_down(const vmsig_event* ev) {
switch (ev->kind) {
case VMSIG_EV_CMD_INPUT:
return VMSIG_LEASE_INPUT;
case VMSIG_EV_CMD_LIFECYCLE:
return (ev->inln[0] == VMSIG_LIFE_POWERDOWN || ev->inln[0] == VMSIG_LIFE_RESET)
? VMSIG_LEASE_POWER : -1;
case VMSIG_EV_CMD_VM:
return (ev->inln[0] == VMSIG_VMOP_RESET || ev->inln[0] == VMSIG_VMOP_POWERDOWN ||
ev->inln[0] == VMSIG_VMOP_QUIT) ? VMSIG_LEASE_POWER : -1;
case VMSIG_EV_CMD_MEMWRITE:
return VMSIG_LEASE_MEMWRITE; /* always destructive (write to shared guest memory) */
default:
return -1;
}
}
/* Cap required to lease a class (probing/holding a class without the cap is forbidden). */
static uint32_t cap_for_lease_class(int cls) {
return cls == VMSIG_LEASE_INPUT ? VMSIG_CAP_INPUT
: cls == VMSIG_LEASE_POWER ? VMSIG_CAP_POWER
: cls == VMSIG_LEASE_MEMWRITE ? VMSIG_CAP_MEMWRITE : 0u;
}
/* Source bitmask permitted to hold a lease class: mirrors the grant's source ceiling
* (which grant_allows_down enforces on the command itself). Leasing is intercepted
* BEFORE grant_allows_down, so source is checked HERE — otherwise a principal without
* the required seam could hold someone else's cell (DoS), bypassing source_mask.
* INPUT -> SRC_INPUT; POWER -> SRC_INPUT (lifecycle) OR SRC_VMHOST (vm) — one
* destructive path suffices; MEMWRITE -> SRC_MEMCTX (lives on the MEMCTX seam). */
static uint32_t source_mask_for_lease_class(int cls) {
return cls == VMSIG_LEASE_INPUT ? (1u << VMSIG_SRC_INPUT)
: cls == VMSIG_LEASE_POWER ? ((1u << VMSIG_SRC_INPUT) | (1u << VMSIG_SRC_VMHOST))
: cls == VMSIG_LEASE_MEMWRITE ? (1u << VMSIG_SRC_MEMCTX) : 0u;
}
/* Capability to receive an UP event: address-space context (MEMCTX/MEMCTX_INVALIDATED)
* -> CAP_MEMCTX; cursor is screen data, available to a GUI observer (OBSERVE) OR an
* input actor (INPUT); otherwise CAP_OBSERVE (frames/SEAM/generic). The grant_allows_up
* gate checks intersection, so OBSERVE|INPUT means "either of the two". */
static uint32_t cap_for_up(const vmsig_event* ev) {
if (ev->kind == VMSIG_EV_CURSOR_STATE) return VMSIG_CAP_OBSERVE | VMSIG_CAP_INPUT;
return (ev->source == VMSIG_SRC_MEMCTX) ? VMSIG_CAP_MEMCTX : VMSIG_CAP_OBSERVE;
}
static int grant_allows_down(const vmsig_grant* g, const vmsig_event* ev) {
if (ev->endpoint >= 64) return 0; /* 64-bit mask: <=64 VMs/cores */
if (!(g->endpoint_mask & (1ull << ev->endpoint))) return 0;
if (!(g->source_mask & (1u << ev->source))) return 0; /* source ceiling on DOWN too */
uint32_t need = cap_for_down(ev);
return need && (g->cap_mask & need);
}
static int grant_allows_up(const vmsig_grant* g, const vmsig_event* ev) {
if (ev->endpoint >= 64) return 0;
if (!(g->cap_mask & cap_for_up(ev))) return 0;
if (!(g->endpoint_mask & (1ull << ev->endpoint))) return 0;
if (!(g->source_mask & (1u << ev->source))) return 0;
return 1;
}
/* Find an adapter by (endpoint, source). NULL if none. Used by pump_down to route a
* DOWN command to its adapter. */
static core_adapter_ent* core_find_adapter(vmsig_core* c, uint32_t endpoint,
vmsig_source source) {
for (int i = 0; i < c->nadapters; i++) {
core_adapter_ent* e = &c->adapters[i];
if (e->ops->source == source && e->endpoint == endpoint) return e;
}
return NULL;
}
/* ===== Lease layer: grant/release/status/finalization/reclaim =====
* Intercepted in core_emit_down BEFORE grant_allows_down (synchronous, not in ctx, does
* not touch pending). Addressed UP replies to the initiator via core_emit_up
* (origin+generation). */
/* Addressed UP reply to the initiator of a lease request. */
static void lease_reply(vmsig_core* c, const vmsig_event* req, vmsig_kind kind,
uint32_t cls, uint32_t reason) {
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = kind; up.source = VMSIG_SRC_CORE; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_URGENT; up.endpoint = req->endpoint; up.origin = req->origin;
vmsig_lease_req lr = { cls, reason };
memcpy(up.inln, &lr, sizeof lr);
core_emit_up(c, &up);
}
/* Lease denial: audit (visibility of authorization/contention denials — capability/
* endpoint enumeration via ACQUIRE is observable) + addressed LEASE_DENIED to initiator. */
static void lease_deny(vmsig_core* c, const vmsig_event* req, uint32_t principal,
uint32_t cls, uint32_t reason) {
vmsig_audit a = { VMSIG_AUDIT_LEASE_DENIED, principal, req->endpoint, cls, reason };
core_audit(c, &a);
lease_reply(c, req, VMSIG_EV_LEASE_DENIED, cls, reason);
}
/* Principal of the cell owner (for STATUS); 0 if owner is dead/absent. */
static uint32_t lease_owner_principal(vmsig_core* c, uint32_t owner) {
core_control_ent* e = origin_ctl(c, owner);
return e ? e->grant.principal : 0u;
}
/* IMPORTANT (layer isolation): signaling does NOT release held keys on lease loss and
* does NOT track held state at all. held is the ACTUATOR's record (vmctl); release is the
* control's decision. On owner change/reset the cell is simply freed; stuck keys remain
* the control's concern (it can issue CMD_QUERY_INPUT and release its own while owner). */
void core_lease_acquire(vmsig_core* c, int ctl_id, const vmsig_event* ev) {
core_control_ent* e = &c->controls[ctl_id];
uint32_t cls = ((const vmsig_lease_req*)ev->inln)->cls;
uint32_t ep = ev->endpoint;
/* 1. validate class/endpoint/grant (default-deny; every denial is audited). */
if (cls >= VMSIG_LEASE_CLASS_MAX) {
lease_deny(c, ev, e->grant.principal, cls, VMSIG_LEASE_DENY_BADCLASS);
return;
}
if (ep >= 64 || !(e->grant.endpoint_mask & (1ull << ep))) {
lease_deny(c, ev, e->grant.principal, cls, VMSIG_LEASE_DENY_NOGRANT);
return;
}
if (!(e->grant.cap_mask & cap_for_lease_class((int)cls))) {
lease_deny(c, ev, e->grant.principal, cls, VMSIG_LEASE_DENY_NOCAP);
return;
}
/* source ceiling: holding a class without rights to its seam is forbidden (else a
* DoS hold of someone else's cell bypassing source_mask, since interception is
* BEFORE grant_allows_down). */
if (!(e->grant.source_mask & source_mask_for_lease_class((int)cls))) {
lease_deny(c, ev, e->grant.principal, cls, VMSIG_LEASE_DENY_NOGRANT);
return;
}
core_lease_cell* cell = &c->lease[ep][cls];
uint32_t me = ev->origin;
/* 2a. free OR dead owner (origin_ctl==NULL) => take as if free. */
core_control_ent* owner_e = cell->owner ? origin_ctl(c, cell->owner) : NULL;
if (cell->owner == 0 || !owner_e) {
cell->owner = me; cell->owner_prio = e->grant.arb_prio;
vmsig_audit a = { VMSIG_AUDIT_LEASE_GRANTED, e->grant.principal, ep, cls, 0 };
core_audit(c, &a);
lease_reply(c, ev, VMSIG_EV_LEASE_GRANTED, cls, 0);
return;
}
/* 2b. owner is the caller itself => idempotent GRANTED. */
if (cell->owner == me) {
lease_reply(c, ev, VMSIG_EV_LEASE_GRANTED, cls, 0);
return;
}
/* 2c. held by a LIVE other owner => policy. incumbent is the live grant. */
vmsig_arb_decision dec;
if (c->arb_cb) {
dec = c->arb_cb(c->arb_ud, ep, cls, &owner_e->grant, &e->grant);
} else {
dec = (e->grant.arb_prio > cell->owner_prio) ? VMSIG_ARB_PREEMPT : VMSIG_ARB_DENY;
}
if (dec != VMSIG_ARB_PREEMPT) {
/* equal priority => owner keeps it (HELD); strictly lower => LOWER_PRIO. */
uint32_t reason = (e->grant.arb_prio < cell->owner_prio)
? VMSIG_LEASE_DENY_LOWER_PRIO : VMSIG_LEASE_DENY_HELD;
lease_deny(c, ev, e->grant.principal, cls, reason);
return;
}
/* PREEMPT: notify the old owner (REVOKED), switch owner, grant to the new one.
* signaling does NOT release held keys (that is the control's decision): the
* ex-owner is responsible for its stuck keys; the new owner can query held
* (CMD_QUERY_INPUT) and release them. */
uint32_t old_owner = cell->owner;
{
vmsig_event rv;
memset(&rv, 0, sizeof rv);
rv.endpoint = ep; rv.origin = old_owner;
lease_reply(c, &rv, VMSIG_EV_LEASE_REVOKED, cls, 0);
}
{
vmsig_audit a = { VMSIG_AUDIT_LEASE_REVOKED, owner_e->grant.principal, ep, cls, 0 };
core_audit(c, &a);
}
cell->owner = me; cell->owner_prio = e->grant.arb_prio;
{
vmsig_audit a = { VMSIG_AUDIT_LEASE_GRANTED, e->grant.principal, ep, cls, 0 };
core_audit(c, &a);
}
lease_reply(c, ev, VMSIG_EV_LEASE_GRANTED, cls, 0);
}
void core_lease_release(vmsig_core* c, int ctl_id, const vmsig_event* ev) {
core_control_ent* e = &c->controls[ctl_id];
uint32_t cls = ((const vmsig_lease_req*)ev->inln)->cls;
uint32_t ep = ev->endpoint;
/* cross-endpoint isolation + cap/source gate BEFORE any action (like acquire). */
if (cls >= VMSIG_LEASE_CLASS_MAX || ep >= 64) return;
if (!(e->grant.endpoint_mask & (1ull << ep))) return;
if (!(e->grant.cap_mask & cap_for_lease_class((int)cls))) return;
if (!(e->grant.source_mask & source_mask_for_lease_class((int)cls))) return;
core_lease_cell* cell = &c->lease[ep][cls];
if (cell->owner != ev->origin) return; /* not owner => no-op */
/* signaling does NOT release held keys — that is the control's decision (it releases
* its own keys before release if needed). Here we only free the cell. */
cell->owner = 0; cell->owner_prio = 0;
lease_reply(c, ev, VMSIG_EV_LEASE_RELEASED, cls, 0);
}
void core_lease_status(vmsig_core* c, int ctl_id, const vmsig_event* ev) {
core_control_ent* e = &c->controls[ctl_id];
uint32_t cls = ((const vmsig_lease_req*)ev->inln)->cls;
uint32_t ep = ev->endpoint;
/* busy-state can be probed only within one's own endpoint and with the class cap
* (else a principal without CAP_INPUT/CAP_POWER would leak busy-state/other principal). */
if (cls >= VMSIG_LEASE_CLASS_MAX || ep >= 64) return;
if (!(e->grant.endpoint_mask & (1ull << ep))) return;
if (!(e->grant.cap_mask & cap_for_lease_class((int)cls))) return;
if (!(e->grant.source_mask & source_mask_for_lease_class((int)cls))) return;
core_lease_cell* cell = &c->lease[ep][cls];
uint32_t busy = (cell->owner && origin_ctl(c, cell->owner)) ? 1u : 0u;
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_LEASE_STATUS; up.source = VMSIG_SRC_CORE; up.dir = VMSIG_DIR_UP;
up.prio = VMSIG_PRIO_URGENT; up.endpoint = ep; up.origin = ev->origin;
vmsig_lease_status st = { cls, busy, busy ? lease_owner_principal(c, cell->owner) : 0u };
memcpy(up.inln, &st, sizeof st);
core_emit_up(c, &up);
}
void core_lease_reap_control(vmsig_core* c, int ctl_id) {
/* Clear all cells owned by this (still live) slot, BEFORE active=0.
* origin is compared by the slot's current gen (active, gen valid at reap time). */
uint32_t owner = origin_pack(ctl_id, c->controls[ctl_id].gen);
for (uint32_t ep = 0; ep < 64; ep++) {
for (int cls = 0; cls < VMSIG_LEASE_CLASSES; cls++) {
core_lease_cell* cell = &c->lease[ep][cls];
if (cell->owner != owner) continue;
/* only free the cell; the dead owner's held keys are NOT our concern (vmctl's
* record; the next owner sees them via CMD_QUERY_INPUT and decides itself). */
cell->owner = 0; cell->owner_prio = 0;
vmsig_audit a = { VMSIG_AUDIT_LEASE_RECLAIMED,
c->controls[ctl_id].grant.principal, ep, (uint32_t)cls, 0 };
core_audit(c, &a);
}
}
}
/* DOWN emit from a control: enforcement against THIS control's grant. */
int core_emit_down(void* token, vmsig_event* ev) {
core_down_ctx* d = token;
vmsig_core* c = d->core;
core_control_ent* e = &c->controls[d->ctl_id];
if (!e->active) { vmsig_payload_release(ev); return -1; }
/* Lease arbitration is intercepted HERE (synchronous, not in ctx, does not touch
* pending). origin is needed for the addressed reply and as the owner key. */
if (ev->kind == VMSIG_EV_CMD_ACQUIRE || ev->kind == VMSIG_EV_CMD_RELEASE ||
ev->kind == VMSIG_EV_CMD_LEASE_STATUS) {
ev->origin = origin_pack(d->ctl_id, e->gen);
if (ev->kind == VMSIG_EV_CMD_ACQUIRE) core_lease_acquire(c, d->ctl_id, ev);
else if (ev->kind == VMSIG_EV_CMD_RELEASE) core_lease_release(c, d->ctl_id, ev);
else core_lease_status(c, d->ctl_id, ev);
vmsig_payload_release(ev);
return 0;
}
if (!grant_allows_down(&e->grant, ev)) {
vmsig_audit a = { VMSIG_AUDIT_DOWN_DENIED, e->grant.principal,
ev->endpoint, (uint32_t)ev->kind, 0 };
core_audit(c, &a); /* rejected by policy (endpoint/source/class) */
vmsig_payload_release(ev);
return -1;
}
/* Lease GATE: destruction is passed ONLY by the class's current owner.
* A non-owner (or an owner whose slot is dead) => drop + audit LEASE_DENIED
* (distinguishable from grant-deny). A free cell => also drop: destruction cannot be
* used without an explicit lease. Safe/read-only commands (cls<0) are not gated. */
{
int cls = lease_class_for_down(ev);
if (cls >= 0 && ev->endpoint < 64) {
uint32_t me = origin_pack(d->ctl_id, e->gen);
uint32_t owner = c->lease[ev->endpoint][cls].owner;
if (owner != me || !origin_ctl(c, owner)) {
vmsig_audit a = { VMSIG_AUDIT_LEASE_DENIED, e->grant.principal,
ev->endpoint, (uint32_t)ev->kind, 0 };
core_audit(c, &a);
vmsig_payload_release(ev);
return -1;
}
}
}
if (e->pending >= VMSIG_DOWN_PENDING_MAX) { /* fairness/DoS: DOWN cap per poller */
vmsig_audit a = { VMSIG_AUDIT_DOWN_DENIED, e->grant.principal,
ev->endpoint, (uint32_t)ev->kind, 0 };
core_audit(c, &a);
vmsig_payload_release(ev);
return -1;
}
ev->origin = origin_pack(d->ctl_id, e->gen); /* addressed reply + pending accounting */
e->pending++;
int r = vmsig_ctx_submit(c->ctx, VMSIG_DIR_DOWN, ev);
if (r != 0) e->pending--; /* not enqueued (drop/err) */
core_wake(c);
return r;
}
static int sub_match(const vmsig_sub* sub, const vmsig_event* ev) {
if (sub->source_mask && !(sub->source_mask & (1u << ev->source))) return 0;
if (ev->prio < sub->prio_min) return 0;
if (sub->endpoint_mask) {
if (ev->endpoint >= 64 || !(sub->endpoint_mask & (1ull << ev->endpoint))) return 0;
}
return 1;
}
/* ===== Address-space context (MEMCTX seam): multicast / retain-replay / epoch =====
* The core vends ONE coherent datum per-endpoint: kcr3+locator paired with an RO-fd. A
* MEMCTX trigger from the adapter => the core builds the AUTHORITATIVE locator from the
* adapter snapshot (reg.describe) + stamps the epoch (single source of truth) and
* distributes to qualified subscribers with re-sharing of the RO-fd. The same path serves
* replay to a late subscriber. */
/* Build a MEMCTX delivery event for endpoint ep. segs are borrowed from the adapter's
* buffer (delivery is synchronous on the loop thread; ownership is not transferred).
* 1 — built. */
static int core_memctx_build(vmsig_core* c, uint32_t ep, vmsig_event* ev) {
if (ep >= 64) return 0;
core_memctx_cell* cell = &c->memctx[ep];
if (!cell->registered || !cell->reg.describe) return 0;
vmsig_memctx pod;
memset(&pod, 0, sizeof pod);
const vmsig_memseg* segs = NULL;
uint32_t nseg = 0;
cell->reg.describe(cell->reg.ctx, &pod, &segs, &nseg);
pod.epoch = c->epoch[ep]; /* core stamps the epoch */
pod.nseg = nseg;
pod.flags |= VMSIG_MEMCTX_RDONLY; /* outward — always read-only */
memset(ev, 0, sizeof *ev);
ev->kind = VMSIG_EV_MEMCTX; ev->source = VMSIG_SRC_MEMCTX; ev->dir = VMSIG_DIR_UP;
ev->prio = VMSIG_PRIO_NORMAL; ev->endpoint = ep;
memcpy(ev->inln, &pod, sizeof pod);
ev->payload.data = (void*)segs; /* borrowed: owner is the adapter */
ev->payload.len = (size_t)nseg * sizeof(vmsig_memseg);
ev->payload.codec = VMSIG_CODEC_MEMCTX;
ev->payload.flags = VMSIG_PL_BORROWED;
ev->payload.release = NULL;
return 1;
}
/* Deliver MEMCTX to one qualified control: fresh RO-fd from reg.share_fd
* (socket -> cmsg, in-proc -> direct int), attach_memctx, close fd (the core does not own
* the fd). On success — audit MEMCTX_GRANTED. */
static void core_memctx_deliver_one(vmsig_core* c, core_memctx_cell* cell,
core_control_ent* e, const vmsig_event* ev) {
if (!e->ops->attach_memctx) return; /* control does not accept MEMCTX */
int fd = cell->reg.share_fd ? cell->reg.share_fd(cell->reg.ctx) : -1;
int r = e->ops->attach_memctx(e->ctl, ev, fd);
if (fd >= 0) close(fd); /* the core does not own the ro-fd */
if (r == 0) {
vmsig_audit a = { VMSIG_AUDIT_MEMCTX_GRANTED, e->grant.principal,
ev->endpoint, 0, 0 };
core_audit(c, &a);
}
}
void core_memctx_route(vmsig_core* c, const vmsig_event* trigger) {
uint32_t ep = trigger->endpoint;
if (ep >= 64) return;
core_memctx_cell* cell = &c->memctx[ep];
if (!cell->registered) return;
vmsig_event ev;
if (!core_memctx_build(c, ep, &ev)) return;
cell->valid = 1; /* epoch context published */
cell->epoch = c->epoch[ep];
for (int i = 0; i < c->ncontrols; i++) {
core_control_ent* e = &c->controls[i];
if (!e->active) continue;
if (grant_allows_up(&e->grant, &ev) && sub_match(&e->sub, &ev))
core_memctx_deliver_one(c, cell, e, &ev);
}
}
void core_memctx_replay(vmsig_core* c, int ctl_id) {
if (ctl_id < 0 || ctl_id >= c->ncontrols) return;
core_control_ent* e = &c->controls[ctl_id];
if (!e->active) return;
for (uint32_t ep = 0; ep < 64; ep++) {
core_memctx_cell* cell = &c->memctx[ep];
if (!cell->registered || !cell->valid) continue;
vmsig_event ev;
if (!core_memctx_build(c, ep, &ev)) continue;
if (grant_allows_up(&e->grant, &ev) && sub_match(&e->sub, &ev))
core_memctx_deliver_one(c, cell, e, &ev);
}
}
void core_epoch_bump(vmsig_core* c, uint32_t endpoint) {
if (endpoint >= 64) return;
c->epoch[endpoint]++;
core_memctx_cell* cell = &c->memctx[endpoint];
cell->valid = 0; /* prior-epoch context is not replayed */
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_MEMCTX_INVALIDATED; up.source = VMSIG_SRC_MEMCTX;
up.dir = VMSIG_DIR_UP; up.prio = VMSIG_PRIO_URGENT; up.endpoint = endpoint;
vmsig_memctx_inv inv = { endpoint, c->epoch[endpoint] };
memcpy(up.inln, &inv, sizeof inv);
core_emit_up(c, &up); /* broadcast to holders (CAP_MEMCTX gate) */
/* request re-bootstrap from the adapter: it re-emits MEMCTX{epoch+1} when ready. */
if (cell->registered && cell->reg.invalidate)
cell->reg.invalidate(cell->reg.ctx, c->epoch[endpoint]);
}
/* UP: drain the context queue and dispatch to subscribed controls */
static void pump_up(vmsig_core* c) {
vmsig_event ev;
while (vmsig_ctx_next(c->ctx, VMSIG_DIR_UP, &ev) == 1) {
if (ev.kind == VMSIG_EV_MEMCTX) {
/* Context trigger: the core builds the authoritative locator (adapter snapshot
* + epoch stamp) and distributes to qualified holders with re-sharing of the
* RO-fd. The trigger itself is NOT delivered as an ordinary event. */
core_memctx_route(c, &ev);
vmsig_payload_release(&ev); /* inline trigger (release=NULL) — harmless */
continue;
}
if (ev.kind == VMSIG_EV_VM_LIFECYCLE && ev.origin == 0) {
/* Epoch-transition observation: a destructive async transition (VMHOST
* broadcast) invalidates the address-space context. NOT continue — VM_LIFECYCLE
* still goes to subscribers below via the normal broadcast. */
const vmsig_vm_state* vs = (const vmsig_vm_state*)ev.inln;
if (vs->state == VMSIG_VM_RESET || vs->state == VMSIG_VM_POWERDOWN ||
vs->state == VMSIG_VM_SHUTDOWN)
core_epoch_bump(c, ev.endpoint);
}
if (ev.origin) {
/* addressed reply ONLY to the initiator (origin+generation). The command was
* already authorized by the grant => we deliver the reply without re-check; if
* the initiator is gone/slot reused — we drop (private data, not broadcast). */
core_control_ent* e = origin_ctl(c, ev.origin);
if (e && e->ops->deliver) e->ops->deliver(e->ctl, &ev);
} else {
/* unaddressed event — broadcast; effective = grant ∩ sub */
for (int i = 0; i < c->ncontrols; i++) {
core_control_ent* e = &c->controls[i];
if (!e->active) continue;
if (grant_allows_up(&e->grant, &ev) && sub_match(&e->sub, &ev) && e->ops->deliver)
e->ops->deliver(e->ctl, &ev);
}
}
vmsig_payload_release(&ev);
}
}
/* DOWN: drain the queue and route the command to the adapter (source+endpoint) */
static void pump_down(vmsig_core* c) {
vmsig_event ev;
while (vmsig_ctx_next(c->ctx, VMSIG_DIR_DOWN, &ev) == 1) {
core_control_ent* oe = origin_ctl(c, ev.origin); /* command has left ctx */
if (oe && oe->pending) oe->pending--; /* THE ONLY decrement */
/* In-flight fencing: destruction whose origin is NO LONGER the class owner (lease
* lost between the emit_down gate and dequeue) is dropped BEFORE actuation. Does
* NOT finalize (finalization is done by acquire/reap) — else a double key-up.
* pending is NOT touched here (already decremented above). */
int cls = lease_class_for_down(&ev);
if (cls >= 0 && ev.endpoint < 64 && c->lease[ev.endpoint][cls].owner != ev.origin) {
/* dropping destruction that lost the lease is observable (origin owner's principal). */
vmsig_audit a = { VMSIG_AUDIT_LEASE_DENIED, lease_owner_principal(c, ev.origin),
ev.endpoint, (uint32_t)ev.kind, (uint32_t)cls };
core_audit(c, &a);
vmsig_payload_release(&ev);
continue;
}
core_adapter_ent* e = core_find_adapter(c, ev.endpoint, ev.source);
if (e && e->ops->submit) e->ops->submit(e->a, &ev);
vmsig_payload_release(&ev);
}
}
/* Deferred reap of detached controls: after the batch (safe — not inside their own
* on_readable). epoll DEL + mark slot dead + ops->close. */
static void core_reap(vmsig_core* c) {
for (int i = 0; i < c->ncontrols; i++) {
core_control_ent* e = &c->controls[i];
if (!e->reap || !e->active) continue;
if (e->slot) {
epoll_ctl(c->epfd, EPOLL_CTL_DEL, e->slot->fd, NULL);
e->slot->role = SLOT_DEAD;
}
core_lease_reap_control(c, i); /* return leases + finalize held BEFORE active=0 */
if (e->ops->close) e->ops->close(e->ctl);
e->active = 0;
e->reap = 0;
}
}
int vmsig_core_run(vmsig_core* c) {
if (!c) return -1;
struct epoll_event evs[VMSIG_MAX_EVENTS];
while (!__atomic_load_n(&c->stopping, __ATOMIC_ACQUIRE)) {
int n = epoll_wait(c->epfd, evs, VMSIG_MAX_EVENTS, -1);
if (n < 0) { if (errno == EINTR) continue; return -1; }
for (int i = 0; i < n; i++) {
core_slot* s = (core_slot*)evs[i].data.ptr;
switch (s->role) {
case SLOT_WAKEUP:
drain_counter_fd(s->fd); /* stopping is checked in while */
break;
case SLOT_ADAPTER:
if (s->ops->on_readiness)
s->ops->on_readiness(s->adapter, s->cookie, evs[i].events);
break;
case SLOT_CTX_TIMING:
drain_counter_fd(s->fd);
break;
case SLOT_CONTROL:
if (s->cops->on_readable)
s->cops->on_readable(s->ctl);
break;
case SLOT_SOURCE:
if (s->on_source)
s->on_source(s->source_user, evs[i].events);
break;
case SLOT_DEAD:
break; /* detached — ignore */
}
}
pump_up(c);
pump_down(c);
core_reap(c);
}
return 0;
}
void vmsig_core_stop(vmsig_core* c) {
if (!c) return;
__atomic_store_n(&c->stopping, 1, __ATOMIC_RELEASE); /* cross-thread stop signal */
core_wake(c);
}
+203
View File
@@ -0,0 +1,203 @@
/* ctx.c — transfer context: priority, ordering, protocol timing.
* This is the SISC-critical seam. No behavioral timing here: commands arrive
* already decided by control; the context only orders and paces them. */
#include "ctx_internal.h"
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/timerfd.h>
#include <unistd.h>
/* Default depth ceiling for a single band (per source,dir) when no policy is set. */
#define VMSIG_CTX_DEFAULT_INFLIGHT 4096
static uint64_t now_ns(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec;
}
/* ---- node recycling (free-list under the shared mutex) ------------------- */
static ev_node* node_get(vmsig_ctx* c) {
ev_node* n = c->freelist;
if (n) { c->freelist = n->next; return n; }
return malloc(sizeof *n);
}
static void node_put(vmsig_ctx* c, ev_node* n) {
n->next = c->freelist;
c->freelist = n;
}
vmsig_ctx* vmsig_ctx_new(void) {
vmsig_ctx* c = calloc(1, sizeof *c);
if (!c) return NULL;
if (pthread_mutex_init(&c->lock, NULL) != 0) { free(c); return NULL; }
for (int d = 0; d < 2; d++) {
c->dir[d].timing_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
if (c->dir[d].timing_fd < 0) {
for (int k = 0; k < d; k++) close(c->dir[k].timing_fd);
pthread_mutex_destroy(&c->lock);
free(c);
return NULL;
}
}
return c;
}
void vmsig_ctx_free(vmsig_ctx* c) {
if (!c) return;
for (int d = 0; d < 2; d++) {
for (int p = 0; p < VMSIG_PRIO_MAX; p++) {
ev_node* n = c->dir[d].band[p].head;
while (n) { ev_node* nx = n->next; vmsig_payload_release(&n->ev); free(n); n = nx; }
}
if (c->dir[d].timing_fd >= 0) close(c->dir[d].timing_fd);
}
/* actually free the recycled nodes (no payload attached) */
ev_node* f = c->freelist;
while (f) { ev_node* nx = f->next; free(f); f = nx; }
pthread_mutex_destroy(&c->lock);
free(c);
}
int vmsig_ctx_set_policy(vmsig_ctx* c, vmsig_source src, vmsig_dir dir,
vmsig_prio default_prio, const vmsig_timing* t) {
if (!c || src >= VMSIG_SRC_MAX || dir > VMSIG_DIR_DOWN) return -1;
pthread_mutex_lock(&c->lock);
ctx_policy* pol = &c->policy[src][dir];
pol->default_prio = default_prio;
if (t) pol->timing = *t; else memset(&pol->timing, 0, sizeof pol->timing);
pol->policy_set = 1;
pthread_mutex_unlock(&c->lock);
return 0;
}
static void band_push_tail(ev_band* b, ev_node* n) {
n->next = NULL;
if (b->tail) b->tail->next = n; else b->head = n;
b->tail = n;
b->count++;
}
int vmsig_ctx_submit(vmsig_ctx* c, vmsig_dir dir, vmsig_event* ev) {
if (!c || !ev || dir > VMSIG_DIR_DOWN) return -1;
vmsig_source src = ev->source < VMSIG_SRC_MAX ? ev->source : VMSIG_SRC_NONE;
pthread_mutex_lock(&c->lock);
ctx_policy* pol = &c->policy[src][dir];
/* effective priority = max(policy default, emitter request) */
vmsig_prio eff = ev->prio > pol->default_prio ? ev->prio : pol->default_prio;
if (eff >= VMSIG_PRIO_MAX) eff = VMSIG_PRIO_MAX - 1;
ev->seq = ++c->seq;
if (ev->ts_ns == 0) ev->ts_ns = now_ns();
ev->prio = eff;
ev_band* band = &c->dir[dir].band[eff];
/* coalescing: a burst of the same kind+endpoint is collapsed (newest wins) */
if (pol->timing.coalesce_ns) {
for (ev_node* n = band->head; n; n = n->next) {
if (n->ev.kind == ev->kind && n->ev.endpoint == ev->endpoint) {
vmsig_payload_release(&n->ev);
uint32_t keep_seq = n->ev.seq; /* keep position in the order */
n->ev = *ev;
n->ev.seq = keep_seq;
pthread_mutex_unlock(&c->lock);
return 1;
}
}
}
/* backpressure: channel depth is bounded. When no policy is set
* (max_inflight==0), a BUILT-IN default ceiling applies (drop newest),
* so the queue does not grow without bound under a command flood. */
uint32_t cap = pol->timing.max_inflight ? pol->timing.max_inflight
: VMSIG_CTX_DEFAULT_INFLIGHT;
uint8_t dp = pol->timing.max_inflight ? pol->timing.drop_policy
: VMSIG_DROP_NEWEST;
if (band->count >= (int)cap) {
if (dp == VMSIG_DROP_OLDEST) {
ev_node* old = band->head; /* drop the oldest */
if (old) {
band->head = old->next;
if (!band->head) band->tail = NULL;
band->count--;
vmsig_payload_release(&old->ev);
node_put(c, old);
}
} else {
/* NEWEST / BLOCK (the loop must not block) — drop the incoming event */
vmsig_payload_release(ev);
pthread_mutex_unlock(&c->lock);
return 1;
}
}
ev_node* node = node_get(c);
if (!node) { pthread_mutex_unlock(&c->lock); return -1; }
node->ev = *ev; /* take ownership of the payload */
band_push_tail(band, node);
pthread_mutex_unlock(&c->lock);
return 0;
}
int vmsig_ctx_next(vmsig_ctx* c, vmsig_dir dir, vmsig_event* out) {
if (!c || !out || dir > VMSIG_DIR_DOWN) return -1;
pthread_mutex_lock(&c->lock);
ctx_dir* d = &c->dir[dir];
uint64_t now = now_ns();
uint64_t min_rem = 0;
int have_rem = 0;
/* Walk bands from highest priority to lowest, and within a band from head
* to tail, returning the FIRST event "matured" against its protocol min_gap.
* A paced source thus waits without blocking ready events of other sources.
* Within one source the order is preserved (its earlier events come first). */
for (int p = VMSIG_PRIO_MAX - 1; p >= 0; p--) {
ev_band* b = &d->band[p];
ev_node* prev = NULL;
ev_node* n = b->head;
while (n) {
vmsig_source src = n->ev.source < VMSIG_SRC_MAX ? n->ev.source : VMSIG_SRC_NONE;
ctx_policy* pol = &c->policy[src][dir];
int due = 1;
uint64_t rem = 0;
if (pol->timing.min_gap_ns) {
uint64_t due_at = pol->last_emit_ns + pol->timing.min_gap_ns;
if (now < due_at) { due = 0; rem = due_at - now; }
}
if (due) {
if (prev) prev->next = n->next; else b->head = n->next;
if (b->tail == n) b->tail = prev;
b->count--;
pol->last_emit_ns = now;
*out = n->ev; /* payload ownership -> caller */
node_put(c, n);
pthread_mutex_unlock(&c->lock);
return 1;
}
if (!have_rem || rem < min_rem) { min_rem = rem; have_rem = 1; }
prev = n;
n = n->next;
}
}
/* nothing matured: arm the timing-fd for the nearest due time (if any waiting) */
if (have_rem) {
struct itimerspec its;
memset(&its, 0, sizeof its);
its.it_value.tv_sec = (time_t)(min_rem / 1000000000ull);
its.it_value.tv_nsec = (long)(min_rem % 1000000000ull);
if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0) its.it_value.tv_nsec = 1;
timerfd_settime(d->timing_fd, 0, &its, NULL);
}
pthread_mutex_unlock(&c->lock);
return 0;
}
int vmsig_ctx_timing_fd(vmsig_ctx* c, vmsig_dir dir) {
if (!c || dir > VMSIG_DIR_DOWN) return -1;
return c->dir[dir].timing_fd;
}
+41
View File
@@ -0,0 +1,41 @@
#ifndef VMSIG_CTX_INTERNAL_H
#define VMSIG_CTX_INTERNAL_H
#include "vmsig_ctx.h"
#include <pthread.h>
#include <stdint.h>
/* Private internals of the transfer context. Priority bands are simple
* linked FIFOs (one node per event; recycling is a later optimization). */
typedef struct ev_node {
vmsig_event ev;
struct ev_node* next;
} ev_node;
typedef struct {
ev_node* head;
ev_node* tail;
int count;
} ev_band;
typedef struct {
vmsig_prio default_prio;
vmsig_timing timing;
uint64_t last_emit_ns; /* for protocol min_gap (per source,dir) */
int policy_set;
} ctx_policy;
typedef struct {
ev_band band[VMSIG_PRIO_MAX]; /* 4 priority bands */
int timing_fd; /* pacing timerfd (created in ctx_new) */
} ctx_dir;
struct vmsig_ctx {
pthread_mutex_t lock;
uint32_t seq;
ev_node* freelist; /* ev_node recycling */
ctx_dir dir[2]; /* indexed by VMSIG_DIR_UP/DOWN */
ctx_policy policy[VMSIG_SRC_MAX][2]; /* [source][dir] */
};
#endif /* VMSIG_CTX_INTERNAL_H */
+95
View File
@@ -0,0 +1,95 @@
/* test_authz.c — authorization to receive the coherent address-space context (MEMCTX).
* Several co-resident controls on one endpoint; MEMCTX is multicast upward and reaches
* ONLY the qualified holder. We check the least-privilege matrix:
* GOOD (CAP_MEMCTX + source_mask MEMCTX + endpoint) -> receives MEMCTX;
* NOCAP (CAP_OBSERVE, no MEMCTX) -> does NOT receive (cap);
* NOSRC (CAP_MEMCTX, source_mask without MEMCTX bit) -> does NOT receive (source_mask);
* NOEP (CAP_MEMCTX, but endpoint outside the mask) -> does NOT receive (endpoint).
* A vmhost stub provides watchdog ticks (VM_LIFECYCLE) — a termination guarantee if
* MEMCTX somehow never arrives (then the asserts catch its absence). In-proc, under ASAN. */
#define _GNU_SOURCE
#include "vmsig.h"
#include <stdio.h>
#include <string.h>
#include <stdint.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { \
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
} while (0)
#define EP 0u
typedef struct {
vmsig_core* core;
const char* tag;
int memctx; /* how many MEMCTX this control received */
int ticks; /* watchdog: VM_LIFECYCLE (only on GOOD) */
int is_good; /* GOOD stops the loop on the first MEMCTX */
} holder;
static int on_ev(void* user, const vmsig_event* ev) {
holder* h = user;
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) {
h->ticks++;
if (h->is_good && h->ticks > 20) vmsig_core_stop(h->core); /* failsafe */
}
return 0;
}
static int on_memctx(void* user, const vmsig_event* ev, int fd) {
holder* h = user;
(void)ev; (void)fd;
h->memctx++;
if (h->is_good) vmsig_core_stop(h->core);
return 0;
}
static void add_holder(vmsig_core* core, holder* h, uint32_t cap,
uint32_t source_mask, uint64_t endpoint_mask) {
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
cfg.on_event = on_ev; cfg.on_memctx = on_memctx; cfg.user = h;
void* ctl = vmsig_inproc_control_new(&cfg);
vmsig_grant g; memset(&g, 0, sizeof g);
g.principal = 7; g.endpoint_mask = endpoint_mask;
g.source_mask = source_mask; g.cap_mask = cap;
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
}
int main(void) {
printf("test_authz (memctx least-privilege)\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
holder good = { core, "GOOD", 0, 0, 1 };
holder nocap = { core, "NOCAP", 0, 0, 0 };
holder nosrc = { core, "NOSRC", 0, 0, 0 };
holder noep = { core, "NOEP", 0, 0, 0 };
/* GOOD: CAP_MEMCTX (+OBSERVE for watchdog lifecycle ticks), source MEMCTX, ep0 -> receives. */
add_holder(core, &good, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << EP);
/* NOCAP: OBSERVE only (no CAP_MEMCTX) -> deny by cap. */
add_holder(core, &nocap, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << EP);
/* NOSRC: CAP_MEMCTX, but source_mask without the MEMCTX bit -> deny by source_mask. */
add_holder(core, &nosrc, VMSIG_CAP_MEMCTX, ~(1u << VMSIG_SRC_MEMCTX), 1ull << EP);
/* NOEP: CAP_MEMCTX, source ok, but endpoint outside the mask (ep1) -> deny by endpoint. */
add_holder(core, &noep, VMSIG_CAP_MEMCTX, 0xFFFFFFFFu, 1ull << 1);
/* watchdog lifecycle ticks + address-space context on one endpoint (stub). */
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, EP) >= 0, "add vmhost adapter");
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, EP) >= 0, "add memctx adapter");
int rc = vmsig_core_run(core);
printf(" rc=%d GOOD.memctx=%d NOCAP=%d NOSRC=%d NOEP=%d\n",
rc, good.memctx, nocap.memctx, nosrc.memctx, noep.memctx);
CHECK(good.memctx >= 1, "GOOD receives MEMCTX (cap+source+endpoint)");
CHECK(nocap.memctx == 0, "NOCAP does not receive (no CAP_MEMCTX)");
CHECK(nosrc.memctx == 0, "NOSRC does not receive (source_mask without MEMCTX)");
CHECK(noep.memctx == 0, "NOEP does not receive (endpoint outside mask)");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
printf("authz tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+125
View File
@@ -0,0 +1,125 @@
/* test_ctx.c — unit tests for the transfer context (public vmsig_ctx_* API):
* priority->seq, coalescing, backpressure (drop oldest/newest), protocol
* pacing via timing-fd. Links against libvmsig; run through ctest. */
#include "vmsig.h"
#include <stdio.h>
#include <string.h>
#include <poll.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { \
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
} while (0)
/* Submit a DOWN event with the given prio/kind/endpoint/corr. */
static int put(vmsig_ctx* c, vmsig_prio p, vmsig_kind k, uint32_t ep, uint32_t corr) {
vmsig_event e;
memset(&e, 0, sizeof e);
e.source = VMSIG_SRC_NONE; e.dir = VMSIG_DIR_DOWN;
e.prio = p; e.kind = k; e.endpoint = ep; e.corr = corr;
return vmsig_ctx_submit(c, VMSIG_DIR_DOWN, &e);
}
/* ---- 1. priority first, then FIFO by seq within a band ------------------- */
static void test_priority_seq(void) {
printf("test_priority_seq\n");
vmsig_ctx* c = vmsig_ctx_new();
put(c, VMSIG_PRIO_BULK, VMSIG_EV_CMD_INPUT, 0, 0xA);
put(c, VMSIG_PRIO_URGENT, VMSIG_EV_CMD_INPUT, 0, 0xB);
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 0xC);
put(c, VMSIG_PRIO_BULK, VMSIG_EV_CMD_INPUT, 0, 0xD);
put(c, VMSIG_PRIO_URGENT, VMSIG_EV_CMD_INPUT, 0, 0xE);
uint32_t want[5] = { 0xB, 0xE, 0xC, 0xA, 0xD }; /* URGENT(seq) -> NORMAL -> BULK(seq) */
vmsig_event o;
for (int i = 0; i < 5; i++) {
int r = vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o);
CHECK(r == 1, "next must return an event");
CHECK(o.corr == want[i], "priority->seq order");
}
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 0, "queue drained");
vmsig_ctx_free(c);
}
/* ---- 2. coalescing: a burst of one kind+endpoint collapses (newest wins) - */
static void test_coalesce(void) {
printf("test_coalesce\n");
vmsig_ctx* c = vmsig_ctx_new();
vmsig_timing t; memset(&t, 0, sizeof t); t.coalesce_ns = 1;
vmsig_ctx_set_policy(c, VMSIG_SRC_NONE, VMSIG_DIR_DOWN, VMSIG_PRIO_NORMAL, &t);
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_VM, 0, 0x11);
int r2 = put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_VM, 0, 0x22); /* should coalesce */
CHECK(r2 == 1, "second submit coalesced (=1)");
vmsig_event o;
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1, "one event after coalescing");
CHECK(o.corr == 0x22, "newest data after coalescing");
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 0, "nothing more");
vmsig_ctx_free(c);
}
/* ---- 3. backpressure drop_oldest --------------------------------------- */
static void test_backpressure_oldest(void) {
printf("test_backpressure_oldest\n");
vmsig_ctx* c = vmsig_ctx_new();
vmsig_timing t; memset(&t, 0, sizeof t);
t.max_inflight = 2; t.drop_policy = VMSIG_DROP_OLDEST;
vmsig_ctx_set_policy(c, VMSIG_SRC_NONE, VMSIG_DIR_DOWN, VMSIG_PRIO_NORMAL, &t);
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 1);
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 2);
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 3); /* evicts 1 */
vmsig_event o;
int got[8], n = 0;
while (vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1) got[n++] = (int)o.corr;
CHECK(n == 2, "2 remain after drop_oldest");
CHECK(n == 2 && got[0] == 2 && got[1] == 3, "oldest evicted (1)");
vmsig_ctx_free(c);
}
/* ---- 4. backpressure drop_newest --------------------------------------- */
static void test_backpressure_newest(void) {
printf("test_backpressure_newest\n");
vmsig_ctx* c = vmsig_ctx_new();
vmsig_timing t; memset(&t, 0, sizeof t);
t.max_inflight = 2; t.drop_policy = VMSIG_DROP_NEWEST;
vmsig_ctx_set_policy(c, VMSIG_SRC_NONE, VMSIG_DIR_DOWN, VMSIG_PRIO_NORMAL, &t);
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 1);
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 2);
int r3 = put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 3); /* dropped */
CHECK(r3 == 1, "third submit dropped (=1)");
vmsig_event o;
int got[8], n = 0;
while (vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1) got[n++] = (int)o.corr;
CHECK(n == 2 && got[0] == 1 && got[1] == 2, "newest dropped (3)");
vmsig_ctx_free(c);
}
/* ---- 5. protocol pacing via timing-fd ---------------------------------- */
static void test_pacing(void) {
printf("test_pacing\n");
vmsig_ctx* c = vmsig_ctx_new();
vmsig_timing t; memset(&t, 0, sizeof t);
t.min_gap_ns = 20u * 1000000u; /* 20 ms gap */
vmsig_ctx_set_policy(c, VMSIG_SRC_NONE, VMSIG_DIR_DOWN, VMSIG_PRIO_NORMAL, &t);
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 1);
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 2);
vmsig_event o;
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1, "first delivered immediately");
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 0, "second paced (0 for now)");
int tfd = vmsig_ctx_timing_fd(c, VMSIG_DIR_DOWN);
CHECK(tfd >= 0, "timing-fd valid");
struct pollfd pfd = { .fd = tfd, .events = POLLIN };
int pr = poll(&pfd, 1, 1000); /* wait for it to fire, no longer than 1s */
CHECK(pr == 1, "timing-fd became ready within the gap");
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1, "after the gap the second matured");
CHECK(o.corr == 2, "this is exactly the second event");
vmsig_ctx_free(c);
}
int main(void) {
test_priority_seq();
test_coalesce();
test_backpressure_oldest();
test_backpressure_newest();
test_pacing();
printf("ctx tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+93
View File
@@ -0,0 +1,93 @@
/* test_inputobs.c — input observation:
* held-query: a control with CAP_INPUT, on CMD_QUERY_INPUT, receives INPUT_HELD from the
* vmctl record (stub without vmctl => count=0); without CAP_INPUT — DOWN_DENIED.
* (The cursor sensor moved out of signaling with the FRAME adapter: CURSOR_STATE is now
* emitted by the out-of-repo vgpu-perception shell-as-control, not by a signaling adapter.)
* In-proc, under ASAN. Links against libvmsig. */
#define _GNU_SOURCE
#include "vmsig.h"
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
static int g_fail = 0;
#define CHECK(c, m) do { if (!(c)) { printf(" FAIL: %s\n", (m)); g_fail = 1; } } while (0)
#define EP 0u
typedef struct {
vmsig_core* core;
void* ctl;
int held; /* INPUT_HELD count */
int last_held_count;
int stop_held; /* stop after N held replies (0=no) */
} obs;
static int on_ev(void* u, const vmsig_event* ev) {
obs* o = u;
if (ev->kind == VMSIG_EV_INPUT_HELD) {
const vmsig_input_held* h = (const vmsig_input_held*)ev->inln;
o->held++; o->last_held_count = (int)h->count;
if (o->stop_held && o->held >= o->stop_held) vmsig_core_stop(o->core);
}
return 0;
}
static void add_ctl(vmsig_core* core, obs* o, uint32_t cap, uint32_t src_mask) {
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
cfg.on_event = on_ev; cfg.user = o;
cfg.sub.source_mask = src_mask; cfg.sub.prio_min = VMSIG_PRIO_BULK;
void* ctl = vmsig_inproc_control_new(&cfg);
o->ctl = ctl;
vmsig_grant g; memset(&g, 0, sizeof g);
g.endpoint_mask = 1ull << EP; g.source_mask = src_mask; g.cap_mask = cap;
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
}
static void send_query_input(void* ctl) {
vmsig_event d; memset(&d, 0, sizeof d);
d.kind = VMSIG_EV_CMD_QUERY_INPUT; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
d.endpoint = EP; d.prio = VMSIG_PRIO_HIGH;
vmsig_inproc_send(ctl, &d);
}
static int g_down_denied = 0;
static void audit_cb(void* ud, const vmsig_audit* a) {
(void)ud; if (a->kind == VMSIG_AUDIT_DOWN_DENIED) g_down_denied++;
}
/* ---- held-query: CAP_INPUT -> INPUT_HELD (stub count=0); without cap -> DOWN_DENIED ---- */
static void test_held_query(void) {
printf("test_held_query\n");
g_down_denied = 0;
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
vmsig_core_set_audit(core, audit_cb, NULL);
vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, EP); /* stub input (no vmctl) */
obs a; memset(&a, 0, sizeof a); a.core = core; a.stop_held = 1;
add_ctl(core, &a, VMSIG_CAP_INPUT, 0xFFFFFFFFu);
send_query_input(a.ctl);
vmsig_core_run(core); /* pump_down -> INPUT_HELD -> pump_up */
CHECK(a.held == 1, "held: CAP_INPUT receives INPUT_HELD");
CHECK(a.last_held_count == 0, "held: stub without vmctl -> count=0");
/* without CAP_INPUT (OBSERVE only): CMD_QUERY_INPUT rejected BEFORE ctx (synchronously). */
obs b; memset(&b, 0, sizeof b); b.core = core;
add_ctl(core, &b, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu);
int before = g_down_denied;
send_query_input(b.ctl);
CHECK(b.held == 0, "held: without CAP_INPUT -> no INPUT_HELD");
CHECK(g_down_denied == before + 1, "held: without CAP_INPUT -> DOWN_DENIED");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
int main(void) {
printf("test_inputobs\n");
test_held_query();
printf("inputobs tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+525
View File
@@ -0,0 +1,525 @@
/* test_lease.c — arbitration layer (exclusive lease of destructive resources).
*
* 13 cases from the contract (docs/plans/lease-arbitration.md §Tests). In-proc,
* runs without a live loop where synchronous interception suffices
* (ACQUIRE/RELEASE/STATUS and the lease gate are synchronous in core_emit_down);
* a live vmsig_core_run() — to check finalization/fencing/reclaim, where the
* input-adapter worker is involved (actuation on the worker thread, ACK via the loop).
*
* Observability of finalization ordering: the input-worker FIFO => ACT_ACK order ==
* submit order. A synthesized key-up has origin=0 (broadcast), the owner's CMD_INPUT
* is an addressed ACK. A shared monotonic log records the relative ordering. */
#include "vmsig.h"
#include "core_internal.h" /* core_request_drop: deterministic reclaim of an in-proc control */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { \
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
} while (0)
/* ---------- shared control infrastructure ---------- */
typedef struct {
int granted, denied, released, revoked, status;
int last_deny_reason;
int last_status_busy;
uint32_t last_status_owner;
} lease_counts;
typedef struct {
void* core;
lease_counts cnt[4]; /* indexed by control */
/* log of ACT_ACK (actuations) in arrival order: tag = corr (0 = synthesized up) */
int ack_log[64];
int nack;
int stop_after_acks; /* stop the loop after N actuations (0=not auto) */
int total_replies; /* count of all lease UP events (GRANTED/DENIED/RELEASED/STATUS/REVOKED) */
int stop_replies; /* stop the loop when total_replies>=this (0=not auto) */
/* phase orchestration for finalization/fencing (on the loop thread via on_ev) */
void* ctl_a;
void* ctl_b;
int phase; /* count of scenario phases passed */
int scenario; /* 0=none, 1=preempt-finalize, 2=in-flight-fence */
} lease_state;
typedef struct { lease_state* s; int idx; } cref;
/* Registry of allocated crefs (control user-data): inproc close() frees the
* ctl itself but not the user-data => we free them centrally at the end (ASAN cleanliness). */
static cref* g_crefs[64];
static int g_ncrefs = 0;
static cref* cref_new(lease_state* s, int idx) {
cref* r = calloc(1, sizeof *r);
r->s = s; r->idx = idx;
if (g_ncrefs < 64) g_crefs[g_ncrefs++] = r;
return r;
}
static void cref_free_all(void) {
for (int i = 0; i < g_ncrefs; i++) free(g_crefs[i]);
g_ncrefs = 0;
}
/* forward declarations of send helpers (used in on_ev for phase orchestration) */
static int acquire(void* ctl, uint32_t cls, uint32_t ep);
static int send_key(void* ctl, int code, int value, uint32_t corr, uint32_t ep);
static void on_lease_ev(lease_state* s, int idx, const vmsig_event* ev) {
lease_counts* c = &s->cnt[idx];
switch (ev->kind) {
case VMSIG_EV_LEASE_GRANTED: c->granted++; s->total_replies++; break;
case VMSIG_EV_LEASE_DENIED: {
c->denied++;
c->last_deny_reason = (int)((const vmsig_lease_req*)ev->inln)->reason;
s->total_replies++;
break;
}
case VMSIG_EV_LEASE_RELEASED: c->released++; s->total_replies++; break;
case VMSIG_EV_LEASE_REVOKED: c->revoked++; s->total_replies++; break;
case VMSIG_EV_LEASE_STATUS: {
c->status++;
const vmsig_lease_status* st = (const vmsig_lease_status*)ev->inln;
c->last_status_busy = (int)st->busy;
c->last_status_owner = st->owner_principal;
s->total_replies++;
break;
}
default: break;
}
if (s->stop_replies && s->total_replies >= s->stop_replies)
vmsig_core_stop(s->core);
}
static int on_ev(void* user, const vmsig_event* ev) {
cref* r = user; lease_state* s = r->s;
on_lease_ev(s, r->idx, ev);
if (ev->kind == VMSIG_EV_ACT_ACK) {
if (s->nack < 64) s->ack_log[s->nack++] = (int)ev->corr;
/* Phase orchestration (loop thread): wait for the REAL actuation of A's key-down
* (the held-set is filled in pump_down=in_submit), and only THEN let B preempt —
* otherwise finalization on a synchronous acquire would run over an empty held-set. */
if (s->scenario == 1 && ev->corr == 11 && s->phase == 0) {
s->phase = 1;
acquire(s->ctl_b, VMSIG_LEASE_INPUT, 0); /* B preempts AFTER A's actuation */
send_key(s->ctl_b, 31, 1, 22, 0);
send_key(s->ctl_b, 31, 0, 23, 0);
}
if (s->stop_after_acks && s->nack >= s->stop_after_acks)
vmsig_core_stop(s->core);
}
return 0;
}
/* ---------- DOWN send helpers ---------- */
static int send_lease(void* ctl, vmsig_kind kind, uint32_t cls, uint32_t ep) {
vmsig_event d;
memset(&d, 0, sizeof d);
d.kind = kind; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
d.endpoint = ep; d.prio = VMSIG_PRIO_HIGH;
vmsig_lease_req lr = { cls, 0 };
memcpy(d.inln, &lr, sizeof lr);
return vmsig_inproc_send(ctl, &d);
}
static int acquire(void* ctl, uint32_t cls, uint32_t ep) {
return send_lease(ctl, VMSIG_EV_CMD_ACQUIRE, cls, ep);
}
static int release_(void* ctl, uint32_t cls, uint32_t ep) {
return send_lease(ctl, VMSIG_EV_CMD_RELEASE, cls, ep);
}
static int status(void* ctl, uint32_t cls, uint32_t ep) {
return send_lease(ctl, VMSIG_EV_CMD_LEASE_STATUS, cls, ep);
}
/* CMD_INPUT: KEY/BTN with value, corr for tracking. */
static int send_key(void* ctl, int code, int value, uint32_t corr, uint32_t ep) {
vmsig_event d;
memset(&d, 0, sizeof d);
d.kind = VMSIG_EV_CMD_INPUT; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
d.endpoint = ep; d.prio = VMSIG_PRIO_HIGH; d.corr = corr;
vmsig_input in; memset(&in, 0, sizeof in);
in.kind = VMSIG_INPUT_KEY; in.code = code; in.value = value;
memcpy(d.inln, &in, sizeof in);
return vmsig_inproc_send(ctl, &d);
}
static int send_life(void* ctl, int op, uint32_t corr, uint32_t ep) {
vmsig_event d;
memset(&d, 0, sizeof d);
d.kind = VMSIG_EV_CMD_LIFECYCLE; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
d.endpoint = ep; d.prio = VMSIG_PRIO_URGENT; d.corr = corr; d.inln[0] = (uint8_t)op;
return vmsig_inproc_send(ctl, &d);
}
/* Pump the loop until `n` more lease replies arrive (UP delivery via ctx
* requires pump_up). Lease DOWN intercepts are synchronous, but their UP replies are
* paced by the context => a live loop is needed. */
static void pump_n(lease_state* s, int n) {
vmsig_core* c = (vmsig_core*)s->core;
c->stopping = 0; /* white-box: reuse the loop between phases */
s->stop_replies = s->total_replies + n;
vmsig_core_run(c);
s->stop_replies = 0;
}
static void* add_ctl(vmsig_core* core, lease_state* s, int idx, uint32_t cap,
uint32_t arb_prio, uint64_t epmask) {
cref* r = cref_new(s, idx);
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
cfg.on_event = on_ev; cfg.user = r;
cfg.sub.source_mask = 0xFFFFFFFFu; cfg.sub.prio_min = VMSIG_PRIO_BULK;
cfg.sub.endpoint_mask = 0; /* all VMs */
void* ctl = vmsig_inproc_control_new(&cfg);
vmsig_grant g; memset(&g, 0, sizeof g);
g.endpoint_mask = epmask; g.source_mask = 0xFFFFFFFFu;
g.cap_mask = cap | VMSIG_CAP_OBSERVE; /* OBSERVE => sees the broadcast finalization ACT_ACK */
g.arb_prio = arb_prio;
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
return ctl;
}
/* ===== Synchronous test group (no loop): ACQUIRE/RELEASE/STATUS interception ===== */
static void test_sync_group(void) {
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
lease_state s; memset(&s, 0, sizeof s); s.core = core;
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
void* B = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 100, 1ull << 0);
void* Lo= add_ctl(core, &s, 2, VMSIG_CAP_INPUT, 5, 1ull << 0); /* low priority */
void* X = add_ctl(core, &s, 3, VMSIG_CAP_INPUT, 10, 1ull << 1); /* grant on ep1, not ep0 */
/* Lease DOWN intercepts are synchronous, but the UP reply is delivered via ctx => after
* each lease request we pump the loop until the corresponding UP arrives. */
/* 1) acquire -> GRANTED; the owner's CMD_INPUT passes the gate (==0). */
acquire(A, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
CHECK(s.cnt[0].granted == 1, "1: A gets GRANTED");
CHECK(send_key(A, 30, 1, 1, 0) == 0, "1: owner's CMD_INPUT passes the gate");
send_key(A, 30, 0, 2, 0); /* release, so as not to leave held for the next tests */
/* 2) gate: non-owner B -> CMD_INPUT dropped (-1). */
CHECK(send_key(B, 30, 1, 3, 0) == -1, "2: non-owner: CMD_INPUT dropped by the gate");
/* 3) equal priorities: a contender of the same prio -> DENIED{HELD}. */
{
void* C = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 10, 1ull << 0); (void)C;
int before = s.cnt[1].denied;
acquire(C, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
CHECK(s.cnt[1].denied == before + 1, "3: equal prio -> DENIED");
CHECK(s.cnt[1].last_deny_reason == VMSIG_LEASE_DENY_HELD, "3: reason=HELD");
}
/* 7) without cap -> NOCAP; foreign endpoint -> NOGRANT. */
{
void* NC = add_ctl(core, &s, 2, 0u /* without INPUT */, 10, 1ull << 0);
int before = s.cnt[2].denied;
acquire(NC, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
CHECK(s.cnt[2].denied == before + 1, "7: without cap -> DENIED");
CHECK(s.cnt[2].last_deny_reason == VMSIG_LEASE_DENY_NOCAP, "7: reason=NOCAP");
}
{
int before = s.cnt[3].denied;
acquire(X, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1); /* X has a grant on ep1, requests ep0 */
CHECK(s.cnt[3].denied == before + 1, "7: foreign endpoint -> DENIED");
CHECK(s.cnt[3].last_deny_reason == VMSIG_LEASE_DENY_NOGRANT, "7: reason=NOGRANT");
}
/* 8) per-endpoint / per-class independence. */
{
void* P0 = add_ctl(core, &s, 1, VMSIG_CAP_POWER, 50, 1ull << 0);
int gb = s.cnt[1].granted, gx = s.cnt[3].granted;
acquire(X, VMSIG_LEASE_INPUT, 1); pump_n(&s, 1); /* X on its own ep1 — free */
CHECK(s.cnt[3].granted == gx + 1, "8: X owns INPUT@ep1 independently");
acquire(P0, VMSIG_LEASE_POWER, 0); pump_n(&s, 1); /* POWER@ep0 is free, even though INPUT@ep0 is held by A */
CHECK(s.cnt[1].granted == gb + 1, "8: POWER@ep0 independent of INPUT@ep0");
}
/* 11) STATUS: busy=1 while A holds INPUT@ep0. */
{
int before = s.cnt[1].status;
status(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
CHECK(s.cnt[1].status == before + 1, "11: STATUS replies");
CHECK(s.cnt[1].last_status_busy == 1, "11: busy=1 while A owns");
}
/* 6) release -> reacquire: A releases, Lo (low prio) now gets it. */
{
int rb = s.cnt[0].released;
release_(A, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
CHECK(s.cnt[0].released == rb + 1, "6: A gets RELEASED");
int gb = s.cnt[2].granted;
acquire(Lo, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1); /* free -> even low prio takes it */
CHECK(s.cnt[2].granted == gb + 1, "6: reacquire after release succeeds");
status(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
CHECK(s.cnt[1].last_status_busy == 1, "11: busy=1 after reacquire");
release_(Lo, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
status(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
CHECK(s.cnt[1].last_status_busy == 0, "11: busy=0 after releasing all");
}
/* 12) safe commands are NOT gated by the lease (nobody holds INPUT@ep0). */
{
void* SAFE = add_ctl(core, &s, 1, VMSIG_CAP_LIFECYCLE | VMSIG_CAP_INPUT, 1, 1ull << 0);
/* PAUSE = safe lifecycle: lease_class_for_down -> -1 => not gated. */
CHECK(send_life(SAFE, VMSIG_LIFE_PAUSE, 90, 0) == 0,
"12: safe lifecycle (PAUSE) is not gated by the lease");
}
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ===== 13) policy seam: custom "always DENY" ===== */
static vmsig_arb_decision policy_always_deny(void* ud, uint32_t ep, uint32_t cls,
const vmsig_grant* inc, const vmsig_grant* con) {
(void)ud; (void)ep; (void)cls; (void)inc; (void)con;
return VMSIG_ARB_DENY;
}
static void test_policy_group(void) {
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
vmsig_core_set_arb_policy(core, policy_always_deny, NULL);
lease_state s; memset(&s, 0, sizeof s); s.core = core;
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
void* B = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 999, 1ull << 0); /* highest prio */
acquire(A, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
CHECK(s.cnt[0].granted == 1, "13: A owns");
acquire(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1); /* high prio, but policy=DENY */
CHECK(s.cnt[1].granted == 0, "13: custom DENY => high prio does NOT preempt");
CHECK(s.cnt[1].denied == 1, "13: B got DENIED");
CHECK(s.cnt[0].revoked == 0, "13: A not revoked");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ===== 4) preemption: high prio takes the lease away (REVOKED to the old, GRANTED to the new).
* signaling does NOT release what is held (rolling back finalization is the control's decision). ===== */
static void test_preempt(void) {
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
lease_state s; memset(&s, 0, sizeof s); s.core = core;
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
void* B = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 100, 1ull << 0);
vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, 0); /* stub input */
s.ctl_a = A; s.ctl_b = B; s.scenario = 1;
acquire(A, VMSIG_LEASE_INPUT, 0);
send_key(A, 30, 1, 11, 0); /* A injects a KEY (corr=11) */
/* B preempts from on_ev AFTER ack corr=11. There is NO finalization => wait for 3 actuations:
* A-down(11), B-down(22), B-up(23). */
s.stop_after_acks = 3;
vmsig_core_run(core);
CHECK(s.cnt[1].granted == 1, "4: B gets GRANTED on preemption");
CHECK(s.cnt[0].revoked == 1, "4: A gets LEASE_REVOKED");
int saw22 = 0;
for (int i = 0; i < s.nack; i++) if (s.ack_log[i] == 22) saw22 = 1;
CHECK(saw22, "4: the new owner's (B) input is actuated after preemption");
/* in-flight fencing of the ex-owner — covered separately in test_inflight_fence. */
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ===== reacquire by the owner with a key held down does NOT synthesize an up (self-preemption) ===== */
static void test_self_reacquire(void) {
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
lease_state s; memset(&s, 0, sizeof s); s.core = core;
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, 0);
acquire(A, VMSIG_LEASE_INPUT, 0);
send_key(A, 30, 1, 11, 0); /* hold down */
acquire(A, VMSIG_LEASE_INPUT, 0); /* reacquire by the same owner -> idempotent */
send_key(A, 31, 1, 22, 0); /* another key */
send_key(A, 30, 0, 33, 0);
send_key(A, 31, 0, 44, 0);
s.stop_after_acks = 4; /* there must be NO synthesized up (corr=0) */
vmsig_core_run(core);
CHECK(s.cnt[0].granted == 2, "self: repeated ACQUIRE -> another GRANTED");
int saw_zero = 0;
for (int i = 0; i < s.nack; i++) if (s.ack_log[i] == 0) saw_zero = 1;
CHECK(!saw_zero, "self: reacquire by the owner does NOT synthesize a key-up");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ===== 9) reclaim-on-death: the slot is freed, RECLAIMED, B GRANTED =====
* We model death via core_request_drop(id): core_reap -> core_lease_reap_control
* frees the owner's slot (held is NOT finalized — that's a vmctl write / control's decision). */
static int g_reclaimed = 0;
static int g_lease_denied = 0;
static void audit_cb(void* ud, const vmsig_audit* a) {
(void)ud;
if (a->kind == VMSIG_AUDIT_LEASE_RECLAIMED) g_reclaimed++;
if (a->kind == VMSIG_AUDIT_LEASE_DENIED) g_lease_denied++;
}
/* On RECLAIMED we stop the loop (to end the reap run deterministically): ud=core. */
static void reclaim_audit_cb(void* ud, const vmsig_audit* a) {
if (a->kind == VMSIG_AUDIT_LEASE_RECLAIMED) {
g_reclaimed++;
if (ud) vmsig_core_stop((vmsig_core*)ud);
}
}
/* Variant of add_ctl that returns the control's id (via out). */
static void* add_ctl_id(vmsig_core* core, lease_state* s, int idx, uint32_t cap,
uint32_t arb_prio, uint64_t epmask, int* out_id) {
cref* r = cref_new(s, idx);
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
cfg.on_event = on_ev; cfg.user = r;
cfg.sub.source_mask = 0xFFFFFFFFu; cfg.sub.prio_min = VMSIG_PRIO_BULK;
void* ctl = vmsig_inproc_control_new(&cfg);
vmsig_grant g; memset(&g, 0, sizeof g);
g.endpoint_mask = epmask; g.source_mask = 0xFFFFFFFFu;
g.cap_mask = cap | VMSIG_CAP_OBSERVE; g.arb_prio = arb_prio;
int id = vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
if (out_id) *out_id = id;
return ctl;
}
static void test_reclaim(void) {
g_reclaimed = 0;
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
vmsig_core_set_audit(core, reclaim_audit_cb, core); /* RECLAIMED -> stop the loop */
lease_state s; memset(&s, 0, sizeof s); s.core = core;
int a_id = -1;
void* A = add_ctl_id(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0, &a_id);
void* B = add_ctl_id(core, &s, 1, VMSIG_CAP_INPUT, 5, 1ull << 0, NULL); /* LOW prio */
acquire(A, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
CHECK(s.cnt[0].granted == 1, "9: A owns");
/* A's death: reap frees its slot (RECLAIMED); the audit-cb stops the loop. */
core_request_drop(core, a_id);
core->stopping = 0; /* white-box: reuse the loop (like pump_n) */
vmsig_core_run(core);
CHECK(g_reclaimed == 1, "9: audit RECLAIMED on owner death");
/* the slot is free: B (low prio) takes it without preemption */
acquire(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
CHECK(s.cnt[1].granted == 1, "9: B GRANTED after reclaim (slot is free)");
(void)A; (void)B;
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ===== 10) in-flight fencing: losing the lease BEFORE pump_down -> drop ===== */
/* A owns it, queues CMD_INPUT into ctx (via emit_down -> ctx), then B preempts
* SYNCHRONOUSLY (acquire does not go through ctx). By the time pump_down reaches A's CMD_INPUT,
* the owner is already B => the fence drops A's command (there must be NO actuation of A). */
static void test_inflight_fence(void) {
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
lease_state s; memset(&s, 0, sizeof s); s.core = core;
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
void* B = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 100, 1ull << 0);
vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, 0);
acquire(A, VMSIG_LEASE_INPUT, 0);
/* A queues a command into ctx (corr=55) — it passes the gate (A owns it), lands in the DOWN queue */
send_key(A, 30, 1, 55, 0);
/* B preempts SYNCHRONOUSLY (before the loop has called pump_down) */
acquire(B, VMSIG_LEASE_INPUT, 0);
/* B sends its own command (corr=66) */
send_key(B, 31, 1, 66, 0);
send_key(B, 31, 0, 67, 0);
/* Expected actuations: finalization on preemption (corr=0, but A held nothing by the
* moment of preemption — A's down is still in ctx, the held-set is empty => finalize=0 ups),
* then B's 66 and 67. A's 55 MUST be dropped by the fence (no corr=55). */
s.stop_after_acks = 2; /* B's down + up */
vmsig_core_run(core);
int saw55 = 0;
for (int i = 0; i < s.nack; i++) if (s.ack_log[i] == 55) saw55 = 1;
CHECK(!saw55, "10: in-flight ex-owner's command dropped by the fence");
CHECK(s.cnt[1].granted == 1, "10: B owns after preemption");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ===== lease source gate + audit on acquire-deny =====
* Regression: ACQUIRE is intercepted BEFORE grant_allows_down, so source_mask and
* audit of the deny paths must be checked/emitted IN THE lease layer ITSELF (otherwise: holding
* someone else's slot bypassing source_mask = DoS; audit-invisible probing of caps/endpoints). */
static void* add_ctl_src(vmsig_core* core, lease_state* s, int idx, uint32_t cap,
uint64_t epmask, uint32_t source_mask) {
cref* r = cref_new(s, idx);
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
cfg.on_event = on_ev; cfg.user = r; cfg.sub.source_mask = 0xFFFFFFFFu;
void* ctl = vmsig_inproc_control_new(&cfg);
vmsig_grant g; memset(&g, 0, sizeof g);
g.endpoint_mask = epmask; g.source_mask = source_mask;
g.cap_mask = cap | VMSIG_CAP_OBSERVE; g.arb_prio = 10;
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
return ctl;
}
static void test_audit_and_source(void) {
g_lease_denied = 0;
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
vmsig_core_set_audit(core, audit_cb, NULL);
lease_state s; memset(&s, 0, sizeof s); s.core = core;
/* cap=INPUT, but source_mask WITHOUT SRC_INPUT: the INPUT lease cannot be acquired (DoS bypass). */
void* NS = add_ctl_src(core, &s, 0, VMSIG_CAP_INPUT, 1ull << 0, ~(1u << VMSIG_SRC_INPUT));
int before = g_lease_denied;
acquire(NS, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
CHECK(s.cnt[0].denied == 1, "src: acquire without the SRC_INPUT bit -> DENIED");
CHECK(s.cnt[0].last_deny_reason == VMSIG_LEASE_DENY_NOGRANT, "src: reason=NOGRANT");
CHECK(g_lease_denied == before + 1, "audit: source-deny emits LEASE_DENIED");
/* without cap: NOCAP + audit (previously acquire-deny was invisible to the audit). */
void* NC = add_ctl_src(core, &s, 1, 0u, 1ull << 0, 0xFFFFFFFFu);
before = g_lease_denied;
acquire(NC, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
CHECK(s.cnt[1].last_deny_reason == VMSIG_LEASE_DENY_NOCAP, "src: without cap -> NOCAP");
CHECK(g_lease_denied == before + 1, "audit: NOCAP-deny emits LEASE_DENIED");
/* control case: cap+source -> GRANTED, without a spurious deny audit. */
void* OK = add_ctl_src(core, &s, 2, VMSIG_CAP_INPUT, 1ull << 0, 0xFFFFFFFFu);
before = g_lease_denied;
acquire(OK, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
CHECK(s.cnt[2].granted == 1, "src: cap+source -> GRANTED");
CHECK(g_lease_denied == before, "audit: successful acquire does not emit a deny");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
int main(void) {
printf("test_lease\n");
test_sync_group();
test_policy_group();
test_preempt();
test_self_reacquire();
test_inflight_fence();
test_reclaim();
test_audit_and_source();
cref_free_all();
printf("lease tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+339
View File
@@ -0,0 +1,339 @@
/* test_memctx.c — seam for the coherent address-space context (MEMCTX).
*
* 1) multicast + RO-fd + decode: a holder with CAP_MEMCTX receives MEMCTX, kcr3,
* epoch=0, nseg=1; the received fd mmaps PROT_READ, while PROT_WRITE -> EACCES
* (RO physically enforced); the vmsig_memctx_segs helper reconstructs segs[];
* a co-resident holder without CAP_MEMCTX does NOT receive it (deny);
* 2) epoch: a synthetic destructive VM_LIFECYCLE -> MEMCTX_INVALIDATED ->
* re-multicast at epoch+1 with a NEW kcr3;
* 3) retain/replay: a LATE subscriber (attached AFTER publication) receives
* the retained MEMCTX with a valid re-shared RO-fd (synchronously on add_control);
* 4) multi-VM: two endpoints, isolation (a VM holder does not see another's context);
* 5) socket E2E: MEMCTX travels as a vmsig_wire frame + RO-fd in cmsg (SCM_RIGHTS), the
* client mmaps RO via the received fd.
* In-proc (except 5) and under ASAN. SISC: not a single control name in the adapter. */
#define _GNU_SOURCE
#include "vmsig.h"
#include "vmsig_socket.h" /* vmsig_wire, vmsig_socket_attach */
#include "core_internal.h" /* core_emit_up (synthetic lifecycle injection) */
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <time.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stddef.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { \
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
} while (0)
/* ===== in-proc holder ===== */
typedef struct holder holder;
struct holder {
vmsig_core* core;
holder* peer; /* multi-VM: stop when both are ready (or NULL) */
int is_driver; /* stops the loop on a condition */
uint32_t expect_ep;
int memctx, invalidated, ticks, bad_ep;
uint64_t last_kcr3, kcr3_e0;
uint32_t last_epoch, last_nseg;
int ro_ok, rw_eacces, seg0_ok;
int inject_reset, injected;
int stop_epoch; /* stop when last_epoch>=stop_epoch (-1 = else) */
};
static void maybe_stop(holder* h) {
if (!h->is_driver) return;
if (h->ticks > 30) { vmsig_core_stop(h->core); return; } /* failsafe (vmhost ticks) */
if (h->stop_epoch >= 0) {
if ((int)h->last_epoch >= h->stop_epoch && h->memctx >= 1) vmsig_core_stop(h->core);
} else if (h->peer) {
if (h->memctx >= 1 && h->peer->memctx >= 1) vmsig_core_stop(h->core);
} else if (h->memctx >= 1) {
vmsig_core_stop(h->core);
}
}
static int h_on_ev(void* u, const vmsig_event* ev) {
holder* h = u;
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) h->ticks++;
else if (ev->kind == VMSIG_EV_MEMCTX_INVALIDATED) h->invalidated++;
maybe_stop(h);
return 0;
}
static int h_on_memctx(void* u, const vmsig_event* ev, int fd) {
holder* h = u;
const vmsig_memctx* m = (const vmsig_memctx*)ev->inln;
h->memctx++;
if (ev->endpoint != h->expect_ep) h->bad_ep++;
h->last_kcr3 = m->kcr3; h->last_epoch = m->epoch;
if (m->epoch == 0) h->kcr3_e0 = m->kcr3;
uint32_t n = 0;
const vmsig_memseg* segs = vmsig_memctx_segs(ev, &n);
h->last_nseg = n;
if (segs && n >= 1 && segs[0].gpa == 0 && segs[0].len == m->low) h->seg0_ok = 1;
if (fd >= 0 && m->low) {
void* ro = mmap(NULL, (size_t)m->low, PROT_READ, MAP_SHARED, fd, 0);
if (ro != MAP_FAILED) { h->ro_ok = 1; munmap(ro, (size_t)m->low); }
void* rw = mmap(NULL, (size_t)m->low, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (rw == MAP_FAILED) h->rw_eacces = 1; else munmap(rw, (size_t)m->low);
}
/* epoch test: on the first context (epoch0) inject a destructive transition. */
if (h->inject_reset && !h->injected && m->epoch == 0) {
h->injected = 1;
vmsig_event lc; memset(&lc, 0, sizeof lc);
lc.kind = VMSIG_EV_VM_LIFECYCLE; lc.source = VMSIG_SRC_VMHOST; lc.dir = VMSIG_DIR_UP;
lc.prio = VMSIG_PRIO_URGENT; lc.endpoint = h->expect_ep; lc.origin = 0;
vmsig_vm_state vs = { VMSIG_VM_RESET, 0 };
memcpy(lc.inln, &vs, sizeof vs);
core_emit_up(h->core, &lc); /* core: epoch++ + invalidate + re-multicast */
}
maybe_stop(h);
return 0;
}
static void add_holder(vmsig_core* core, holder* h, uint32_t cap,
uint32_t source_mask, uint64_t endpoint_mask) {
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
cfg.on_event = h_on_ev; cfg.on_memctx = h_on_memctx; cfg.user = h;
void* ctl = vmsig_inproc_control_new(&cfg);
vmsig_grant g; memset(&g, 0, sizeof g);
g.principal = 5; g.endpoint_mask = endpoint_mask;
g.source_mask = source_mask; g.cap_mask = cap;
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
}
/* ---- 1. multicast + RO-fd + decode + deny ---------------------------------- */
static void test_multicast(void) {
printf("test_multicast\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
holder good; memset(&good, 0, sizeof good);
good.core = core; good.is_driver = 1; good.expect_ep = 0; good.stop_epoch = -1;
holder deny; memset(&deny, 0, sizeof deny);
deny.core = core; deny.expect_ep = 0; deny.stop_epoch = -1;
add_holder(core, &good, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
add_holder(core, &deny, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0); /* no MEMCTX */
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
vmsig_core_run(core);
CHECK(good.memctx >= 1, "GOOD received MEMCTX");
CHECK(good.last_kcr3 != 0, "kcr3 nonzero");
CHECK(good.last_epoch == 0, "first publication is epoch 0");
CHECK(good.last_nseg == 1, "nseg=1 (single-low)");
CHECK(good.seg0_ok, "segs[] decoded by the helper (gpa=0,len=low)");
CHECK(good.ro_ok, "RO-fd: mmap(PROT_READ) ok");
CHECK(good.rw_eacces, "RO-fd: mmap(PROT_WRITE) -> EACCES (RO enforced)");
CHECK(good.bad_ep == 0, "delivery endpoint is correct");
CHECK(deny.memctx == 0, "deny without CAP_MEMCTX does NOT receive MEMCTX");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ---- 2. epoch: invalidation + re-multicast epoch+1 ------------------------- */
static void test_epoch(void) {
printf("test_epoch\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
holder h; memset(&h, 0, sizeof h);
h.core = core; h.is_driver = 1; h.expect_ep = 0; h.inject_reset = 1; h.stop_epoch = 1;
add_holder(core, &h, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
vmsig_core_run(core);
CHECK(h.memctx >= 2, "contexts for epochs 0 and 1 received");
CHECK(h.invalidated >= 1, "MEMCTX_INVALIDATED delivered on epoch change");
CHECK(h.last_epoch == 1, "re-multicast at epoch+1");
CHECK(h.kcr3_e0 != 0 && h.last_kcr3 != 0 && h.last_kcr3 != h.kcr3_e0,
"new kcr3 after re-bootstrap (epoch 1 kcr3 != epoch 0)");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ---- 3. retain/replay to a late subscriber --------------------------------- */
static void test_retain(void) {
printf("test_retain\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
holder a; memset(&a, 0, sizeof a);
a.core = core; a.is_driver = 1; a.expect_ep = 0; a.stop_epoch = -1;
add_holder(core, &a, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
vmsig_core_run(core); /* A receives MEMCTX, loop stopped */
CHECK(a.memctx >= 1, "early subscriber A received MEMCTX");
/* LATE subscriber B: attaches AFTER publication. Replay of the retained context
* happens SYNCHRONOUSLY in add_control (cell valid) — without a second loop run. */
holder b; memset(&b, 0, sizeof b);
b.core = core; b.expect_ep = 0; b.stop_epoch = -1;
add_holder(core, &b, VMSIG_CAP_MEMCTX, 0xFFFFFFFFu, 1ull << 0);
CHECK(b.memctx >= 1, "late subscriber B received the retained MEMCTX (replay)");
CHECK(b.last_kcr3 != 0, "B: kcr3 nonzero in the replay");
CHECK(b.ro_ok, "B: re-shared RO-fd mmaps PROT_READ");
CHECK(b.rw_eacces, "B: re-shared fd is RO (PROT_WRITE -> EACCES)");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ---- 4. multi-VM: endpoint isolation --------------------------------------- */
static void test_multivm(void) {
printf("test_multivm\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
holder h0; memset(&h0, 0, sizeof h0);
holder h1; memset(&h1, 0, sizeof h1);
h0.core = core; h0.is_driver = 1; h0.expect_ep = 0; h0.stop_epoch = -1; h0.peer = &h1;
h1.core = core; h1.is_driver = 1; h1.expect_ep = 1; h1.stop_epoch = -1; h1.peer = &h0;
/* each holder is scoped to its OWN endpoint (+OBSERVE for watchdog lifecycle ticks on ep0). */
add_holder(core, &h0, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
add_holder(core, &h1, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 1);
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost ep0 (watchdog)");
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx ep0");
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 1) >= 0, "add memctx ep1");
vmsig_core_run(core);
CHECK(h0.memctx >= 1 && h0.bad_ep == 0, "VM0 receives ONLY its own context (ep0)");
CHECK(h1.memctx >= 1 && h1.bad_ep == 0, "VM1 receives ONLY its own context (ep1)");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ---- 5. socket end-to-end: MEMCTX frame + fd in cmsg ----------------------- */
#define SOCK_EP 3u
static vmsig_grant sock_policy(uint32_t uid, uint32_t pid, void* ud) {
(void)pid; (void)ud;
vmsig_grant g; memset(&g, 0, sizeof g);
g.principal = uid; g.endpoint_mask = 1ull << SOCK_EP;
g.source_mask = 0xFFFFFFFFu; g.cap_mask = VMSIG_CAP_MEMCTX;
return g;
}
static void* loop_main(void* p) { vmsig_core_run((vmsig_core*)p); return NULL; }
static int connect_abstract(const char* name) {
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0) return -1;
struct sockaddr_un a; memset(&a, 0, sizeof a); a.sun_family = AF_UNIX;
size_t n = strlen(name);
a.sun_path[0] = 0;
memcpy(a.sun_path + 1, name + 1, n - 1);
socklen_t alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
if (connect(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; }
return fd;
}
/* Read ONE 80-byte vmsig_wire frame; the adjacent fd (cmsg) -> into *out_fd. */
static int recv_wire(int fd, vmsig_wire* w, int* out_fd) {
*out_fd = -1;
struct iovec iov = { .iov_base = w, .iov_len = sizeof *w };
union { char buf[CMSG_SPACE(sizeof(int))]; struct cmsghdr a; } cm;
memset(&cm, 0, sizeof cm);
struct msghdr mh; memset(&mh, 0, sizeof mh);
mh.msg_iov = &iov; mh.msg_iovlen = 1;
mh.msg_control = cm.buf; mh.msg_controllen = sizeof cm.buf;
size_t got = 0;
while (got < sizeof *w) {
iov.iov_base = (char*)w + got; iov.iov_len = sizeof *w - got;
ssize_t n = recvmsg(fd, &mh, MSG_CMSG_CLOEXEC);
if (n <= 0) return (got == 0) ? 0 : -1;
for (struct cmsghdr* c = CMSG_FIRSTHDR(&mh); c; c = CMSG_NXTHDR(&mh, c))
if (c->cmsg_level == SOL_SOCKET && c->cmsg_type == SCM_RIGHTS)
memcpy(out_fd, CMSG_DATA(c), sizeof(int));
got += (size_t)n;
mh.msg_control = NULL; mh.msg_controllen = 0; /* fd only on the first recvmsg */
}
return 1;
}
static void test_socket(void) {
printf("test_socket\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, SOCK_EP) >= 0, "add memctx");
const char* SOCK = "@vmsig-memctx-e2e";
CHECK(vmsig_socket_attach(core, SOCK, sock_policy, NULL) == 0, "socket attach");
pthread_t th; pthread_create(&th, NULL, loop_main, core);
int c = connect_abstract(SOCK);
CHECK(c >= 0, "client connected");
if (c < 0) { vmsig_core_stop(core); pthread_join(th, NULL); vmsig_core_free(core); vmsig_ctx_free(ctx); return; }
struct timeval tv = { .tv_sec = 3, .tv_usec = 0 };
setsockopt(c, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv);
int got_ctx = 0, ro_fd = -1, ro_ok = 0, rw_eacces = 0;
vmsig_memctx pod; memset(&pod, 0, sizeof pod);
for (int iter = 0; iter < 20 && !got_ctx; iter++) {
vmsig_wire w; int wfd = -1;
int r = recv_wire(c, &w, &wfd);
if (r != 1) break;
if (w.kind == VMSIG_EV_MEMCTX) {
got_ctx = 1; ro_fd = wfd;
memcpy(&pod, w.inln, sizeof pod);
CHECK(ro_fd >= 0, "MEMCTX frame carries an RO-fd in cmsg");
CHECK(pod.flags & VMSIG_MEMCTX_RDONLY, "RDONLY flag is set");
if (ro_fd >= 0 && pod.low) {
void* ro = mmap(NULL, (size_t)pod.low, PROT_READ, MAP_SHARED, ro_fd, 0);
if (ro != MAP_FAILED) { ro_ok = 1; munmap(ro, (size_t)pod.low); }
void* rw = mmap(NULL, (size_t)pod.low, PROT_READ | PROT_WRITE, MAP_SHARED, ro_fd, 0);
if (rw == MAP_FAILED) rw_eacces = 1; else munmap(rw, (size_t)pod.low);
}
}
}
CHECK(got_ctx == 1, "MEMCTX frame arrived over the socket (wire framing)");
CHECK(ro_ok, "mmap RO via the received fd");
CHECK(rw_eacces, "write-mmap via the received fd fails (RO)");
if (ro_fd >= 0) close(ro_fd);
close(c);
vmsig_core_stop(core);
pthread_join(th, NULL);
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
int main(void) {
test_multicast();
test_epoch();
test_retain();
test_multivm();
test_socket();
printf("memctx tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+227
View File
@@ -0,0 +1,227 @@
/* test_memwrite.c — write-signaled seam (MEMWRITE): atomic guest-memory write under an
* exclusive lease. Stub mode (no VM): proves the full path cap -> grant -> lease-gate ->
* route -> adapter -> ACT_ACK without actuation, plus the default-deny and fencing
* invariants. The adapter never sees a control name (SISC).
*
* 1) happy path: CAP_MEMWRITE + a MEMWRITE lease -> CMD_MEMWRITE -> ACT_ACK{ok=1};
* 2) extent default-deny: len > VMSIG_MEMWRITE_MAX and a missing SRC flag -> ACK{ok=0};
* 3) lease gate: CMD_MEMWRITE WITHOUT an acquired lease -> dropped at the gate (no ACK);
* 4) cap gate: a control WITHOUT CAP_MEMWRITE cannot acquire the lease (DENIED{NOCAP});
* 5) in-flight fence: A holds the lease, queues a write, B preempts SYNCHRONOUSLY -> A's
* queued write is dropped by the fence (no ACK for A's corr), B's write actuates.
* In-proc, under ASAN. */
#include "vmsig.h"
#include "memctx.h" /* VMSIG_MEMWRITE_MAX: the adapter's extent bound (private) */
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { \
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
} while (0)
/* ---- in-proc control: records lease replies + MEMWRITE ACKs ---- */
typedef struct {
void* core;
int granted, denied, last_deny_reason;
int ack_ok[64]; /* ok flag per ACK in arrival order */
uint32_t ack_corr[64]; /* corr per ACK */
int nack;
int stop_replies, replies; /* stop the loop after N lease replies (0=off) */
int stop_acks; /* stop the loop after N acks (0=off) */
} cstate;
typedef struct { cstate* s; } cref;
static cref* g_refs[16]; static int g_nrefs = 0;
static cref* cref_new(cstate* s) {
cref* r = calloc(1, sizeof *r); r->s = s;
if (g_nrefs < 16) g_refs[g_nrefs++] = r;
return r;
}
static void cref_free_all(void) { for (int i = 0; i < g_nrefs; i++) free(g_refs[i]); g_nrefs = 0; }
static int on_ev(void* user, const vmsig_event* ev) {
cref* r = user; cstate* s = r->s;
switch (ev->kind) {
case VMSIG_EV_LEASE_GRANTED: s->granted++; s->replies++; break;
case VMSIG_EV_LEASE_DENIED:
s->denied++;
s->last_deny_reason = (int)((const vmsig_lease_req*)ev->inln)->reason;
s->replies++;
break;
case VMSIG_EV_ACT_ACK:
if (s->nack < 64) {
/* inln layout from mc_memwrite_ack: {int ok; uint32_t corr; uint32_t origin}. */
int ok; memcpy(&ok, ev->inln, sizeof ok);
s->ack_ok[s->nack] = ok;
s->ack_corr[s->nack] = ev->corr;
s->nack++;
}
break;
default: break;
}
if (s->stop_replies && s->replies >= s->stop_replies) vmsig_core_stop(s->core);
if (s->stop_acks && s->nack >= s->stop_acks) vmsig_core_stop(s->core);
return 0;
}
static void* add_ctl(vmsig_core* core, cstate* s, uint32_t cap, uint32_t arb_prio) {
cref* r = cref_new(s);
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
cfg.on_event = on_ev; cfg.user = r;
cfg.sub.source_mask = 0xFFFFFFFFu; cfg.sub.prio_min = VMSIG_PRIO_BULK;
void* ctl = vmsig_inproc_control_new(&cfg);
vmsig_grant g; memset(&g, 0, sizeof g);
g.endpoint_mask = 1ull << 0; g.source_mask = 0xFFFFFFFFu;
g.cap_mask = cap | VMSIG_CAP_OBSERVE; g.arb_prio = arb_prio;
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
return ctl;
}
/* ---- DOWN send helpers ---- */
static int acquire_mw(void* ctl) {
vmsig_event d; memset(&d, 0, sizeof d);
d.kind = VMSIG_EV_CMD_ACQUIRE; d.source = VMSIG_SRC_MEMCTX; d.dir = VMSIG_DIR_DOWN;
d.endpoint = 0; d.prio = VMSIG_PRIO_HIGH;
vmsig_lease_req lr = { VMSIG_LEASE_MEMWRITE, 0 };
memcpy(d.inln, &lr, sizeof lr);
return vmsig_inproc_send(ctl, &d);
}
/* CMD_MEMWRITE with inline SRC; corr for tracking. flags: VMSIG_MW_SRC_* (0 => no SRC). */
static int send_write(void* ctl, uint64_t gva, uint32_t len, uint32_t flags,
const void* src, uint32_t corr) {
vmsig_event d; memset(&d, 0, sizeof d);
d.kind = VMSIG_EV_CMD_MEMWRITE; d.source = VMSIG_SRC_MEMCTX; d.dir = VMSIG_DIR_DOWN;
d.endpoint = 0; d.prio = VMSIG_PRIO_HIGH; d.corr = corr;
vmsig_memwrite mw = { gva, len, flags };
memcpy(d.inln, &mw, sizeof mw);
if ((flags & VMSIG_MW_SRC_INLINE) && src && len <= VMSIG_MEMWRITE_INLINE)
memcpy(d.inln + sizeof mw, src, len);
return vmsig_inproc_send(ctl, &d);
}
/* Run the loop until N acks (used after queuing actuated writes). */
static void run_until_acks(cstate* s, int n) {
vmsig_core* c = (vmsig_core*)s->core;
s->stop_acks = n; s->stop_replies = 0;
vmsig_core_run(c);
s->stop_acks = 0;
}
/* ---- 1+2+3: happy path, extent default-deny, lease gate -------------------- */
static void test_path_and_deny(void) {
printf("test_path_and_deny\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
cstate s; memset(&s, 0, sizeof s); s.core = core;
void* A = add_ctl(core, &s, VMSIG_CAP_MEMWRITE, 10);
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
/* 3) lease gate: without ACQUIRE the write is dropped at the gate (-1, no actuation). */
uint8_t pat[8] = { 0xDE, 0xAD, 0xBE, 0xEF, 1, 2, 3, 4 };
CHECK(send_write(A, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 99) == -1,
"3: CMD_MEMWRITE without a lease is dropped by the gate");
/* acquire the MEMWRITE lease (synchronous intercept; UP reply paced by ctx). */
CHECK(acquire_mw(A) == 0, "acquire submitted");
/* 1) happy path: inline write -> queued -> ACT_ACK{ok=1}. Also drains the GRANTED reply. */
CHECK(send_write(A, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 11) == 0,
"1: owner's CMD_MEMWRITE passes the gate");
/* 2) extent: len > MAX -> ACK{ok=0}, NOT actuated (queued ack on the loop thread). */
CHECK(send_write(A, 0x2000, VMSIG_MEMWRITE_MAX + 1, VMSIG_MW_SRC_INLINE, pat, 22) == 0,
"2: over-extent write is accepted by the gate (denied inside the adapter)");
/* 2b) missing SRC flag -> ACK{ok=0}. */
CHECK(send_write(A, 0x3000, 4, 0u, NULL, 33) == 0,
"2b: no-SRC-flag write is accepted by the gate (denied inside the adapter)");
/* expect 3 ACKs (corr 11/22/33) + the GRANTED reply. */
run_until_acks(&s, 3);
CHECK(s.granted == 1, "lease GRANTED once");
int saw11_ok = -1, saw22_ok = -1, saw33_ok = -1, saw99 = 0;
for (int i = 0; i < s.nack; i++) {
if (s.ack_corr[i] == 11) saw11_ok = s.ack_ok[i];
if (s.ack_corr[i] == 22) saw22_ok = s.ack_ok[i];
if (s.ack_corr[i] == 33) saw33_ok = s.ack_ok[i];
if (s.ack_corr[i] == 99) saw99 = 1;
}
CHECK(saw11_ok == 1, "1: happy-path write ACKs ok=1 (stub)");
CHECK(saw22_ok == 0, "2: over-extent write ACKs ok=0 (default-deny)");
CHECK(saw33_ok == 0, "2b: no-SRC-flag write ACKs ok=0 (default-deny)");
CHECK(!saw99, "3: the gate-dropped write produced no ACK");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ---- 4: cap gate — no CAP_MEMWRITE cannot acquire the lease ----------------- */
static void test_cap_gate(void) {
printf("test_cap_gate\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
cstate s; memset(&s, 0, sizeof s); s.core = core;
void* NC = add_ctl(core, &s, 0u /* no MEMWRITE */, 10);
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
CHECK(acquire_mw(NC) == 0, "acquire submitted");
s.stop_replies = 1; vmsig_core_run(core); s.stop_replies = 0;
CHECK(s.denied == 1, "4: acquire without CAP_MEMWRITE -> DENIED");
CHECK(s.last_deny_reason == VMSIG_LEASE_DENY_NOCAP, "4: reason=NOCAP");
CHECK(s.granted == 0, "4: not granted");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
/* ---- 5: in-flight fence — losing the lease before pump_down drops the write -- */
static void test_inflight_fence(void) {
printf("test_inflight_fence\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
cstate s; memset(&s, 0, sizeof s); s.core = core;
void* A = add_ctl(core, &s, VMSIG_CAP_MEMWRITE, 10);
void* B = add_ctl(core, &s, VMSIG_CAP_MEMWRITE, 100); /* higher prio: preempts */
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
uint8_t pat[4] = { 1, 2, 3, 4 };
CHECK(acquire_mw(A) == 0, "A acquires");
/* A queues a write (corr=55): passes the gate (A owns), lands in the DOWN queue. */
CHECK(send_write(A, 0x1000, 4, VMSIG_MW_SRC_INLINE, pat, 55) == 0, "A queues write 55");
/* B preempts SYNCHRONOUSLY (acquire does not go through ctx). */
CHECK(acquire_mw(B) == 0, "B preempts");
/* B's own write (corr=66) — should actuate. */
CHECK(send_write(B, 0x2000, 4, VMSIG_MW_SRC_INLINE, pat, 66) == 0, "B queues write 66");
run_until_acks(&s, 1); /* B's 66 acks; A's 55 must be fenced (no ack) */
int saw55 = 0, saw66 = 0;
for (int i = 0; i < s.nack; i++) {
if (s.ack_corr[i] == 55) saw55 = 1;
if (s.ack_corr[i] == 66) saw66 = 1;
}
CHECK(!saw55, "5: ex-owner A's in-flight write is dropped by the fence");
CHECK(saw66, "5: new owner B's write actuates after preemption");
CHECK(s.granted == 2, "5: A and B each got GRANTED");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
int main(void) {
printf("test_memwrite\n");
test_path_and_deny();
test_cap_gate();
test_inflight_fence();
cref_free_all();
printf("memwrite tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+62
View File
@@ -0,0 +1,62 @@
/* test_mvm.c — mode A (single core, multiple VMs): per-endpoint multiplexing and
* per-VM grant scoping on UP delivery. Two vmhost endpoints in one core (each stub
* ticks VM_LIFECYCLE per endpoint); a poller granted only VM0 must see only ep0
* lifecycle events, the VM1 poller — only ep1. */
#include "vmsig.h"
#include <stdio.h>
#include <string.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { \
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
} while (0)
typedef struct { vmsig_core* core; int seen[2][2]; } mvm; /* seen[control][endpoint] */
typedef struct { mvm* d; int which; } ctlref;
static int on_ev(void* user, const vmsig_event* ev) {
ctlref* r = user; mvm* d = r->d;
if (ev->kind == VMSIG_EV_VM_LIFECYCLE && ev->endpoint < 2)
d->seen[r->which][ev->endpoint]++;
if (d->seen[0][0] >= 2 && d->seen[1][1] >= 2) vmsig_core_stop(d->core);
return 0;
}
int main(void) {
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
mvm d; memset(&d, 0, sizeof d); d.core = core;
ctlref r0 = { &d, 0 }, r1 = { &d, 1 };
vmsig_inproc_cfg c0; memset(&c0, 0, sizeof c0); c0.on_event = on_ev; c0.user = &r0;
vmsig_inproc_cfg c1; memset(&c1, 0, sizeof c1); c1.on_event = on_ev; c1.user = &r1;
void* ctl0 = vmsig_inproc_control_new(&c0);
void* ctl1 = vmsig_inproc_control_new(&c1);
/* grants segregate the pollers per VM */
vmsig_grant g0; memset(&g0, 0, sizeof g0);
g0.endpoint_mask = 1ull << 0; g0.source_mask = 0xFFFFFFFFu; g0.cap_mask = VMSIG_CAP_OBSERVE;
vmsig_grant g1; memset(&g1, 0, sizeof g1);
g1.endpoint_mask = 1ull << 1; g1.source_mask = 0xFFFFFFFFu; g1.cap_mask = VMSIG_CAP_OBSERVE;
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl0, &g0);
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl1, &g1);
/* two VMs in one core: a vmhost adapter per endpoint (stub ticks VM_LIFECYCLE) */
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "VM0 adapter");
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 1) >= 0, "VM1 adapter");
int rc = vmsig_core_run(core);
printf("test_mvm rc=%d c0[ep0=%d ep1=%d] c1[ep0=%d ep1=%d]\n",
rc, d.seen[0][0], d.seen[0][1], d.seen[1][0], d.seen[1][1]);
CHECK(d.seen[0][0] >= 2, "control0 sees lifecycle of its own VM0");
CHECK(d.seen[0][1] == 0, "control0 does NOT see VM1 (grant scoping)");
CHECK(d.seen[1][1] >= 2, "control1 sees lifecycle of its own VM1");
CHECK(d.seen[1][0] == 0, "control1 does NOT see VM0");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
printf("multi-vm tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+121
View File
@@ -0,0 +1,121 @@
/* test_sec.c — security layer: grant enforcement on DOWN commands.
* Checks capability split (OBSERVE != INPUT != POWER != VM), source_mask
* on DOWN, destructive vs safe lifecycle/VM, foreign endpoint, default-deny.
* (Memory is no longer a DOWN command: the address-space context is multicast
* upward and gated by CAP_MEMCTX — see test_authz/test_memctx; here only DOWN
* actuation.) vmsig_inproc_send returns the result of core_emit_down (the grant of
* THIS specific control) — no need to run the loop. */
#include "vmsig.h"
#include <stdio.h>
#include <string.h>
#include <stdint.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { \
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
} while (0)
static int g_denied = 0;
static void audit_cb(void* ud, const vmsig_audit* a) {
(void)ud;
if (a->kind == VMSIG_AUDIT_DOWN_DENIED) g_denied++;
}
/* DOWN command of kind on endpoint ep; source derived from kind */
static int send(void* ctl, vmsig_kind kind, uint32_t ep) {
vmsig_event d;
memset(&d, 0, sizeof d);
d.kind = kind; d.dir = VMSIG_DIR_DOWN; d.endpoint = ep; d.prio = VMSIG_PRIO_NORMAL;
d.source = (kind == VMSIG_EV_CMD_INPUT || kind == VMSIG_EV_CMD_LIFECYCLE) ? VMSIG_SRC_INPUT
: VMSIG_SRC_VMHOST;
return vmsig_inproc_send(ctl, &d);
}
/* CMD_LIFECYCLE with a specific operation (code in inln[0]) */
static int send_life(void* ctl, int op, uint32_t ep) {
vmsig_event d;
memset(&d, 0, sizeof d);
d.kind = VMSIG_EV_CMD_LIFECYCLE; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
d.endpoint = ep; d.prio = VMSIG_PRIO_NORMAL; d.inln[0] = (uint8_t)op;
return vmsig_inproc_send(ctl, &d);
}
/* Acquire a lease of class cls on ep (destructive/input now requires a lease). */
static int acq(void* ctl, uint32_t cls, uint32_t ep) {
vmsig_event d;
memset(&d, 0, sizeof d);
d.kind = VMSIG_EV_CMD_ACQUIRE; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
d.endpoint = ep; d.prio = VMSIG_PRIO_HIGH;
vmsig_lease_req lr = { cls, 0 };
memcpy(d.inln, &lr, sizeof lr);
return vmsig_inproc_send(ctl, &d);
}
/* CMD_VM with an operation (vmsig_vm_cmd in inln) */
static int send_vm(void* ctl, int op, uint32_t ep) {
vmsig_event d;
memset(&d, 0, sizeof d);
d.kind = VMSIG_EV_CMD_VM; d.source = VMSIG_SRC_VMHOST; d.dir = VMSIG_DIR_DOWN;
d.endpoint = ep; d.prio = VMSIG_PRIO_NORMAL;
vmsig_vm_cmd c = { (uint32_t)op };
memcpy(d.inln, &c, sizeof c);
return vmsig_inproc_send(ctl, &d);
}
static void* add_ctl(vmsig_core* core, uint32_t cap, uint32_t source_mask) {
vmsig_inproc_cfg cfg;
memset(&cfg, 0, sizeof cfg); /* on_event=NULL, sub=0: no UP needed */
void* ctl = vmsig_inproc_control_new(&cfg);
vmsig_grant g; memset(&g, 0, sizeof g);
g.endpoint_mask = 1ull << 0; g.source_mask = source_mask; g.cap_mask = cap;
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
return ctl;
}
int main(void) {
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
vmsig_core_set_audit(core, audit_cb, NULL);
void* A = add_ctl(core, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu); /* screen observer */
void* B = add_ctl(core, VMSIG_CAP_INPUT | VMSIG_CAP_LIFECYCLE, 0xFFFFFFFFu);/* input + safe lifecycle */
void* P = add_ctl(core, VMSIG_CAP_POWER, 0xFFFFFFFFu); /* destructive power */
void* S = add_ctl(core, VMSIG_CAP_INPUT, 1u << VMSIG_SRC_FRAME); /* INPUT, but source=FRAME */
void* V = add_ctl(core, VMSIG_CAP_VM, 0xFFFFFFFFu); /* VM control (safe) */
void* C = vmsig_inproc_control_new(&(vmsig_inproc_cfg){0}); /* default-deny */
vmsig_core_add_control(core, vmsig_inproc_control_ops(), C, NULL);
printf("test_security\n");
/* A — screen observer: does NOT actuate input/lifecycle (split CAP) */
CHECK(send(A, VMSIG_EV_CMD_INPUT, 0) == -1, "OBSERVE != input"); /* deny 1 */
CHECK(send_life(A, VMSIG_LIFE_PAUSE, 0) == -1, "OBSERVE != lifecycle"); /* deny 2 */
/* B — input + SAFE lifecycle, but NOT destructive power. Destructive/input
* now passes ONLY while holding a class lease => ACQUIRE first. */
acq(B, VMSIG_LEASE_INPUT, 0);
CHECK(send(B, VMSIG_EV_CMD_INPUT, 0) == 0, "INPUT => input allowed");
CHECK(send_life(B, VMSIG_LIFE_PAUSE, 0) == 0, "LIFECYCLE => pause allowed");
CHECK(send_life(B, VMSIG_LIFE_POWERDOWN, 0) == -1,"powerdown requires CAP_POWER"); /* deny 3 */
/* P — destructive power (with a POWER class lease) */
acq(P, VMSIG_LEASE_POWER, 0);
CHECK(send_life(P, VMSIG_LIFE_POWERDOWN, 0) == 0, "POWER => powerdown allowed");
/* S — has INPUT, but source_mask lacks SRC_INPUT: DOWN input denied */
CHECK(send(S, VMSIG_EV_CMD_INPUT, 0) == -1, "source_mask on DOWN: SRC_INPUT denied"); /* deny 4 */
/* V — VM control: safe ops yes, destructive ones require CAP_POWER */
CHECK(send_vm(V, VMSIG_VMOP_CONT, 0) == 0, "CAP_VM => cont allowed");
CHECK(send_vm(V, VMSIG_VMOP_POWERDOWN, 0) == -1, "VM powerdown requires CAP_POWER"); /* deny 5 */
CHECK(send_vm(P, VMSIG_VMOP_POWERDOWN, 0) == 0, "CAP_POWER => VM powerdown allowed");
/* C — default-deny */
CHECK(send_vm(C, VMSIG_VMOP_QUERY, 0) == -1, "default-deny is deaf"); /* deny 6 */
/* audit recorded all 6 DOWN denials */
CHECK(g_denied == 6, "audit: all DOWN denials recorded");
vmsig_core_free(core); /* closes/frees all controls */
vmsig_ctx_free(ctx);
printf("security tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+154
View File
@@ -0,0 +1,154 @@
/* test_sock.c — out-of-process control: wire codec + authentication/admission.
* Bring up two listeners (one admitting, one rejecting) on abstract sockets, run
* the core in a separate thread, connect clients and check: policy invoked,
* valid poller admitted, unauthorized rejected (EOF), reap without a crash. */
#define _GNU_SOURCE
#include "vmsig.h"
#include "vmsig_socket.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
#include <stdatomic.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#include <stddef.h>
#include <time.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { \
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
} while (0)
static atomic_int g_auth = 0;
static atomic_int g_deny = 0;
static atomic_int g_admit = 0;
static atomic_int g_reject = 0;
static void audit_cb(void* ud, const vmsig_audit* a) {
(void)ud;
if (a->kind == VMSIG_AUDIT_ADMIT) atomic_fetch_add(&g_admit, 1);
else if (a->kind == VMSIG_AUDIT_REJECT) atomic_fetch_add(&g_reject, 1);
}
static vmsig_grant pol_ok(uint32_t uid, uint32_t pid, void* ud) {
(void)pid; (void)ud;
atomic_fetch_add(&g_auth, 1);
vmsig_grant g; memset(&g, 0, sizeof g);
g.principal = uid; g.endpoint_mask = 1u << 0;
g.source_mask = 0xFFFFFFFFu; g.cap_mask = VMSIG_CAP_OBSERVE;
return g;
}
static vmsig_grant pol_deny(uint32_t uid, uint32_t pid, void* ud) {
(void)uid; (void)pid; (void)ud;
atomic_fetch_add(&g_deny, 1);
vmsig_grant g; memset(&g, 0, sizeof g); /* empty => reject */
return g;
}
static int connect_abstract(const char* name) {
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0) return -1;
struct sockaddr_un a; memset(&a, 0, sizeof a); a.sun_family = AF_UNIX;
size_t n = strlen(name);
a.sun_path[0] = 0;
memcpy(a.sun_path + 1, name + 1, n - 1);
socklen_t alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
if (connect(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; }
return fd;
}
static void* loop_main(void* p) { vmsig_core_run((vmsig_core*)p); return NULL; }
static void wait_atomic(atomic_int* a, int want, int ms) {
for (int i = 0; i < ms; i++) {
if (atomic_load(a) >= want) return;
struct timespec t = { .tv_sec = 0, .tv_nsec = 1000000 };
nanosleep(&t, NULL);
}
}
static void test_wire(void) {
printf("test_wire\n");
vmsig_event ev; memset(&ev, 0, sizeof ev);
ev.kind = VMSIG_EV_CMD_VM; ev.source = VMSIG_SRC_VMHOST; ev.dir = VMSIG_DIR_DOWN;
ev.prio = VMSIG_PRIO_HIGH; ev.endpoint = 0; ev.corr = 0xABCD;
for (int i = 0; i < 48; i++) ev.inln[i] = (uint8_t)i;
vmsig_wire w; vmsig_wire_encode(&w, &ev);
vmsig_event d;
CHECK(vmsig_wire_decode(&w, &d) == 0, "decode ok");
CHECK(d.kind == ev.kind && d.source == ev.source &&
d.endpoint == ev.endpoint && d.corr == ev.corr, "frame fields match");
CHECK(memcmp(d.inln, ev.inln, 48) == 0, "inln matches");
vmsig_wire bad = w; bad.magic = 0; vmsig_event x;
CHECK(vmsig_wire_decode(&bad, &x) == -1, "bad magic rejected");
}
int main(void) {
test_wire();
printf("test_socket\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
vmsig_core_set_audit(core, audit_cb, NULL);
const char* OK = "@vmsig-sock-ok-test";
const char* DENY = "@vmsig-sock-deny-test";
CHECK(vmsig_socket_attach(core, OK, pol_ok, NULL) == 0, "attach ok listener");
CHECK(vmsig_socket_attach(core, DENY, pol_deny, NULL) == 0, "attach deny listener");
pthread_t th;
pthread_create(&th, NULL, loop_main, core);
/* valid poller: connect -> policy -> admission */
int c1 = connect_abstract(OK);
CHECK(c1 >= 0, "client connected (ok)");
wait_atomic(&g_auth, 1, 1000);
CHECK(atomic_load(&g_auth) >= 1, "policy invoked — poller authenticated/admitted");
if (c1 >= 0) close(c1); /* disconnect -> deferred reap (no crash) */
/* unauthorized: connect -> server closes -> EOF on the client */
int c2 = connect_abstract(DENY);
CHECK(c2 >= 0, "client connected (deny)");
wait_atomic(&g_deny, 1, 1000);
CHECK(atomic_load(&g_deny) >= 1, "deny policy invoked");
if (c2 >= 0) {
struct timeval tv = { .tv_sec = 1, .tv_usec = 0 };
setsockopt(c2, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv);
char b; ssize_t r = read(c2, &b, 1);
CHECK(r == 0, "connection rejected by server (EOF)");
close(c2);
}
/* slot reuse: churn > MAX_CONTROLS(64). Without returning slots the listener
* would die after 64 cycles. Each cycle: connect(ok) -> wait auth++ -> close. */
int base = atomic_load(&g_auth);
const int churn = 70;
for (int k = 0; k < churn; k++) {
int fc = connect_abstract(OK);
if (fc < 0) { CHECK(0, "churn connect"); break; }
wait_atomic(&g_auth, base + k + 1, 1000);
close(fc);
struct timespec ts = { .tv_sec = 0, .tv_nsec = 2 * 1000000 };
nanosleep(&ts, NULL); /* let the loop reap before the next connection */
}
CHECK(atomic_load(&g_auth) >= base + churn,
"slots reused: churn > MAX_CONTROLS admitted");
/* audit recorded admissions and rejections */
CHECK(atomic_load(&g_admit) >= 1, "audit: poller admission");
CHECK(atomic_load(&g_reject) >= 1, "audit: rejection (deny listener)");
struct timespec t = { .tv_sec = 0, .tv_nsec = 50 * 1000000 };
nanosleep(&t, NULL); /* let the loop process the reaps */
vmsig_core_stop(core);
pthread_join(th, NULL);
vmsig_core_free(core);
vmsig_ctx_free(ctx);
printf("socket tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+146
View File
@@ -0,0 +1,146 @@
/* test_vmhost.c — QEMU/QMP host-plane, armed path: fake QMP server (this test)
* <-> real QMP client vmhost. We verify: handshake (greeting -> qmp_capabilities
* -> return -> SEAM_UP), async events -> VM_LIFECYCLE (broadcast), CMD_VM{QUERY}
* -> command to server -> return -> addressed VM_LIFECYCLE to the initiator, EOF -> SEAM_DOWN. */
#define _GNU_SOURCE
#include "vmsig.h"
#include "vmhost.h" /* private cfg (CMake provides the include path) */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
#include <stdatomic.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#include <stddef.h>
#include <time.h>
#include <errno.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { \
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
} while (0)
static atomic_int g_seamup = 0, g_seamdown = 0;
static atomic_int g_paused = 0, g_running_bcast = 0, g_query_reply = 0;
static void* g_ctl = NULL;
static int on_ev(void* user, const vmsig_event* ev) {
(void)user;
if (ev->kind == VMSIG_EV_SEAM_UP && ev->source == VMSIG_SRC_VMHOST) {
atomic_store(&g_seamup, 1);
vmsig_event d; memset(&d, 0, sizeof d); /* once ready — query status */
d.kind = VMSIG_EV_CMD_VM; d.source = VMSIG_SRC_VMHOST; d.dir = VMSIG_DIR_DOWN;
d.prio = VMSIG_PRIO_NORMAL; d.endpoint = 0; d.corr = 0x55;
vmsig_vm_cmd c = { VMSIG_VMOP_QUERY }; memcpy(d.inln, &c, sizeof c);
vmsig_inproc_send(g_ctl, &d);
} else if (ev->kind == VMSIG_EV_SEAM_DOWN && ev->source == VMSIG_SRC_VMHOST) {
atomic_store(&g_seamdown, 1);
} else if (ev->kind == VMSIG_EV_VM_LIFECYCLE) {
vmsig_vm_state vs; memcpy(&vs, ev->inln, sizeof vs);
if (ev->origin) { /* addressed reply to our QUERY */
if (vs.state == VMSIG_VM_RUNNING) atomic_store(&g_query_reply, 1);
} else { /* broadcast async event */
if (vs.state == VMSIG_VM_PAUSED) atomic_store(&g_paused, 1);
if (vs.state == VMSIG_VM_RUNNING) atomic_store(&g_running_bcast, 1);
}
}
return 0;
}
static void* loop_main(void* p) { vmsig_core_run((vmsig_core*)p); return NULL; }
static int srv_listen(const char* name) {
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0) return -1;
struct sockaddr_un a; memset(&a, 0, sizeof a); a.sun_family = AF_UNIX;
size_t n = strlen(name);
a.sun_path[0] = 0; memcpy(a.sun_path + 1, name + 1, n - 1);
socklen_t alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
if (bind(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; }
if (listen(fd, 4) < 0) { close(fd); return -1; }
return fd;
}
static void srv_send(int fd, const char* s) { ssize_t r = write(fd, s, strlen(s)); (void)r; }
static int srv_expect(int fd, const char* needle) {
char buf[1024]; size_t len = 0;
for (int i = 0; i < 200; i++) { /* up to ~2s */
ssize_t r = read(fd, buf + len, sizeof buf - 1 - len);
if (r > 0) { len += (size_t)r; buf[len] = 0; if (strstr(buf, needle)) return 1; }
else if (r == 0) return 0;
else { struct timespec t = { 0, 10 * 1000000 }; nanosleep(&t, NULL); }
if (len >= sizeof buf - 1) len = 0;
}
return 0;
}
static void wait_atomic(atomic_int* a, int ms) {
for (int i = 0; i < ms; i++) {
if (atomic_load(a)) return;
struct timespec t = { 0, 1000000 }; nanosleep(&t, NULL);
}
}
int main(void) {
const char* QMP = "@vmsig-qmp-fake-test";
int srv = srv_listen(QMP);
if (srv < 0) { printf("srv_listen failed\n"); return 1; }
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
vmsig_inproc_cfg cc; memset(&cc, 0, sizeof cc); cc.on_event = on_ev;
void* ctl = vmsig_inproc_control_new(&cc);
g_ctl = ctl;
vmsig_grant g; memset(&g, 0, sizeof g);
g.endpoint_mask = 1ull << 0; g.source_mask = 0xFFFFFFFFu;
g.cap_mask = VMSIG_CAP_OBSERVE | VMSIG_CAP_VM;
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
/* armed vmhost: it will connect to our fake QMP */
vmsig_vmhost_cfg vcfg; memset(&vcfg, 0, sizeof vcfg); vcfg.qmp_path = QMP;
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), &vcfg, 0) >= 0, "vmhost armed attach");
pthread_t th; pthread_create(&th, NULL, loop_main, core);
/* === QMP server role === */
int c = accept(srv, NULL, NULL);
CHECK(c >= 0, "server accepted vmhost connection");
if (c >= 0) {
struct timeval tv = { 0, 50 * 1000 };
setsockopt(c, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv);
srv_send(c, "{\"QMP\": {\"version\": {}, \"capabilities\": []}}\r\n");
CHECK(srv_expect(c, "qmp_capabilities"), "client sent qmp_capabilities");
srv_send(c, "{\"return\": {}}\r\n"); /* -> READY -> SEAM_UP */
srv_send(c, "{\"event\": \"STOP\"}\r\n"); /* -> broadcast PAUSED */
CHECK(srv_expect(c, "query-status"), "client sent query-status (from CMD_VM)");
srv_send(c, "{\"return\": {\"status\": \"running\"}, \"id\": 1}\r\n"); /* -> addressed reply */
srv_send(c, "{\"event\": \"RESUME\"}\r\n"); /* -> broadcast RUNNING */
wait_atomic(&g_seamup, 1000);
wait_atomic(&g_paused, 1000);
wait_atomic(&g_query_reply, 1000);
wait_atomic(&g_running_bcast, 1000);
close(c); /* EOF -> SEAM_DOWN */
wait_atomic(&g_seamdown, 1000);
}
CHECK(atomic_load(&g_seamup), "handshake complete (SEAM_UP)");
CHECK(atomic_load(&g_paused), "async STOP -> VM_LIFECYCLE PAUSED (broadcast)");
CHECK(atomic_load(&g_query_reply), "CMD_VM QUERY -> addressed VM_LIFECYCLE RUNNING");
CHECK(atomic_load(&g_running_bcast),"async RESUME -> VM_LIFECYCLE RUNNING (broadcast)");
CHECK(atomic_load(&g_seamdown), "EOF QMP -> SEAM_DOWN");
vmsig_core_stop(core);
pthread_join(th, NULL);
vmsig_core_free(core);
vmsig_ctx_free(ctx);
close(srv);
printf("vmhost tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}