mirror of
https://dev.lirent.ru/Vatrog/vm-automation-signaling.git
synced 2026-06-20 19:06:37 +03:00
vmsig: a neutral signaling layer between sensors/input and controls
An epoll-driven, neutral transfer-event bus that connects sensors and input actuators to one or more controls, bidirectionally. It owns the transfer context and events — delivery order, priority, protocol-level timing, and an interrupt-driven event model over fd sources (eventfd/timerfd/sockets) — and stays agnostic to both the sensor/input drivers and the control. What lives here: - memctx: a coherent address-space context per endpoint — the guest address-space root paired with a pre-opened read-only RAM-region fd, with per-endpoint epoch invalidation and retained replay to late subscribers. Perception lives in out-of-tree sensor libraries that consume this datum read-only. - exclusive-ownership leases for destructive resource classes (input, power, memory-write). - write-signaled memory writes (MEMWRITE): an atomic write to guest memory routed through the seam under an exclusive lease, never a writable mapping. - a host-management seam for VM lifecycle/status and a neutral input-injection command path. - multi-VM endpoints; capability-gated, audited control authorization over an in-process or unix-socket transport. Builds against headers only by default (a stub mode that exercises the seam without a VM); armed builds link the real sensor/input libraries behind flags. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,5 @@
|
|||||||
|
.*/
|
||||||
|
cmake-*/
|
||||||
|
compile*
|
||||||
|
Testing/
|
||||||
|
CLAUDE.md
|
||||||
+137
@@ -0,0 +1,137 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.16)
|
||||||
|
project(vmsig C)
|
||||||
|
|
||||||
|
set(CMAKE_C_STANDARD 17)
|
||||||
|
set(CMAKE_C_STANDARD_REQUIRED ON)
|
||||||
|
set(CMAKE_C_EXTENSIONS ON) # epoll/eventfd/timerfd/clock_gettime: gnu ext
|
||||||
|
option(VMSIG_LTO "Enable LTO" OFF)
|
||||||
|
|
||||||
|
# Link the real sibling libraries (their .a, built with -fPIC). By default the spine
|
||||||
|
# builds against headers only: the SI calls are hidden behind these flags, and the
|
||||||
|
# stub mode proves the seam without a real VM.
|
||||||
|
option(VMSIG_WITH_VMIE "Link real vmie (libvmie.a, PIC) for armed memctx" OFF)
|
||||||
|
option(VMSIG_WITH_VMCTL "Link real vmctl (libvmctl.a, PIC) for armed input" OFF)
|
||||||
|
|
||||||
|
# ---- Sibling library sources (set these to your local checkouts) ------------
|
||||||
|
# Only needed for the armed builds below; the default stub build needs neither.
|
||||||
|
set(LIBVMIE_PATH "" CACHE PATH "Path to the vmie library sources (for VMSIG_WITH_VMIE)")
|
||||||
|
set(LIBVMCTL_PATH "" CACHE PATH "Path to the vmctl library sources (for VMSIG_WITH_VMCTL)")
|
||||||
|
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
|
# ---- signaling library ------------------------------------------------------
|
||||||
|
add_library(vmsig SHARED
|
||||||
|
src/core/core.c
|
||||||
|
src/core/linux/loop.c
|
||||||
|
src/ctx/ctx.c
|
||||||
|
src/adapter/linux/worker.c
|
||||||
|
src/adapter/memctx/memctx.c
|
||||||
|
src/adapter/input/input.c
|
||||||
|
src/adapter/vmhost/vmhost.c
|
||||||
|
src/control/inproc.c
|
||||||
|
src/control/socket.c)
|
||||||
|
|
||||||
|
target_include_directories(vmsig
|
||||||
|
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||||
|
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/core/include
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/ctx/include
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/include
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/memctx/include
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/input/include
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/vmhost/include)
|
||||||
|
|
||||||
|
target_link_libraries(vmsig PRIVATE Threads::Threads)
|
||||||
|
|
||||||
|
# armed: the real vmie as a pre-built PIC .a (IMPORTED) — headers and symbols.
|
||||||
|
if(VMSIG_WITH_VMIE)
|
||||||
|
add_library(vmie STATIC IMPORTED)
|
||||||
|
set_target_properties(vmie PROPERTIES
|
||||||
|
IMPORTED_LOCATION ${LIBVMIE_PATH}/.build/libvmie.a
|
||||||
|
INTERFACE_INCLUDE_DIRECTORIES ${LIBVMIE_PATH}/include)
|
||||||
|
target_link_libraries(vmsig PRIVATE vmie)
|
||||||
|
target_compile_definitions(vmsig PRIVATE VMSIG_WITH_VMIE)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# armed: the real vmctl as a pre-built PIC .a (IMPORTED).
|
||||||
|
if(VMSIG_WITH_VMCTL)
|
||||||
|
add_library(vmctl STATIC IMPORTED)
|
||||||
|
set_target_properties(vmctl PROPERTIES
|
||||||
|
IMPORTED_LOCATION ${LIBVMCTL_PATH}/.build/libvmctl.a
|
||||||
|
INTERFACE_INCLUDE_DIRECTORIES ${LIBVMCTL_PATH}/include)
|
||||||
|
target_link_libraries(vmsig PRIVATE vmctl)
|
||||||
|
target_compile_definitions(vmsig PRIVATE VMSIG_WITH_VMCTL)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
target_compile_options(vmsig PRIVATE -O2 -Wall -Wextra)
|
||||||
|
if(VMSIG_LTO)
|
||||||
|
target_compile_options(vmsig PRIVATE -flto)
|
||||||
|
target_link_options(vmsig PRIVATE -flto)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# ---- demonstrator on top of the library (like vmie_cli / vmctl) -------------
|
||||||
|
add_executable(vmsig_cli src/cli.c)
|
||||||
|
target_link_libraries(vmsig_cli PRIVATE vmsig)
|
||||||
|
target_compile_options(vmsig_cli PRIVATE -Wall -Wextra)
|
||||||
|
|
||||||
|
# ---- transfer-context tests (ctest) -----------------------------------------
|
||||||
|
enable_testing()
|
||||||
|
add_executable(vmsig_test src/test/test_ctx.c)
|
||||||
|
target_link_libraries(vmsig_test PRIVATE vmsig)
|
||||||
|
target_compile_options(vmsig_test PRIVATE -Wall -Wextra)
|
||||||
|
add_test(NAME ctx COMMAND vmsig_test)
|
||||||
|
|
||||||
|
add_executable(vmsig_sectest src/test/test_sec.c)
|
||||||
|
target_link_libraries(vmsig_sectest PRIVATE vmsig)
|
||||||
|
target_compile_options(vmsig_sectest PRIVATE -Wall -Wextra)
|
||||||
|
add_test(NAME sec COMMAND vmsig_sectest)
|
||||||
|
|
||||||
|
add_executable(vmsig_socktest src/test/test_sock.c)
|
||||||
|
target_link_libraries(vmsig_socktest PRIVATE vmsig Threads::Threads)
|
||||||
|
target_compile_options(vmsig_socktest PRIVATE -Wall -Wextra)
|
||||||
|
add_test(NAME sock COMMAND vmsig_socktest)
|
||||||
|
|
||||||
|
add_executable(vmsig_mvmtest src/test/test_mvm.c)
|
||||||
|
target_link_libraries(vmsig_mvmtest PRIVATE vmsig)
|
||||||
|
target_compile_options(vmsig_mvmtest PRIVATE -Wall -Wextra)
|
||||||
|
add_test(NAME mvm COMMAND vmsig_mvmtest)
|
||||||
|
|
||||||
|
add_executable(vmsig_authztest src/test/test_authz.c)
|
||||||
|
target_link_libraries(vmsig_authztest PRIVATE vmsig)
|
||||||
|
target_compile_options(vmsig_authztest PRIVATE -Wall -Wextra)
|
||||||
|
add_test(NAME authz COMMAND vmsig_authztest)
|
||||||
|
|
||||||
|
add_executable(vmsig_memctxtest src/test/test_memctx.c)
|
||||||
|
target_link_libraries(vmsig_memctxtest PRIVATE vmsig Threads::Threads)
|
||||||
|
target_include_directories(vmsig_memctxtest PRIVATE
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/core/include)
|
||||||
|
target_compile_options(vmsig_memctxtest PRIVATE -Wall -Wextra)
|
||||||
|
add_test(NAME memctx COMMAND vmsig_memctxtest)
|
||||||
|
|
||||||
|
add_executable(vmsig_vmhosttest src/test/test_vmhost.c)
|
||||||
|
target_link_libraries(vmsig_vmhosttest PRIVATE vmsig Threads::Threads)
|
||||||
|
target_include_directories(vmsig_vmhosttest PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/vmhost/include)
|
||||||
|
target_compile_options(vmsig_vmhosttest PRIVATE -Wall -Wextra)
|
||||||
|
add_test(NAME vmhost COMMAND vmsig_vmhosttest)
|
||||||
|
|
||||||
|
add_executable(vmsig_leasetest src/test/test_lease.c)
|
||||||
|
target_link_libraries(vmsig_leasetest PRIVATE vmsig Threads::Threads)
|
||||||
|
target_include_directories(vmsig_leasetest PRIVATE
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/core/include
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/ctx/include)
|
||||||
|
target_compile_options(vmsig_leasetest PRIVATE -Wall -Wextra)
|
||||||
|
add_test(NAME lease COMMAND vmsig_leasetest)
|
||||||
|
|
||||||
|
add_executable(vmsig_inputobstest src/test/test_inputobs.c)
|
||||||
|
target_link_libraries(vmsig_inputobstest PRIVATE vmsig Threads::Threads)
|
||||||
|
target_compile_options(vmsig_inputobstest PRIVATE -Wall -Wextra)
|
||||||
|
add_test(NAME inputobs COMMAND vmsig_inputobstest)
|
||||||
|
|
||||||
|
add_executable(vmsig_memwritetest src/test/test_memwrite.c)
|
||||||
|
target_link_libraries(vmsig_memwritetest PRIVATE vmsig Threads::Threads)
|
||||||
|
target_include_directories(vmsig_memwritetest PRIVATE
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/memctx/include)
|
||||||
|
target_compile_options(vmsig_memwritetest PRIVATE -Wall -Wextra)
|
||||||
|
add_test(NAME memwrite COMMAND vmsig_memwritetest)
|
||||||
|
|
||||||
|
# the demonstrator doubles as an end-to-end seam test (self-terminates rc=0)
|
||||||
|
add_test(NAME cli COMMAND vmsig_cli)
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
#ifndef VMSIG_H
|
||||||
|
#define VMSIG_H
|
||||||
|
|
||||||
|
/* vmsig.h — umbrella header for the signaling layer of the SISC paradigm.
|
||||||
|
*
|
||||||
|
* vmsig binds the three SI repos (sensors vmie/vgpustream + input vmctl) to the control
|
||||||
|
* (algorithm OR human), bidirectionally translating transfer events. The layer itself
|
||||||
|
* contains NO sensing, actuation, or decision/behavioral-timing logic.
|
||||||
|
*
|
||||||
|
* Application wiring: create the context (vmsig_ctx_new) -> core (vmsig_core_new) ->
|
||||||
|
* register per-VM adapters (vmsig_core_add_adapter) and control
|
||||||
|
* (vmsig_core_add_control) -> vmsig_core_run. */
|
||||||
|
|
||||||
|
#include "vmsig_event.h" /* neutral transfer-event model + payload */
|
||||||
|
#include "vmsig_memctx.h" /* address-space context handoff contract (kcr3+locator) */
|
||||||
|
#include "vmsig_ctx.h" /* transfer context: priority/seq/protocol timing */
|
||||||
|
#include "vmsig_adapter.h" /* unified SI adapter interface + factories */
|
||||||
|
#include "vmsig_control.h" /* control-agnostic seam + reference in-proc */
|
||||||
|
#include "vmsig_core.h" /* epoll core */
|
||||||
|
|
||||||
|
#endif /* VMSIG_H */
|
||||||
@@ -0,0 +1,81 @@
|
|||||||
|
#ifndef VMSIG_ADAPTER_H
|
||||||
|
#define VMSIG_ADAPTER_H
|
||||||
|
#include "vmsig_event.h"
|
||||||
|
#include "vmsig_memctx.h" /* vmsig_memctx_reg — address-space context registration seam */
|
||||||
|
|
||||||
|
/* vmsig_adapter.h — unified SI adapter interface. One vtable, three readiness
|
||||||
|
* shapes behind it. The adapter is the ONLY place that includes neighbor headers
|
||||||
|
* (memmodel.h/win32.h/vgpu_stream.h/vmctl.h). It registers 0..N fds with the core;
|
||||||
|
* the core does not know whether it is a socket, eventfd or timerfd. SI specifics
|
||||||
|
* never leave these functions. */
|
||||||
|
|
||||||
|
typedef struct vmsig_adapter vmsig_adapter; /* opaque adapter instance */
|
||||||
|
|
||||||
|
/* How the adapter expresses readiness. The core treats all three as ordinary
|
||||||
|
* epoll fds; the enum is documentation + the choice of default epoll flags. */
|
||||||
|
typedef enum {
|
||||||
|
VMSIG_RDY_FD = 0, /* native pollable fd (socket) */
|
||||||
|
VMSIG_RDY_TIMERFD = 1, /* timerfd; adapter samples shared memory */
|
||||||
|
VMSIG_RDY_EVENTFD = 2 /* worker thread bridges a blocking API -> eventfd */
|
||||||
|
} vmsig_readiness;
|
||||||
|
|
||||||
|
/* Sink handed by the core to the adapter for emitting UP events without knowing
|
||||||
|
* the internals of the context. emit() is thread-safe (also called from worker
|
||||||
|
* threads); register_memctx/unregister_memctx are called ONLY on the loop thread.
|
||||||
|
* The registration hooks may be NULL (adapters/tests need not call them). */
|
||||||
|
typedef struct {
|
||||||
|
int (*emit)(void* token, vmsig_event* ev); /* UP (thread-safe) */
|
||||||
|
int (*register_memctx)(void* token, const vmsig_memctx_reg* reg); /* loop thread: AS context; 0/-1 */
|
||||||
|
void (*unregister_memctx)(void* token, uint32_t endpoint); /* loop thread: context gone */
|
||||||
|
void* token;
|
||||||
|
} vmsig_emit;
|
||||||
|
|
||||||
|
/* One fd contributed by the adapter, with epoll flags and a cookie for demux. */
|
||||||
|
typedef struct {
|
||||||
|
int fd;
|
||||||
|
uint32_t epoll_events; /* EPOLLIN / EPOLLOUT / ... */
|
||||||
|
vmsig_readiness shape;
|
||||||
|
uint32_t cookie; /* adapter-private fd discriminator */
|
||||||
|
} vmsig_fd_reg;
|
||||||
|
|
||||||
|
/* Adapter vtable. Each SI adapter implements this; SI specifics do not leak. */
|
||||||
|
typedef struct vmsig_adapter_ops {
|
||||||
|
const char* name; /* "memctx"/"input"/"vmhost" — diagnostics */
|
||||||
|
vmsig_source source; /* neutral seam role */
|
||||||
|
uint32_t codec; /* vmsig_codec owned by the adapter */
|
||||||
|
|
||||||
|
/* Create an instance from opaque cfg (adapter parses it; core passes as-is).
|
||||||
|
* Returns an instance or NULL. `endpoint` is the id of the VM it binds to. */
|
||||||
|
vmsig_adapter* (*open)(const void* cfg, uint32_t endpoint);
|
||||||
|
|
||||||
|
/* Attach: open the SI contract, bring up workers, hand fds into reg[]
|
||||||
|
* (<=cap), store `emit` for UP. Returns the number of registered fds (>=0) / -1. */
|
||||||
|
int (*attach)(vmsig_adapter* a, const vmsig_emit* emit,
|
||||||
|
vmsig_fd_reg* reg, int cap);
|
||||||
|
|
||||||
|
/* Readiness of one of the adapter's fds: `cookie` identifies the fd, `events`
|
||||||
|
* are the epoll flags. The adapter does NON-blocking work (reads the socket /
|
||||||
|
* drains the eventfd / reads the timerfd + samples counters) and calls emit on
|
||||||
|
* each UP. 0 — ok, -1 — fatal (the core detaches the adapter). */
|
||||||
|
int (*on_readiness)(vmsig_adapter* a, uint32_t cookie, uint32_t events);
|
||||||
|
|
||||||
|
/* Consume a DOWN event (a control decision): encode it into the contract
|
||||||
|
* (vmctl_batch / vmctl power; write the vgpu control block; read request to vmie).
|
||||||
|
* For blocking sinks it hands the work to a worker and returns immediately;
|
||||||
|
* completion arrives later as an UP VMSIG_EV_ACT_ACK (keyed by ev->corr).
|
||||||
|
* 0 — accepted, 1 — rejected (not for this seam), -1 — error. */
|
||||||
|
int (*submit)(vmsig_adapter* a, const vmsig_event* ev);
|
||||||
|
|
||||||
|
/* Detach + free: stop workers, close SI handles and fds. */
|
||||||
|
void (*close)(vmsig_adapter* a);
|
||||||
|
} vmsig_adapter_ops;
|
||||||
|
|
||||||
|
/* Factories (defined in each adapter's TU — the only symbol the build/cli layer
|
||||||
|
* needs; keeps neighbor headers out of the core's include-path). */
|
||||||
|
const vmsig_adapter_ops* vmsig_memctx_ops(void); /* vmie: address-space context (kcr3+locator) */
|
||||||
|
const vmsig_adapter_ops* vmsig_input_ops(void); /* vmctl */
|
||||||
|
const vmsig_adapter_ops* vmsig_vmhost_ops(void); /* QEMU/QMP (its own signaling) */
|
||||||
|
/* (vgpu frame sensor is no longer a signaling adapter: vgpu perception lives in an
|
||||||
|
* out-of-repo S-lib that consumes memctx; see vgpu-perception-handoff.) */
|
||||||
|
|
||||||
|
#endif /* VMSIG_ADAPTER_H */
|
||||||
@@ -0,0 +1,126 @@
|
|||||||
|
#ifndef VMSIG_CONTROL_H
|
||||||
|
#define VMSIG_CONTROL_H
|
||||||
|
#include "vmsig_event.h"
|
||||||
|
|
||||||
|
/* vmsig_control.h — control-agnostic seam. Control (an algorithm OR a human)
|
||||||
|
* attaches via ONE neutral interface: a command queue (down) + an event
|
||||||
|
* subscription (up). In-process implements the vtable with direct callbacks
|
||||||
|
* (fd = -1); out-of-process is a socket whose fd is registered with the core like
|
||||||
|
* any source. The core treats both the same. Orchestration is NOT wired in here —
|
||||||
|
* only the seam. */
|
||||||
|
|
||||||
|
/* Subscription filter: which UP events the control WANTS. This is only a
|
||||||
|
* NARROWING; the real ceiling is set by the grant (effective = sub ∩ grant). */
|
||||||
|
typedef struct {
|
||||||
|
uint32_t source_mask; /* bit (1u<<vmsig_source) for the seam of interest */
|
||||||
|
vmsig_prio prio_min; /* drop UP below this priority */
|
||||||
|
uint64_t endpoint_mask; /* 0 = all VMs; otherwise bit (1ull<<endpoint) */
|
||||||
|
} vmsig_sub;
|
||||||
|
|
||||||
|
/* ===== Security layer: a control's grant (capability set) =====
|
||||||
|
* Neutral ceiling of a poller's rights: which VMs, which UP sources, which classes
|
||||||
|
* of DOWN commands. The enforcement mechanism is in the core (admission/pump_up/
|
||||||
|
* emit_down); the policy (who gets what) is set by the embedding program/
|
||||||
|
* orchestrator. Default DENY: an empty grant => not a valid poller (receives and
|
||||||
|
* sends nothing). */
|
||||||
|
#define VMSIG_CAP_OBSERVE 0x1u /* UP of SEAM/generic coherent state (observation) */
|
||||||
|
#define VMSIG_CAP_INPUT 0x2u /* CMD_INPUT */
|
||||||
|
/* (0x4 is the freed bit of the removed CAP_STREAM; the future vgpu-control down-path
|
||||||
|
* returns via write-signaled/MEMWRITE. Do NOT reuse.) */
|
||||||
|
#define VMSIG_CAP_LIFECYCLE 0x8u /* CMD_LIFECYCLE safe ones (pause/resume/wakeup) */
|
||||||
|
/* (0x10 is the freed bit of the removed CAP_MEMREAD; do NOT reuse: a stale grant
|
||||||
|
* with this bit must not silently alias to the privileged memory cap.) */
|
||||||
|
#define VMSIG_CAP_POWER 0x20u /* destructive lifecycle/VM (powerdown/reset/quit) */
|
||||||
|
#define VMSIG_CAP_VM 0x40u /* CMD_VM safe ones (query/cont/stop), VMHOST seam */
|
||||||
|
#define VMSIG_CAP_MEMCTX 0x80u /* SUBSCRIPTION to a coherent AS context (UP MEMCTX*, re-share RO-fd).
|
||||||
|
* NOT an access broker (that is OS-DAC on the fd) — gates RECEIVING the datum. */
|
||||||
|
#define VMSIG_CAP_MEMWRITE 0x100u /* CMD_MEMWRITE: atomic write-signaled mutation of shared guest memory
|
||||||
|
* (separate from the freed CAP_MEMREAD bit — read != write; fresh bit
|
||||||
|
* avoids stale-grant aliasing to this privileged cap). */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint32_t principal; /* id for auditing (uid/token) */
|
||||||
|
uint64_t endpoint_mask; /* which VMs (bit 1ull<<endpoint, endpoint<64); 0=none */
|
||||||
|
uint32_t source_mask; /* which UP sources (bit 1u<<vmsig_source) */
|
||||||
|
uint32_t cap_mask; /* VMSIG_CAP_* */
|
||||||
|
uint32_t arb_prio; /* lease arbitration priority: higher=stronger; supervisor=max. */
|
||||||
|
/* Separate from vmsig_prio (on-wire ordering). The default */
|
||||||
|
/* arbitration policy compares it (STRICTLY higher preempts, ties=owner). */
|
||||||
|
} vmsig_grant;
|
||||||
|
|
||||||
|
/* ===== Lease arbitration policy (orchestrator; §5) =====
|
||||||
|
* Signaling owns the MECHANISM (exclusivity, preemption, fencing, finalization);
|
||||||
|
* the POLICY (preempt or deny on conflict) is set by the orchestrator via a
|
||||||
|
* pluggable callback. Default (cb==NULL) = arb_prio comparison: STRICTLY higher ->
|
||||||
|
* PREEMPT, otherwise DENY. */
|
||||||
|
typedef enum {
|
||||||
|
VMSIG_ARB_DENY = 0, /* deny the contender, the owner keeps it */
|
||||||
|
VMSIG_ARB_PREEMPT = 1 /* take it from the owner, give it to the contender (QUEUE — reserved) */
|
||||||
|
} vmsig_arb_decision;
|
||||||
|
|
||||||
|
/* Called ONLY when (endpoint,class) is held by a LIVE owner (incumbent) and an
|
||||||
|
* ACQUIRE arrives from another contender. incumbent/contender are the parties'
|
||||||
|
* grants (live, not copies); incumbent is NEVER NULL (a dead owner is treated as a
|
||||||
|
* free slot and policy is not called). Called on the loop thread. */
|
||||||
|
typedef vmsig_arb_decision (*vmsig_arb_policy)(void* ud, uint32_t endpoint, uint32_t cls,
|
||||||
|
const vmsig_grant* incumbent,
|
||||||
|
const vmsig_grant* contender);
|
||||||
|
|
||||||
|
/* Control endpoint vtable. The core calls deliver() for UP; control sends DOWN via
|
||||||
|
* the emit hook that the core installs in set_emit_down(). */
|
||||||
|
typedef struct vmsig_control_ops {
|
||||||
|
const char* name;
|
||||||
|
|
||||||
|
/* fd for an out-of-process control (socket). -1 => in-process, callbacks only
|
||||||
|
* (no registration in epoll). */
|
||||||
|
int (*fd)(void* ctl);
|
||||||
|
|
||||||
|
/* Declare interest (called once at attach). */
|
||||||
|
int (*subscribe)(void* ctl, vmsig_sub* out);
|
||||||
|
|
||||||
|
/* Core -> control: an UP event for the subscriber. For in-process, a direct
|
||||||
|
* call; for socket-control, serialization onto the wire. Borrowed: whatever
|
||||||
|
* must outlive the call must be copied. */
|
||||||
|
int (*deliver)(void* ctl, const vmsig_event* ev);
|
||||||
|
|
||||||
|
/* Core -> control (socket only): the control-fd is readable; the implementation
|
||||||
|
* parses the wire into DOWN events and calls the installed down-emit. */
|
||||||
|
int (*on_readable)(void* ctl);
|
||||||
|
|
||||||
|
/* The core installs the hook by which control sends DOWN commands; the core
|
||||||
|
* routes them into vmsig_ctx_submit(ctx, VMSIG_DIR_DOWN, ev). */
|
||||||
|
void (*set_emit_down)(void* ctl, int (*emit)(void* token, vmsig_event*),
|
||||||
|
void* token);
|
||||||
|
|
||||||
|
void (*close)(void* ctl);
|
||||||
|
|
||||||
|
/* Core -> control: deliver a coherent address-space context (UP MEMCTX) + RO-fd
|
||||||
|
* of the RAM region. Socket: a vmsig_wire frame (kind=MEMCTX, inln=vmsig_memctx) + fd in cmsg
|
||||||
|
* (SCM_RIGHTS); the segs payload does NOT go on the wire (the holder opens
|
||||||
|
* via `low`). In-proc: direct fd + event (segs in payload, decode with vmsig_memctx_segs).
|
||||||
|
* The fd is BORROWED for the duration of the call (the core closes it afterwards) — the holder
|
||||||
|
* dup's/mmap's it to keep it. Optional: NULL => control does not accept MEMCTX. 0/-1. */
|
||||||
|
int (*attach_memctx)(void* ctl, const vmsig_event* ev, int fd);
|
||||||
|
} vmsig_control_ops;
|
||||||
|
|
||||||
|
/* Reference in-process control: a thin shim turning a C callback into a vtable, for
|
||||||
|
* embedding an algorithm directly. */
|
||||||
|
typedef struct {
|
||||||
|
int (*on_event)(void* user, const vmsig_event* up); /* core -> algorithm */
|
||||||
|
void* user;
|
||||||
|
vmsig_sub sub; /* subscription filter */
|
||||||
|
/* Core -> algorithm: a coherent AS context (UP MEMCTX) + RO-fd as a direct int. The fd
|
||||||
|
* is borrowed (dup/mmap to keep it). NULL => does not accept. 0/-1. */
|
||||||
|
int (*on_memctx)(void* user, const vmsig_event* ev, int fd);
|
||||||
|
} vmsig_inproc_cfg;
|
||||||
|
|
||||||
|
/* Create a reference in-proc control over cfg (which is copied). Returns an opaque
|
||||||
|
* ctl for vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl). Freed via
|
||||||
|
* ops->close(ctl). NULL on OOM. */
|
||||||
|
const vmsig_control_ops* vmsig_inproc_control_ops(void);
|
||||||
|
void* vmsig_inproc_control_new(const vmsig_inproc_cfg* cfg);
|
||||||
|
|
||||||
|
/* Send a DOWN command from an in-proc control (after attach). 0 — ok, -1 — error. */
|
||||||
|
int vmsig_inproc_send(void* ctl, vmsig_event* down);
|
||||||
|
|
||||||
|
#endif /* VMSIG_CONTROL_H */
|
||||||
@@ -0,0 +1,70 @@
|
|||||||
|
#ifndef VMSIG_CORE_H
|
||||||
|
#define VMSIG_CORE_H
|
||||||
|
#include "vmsig_event.h"
|
||||||
|
#include "vmsig_ctx.h"
|
||||||
|
#include "vmsig_adapter.h"
|
||||||
|
#include "vmsig_control.h"
|
||||||
|
|
||||||
|
/* vmsig_core.h — non-blocking epoll core. It knows a single vocabulary: "here is
|
||||||
|
* an fd — call the neutral handler on readiness; the handler produces/consumes
|
||||||
|
* neutral events". All neighbor mechanisms are just different ways to spawn an
|
||||||
|
* fd. The core structurally cannot name a neighbor's type: neighbor headers are
|
||||||
|
* visible only from the adapter TUs. */
|
||||||
|
|
||||||
|
typedef struct vmsig_core vmsig_core;
|
||||||
|
|
||||||
|
/* Create the core over a transfer context (the core does NOT own ctx; ctx's
|
||||||
|
* lifetime must cover the core). NULL on error. */
|
||||||
|
vmsig_core* vmsig_core_new(vmsig_ctx* ctx);
|
||||||
|
|
||||||
|
/* Stop, detach all adapters/control, free. Safe on NULL. */
|
||||||
|
void vmsig_core_free(vmsig_core* c);
|
||||||
|
|
||||||
|
/* ===== Audit (observability of admissions/denials) ===== */
|
||||||
|
typedef enum {
|
||||||
|
VMSIG_AUDIT_ADMIT = 0, /* poller admitted (socket accept) */
|
||||||
|
VMSIG_AUDIT_REJECT = 1, /* poller rejected at accept (empty grant) */
|
||||||
|
VMSIG_AUDIT_DOWN_DENIED = 2, /* DOWN command denied by grant/cap */
|
||||||
|
/* --- lease arbitration --- */
|
||||||
|
VMSIG_AUDIT_LEASE_GRANTED = 3, /* lease granted/preempted */
|
||||||
|
VMSIG_AUDIT_LEASE_DENIED = 4, /* ACQUIRE denied OR destructive dropped by lease gate */
|
||||||
|
VMSIG_AUDIT_LEASE_REVOKED = 5, /* lease revoked by preemption */
|
||||||
|
VMSIG_AUDIT_LEASE_RECLAIMED = 6, /* lease reclaimed on owner death (reclaim) */
|
||||||
|
VMSIG_AUDIT_MEMCTX_GRANTED = 7 /* address-space context granted/replayed to holder */
|
||||||
|
} vmsig_audit_kind;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
vmsig_audit_kind kind;
|
||||||
|
uint32_t principal; /* uid/token (grant.principal or peer uid) */
|
||||||
|
uint32_t endpoint;
|
||||||
|
uint32_t cmd; /* vmsig_kind for DOWN_DENIED */
|
||||||
|
uint32_t detail; /* extra (e.g. peer pid) */
|
||||||
|
} vmsig_audit;
|
||||||
|
|
||||||
|
/* Set the audit callback (NULL = off). Called on the loop thread. */
|
||||||
|
void vmsig_core_set_audit(vmsig_core* c,
|
||||||
|
void (*cb)(void* ud, const vmsig_audit* a), void* ud);
|
||||||
|
|
||||||
|
/* Set the lease arbitration policy (NULL => default: contender.arb_prio >
|
||||||
|
* incumbent.arb_prio ? PREEMPT : DENY). Called on the loop thread. */
|
||||||
|
void vmsig_core_set_arb_policy(vmsig_core* c, vmsig_arb_policy cb, void* ud);
|
||||||
|
|
||||||
|
/* Register an adapter for VM `endpoint`: open(cfg,endpoint) -> attach(...),
|
||||||
|
* enroll each yielded fd into epoll and into the dispatch table fd->(adapter,cookie).
|
||||||
|
* Returns the adapter id (>=0) or -1. */
|
||||||
|
int vmsig_core_add_adapter(vmsig_core* c, const vmsig_adapter_ops* ops,
|
||||||
|
const void* cfg, uint32_t endpoint);
|
||||||
|
|
||||||
|
/* Attach a control endpoint (in-process or socket) with a GRANT (capability set).
|
||||||
|
* grant == NULL => default-deny (poller inert). The core sees only the neutral
|
||||||
|
* vtable + grant + (opt.) fd. Returns the control id (>=0) or -1. */
|
||||||
|
int vmsig_core_add_control(vmsig_core* c, const vmsig_control_ops* ops, void* ctl,
|
||||||
|
const vmsig_grant* grant);
|
||||||
|
|
||||||
|
/* Spin the loop until a stop is requested. 0 — clean, -1 — fatal. */
|
||||||
|
int vmsig_core_run(vmsig_core* c);
|
||||||
|
|
||||||
|
/* Asynchronous, signal-safe stop request: writes the wakeup eventfd. */
|
||||||
|
void vmsig_core_stop(vmsig_core* c);
|
||||||
|
|
||||||
|
#endif /* VMSIG_CORE_H */
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
#ifndef VMSIG_CTX_H
|
||||||
|
#define VMSIG_CTX_H
|
||||||
|
#include "vmsig_event.h"
|
||||||
|
|
||||||
|
/* vmsig_ctx.h — the "transfer context": the SISC-critical seam owning PRIORITY,
|
||||||
|
* SEQUENCING and PROTOCOL timing of delivery. Behavioral timing does NOT belong
|
||||||
|
* here — commands arrive already decided from control; the context merely
|
||||||
|
* orders and paces them on the "wire". */
|
||||||
|
|
||||||
|
typedef struct vmsig_ctx vmsig_ctx; /* opaque: queues, seq, timing */
|
||||||
|
|
||||||
|
/* Protocol (RS232-like) transmission timings — transport ONLY, not behavior.
|
||||||
|
* All zeros = pass-through (no pacing). */
|
||||||
|
typedef struct {
|
||||||
|
uint32_t min_gap_ns; /* min. gap between channel events (rate-cap) */
|
||||||
|
uint32_t coalesce_ns; /* collapse bursts of one kind within a window */
|
||||||
|
uint32_t max_inflight; /* backpressure depth on a channel before drop */
|
||||||
|
uint8_t drop_policy; /* VMSIG_DROP_* */
|
||||||
|
} vmsig_timing;
|
||||||
|
|
||||||
|
#define VMSIG_DROP_OLDEST 0
|
||||||
|
#define VMSIG_DROP_NEWEST 1
|
||||||
|
#define VMSIG_DROP_BLOCK 2
|
||||||
|
|
||||||
|
vmsig_ctx* vmsig_ctx_new(void);
|
||||||
|
void vmsig_ctx_free(vmsig_ctx* c);
|
||||||
|
|
||||||
|
/* Policy per (source,dir): default priority + protocol timing. They live
|
||||||
|
* here, NOT in adapters and NOT in control. */
|
||||||
|
int vmsig_ctx_set_policy(vmsig_ctx* c, vmsig_source src, vmsig_dir dir,
|
||||||
|
vmsig_prio default_prio, const vmsig_timing* t);
|
||||||
|
|
||||||
|
/* Enqueue an event into the `dir`-direction context (assigns seq, applies
|
||||||
|
* priority/timing/coalescing/backpressure). 0 — enqueued, 1 —
|
||||||
|
* coalesced/dropped by policy, -1 — error. On success takes ownership of
|
||||||
|
* ev->payload. Thread-safe (the UP side is called from worker threads). */
|
||||||
|
int vmsig_ctx_submit(vmsig_ctx* c, vmsig_dir dir, vmsig_event* ev);
|
||||||
|
|
||||||
|
/* Fetch the next event of direction `dir` ready for delivery, honoring
|
||||||
|
* priority + protocol timing. 1 — event written to out, 0 — nothing yet
|
||||||
|
* (caller arms timing_fd), -1 — error. */
|
||||||
|
int vmsig_ctx_next(vmsig_ctx* c, vmsig_dir dir, vmsig_event* out);
|
||||||
|
|
||||||
|
/* timerfd by which the context wakes the loop when a paced/coalesced event
|
||||||
|
* has matured. Registered in the core like any source. -1 if not needed. */
|
||||||
|
int vmsig_ctx_timing_fd(vmsig_ctx* c, vmsig_dir dir);
|
||||||
|
|
||||||
|
#endif /* VMSIG_CTX_H */
|
||||||
@@ -0,0 +1,280 @@
|
|||||||
|
#ifndef VMSIG_EVENT_H
|
||||||
|
#define VMSIG_EVENT_H
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
/* vmsig_event.h — neutral "transfer event" + "payload" model.
|
||||||
|
*
|
||||||
|
* This is the ONLY type that crosses the signaling core. The taxonomy names the
|
||||||
|
* transfer SEMANTICS, not neighbor types: a TU compiled against this header
|
||||||
|
* cannot name vmctl_batch, vgpu_producer_t, or vmie_mem. The SI data body lives
|
||||||
|
* in an opaque payload owned by the source adapter's codec; the core does NOT
|
||||||
|
* dereference it — it only routes the event and carries the payload. */
|
||||||
|
|
||||||
|
/* Transfer direction relative to control. */
|
||||||
|
typedef enum {
|
||||||
|
VMSIG_DIR_UP = 0, /* sensor/state -> control */
|
||||||
|
VMSIG_DIR_DOWN = 1 /* control decision -> actuation/SI */
|
||||||
|
} vmsig_dir;
|
||||||
|
|
||||||
|
/* Logical seam (SI role) the event crosses. NEUTRAL roles, not driver names:
|
||||||
|
* assigned at adapter registration, used only for routing, the priority default,
|
||||||
|
* and the subscription filter. */
|
||||||
|
typedef enum {
|
||||||
|
VMSIG_SRC_NONE = 0,
|
||||||
|
VMSIG_SRC_FRAME = 1, /* vgpu desktop sensor role; reserved: no signaling adapter,
|
||||||
|
* the future vgpu-perception shell-as-control carries it (CURSOR_STATE) */
|
||||||
|
VMSIG_SRC_INPUT = 2, /* input/actuation + lifecycle (vmctl role) */
|
||||||
|
VMSIG_SRC_CONTROL = 3, /* originated by a control endpoint */
|
||||||
|
VMSIG_SRC_CORE = 4, /* core-internal (shutdown/error/tick) */
|
||||||
|
VMSIG_SRC_VMHOST = 5, /* VM substrate / QEMU: lifecycle + events (own QMP) */
|
||||||
|
VMSIG_SRC_MEMCTX = 6, /* coherent guest address-space context (kcr3+locator) */
|
||||||
|
VMSIG_SRC_MAX
|
||||||
|
} vmsig_source;
|
||||||
|
|
||||||
|
/* Delivery priority class. Higher value — earlier delivery. This is NOT a
|
||||||
|
* behavioral timing but ordering on the "wire". The default is assigned per
|
||||||
|
* source at registration; the emitter may override it per event. */
|
||||||
|
typedef enum {
|
||||||
|
VMSIG_PRIO_BULK = 0, /* frames, large state deltas */
|
||||||
|
VMSIG_PRIO_NORMAL = 1, /* routine ack/samples */
|
||||||
|
VMSIG_PRIO_HIGH = 2, /* input commands (latency-sensitive) */
|
||||||
|
VMSIG_PRIO_URGENT = 3, /* lifecycle, seam-down, errors */
|
||||||
|
VMSIG_PRIO_MAX
|
||||||
|
} vmsig_prio;
|
||||||
|
|
||||||
|
/* NEUTRAL event taxonomy: each kind is a transfer MEANING that exactly one
|
||||||
|
* adapter codec decodes from / encodes into its contract. The core routes by
|
||||||
|
* kind + source + dir + prio and does not interpret the payload. */
|
||||||
|
typedef enum {
|
||||||
|
/* --- generic / lifecycle (any seam) --- */
|
||||||
|
VMSIG_EV_NONE = 0,
|
||||||
|
VMSIG_EV_SEAM_UP = 1, /* SI seam came up (attach/bootstrap ok) */
|
||||||
|
VMSIG_EV_SEAM_DOWN = 2, /* seam lost (heartbeat stale, socket closed) */
|
||||||
|
VMSIG_EV_ERROR = 3, /* adapter/core error, details in payload */
|
||||||
|
|
||||||
|
/* (16..19 — retired STATE_* of the MEMSTATE seam; do NOT reuse numbers: on a
|
||||||
|
* version skew an old STATE kind must not alias a new kind on the wire.) */
|
||||||
|
|
||||||
|
/* (32..36 — retired FRAME_READY/FRAME_STATE/BULK_ATTACHED/BULK_READY/BULK_DETACHED of
|
||||||
|
* the removed FRAME adapter + bulk data-plane (vgpu perception moved to an S-lib);
|
||||||
|
* do NOT reuse numbers — wire-skew safety.) */
|
||||||
|
|
||||||
|
/* --- UP: cursor (vgpu sensor; emitted by the vgpu-perception shell-as-control) --- */
|
||||||
|
VMSIG_EV_CURSOR_STATE = 37, /* cursor position/visibility; inln=vmsig_cursor; cap OBSERVE|INPUT */
|
||||||
|
|
||||||
|
/* --- UP: input/lifecycle ack (INPUT seam) --- */
|
||||||
|
VMSIG_EV_ACT_ACK = 48, /* down-command completed (ok/err) */
|
||||||
|
VMSIG_EV_VM_LIFECYCLE = 49, /* power/lifecycle state report */
|
||||||
|
|
||||||
|
/* --- UP: lease arbitration (all addressed, origin=initiator; source=CORE) --- */
|
||||||
|
VMSIG_EV_LEASE_GRANTED = 50, /* lease granted (CMD_ACQUIRE succeeded) */
|
||||||
|
VMSIG_EV_LEASE_DENIED = 51, /* lease denied (reason in vmsig_lease_req) */
|
||||||
|
VMSIG_EV_LEASE_RELEASED= 52, /* lease released by owner (CMD_RELEASE) */
|
||||||
|
VMSIG_EV_LEASE_REVOKED = 53, /* lease taken away by preemption/death */
|
||||||
|
VMSIG_EV_LEASE_STATUS = 54, /* response to CMD_LEASE_STATUS (vmsig_lease_status) */
|
||||||
|
|
||||||
|
/* --- UP: response to a held-input query (INPUT seam, addressed to initiator) --- */
|
||||||
|
VMSIG_EV_INPUT_HELD = 55, /* set of held KEY/BTN from the vmctl record; inln=vmsig_input_held */
|
||||||
|
|
||||||
|
/* --- DOWN: control decisions --- */
|
||||||
|
VMSIG_EV_CMD_INPUT = 64, /* input injection (abs/rel/btn/key/scroll) */
|
||||||
|
VMSIG_EV_CMD_LIFECYCLE = 65, /* powerdown/reset/wakeup/pause/resume */
|
||||||
|
/* (66 — retired CMD_STREAM of the removed FRAME adapter; the future vgpu-control
|
||||||
|
* down-path returns via write-signaled/MEMWRITE. 67..69 — retired
|
||||||
|
* CMD_QUERY/WATCH/UNWATCH; do NOT reuse numbers.) */
|
||||||
|
VMSIG_EV_CMD_VM = 70, /* base VM control (vmsig_vm_cmd; VMHOST seam) */
|
||||||
|
/* (71..72 — retired CMD_SUBSCRIBE_BULK/UNSUBSCRIBE_BULK of the bulk data-plane;
|
||||||
|
* do NOT reuse numbers.) */
|
||||||
|
|
||||||
|
/* --- DOWN: lease arbitration (intercepted by the core, not forwarded to the adapter) --- */
|
||||||
|
VMSIG_EV_CMD_ACQUIRE = 73, /* request an exclusive lease of a class: inln=vmsig_lease_req */
|
||||||
|
VMSIG_EV_CMD_RELEASE = 74, /* release your own lease of a class: inln=vmsig_lease_req */
|
||||||
|
VMSIG_EV_CMD_LEASE_STATUS = 75, /* query lease status of a class: inln=vmsig_lease_req */
|
||||||
|
VMSIG_EV_CMD_QUERY_INPUT = 76, /* query held KEY/BTN (from the vmctl record); reply UP INPUT_HELD; cap INPUT */
|
||||||
|
|
||||||
|
/* --- UP: address-space context (MEMCTX seam; coherent kcr3+locator datum) --- */
|
||||||
|
VMSIG_EV_MEMCTX = 77, /* context multicast/replay: inln=vmsig_memctx,
|
||||||
|
* payload=vmsig_memseg[] (owned), RO-fd alongside */
|
||||||
|
VMSIG_EV_MEMCTX_INVALIDATED = 78, /* epoch invalidation: inln=vmsig_memctx_inv (URGENT) */
|
||||||
|
|
||||||
|
/* --- DOWN: coherent memory write (write-signaled; MEMCTX seam) --- */
|
||||||
|
VMSIG_EV_CMD_MEMWRITE = 79, /* atomic gva_write under the held lease; inln=vmsig_memwrite (+tail/payload bytes);
|
||||||
|
* cap MEMWRITE + lease MEMWRITE + extent. ACK via ACT_ACK{ok,corr}. */
|
||||||
|
VMSIG_EV_KIND_MAX
|
||||||
|
} vmsig_kind;
|
||||||
|
|
||||||
|
/* ===== Lease arbitration (exclusive-ownership layer for destructive resources) =====
|
||||||
|
* A destructive VM resource is owned by EXACTLY one control (per endpoint+class pair).
|
||||||
|
* The class is generic; INPUT, POWER and MEMWRITE are active. MEMWRITE is the
|
||||||
|
* write-signaled atomic guest-memory write on the MEMCTX seam. */
|
||||||
|
typedef enum {
|
||||||
|
VMSIG_LEASE_INPUT = 0, /* exclusive grab of input (CMD_INPUT) */
|
||||||
|
VMSIG_LEASE_POWER = 1, /* exclusive destructive power (lifecycle/VM) */
|
||||||
|
VMSIG_LEASE_MEMWRITE = 2, /* exclusive atomic guest-memory write (gva_write); NO finalization */
|
||||||
|
VMSIG_LEASE_CLASS_MAX
|
||||||
|
} vmsig_lease_class;
|
||||||
|
|
||||||
|
/* Lease denial reason (vmsig_lease_req.reason in UP LEASE_DENIED). */
|
||||||
|
enum {
|
||||||
|
VMSIG_LEASE_DENY_HELD = 0, /* held by an equal/higher; the owner holds it */
|
||||||
|
VMSIG_LEASE_DENY_NOCAP = 1, /* no cap for the class (CAP_INPUT/CAP_POWER) */
|
||||||
|
VMSIG_LEASE_DENY_NOGRANT = 2, /* endpoint outside the grant (endpoint_mask) */
|
||||||
|
VMSIG_LEASE_DENY_BADCLASS = 3, /* class out of range */
|
||||||
|
VMSIG_LEASE_DENY_LOWER_PRIO = 4 /* contender priority not above the owner's */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Lease request/response (DOWN CMD_ACQUIRE/RELEASE/LEASE_STATUS and UP LEASE_*, in inln). */
|
||||||
|
typedef struct {
|
||||||
|
uint32_t cls; /* vmsig_lease_class */
|
||||||
|
uint32_t reason; /* DOWN: 0; UP LEASE_DENIED: VMSIG_LEASE_DENY_* */
|
||||||
|
} vmsig_lease_req;
|
||||||
|
|
||||||
|
/* Response to CMD_LEASE_STATUS (UP LEASE_STATUS, in inln). */
|
||||||
|
typedef struct {
|
||||||
|
uint32_t cls; /* requested class */
|
||||||
|
uint32_t busy; /* 1=held by a live owner, 0=free */
|
||||||
|
uint32_t owner_principal; /* owner principal (for audit/UI); 0 if free */
|
||||||
|
} vmsig_lease_status;
|
||||||
|
|
||||||
|
/* Lifecycle operations for CMD_LIFECYCLE (code in inln[0]). Destructive ones
|
||||||
|
* (POWERDOWN/RESET) require CAP_POWER; safe ones — CAP_LIFECYCLE. */
|
||||||
|
enum {
|
||||||
|
VMSIG_LIFE_POWERDOWN = 0,
|
||||||
|
VMSIG_LIFE_RESET = 1,
|
||||||
|
VMSIG_LIFE_WAKEUP = 2,
|
||||||
|
VMSIG_LIFE_PAUSE = 3,
|
||||||
|
VMSIG_LIFE_RESUME = 4
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ===== Input (DOWN VMSIG_EV_CMD_INPUT, in inln) — NEUTRAL =====
|
||||||
|
* control describes input abstractly (axis/button/key/scroll), WITHOUT knowing the driver
|
||||||
|
* (uinput/QMP): the input adapter translates it into its contract. Requires CAP_INPUT. This
|
||||||
|
* is the ONLY public input-encoding contract — an external control encodes vmsig_input into
|
||||||
|
* vmsig_event.inln. */
|
||||||
|
typedef enum {
|
||||||
|
VMSIG_INPUT_ABS = 0, /* absolute axis: code=axis, value=coordinate */
|
||||||
|
VMSIG_INPUT_REL = 1, /* relative axis: code=axis, value=delta */
|
||||||
|
VMSIG_INPUT_BTN = 2, /* button: code=button, value=pressed(1)/released(0) */
|
||||||
|
VMSIG_INPUT_KEY = 3, /* key: code=evdev code, value=pressed/released */
|
||||||
|
VMSIG_INPUT_SCROLL = 4 /* scroll: code=axis, scroll=magnitude */
|
||||||
|
} vmsig_input_kind;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint32_t kind; /* vmsig_input_kind */
|
||||||
|
int32_t code; /* axis / button / evdev code (neutral event code) */
|
||||||
|
int32_t value; /* abs coordinate / rel delta / pressed(1)|released(0) */
|
||||||
|
double scroll; /* scroll magnitude (VMSIG_INPUT_SCROLL only) */
|
||||||
|
} vmsig_input; /* fits in vmsig_event.inln[48] */
|
||||||
|
|
||||||
|
/* ===== Memory write (DOWN VMSIG_EV_CMD_MEMWRITE) — NEUTRAL, write-signaled =====
|
||||||
|
* control describes an ATOMIC write into guest memory abstractly (guest VA + length),
|
||||||
|
* WITHOUT knowing vmie/cr3: the memctx adapter resolves it under the held kcr3 and does
|
||||||
|
* ONE gva_write. Requires CAP_MEMWRITE + an exclusive MEMWRITE lease + an extent check.
|
||||||
|
* SRC bytes: inline (<= VMSIG_MEMWRITE_INLINE) ride in the inln tail right after this header
|
||||||
|
* (flags & INLINE); larger in-proc writes ride in the borrowed payload (flags & PAYLOAD). */
|
||||||
|
#define VMSIG_MEMWRITE_INLINE 32u /* inln tail capacity for SRC (48 - 16 header) */
|
||||||
|
#define VMSIG_MW_SRC_INLINE 0x1u /* SRC bytes are in inln tail (len<=INLINE) */
|
||||||
|
#define VMSIG_MW_SRC_PAYLOAD 0x2u /* SRC bytes are in ev->payload.data (in-proc) */
|
||||||
|
typedef struct {
|
||||||
|
uint64_t gva; /* guest virtual address to write (resolved under the adapter's kcr3) */
|
||||||
|
uint32_t len; /* number of bytes to write (1..VMSIG_MEMWRITE_MAX) */
|
||||||
|
uint32_t flags; /* VMSIG_MW_SRC_INLINE | VMSIG_MW_SRC_PAYLOAD */
|
||||||
|
/* inline SRC tail (when VMSIG_MW_SRC_INLINE): up to VMSIG_MEMWRITE_INLINE bytes follow */
|
||||||
|
} vmsig_memwrite; /* header = 8+4+4 = 16 bytes; +32 tail = 48 (exactly inln[48]) */
|
||||||
|
|
||||||
|
/* ===== Cursor (UP VMSIG_EV_CURSOR_STATE, in inln) — NEUTRAL =====
|
||||||
|
* Cursor position from the SCREEN sensor (vgpu). NEUTRAL payload format only: emitted by the
|
||||||
|
* out-of-repo vgpu-perception shell-as-control (source VMSIG_SRC_FRAME), not by a signaling
|
||||||
|
* adapter — signaling just fans it out. x,y signed (multi-monitor -> negative). cap OBSERVE|INPUT. */
|
||||||
|
typedef struct {
|
||||||
|
int32_t x; /* screen coordinate X (signed) */
|
||||||
|
int32_t y; /* screen coordinate Y (signed) */
|
||||||
|
uint32_t visible; /* 1=shown, 0=hidden */
|
||||||
|
uint32_t seq; /* monotonic cursor-publication counter (vgpu) */
|
||||||
|
} vmsig_cursor;
|
||||||
|
|
||||||
|
/* ===== Held input (UP VMSIG_EV_INPUT_HELD, in inln) — response to CMD_QUERY_INPUT =====
|
||||||
|
* Set of held KEY/BTN from the ACTUATOR record (vmctl): signaling only returns it on request,
|
||||||
|
* does NOT track it itself and does NOT decide release (that is control). flags & TRUNC => more
|
||||||
|
* held than ent. */
|
||||||
|
#define VMSIG_INPUT_HELD_TRUNC 0x1u
|
||||||
|
typedef struct {
|
||||||
|
uint32_t count; /* number of valid entries in ent[] */
|
||||||
|
uint32_t flags; /* VMSIG_INPUT_HELD_TRUNC if more held than capacity */
|
||||||
|
struct { uint16_t kind; uint16_t code; } ent[10]; /* kind=VMSIG_INPUT_KEY/BTN; code */
|
||||||
|
} vmsig_input_held; /* 4+4+10*4 = 48 (exactly inln[48]) */
|
||||||
|
|
||||||
|
/* ===== QEMU/QMP host-plane (VMHOST seam) — VM-substrate control =====
|
||||||
|
* VM state (UP VMSIG_EV_VM_LIFECYCLE, in inln). */
|
||||||
|
enum {
|
||||||
|
VMSIG_VM_RUNNING = 0, VMSIG_VM_PAUSED, VMSIG_VM_SHUTDOWN,
|
||||||
|
VMSIG_VM_RESET, VMSIG_VM_POWERDOWN, VMSIG_VM_CRASHED, VMSIG_VM_UNKNOWN
|
||||||
|
};
|
||||||
|
typedef struct { uint32_t state; uint32_t detail; } vmsig_vm_state;
|
||||||
|
|
||||||
|
/* VM control operations (DOWN VMSIG_EV_CMD_VM, in inln). Destructive ones
|
||||||
|
* (RESET/POWERDOWN/QUIT) require CAP_POWER; safe ones — CAP_VM. */
|
||||||
|
enum {
|
||||||
|
VMSIG_VMOP_QUERY = 0, /* query-status */
|
||||||
|
VMSIG_VMOP_CONT, /* cont (resume) */
|
||||||
|
VMSIG_VMOP_STOP, /* stop (pause) */
|
||||||
|
VMSIG_VMOP_RESET, /* system_reset (destructive) */
|
||||||
|
VMSIG_VMOP_POWERDOWN, /* system_powerdown (destructive) */
|
||||||
|
VMSIG_VMOP_QUIT /* quit (destructive) */
|
||||||
|
};
|
||||||
|
typedef struct { uint32_t op; } vmsig_vm_cmd;
|
||||||
|
|
||||||
|
/* Codec tags: which adapter owns the payload body (for release/diagnostics). */
|
||||||
|
typedef enum {
|
||||||
|
VMSIG_CODEC_NONE = 0,
|
||||||
|
VMSIG_CODEC_INPUT = 1,
|
||||||
|
VMSIG_CODEC_VMHOST = 2,
|
||||||
|
VMSIG_CODEC_MEMCTX = 3 /* owned-payload locator (vmsig_memseg[]) of the MEMCTX seam */
|
||||||
|
} vmsig_codec;
|
||||||
|
|
||||||
|
/* Payload ownership flags. */
|
||||||
|
#define VMSIG_PL_OWNED 0x1u /* core frees it via release() on drop */
|
||||||
|
#define VMSIG_PL_BORROWED 0x2u /* borrowed (e.g. a seqlock frame): copy */
|
||||||
|
/* or revalidate before release() */
|
||||||
|
#define VMSIG_PL_INLINE 0x4u /* small body lives in vmsig_event.inln */
|
||||||
|
|
||||||
|
/* Opaque, releasable payload. The body is owned by the emitting adapter's codec
|
||||||
|
* (mmap'd frame slot, vmie heap diff, ...). The core carries the bearer and calls
|
||||||
|
* release() EXACTLY once on consumption/drop. The core never dereferences data. */
|
||||||
|
typedef struct vmsig_payload {
|
||||||
|
void* data; /* opaque body, codec-defined */
|
||||||
|
size_t len; /* bytes in data (0 if borrowed) */
|
||||||
|
uint32_t codec; /* vmsig_codec: whose payload it is */
|
||||||
|
uint32_t flags; /* VMSIG_PL_* */
|
||||||
|
void (*release)(struct vmsig_payload*); /* idempotent; may be NULL */
|
||||||
|
void* owner; /* codec context for release() */
|
||||||
|
} vmsig_payload;
|
||||||
|
|
||||||
|
/* TRANSFER EVENT. Fixed-size header + a small inline zone; large bodies hang off
|
||||||
|
* the payload. */
|
||||||
|
typedef struct vmsig_event {
|
||||||
|
vmsig_kind kind;
|
||||||
|
vmsig_source source; /* source seam */
|
||||||
|
vmsig_dir dir;
|
||||||
|
vmsig_prio prio;
|
||||||
|
uint32_t endpoint; /* VM/endpoint id — multi-VM-ready */
|
||||||
|
uint32_t seq; /* monotonic sequence (set by the context) */
|
||||||
|
uint32_t corr; /* correlation: links an ACK to its CMD */
|
||||||
|
uint32_t origin; /* INTERNAL: id+1 of the control that initiated DOWN (0=none/broadcast). */
|
||||||
|
/* Set by the core in emit_down; NOT serialized onto the wire */
|
||||||
|
/* (a poller cannot forge it). Addressed reply delivery. */
|
||||||
|
uint64_t ts_ns; /* CLOCK_MONOTONIC at emit time */
|
||||||
|
vmsig_payload payload; /* opaque body (may be empty) */
|
||||||
|
uint8_t inln[48]; /* inline zone for small events (VMSIG_PL_INLINE) */
|
||||||
|
} vmsig_event;
|
||||||
|
|
||||||
|
/* Release the event's payload (if it has release and is not yet freed). Idempotent. */
|
||||||
|
static inline void vmsig_payload_release(vmsig_event* ev) {
|
||||||
|
if (ev && ev->payload.release) {
|
||||||
|
ev->payload.release(&ev->payload);
|
||||||
|
ev->payload.release = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* VMSIG_EVENT_H */
|
||||||
@@ -0,0 +1,101 @@
|
|||||||
|
#ifndef VMSIG_MEMCTX_H
|
||||||
|
#define VMSIG_MEMCTX_H
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include "vmsig_event.h"
|
||||||
|
|
||||||
|
/* vmsig_memctx.h — NEUTRAL handoff contract for the guest address-space context.
|
||||||
|
*
|
||||||
|
* signaling is a COHERENCE layer for shared state, not perception. Over memory it
|
||||||
|
* vends ONE coherent datum: the root of the guest address space (the permanent System
|
||||||
|
* DirectoryTableBase, `kcr3`) PAIRED with a RAM-region locator — a pre-opened `O_RDONLY`
|
||||||
|
* fd. The holder (an S library / any control, including a human operator via their shim)
|
||||||
|
* subscribes to this datum, opens ITS OWN read-only context FROM the received fd (keyed on
|
||||||
|
* `kcr3`), and does proc_list/gva_read/scan/pmap itself. Perception and semantics are NOT here.
|
||||||
|
*
|
||||||
|
* Holder invariants:
|
||||||
|
* - The locator is valid ONLY against the received `O_RDONLY` fd. From it the holder opens
|
||||||
|
* its own read-only context, keyed on the vended `kcr3`:
|
||||||
|
* * raw reads under a cr3 it already holds — vmie_mem_from_ro_fd(fd, low) (nseg==0) or
|
||||||
|
* vmie_mem_from_ro_fd_segs(fd, segs, nseg) (nseg>0); gva_read keyed on (mem, kcr3);
|
||||||
|
* * FULL read context WITH process/module discovery — vmie_win32_open_ro_fd(fd, low,
|
||||||
|
* kcr3): builds the offset profile read-only from the image (no beacon/ACK) and
|
||||||
|
* enables proc_list/proc_modules plus the section/import/export/scan surfaces. A
|
||||||
|
* sensor that must FIND a process (then read its private AS) needs this one — kcr3
|
||||||
|
* alone gives reads-under-a-known-cr3, not discovery.
|
||||||
|
* Both map PROT_READ (gva_write -> -1). `kcr3` is valid ONLY within its `epoch`.
|
||||||
|
* - On UP MEMCTX_INVALIDATED{endpoint,epoch} the holder closes its context/fd-mmap
|
||||||
|
* and waits for the next MEMCTX{epoch+1} (re-multicast with a new kcr3 and a fresh fd).
|
||||||
|
* - The fd is always `O_RDONLY` (VMSIG_MEMCTX_RDONLY set by this layer): mmap(PROT_WRITE)
|
||||||
|
* through it -> EACCES. Writing into the guest is structurally impossible on the holder
|
||||||
|
* side — it goes through the write-signaled MEMWRITE command (CMD_MEMWRITE under the
|
||||||
|
* MEMWRITE lease), never this RO mapping. */
|
||||||
|
|
||||||
|
/* Locator-POD flag: the region is vended read-only (always set by signaling). */
|
||||||
|
#define VMSIG_MEMCTX_RDONLY 0x1u
|
||||||
|
|
||||||
|
/* Address-space context locator-POD (rides in vmsig_event.inln; <=48 bytes).
|
||||||
|
* Flat self-describing encoding: nseg explicit, no offset magic. */
|
||||||
|
typedef struct {
|
||||||
|
uint64_t kcr3; /* permanent System DirectoryTableBase (guest AS root) */
|
||||||
|
uint64_t low; /* below-4G RAM size (PCI-hole split point; single-low open) */
|
||||||
|
uint32_t epoch; /* VM-session epoch; kcr3 valid ONLY within it */
|
||||||
|
uint32_t nseg; /* number of segments in the owned-payload (0 => single-low by `low`) */
|
||||||
|
uint32_t flags; /* VMSIG_MEMCTX_RDONLY */
|
||||||
|
uint32_t _pad;
|
||||||
|
} vmsig_memctx; /* 8+8+4+4+4+4 = 32 bytes */
|
||||||
|
|
||||||
|
/* One GPA->file segment (mirrors the neighbor's gpa_seg from memmodel.h, but self-contained:
|
||||||
|
* this header does NOT pull in the neighbor's contract). Rides in the owned-payload of the
|
||||||
|
* MEMCTX event when nseg>0. For a single-low image nseg==0 and the holder opens by `low`. */
|
||||||
|
typedef struct {
|
||||||
|
uint64_t gpa; /* GPA of the window */
|
||||||
|
uint64_t len; /* window length in bytes */
|
||||||
|
uint64_t file_off; /* offset into the RAM-backing file */
|
||||||
|
} vmsig_memseg;
|
||||||
|
|
||||||
|
/* Epoch invalidation (UP VMSIG_EV_MEMCTX_INVALIDATED, in inln). */
|
||||||
|
typedef struct {
|
||||||
|
uint32_t endpoint;
|
||||||
|
uint32_t epoch; /* new epoch; the previous one's context is invalid */
|
||||||
|
} vmsig_memctx_inv;
|
||||||
|
|
||||||
|
/* Decode the MEMCTX event's owned-payload into segs[] (pointer + nseg). A pure function over
|
||||||
|
* the event: no ownership, no allocations. Returns a pointer to the segments (or NULL, setting
|
||||||
|
* *out_nseg=0, if there are none — e.g. a single-low image OR socket delivery, where the
|
||||||
|
* payload does not cross the wire and the holder opens by `low`). */
|
||||||
|
static inline const vmsig_memseg* vmsig_memctx_segs(const vmsig_event* ev,
|
||||||
|
uint32_t* out_nseg) {
|
||||||
|
const vmsig_memctx* m = (const vmsig_memctx*)ev->inln;
|
||||||
|
uint32_t n = m->nseg;
|
||||||
|
if (!n || !ev->payload.data ||
|
||||||
|
ev->payload.len < (size_t)n * sizeof(vmsig_memseg)) {
|
||||||
|
if (out_nseg) *out_nseg = 0;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (out_nseg) *out_nseg = n;
|
||||||
|
return (const vmsig_memseg*)ev->payload.data;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== Registration seam adapter -> core =====
|
||||||
|
*
|
||||||
|
* The memctx adapter registers THIS in the core via vmsig_emit.register_memctx. The core
|
||||||
|
* keeps the registration per-endpoint (retained-context) and does NOT store a copy of the
|
||||||
|
* locator: on delivery/replay it calls describe() (current locator snapshot) + share_fd()
|
||||||
|
* (fresh O_RDONLY fd). The epoch is stamped by the CORE (single source of truth); describe
|
||||||
|
* does NOT fill it. invalidate() — the core asks the adapter to re-bootstrap on an epoch
|
||||||
|
* change (the adapter re-emits MEMCTX once ready). All callbacks are called on the loop
|
||||||
|
* thread. ctx — the adapter's private context. */
|
||||||
|
typedef struct vmsig_memctx_reg {
|
||||||
|
uint32_t endpoint;
|
||||||
|
uint32_t source; /* VMSIG_SRC_MEMCTX */
|
||||||
|
void* ctx; /* adapter's private context */
|
||||||
|
/* Current locator snapshot: kcr3/low/nseg/flags + segs (borrowed, owned by the
|
||||||
|
* adapter; lives across epochs). The core overwrites epoch with its own value. */
|
||||||
|
void (*describe)(void* ctx, vmsig_memctx* out_pod,
|
||||||
|
const vmsig_memseg** out_segs, uint32_t* out_nseg);
|
||||||
|
int (*share_fd)(void* ctx); /* fresh O_RDONLY fd of the RAM region (caller closes) */
|
||||||
|
void (*invalidate)(void* ctx, uint32_t epoch); /* re-bootstrap for the new epoch */
|
||||||
|
} vmsig_memctx_reg;
|
||||||
|
|
||||||
|
#endif /* VMSIG_MEMCTX_H */
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
#ifndef VMSIG_SOCKET_H
|
||||||
|
#define VMSIG_SOCKET_H
|
||||||
|
#include "vmsig_event.h"
|
||||||
|
#include "vmsig_control.h" /* vmsig_grant */
|
||||||
|
#include "vmsig_core.h" /* vmsig_core */
|
||||||
|
|
||||||
|
/* vmsig_socket.h — out-of-process control over a unix socket (human/service poller).
|
||||||
|
* signaling LISTENS; each accepted connection is authenticated (SO_PEERCRED) and,
|
||||||
|
* per policy, receives a grant -> becomes a distinct control behind the same seam. */
|
||||||
|
|
||||||
|
/* Wire format: fixed-size, pointer-free — the same contract on the external
|
||||||
|
* poller. Single host (unix socket) => native byte order. Only the event's
|
||||||
|
* inline part is serialized (payload pointers do not go on the wire). */
|
||||||
|
#define VMSIG_WIRE_MAGIC 0x47495356u /* 'VSIG' */
|
||||||
|
#define VMSIG_WIRE_VERSION 1u
|
||||||
|
typedef struct {
|
||||||
|
uint32_t magic;
|
||||||
|
uint32_t version;
|
||||||
|
uint32_t kind; /* vmsig_kind */
|
||||||
|
uint32_t source; /* vmsig_source */
|
||||||
|
uint32_t dir; /* vmsig_dir */
|
||||||
|
uint32_t prio; /* vmsig_prio */
|
||||||
|
uint32_t endpoint;
|
||||||
|
uint32_t corr;
|
||||||
|
uint8_t inln[48]; /* inline event payload */
|
||||||
|
} vmsig_wire;
|
||||||
|
|
||||||
|
/* Frame <-> event codec (for external clients too). */
|
||||||
|
void vmsig_wire_encode(vmsig_wire* w, const vmsig_event* ev);
|
||||||
|
int vmsig_wire_decode(const vmsig_wire* w, vmsig_event* ev); /* 0 ok, -1 bad magic/ver */
|
||||||
|
|
||||||
|
/* Admission policy: given the authenticated peer (SO_PEERCRED), return a grant.
|
||||||
|
* An empty grant (cap_mask==0 || endpoint_mask==0) => connection is rejected. */
|
||||||
|
typedef vmsig_grant (*vmsig_socket_policy)(uint32_t uid, uint32_t pid, void* ud);
|
||||||
|
|
||||||
|
/* Bring up a unix-socket control listener on `path` (prefix '@' => abstract socket).
|
||||||
|
* Driven by the epoll core: accept -> SO_PEERCRED -> policy -> grant -> per-conn
|
||||||
|
* control. Returns 0/-1. */
|
||||||
|
int vmsig_socket_attach(vmsig_core* core, const char* path,
|
||||||
|
vmsig_socket_policy policy, void* ud);
|
||||||
|
|
||||||
|
#endif /* VMSIG_SOCKET_H */
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
#ifndef VMSIG_ADAPTER_UTIL_H
|
||||||
|
#define VMSIG_ADAPTER_UTIL_H
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
/* adapter_util.h — shared primitive "blocking API -> completion eventfd".
|
||||||
|
*
|
||||||
|
* A bridge turning a synchronous CPU-bound / blocking neighbor call (vmie,
|
||||||
|
* vmctl) into a readiness source for the epoll core: the loop thread posts a request, a
|
||||||
|
* separate worker thread runs the blocking work and signals a completion eventfd; on it
|
||||||
|
* the loop wakes and collects the result in on_readiness. Reused by the memctx
|
||||||
|
* (off-loop bootstrap) and input adapters. */
|
||||||
|
|
||||||
|
typedef struct vmsig_worker vmsig_worker;
|
||||||
|
|
||||||
|
#define VMSIG_WORK_SLOT 256 /* req/res slot size (POD, copied) */
|
||||||
|
|
||||||
|
/* Callback run IN the worker thread: req -> res (both POD <= VMSIG_WORK_SLOT).
|
||||||
|
* Returns 0/-1 (the code is stored alongside, see vmsig_worker_poll). Must not touch
|
||||||
|
* core structures — only compute res from req. */
|
||||||
|
typedef int (*vmsig_work_fn)(void* user, const void* req, void* res);
|
||||||
|
|
||||||
|
/* Create a worker pool of nthreads threads over a shared queue (nthreads>=1). vmie
|
||||||
|
* allows parallel read-only readers; for a serial channel (QMP) use 1. max_depth — the
|
||||||
|
* request-queue depth ceiling (<=0 => default): submit beyond it is rejected (-1) so an
|
||||||
|
* untrusted flood does not grow into OOM. NULL on error. */
|
||||||
|
vmsig_worker* vmsig_worker_new(vmsig_work_fn fn, void* user, int nthreads, int max_depth);
|
||||||
|
|
||||||
|
/* Stop the threads (join) and free. Safe on NULL. */
|
||||||
|
void vmsig_worker_free(vmsig_worker* w);
|
||||||
|
|
||||||
|
/* completion eventfd: the adapter registers it as a VMSIG_RDY_EVENTFD source. */
|
||||||
|
int vmsig_worker_evfd(const vmsig_worker* w);
|
||||||
|
|
||||||
|
/* loop thread: post a request (copied, len <= VMSIG_WORK_SLOT). 0/-1. */
|
||||||
|
int vmsig_worker_submit(vmsig_worker* w, const void* req, size_t len);
|
||||||
|
|
||||||
|
/* loop thread (in on_readiness): drain the completion eventfd. */
|
||||||
|
void vmsig_worker_ack(vmsig_worker* w);
|
||||||
|
|
||||||
|
/* loop thread: collect a ready result. 1 — written to res (+ *rc = fn code),
|
||||||
|
* 0 — empty, -1 — error. Drain in a loop until 0. */
|
||||||
|
int vmsig_worker_poll(vmsig_worker* w, void* res, size_t cap, int* rc);
|
||||||
|
|
||||||
|
#endif /* VMSIG_ADAPTER_UTIL_H */
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
#ifndef VMSIG_INPUT_H
|
||||||
|
#define VMSIG_INPUT_H
|
||||||
|
|
||||||
|
/* Private config of the input adapter (vmctl). cfg==NULL => stub mode. Armed mode
|
||||||
|
* (VMSIG_WITH_VMCTL) opens vmctl_open() and actuates for real. driver is an int so
|
||||||
|
* as not to pull vmctl.h into this header (values match VMCTL_DRIVER_*). */
|
||||||
|
typedef struct {
|
||||||
|
int stub;
|
||||||
|
int driver; /* 0=QMP, 1=UINPUT (see VMCTL_DRIVER_*) */
|
||||||
|
const char* qmp_path;
|
||||||
|
const char* input_bus;
|
||||||
|
int ptr_mode;
|
||||||
|
} vmsig_input_cfg;
|
||||||
|
|
||||||
|
/* Input event codes/contract are PUBLIC: vmsig_input / vmsig_input_kind in
|
||||||
|
* include/vmsig_event.h (external control encodes them into inln). No private duplicate. */
|
||||||
|
|
||||||
|
#endif /* VMSIG_INPUT_H */
|
||||||
@@ -0,0 +1,230 @@
|
|||||||
|
/* input.c — input/actuator adapter for vmctl (input + power/lifecycle).
|
||||||
|
*
|
||||||
|
* Mechanism (recommended): vmctl is a blocking QMP round-trip; we run it on a
|
||||||
|
* worker thread, completion ack via a completion-eventfd. The uinput path is a
|
||||||
|
* local instantaneous write; when armed it would be done inline (see comment in submit).
|
||||||
|
* Real actuation is under VMSIG_WITH_VMCTL; otherwise the stub acks (spine without a VM). */
|
||||||
|
#include "vmsig_adapter.h"
|
||||||
|
#include "adapter_util.h"
|
||||||
|
#include "input.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <sys/epoll.h>
|
||||||
|
|
||||||
|
#ifdef VMSIG_WITH_VMCTL
|
||||||
|
#include "vmctl.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* POD request/result of the worker. */
|
||||||
|
typedef struct {
|
||||||
|
int cmd; /* 0 = input event, 1 = lifecycle */
|
||||||
|
uint32_t corr;
|
||||||
|
uint32_t origin; /* initiator (addressed ACK) */
|
||||||
|
int kind; /* vmsig_input_kind (for cmd==0) */
|
||||||
|
int code; /* axis/btn/evdev-code */
|
||||||
|
int value; /* abs/rel/down */
|
||||||
|
double scroll;
|
||||||
|
int life_op; /* VMSIG_LIFE_* (powerdown/reset/wakeup/pause/resume) */
|
||||||
|
} input_req;
|
||||||
|
typedef struct { int ok; uint32_t corr; uint32_t origin; } input_res;
|
||||||
|
|
||||||
|
/* signaling does NOT track held state: the record of what is pressed lives in the
|
||||||
|
* ACTUATOR (vmctl); we hand it to control on request (CMD_QUERY_INPUT), release is control's decision. */
|
||||||
|
struct vmsig_adapter {
|
||||||
|
uint32_t endpoint;
|
||||||
|
int stub;
|
||||||
|
vmsig_emit emit;
|
||||||
|
vmsig_worker* worker;
|
||||||
|
int driver; /* 0=QMP, 1=UINPUT (VMCTL_DRIVER_*); carried open->attach */
|
||||||
|
const char* qmp_path; /* borrowed from cfg (valid through attach) */
|
||||||
|
const char* input_bus;
|
||||||
|
int ptr_mode;
|
||||||
|
#ifdef VMSIG_WITH_VMCTL
|
||||||
|
vmctl_t* vmctl;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
static int input_job(void* user, const void* reqp, void* resp) {
|
||||||
|
struct vmsig_adapter* a = user;
|
||||||
|
const input_req* rq = reqp;
|
||||||
|
input_res* rs = resp;
|
||||||
|
memset(rs, 0, sizeof *rs);
|
||||||
|
rs->corr = rq->corr;
|
||||||
|
rs->origin = rq->origin;
|
||||||
|
#ifdef VMSIG_WITH_VMCTL
|
||||||
|
if (a->vmctl) {
|
||||||
|
int r = -1;
|
||||||
|
if (rq->cmd == 0) {
|
||||||
|
vmctl_batch b; vmctl_batch_init(&b);
|
||||||
|
switch (rq->kind) {
|
||||||
|
case VMSIG_INPUT_ABS: vmctl_batch_abs(&b, rq->code, rq->value); break;
|
||||||
|
case VMSIG_INPUT_REL: vmctl_batch_rel(&b, rq->code, rq->value); break;
|
||||||
|
case VMSIG_INPUT_BTN: vmctl_batch_btn(&b, rq->code, rq->value); break;
|
||||||
|
case VMSIG_INPUT_KEY: vmctl_batch_key(&b, rq->code, rq->value); break;
|
||||||
|
case VMSIG_INPUT_SCROLL: vmctl_batch_scroll(&b, rq->code, rq->scroll); break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
r = vmctl_batch_send(a->vmctl, &b);
|
||||||
|
} else {
|
||||||
|
switch (rq->life_op) {
|
||||||
|
case 0: r = vmctl_powerdown(a->vmctl); break;
|
||||||
|
case 1: r = vmctl_reset(a->vmctl); break;
|
||||||
|
case 2: r = vmctl_wakeup(a->vmctl); break;
|
||||||
|
case 3: r = vmctl_pause(a->vmctl); break;
|
||||||
|
case 4: r = vmctl_resume(a->vmctl); break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rs->ok = (r == 0);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
(void)a;
|
||||||
|
rs->ok = 1; /* stub: ack without actuation */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static vmsig_adapter* in_open(const void* cfg, uint32_t endpoint) {
|
||||||
|
const vmsig_input_cfg* c = cfg;
|
||||||
|
struct vmsig_adapter* a = calloc(1, sizeof *a);
|
||||||
|
if (!a) return NULL;
|
||||||
|
a->endpoint = endpoint;
|
||||||
|
a->stub = c ? c->stub : 1;
|
||||||
|
if (c) { /* carry the driver selection to attach (cfg not passed there) */
|
||||||
|
a->driver = c->driver;
|
||||||
|
a->qmp_path = c->qmp_path;
|
||||||
|
a->input_bus = c->input_bus;
|
||||||
|
a->ptr_mode = c->ptr_mode;
|
||||||
|
}
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int in_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
|
||||||
|
if (cap < 1) return -1;
|
||||||
|
a->emit = *emit;
|
||||||
|
a->worker = vmsig_worker_new(input_job, a, 1, 64); /* QMP is a serial channel, cap 64 */
|
||||||
|
if (!a->worker) return -1;
|
||||||
|
|
||||||
|
#ifdef VMSIG_WITH_VMCTL
|
||||||
|
if (!a->stub) {
|
||||||
|
/* armed: build vmctl_config from the carried cfg and open the actuator. UINPUT
|
||||||
|
* (host uinput + optional virtio-input-host-pci passthrough via QMP) is the primary
|
||||||
|
* input driver; QMP input-send-event is the fallback. */
|
||||||
|
vmctl_config vcfg;
|
||||||
|
memset(&vcfg, 0, sizeof vcfg);
|
||||||
|
vcfg.driver = (a->driver == 1) ? VMCTL_DRIVER_UINPUT : VMCTL_DRIVER_QMP;
|
||||||
|
vcfg.qmp_path = a->qmp_path;
|
||||||
|
vcfg.input_bus = a->input_bus;
|
||||||
|
vcfg.ptr_mode = a->ptr_mode;
|
||||||
|
vcfg.uinput_id = NULL; /* built-in HID identity defaults */
|
||||||
|
a->vmctl = vmctl_open(&vcfg);
|
||||||
|
if (!a->vmctl) { vmsig_worker_free(a->worker); a->worker = NULL; return -1; }
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
reg[0].fd = vmsig_worker_evfd(a->worker);
|
||||||
|
reg[0].epoll_events = EPOLLIN;
|
||||||
|
reg[0].shape = VMSIG_RDY_EVENTFD;
|
||||||
|
reg[0].cookie = 0;
|
||||||
|
|
||||||
|
vmsig_event up;
|
||||||
|
memset(&up, 0, sizeof up);
|
||||||
|
up.kind = VMSIG_EV_SEAM_UP; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
|
||||||
|
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||||
|
a->emit.emit(a->emit.token, &up);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int in_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
|
||||||
|
(void)cookie; (void)events;
|
||||||
|
vmsig_worker_ack(a->worker);
|
||||||
|
input_res rs; int rc;
|
||||||
|
while (vmsig_worker_poll(a->worker, &rs, sizeof rs, &rc) == 1) {
|
||||||
|
vmsig_event up;
|
||||||
|
memset(&up, 0, sizeof up);
|
||||||
|
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
|
||||||
|
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||||
|
up.corr = rs.corr; up.origin = rs.origin;
|
||||||
|
up.payload.flags = VMSIG_PL_INLINE;
|
||||||
|
memcpy(up.inln, &rs, sizeof up.inln < sizeof rs ? sizeof up.inln : sizeof rs);
|
||||||
|
a->emit.emit(a->emit.token, &up);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int in_submit(vmsig_adapter* a, const vmsig_event* ev) {
|
||||||
|
if (ev->kind == VMSIG_EV_CMD_QUERY_INPUT) {
|
||||||
|
/* Return what is PRESSED from the vmctl ACTUATOR's record (signaling does NOT track
|
||||||
|
* held itself). The read is read-only (no QMP round-trip) => on the loop thread;
|
||||||
|
* addressed reply to the initiator. stub without vmctl => empty set (nothing to
|
||||||
|
* actuate — nothing to hold). */
|
||||||
|
vmsig_input_held h;
|
||||||
|
memset(&h, 0, sizeof h);
|
||||||
|
#ifdef VMSIG_WITH_VMCTL
|
||||||
|
if (a->vmctl) {
|
||||||
|
const uint32_t capn = (uint32_t)(sizeof h.ent / sizeof h.ent[0]);
|
||||||
|
unsigned char bits[VMCTL_KEYS_SNAPSHOT_BYTES];
|
||||||
|
int n = vmctl_keys_snapshot(a->vmctl, bits, sizeof bits);
|
||||||
|
for (int code = 0; n > 0 && code <= VMCTL_KEY_CODE_MAX; code++)
|
||||||
|
if (bits[code >> 3] & (1u << (code & 7))) {
|
||||||
|
if (h.count < capn) { h.ent[h.count].kind = VMSIG_INPUT_KEY;
|
||||||
|
h.ent[h.count].code = (uint16_t)code; h.count++; }
|
||||||
|
else h.flags |= VMSIG_INPUT_HELD_TRUNC;
|
||||||
|
}
|
||||||
|
unsigned bm = vmctl_btns_snapshot(a->vmctl);
|
||||||
|
for (int b = 0; b < 8; b++) if (bm & (1u << b)) {
|
||||||
|
if (h.count < capn) { h.ent[h.count].kind = VMSIG_INPUT_BTN;
|
||||||
|
h.ent[h.count].code = (uint16_t)b; h.count++; }
|
||||||
|
else h.flags |= VMSIG_INPUT_HELD_TRUNC;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
vmsig_event up;
|
||||||
|
memset(&up, 0, sizeof up);
|
||||||
|
up.kind = VMSIG_EV_INPUT_HELD; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
|
||||||
|
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint; up.origin = ev->origin;
|
||||||
|
up.payload.flags = VMSIG_PL_INLINE;
|
||||||
|
memcpy(up.inln, &h, sizeof up.inln < sizeof h ? sizeof up.inln : sizeof h);
|
||||||
|
a->emit.emit(a->emit.token, &up);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
input_req rq;
|
||||||
|
memset(&rq, 0, sizeof rq);
|
||||||
|
rq.corr = ev->corr; rq.origin = ev->origin;
|
||||||
|
if (ev->kind == VMSIG_EV_CMD_INPUT) {
|
||||||
|
rq.cmd = 0;
|
||||||
|
/* Decode the NEUTRAL public input contract from inln (vmsig_input). We do NOT track
|
||||||
|
* held — that is the vmctl actuator's record (returned via CMD_QUERY_INPUT). */
|
||||||
|
vmsig_input in;
|
||||||
|
memcpy(&in, ev->inln, sizeof in <= sizeof ev->inln ? sizeof in : sizeof ev->inln);
|
||||||
|
rq.kind = (int)in.kind;
|
||||||
|
rq.code = (int)in.code;
|
||||||
|
rq.value = (int)in.value;
|
||||||
|
rq.scroll = in.scroll;
|
||||||
|
} else if (ev->kind == VMSIG_EV_CMD_LIFECYCLE) {
|
||||||
|
rq.cmd = 1;
|
||||||
|
rq.life_op = (int)(unsigned char)ev->inln[0];
|
||||||
|
} else {
|
||||||
|
return 1; /* not for this seam */
|
||||||
|
}
|
||||||
|
return vmsig_worker_submit(a->worker, &rq, sizeof rq) == 0 ? 0 : -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void in_close(vmsig_adapter* a) {
|
||||||
|
if (!a) return;
|
||||||
|
vmsig_worker_free(a->worker);
|
||||||
|
#ifdef VMSIG_WITH_VMCTL
|
||||||
|
if (a->vmctl) vmctl_close(a->vmctl);
|
||||||
|
#endif
|
||||||
|
free(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const vmsig_adapter_ops IN_OPS = {
|
||||||
|
.name = "input", .source = VMSIG_SRC_INPUT, .codec = VMSIG_CODEC_INPUT,
|
||||||
|
.open = in_open, .attach = in_attach, .on_readiness = in_on_ready,
|
||||||
|
.submit = in_submit, .close = in_close
|
||||||
|
};
|
||||||
|
|
||||||
|
const vmsig_adapter_ops* vmsig_input_ops(void) { return &IN_OPS; }
|
||||||
@@ -0,0 +1,162 @@
|
|||||||
|
/* worker.c — bridge "blocking API -> completion eventfd" (pool of N threads).
|
||||||
|
* MPSC request/result queues under a mutex + condvar; result readiness is
|
||||||
|
* signaled via eventfd, on which the core's epoll loop wakes. N threads share one
|
||||||
|
* request queue (for vmie — parallel read-only readers; for QMP — N=1). */
|
||||||
|
#include "adapter_util.h"
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <sys/eventfd.h>
|
||||||
|
|
||||||
|
typedef struct work_node {
|
||||||
|
struct work_node* next;
|
||||||
|
int rc; /* fn return code (for results) */
|
||||||
|
size_t len;
|
||||||
|
unsigned char buf[VMSIG_WORK_SLOT];
|
||||||
|
} work_node;
|
||||||
|
|
||||||
|
typedef struct { work_node* head; work_node* tail; } work_q;
|
||||||
|
|
||||||
|
struct vmsig_worker {
|
||||||
|
pthread_t* threads;
|
||||||
|
int nthreads;
|
||||||
|
pthread_mutex_t lock;
|
||||||
|
pthread_cond_t cv;
|
||||||
|
work_q req; /* loop -> workers */
|
||||||
|
work_q res; /* workers -> loop */
|
||||||
|
int evfd;
|
||||||
|
int stop;
|
||||||
|
int max_depth; /* cap on req-queue depth */
|
||||||
|
int req_count; /* current req-queue depth */
|
||||||
|
vmsig_work_fn fn;
|
||||||
|
void* user;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void q_push(work_q* q, work_node* n) {
|
||||||
|
n->next = NULL;
|
||||||
|
if (q->tail) q->tail->next = n; else q->head = n;
|
||||||
|
q->tail = n;
|
||||||
|
}
|
||||||
|
static work_node* q_pop(work_q* q) {
|
||||||
|
work_node* n = q->head;
|
||||||
|
if (!n) return NULL;
|
||||||
|
q->head = n->next;
|
||||||
|
if (!q->head) q->tail = NULL;
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
static void q_drain(work_q* q) {
|
||||||
|
work_node* n = q->head;
|
||||||
|
while (n) { work_node* nx = n->next; free(n); n = nx; }
|
||||||
|
q->head = q->tail = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* worker_main(void* arg) {
|
||||||
|
vmsig_worker* w = arg;
|
||||||
|
for (;;) {
|
||||||
|
pthread_mutex_lock(&w->lock);
|
||||||
|
while (!w->stop && !w->req.head) pthread_cond_wait(&w->cv, &w->lock);
|
||||||
|
/* On stop we DRAIN the queue: run the remaining requests so that submitted
|
||||||
|
* work is not silently lost (matters for jobs carrying resource ownership).
|
||||||
|
* We exit only when stop AND the queue is empty. */
|
||||||
|
if (w->stop && !w->req.head) { pthread_mutex_unlock(&w->lock); break; }
|
||||||
|
work_node* rq = q_pop(&w->req);
|
||||||
|
if (rq) w->req_count--;
|
||||||
|
pthread_mutex_unlock(&w->lock);
|
||||||
|
if (!rq) continue;
|
||||||
|
|
||||||
|
work_node* rs = calloc(1, sizeof *rs);
|
||||||
|
if (rs) {
|
||||||
|
rs->rc = w->fn ? w->fn(w->user, rq->buf, rs->buf) : -1;
|
||||||
|
rs->len = VMSIG_WORK_SLOT;
|
||||||
|
pthread_mutex_lock(&w->lock);
|
||||||
|
q_push(&w->res, rs);
|
||||||
|
pthread_mutex_unlock(&w->lock);
|
||||||
|
uint64_t one = 1;
|
||||||
|
ssize_t r = write(w->evfd, &one, sizeof one);
|
||||||
|
(void)r;
|
||||||
|
}
|
||||||
|
free(rq);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
vmsig_worker* vmsig_worker_new(vmsig_work_fn fn, void* user, int nthreads, int max_depth) {
|
||||||
|
if (nthreads < 1) nthreads = 1;
|
||||||
|
vmsig_worker* w = calloc(1, sizeof *w);
|
||||||
|
if (!w) return NULL;
|
||||||
|
w->fn = fn; w->user = user; w->evfd = -1;
|
||||||
|
w->max_depth = max_depth > 0 ? max_depth : 512;
|
||||||
|
w->threads = calloc((size_t)nthreads, sizeof *w->threads);
|
||||||
|
if (!w->threads) { free(w); return NULL; }
|
||||||
|
w->evfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
|
||||||
|
if (w->evfd < 0) { free(w->threads); free(w); return NULL; }
|
||||||
|
if (pthread_mutex_init(&w->lock, NULL) != 0) { close(w->evfd); free(w->threads); free(w); return NULL; }
|
||||||
|
if (pthread_cond_init(&w->cv, NULL) != 0) {
|
||||||
|
pthread_mutex_destroy(&w->lock); close(w->evfd); free(w->threads); free(w); return NULL;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < nthreads; i++) {
|
||||||
|
if (pthread_create(&w->threads[i], NULL, worker_main, w) != 0) break;
|
||||||
|
w->nthreads++;
|
||||||
|
}
|
||||||
|
if (w->nthreads == 0) {
|
||||||
|
pthread_cond_destroy(&w->cv); pthread_mutex_destroy(&w->lock);
|
||||||
|
close(w->evfd); free(w->threads); free(w); return NULL;
|
||||||
|
}
|
||||||
|
return w;
|
||||||
|
}
|
||||||
|
|
||||||
|
void vmsig_worker_free(vmsig_worker* w) {
|
||||||
|
if (!w) return;
|
||||||
|
pthread_mutex_lock(&w->lock);
|
||||||
|
w->stop = 1;
|
||||||
|
pthread_cond_broadcast(&w->cv);
|
||||||
|
pthread_mutex_unlock(&w->lock);
|
||||||
|
for (int i = 0; i < w->nthreads; i++) pthread_join(w->threads[i], NULL);
|
||||||
|
q_drain(&w->req);
|
||||||
|
q_drain(&w->res);
|
||||||
|
pthread_cond_destroy(&w->cv);
|
||||||
|
pthread_mutex_destroy(&w->lock);
|
||||||
|
if (w->evfd >= 0) close(w->evfd);
|
||||||
|
free(w->threads);
|
||||||
|
free(w);
|
||||||
|
}
|
||||||
|
|
||||||
|
int vmsig_worker_evfd(const vmsig_worker* w) { return w ? w->evfd : -1; }
|
||||||
|
|
||||||
|
int vmsig_worker_submit(vmsig_worker* w, const void* req, size_t len) {
|
||||||
|
if (!w || len > VMSIG_WORK_SLOT) return -1;
|
||||||
|
pthread_mutex_lock(&w->lock);
|
||||||
|
if (w->req_count >= w->max_depth) { /* queue cap: reject flooding */
|
||||||
|
pthread_mutex_unlock(&w->lock);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
work_node* n = calloc(1, sizeof *n);
|
||||||
|
if (!n) { pthread_mutex_unlock(&w->lock); return -1; }
|
||||||
|
if (req && len) memcpy(n->buf, req, len);
|
||||||
|
n->len = len;
|
||||||
|
q_push(&w->req, n);
|
||||||
|
w->req_count++;
|
||||||
|
pthread_cond_signal(&w->cv);
|
||||||
|
pthread_mutex_unlock(&w->lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void vmsig_worker_ack(vmsig_worker* w) {
|
||||||
|
if (!w) return;
|
||||||
|
uint64_t v;
|
||||||
|
while (read(w->evfd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
int vmsig_worker_poll(vmsig_worker* w, void* res, size_t cap, int* rc) {
|
||||||
|
if (!w) return -1;
|
||||||
|
pthread_mutex_lock(&w->lock);
|
||||||
|
work_node* n = q_pop(&w->res);
|
||||||
|
pthread_mutex_unlock(&w->lock);
|
||||||
|
if (!n) return 0;
|
||||||
|
if (res && cap) memcpy(res, n->buf, cap < n->len ? cap : n->len);
|
||||||
|
if (rc) *rc = n->rc;
|
||||||
|
free(n);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
#ifndef VMSIG_MEMCTX_CFG_H
|
||||||
|
#define VMSIG_MEMCTX_CFG_H
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
/* Private config of the memctx adapter (vmie). Passed as opaque to open(); NOT
|
||||||
|
* public (layout per reference: src/<module>/include/). cfg==NULL => stub. */
|
||||||
|
typedef struct {
|
||||||
|
int stub; /* 1 => synthetic kcr3/RO-fd (spine without a VM) */
|
||||||
|
const char* ram_path; /* armed: path to guest RAM backing (NOT published outward) */
|
||||||
|
uint64_t low; /* below-4G split (vmie_win32_open / locator.low) */
|
||||||
|
int ro_fd; /* >=0 => infra supplied a pre-sealed RO-fd (policy); */
|
||||||
|
/* <0 => default: open(ram_path, O_RDONLY) / stub-memfd */
|
||||||
|
} vmsig_memctx_cfg;
|
||||||
|
|
||||||
|
/* Max SRC bytes per atomic gva_write (bounds the worker POD slot; mc_req header + src
|
||||||
|
* must stay <= VMSIG_WORK_SLOT). Private to the adapter (an executor bound), NOT part of
|
||||||
|
* the neutral control contract — control only needs VMSIG_MEMWRITE_INLINE for inline SRC. */
|
||||||
|
#define VMSIG_MEMWRITE_MAX 192u
|
||||||
|
|
||||||
|
#endif /* VMSIG_MEMCTX_CFG_H */
|
||||||
@@ -0,0 +1,407 @@
|
|||||||
|
/* memctx.c — vmie sensor adapter: vends ONE coherent guest address-space context —
|
||||||
|
* the permanent System DirectoryTableBase (`kcr3`) PAIRED with a RAM-region locator
|
||||||
|
* and a pre-opened O_RDONLY fd. This is NOT perception and NOT semantics: signaling
|
||||||
|
* multicasts the datum + RO-fd, while the holder (an S-lib / any control) opens ITS OWN
|
||||||
|
* read-only vmie_mem from the fd and does gva_read/scan/pmap itself.
|
||||||
|
*
|
||||||
|
* Cold bring-up (host_bootstrap) is CPU-bound and blocking, so it runs on an off-loop
|
||||||
|
* worker; the loop thread only assembles the locator on the completion-eventfd and emits
|
||||||
|
* the MEMCTX trigger. The epoch is stamped by the CORE (retained-context); on an epoch
|
||||||
|
* change the core calls reg.invalidate, the adapter re-bootstraps and re-emits MEMCTX.
|
||||||
|
*
|
||||||
|
* RO outward is physical: O_RDONLY fd => mmap(PROT_WRITE) -> EACCES, so a write into the
|
||||||
|
* guest on the holder side is structurally impossible. stub mode (without VMSIG_WITH_VMIE
|
||||||
|
* or ram_path==NULL) synthesizes a kcr3 and a genuinely RO-mappable fd (memfd + seal) —
|
||||||
|
* the seam is provable without a VM. */
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include "vmsig_adapter.h"
|
||||||
|
#include "memctx.h"
|
||||||
|
#include "adapter_util.h" /* vmsig_worker (off-loop bootstrap) */
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <sys/epoll.h>
|
||||||
|
|
||||||
|
#ifdef VMSIG_WITH_VMIE
|
||||||
|
#include "win32.h" /* vmie_win32_open/host_bootstrap/proc_list/close */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* memfd_create / seal — ABI fallbacks for old glibc/kernel (stub RO-fd backing). */
|
||||||
|
#ifndef MFD_CLOEXEC
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <linux/memfd.h>
|
||||||
|
static int memfd_create(const char* name, unsigned int flags) {
|
||||||
|
return (int)syscall(SYS_memfd_create, name, flags);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifndef MFD_ALLOW_SEALING
|
||||||
|
#define MFD_ALLOW_SEALING 0x0002U
|
||||||
|
#endif
|
||||||
|
#ifndef F_ADD_SEALS
|
||||||
|
#define F_ADD_SEALS (1024 + 9)
|
||||||
|
#define F_SEAL_SHRINK 0x0002
|
||||||
|
#define F_SEAL_GROW 0x0004
|
||||||
|
#endif
|
||||||
|
#ifndef F_SEAL_FUTURE_WRITE
|
||||||
|
#define F_SEAL_FUTURE_WRITE 0x0010 /* kernel 5.1+: forbid future writable mappings */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MC_STUB_SIZE 0x10000u /* 64 KB of synthetic RAM image (stub) */
|
||||||
|
#define MC_MAX_SEG 8
|
||||||
|
#define MC_WORKER_DEPTH 16 /* one off-loop thread: rare bootstrap + writes */
|
||||||
|
|
||||||
|
enum { MC_JOB_BOOTSTRAP = 0, MC_JOB_WRITE = 1 };
|
||||||
|
|
||||||
|
/* worker req/res (POD <= VMSIG_WORK_SLOT). One off-loop worker runs BOTH the cold
|
||||||
|
* bootstrap and the atomic writes (FIFO serializes a write against the close-on-rebootstrap).
|
||||||
|
* boot_count drives the stub kcr3 (changes per epoch); the real guest kcr3 does NOT depend
|
||||||
|
* on it (armed reads the System DTB). MC_JOB_WRITE copies SRC off-loop into req.src. */
|
||||||
|
typedef struct {
|
||||||
|
uint32_t op; /* MC_JOB_* */
|
||||||
|
uint32_t boot_count; /* MC_JOB_BOOTSTRAP */
|
||||||
|
/* --- MC_JOB_WRITE --- */
|
||||||
|
uint64_t gva;
|
||||||
|
uint32_t len;
|
||||||
|
uint32_t corr;
|
||||||
|
uint32_t origin;
|
||||||
|
uint8_t src[VMSIG_MEMWRITE_MAX]; /* SRC bytes copied off-loop (gva_write reads this) */
|
||||||
|
} mc_req;
|
||||||
|
typedef struct {
|
||||||
|
uint32_t op; /* echoes the job type so on_ready demuxes */
|
||||||
|
int ok; /* MC_JOB_WRITE result */
|
||||||
|
uint32_t corr;
|
||||||
|
uint32_t origin;
|
||||||
|
uint64_t kcr3; /* MC_JOB_BOOTSTRAP result */
|
||||||
|
} mc_res;
|
||||||
|
|
||||||
|
struct vmsig_adapter {
|
||||||
|
uint32_t endpoint;
|
||||||
|
int stub;
|
||||||
|
const char* ram_path; /* armed: RAM-backing path (NOT published outward) */
|
||||||
|
uint64_t low;
|
||||||
|
int cfg_ro_fd; /* >=0 => infra-sealed RO-fd (policy); <0 => default */
|
||||||
|
vmsig_emit emit;
|
||||||
|
int registered; /* register_memctx already called */
|
||||||
|
vmsig_worker* worker; /* off-loop bootstrap + atomic writes */
|
||||||
|
uint32_t boot_count; /* incremented on each (re-)bootstrap */
|
||||||
|
|
||||||
|
#ifdef VMSIG_WITH_VMIE
|
||||||
|
vmie_win32* win; /* held RW handle across the epoch (kcr3 source + gva_write target) */
|
||||||
|
vmie_mem* mem; /* vmie_win32_mem(win); borrowed, valid until vmie_win32_close */
|
||||||
|
#endif
|
||||||
|
uint64_t kcr3; /* current System DTB (also published in cur_pod.kcr3) */
|
||||||
|
|
||||||
|
/* persistent locator: owned by the loop thread; worker only yields kcr3 into scratch. */
|
||||||
|
int have_ctx;
|
||||||
|
vmsig_memctx cur_pod; /* kcr3/low/nseg/flags (epoch stamped by the core) */
|
||||||
|
vmsig_memseg cur_segs[MC_MAX_SEG];
|
||||||
|
uint32_t cur_nseg;
|
||||||
|
|
||||||
|
int stub_fd; /* stub: memfd of synth RAM (+seal); share_fd reopens it */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* fwd: MEMWRITE completion ACK (defined below mc_submit; used in mc_on_ready demux). */
|
||||||
|
static void mc_memwrite_ack(struct vmsig_adapter* a, int ok, uint32_t corr, uint32_t origin);
|
||||||
|
|
||||||
|
/* ---- stub RO-fd: memfd + deterministic contents + seal of future writes ---- */
|
||||||
|
static int mc_make_stub_fd(uint32_t size) {
|
||||||
|
int fd = memfd_create("vmsig_memctx", MFD_CLOEXEC | MFD_ALLOW_SEALING);
|
||||||
|
if (fd < 0) fd = memfd_create("vmsig_memctx", MFD_CLOEXEC);
|
||||||
|
if (fd < 0) return -1;
|
||||||
|
if (ftruncate(fd, (off_t)size) != 0) { close(fd); return -1; }
|
||||||
|
/* deterministic contents via a temporary RW mapping BEFORE the seal */
|
||||||
|
uint8_t* p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||||
|
if (p != MAP_FAILED) {
|
||||||
|
for (uint32_t i = 0; i < size; i++) p[i] = (uint8_t)(i & 0xFFu);
|
||||||
|
munmap(p, size);
|
||||||
|
}
|
||||||
|
/* FUTURE_WRITE: even if the holder reopens the fd as O_RDWR, it gets no writable mapping.
|
||||||
|
* best-effort (kernel 5.1+); on older kernels only the O_RDONLY fd protects. */
|
||||||
|
if (fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_FUTURE_WRITE) != 0)
|
||||||
|
(void)fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW);
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef VMSIG_WITH_VMIE
|
||||||
|
/* armed bring-up: open RAM (RW is vmie's internal concern), host_bootstrap, extract the
|
||||||
|
* permanent System DTB as the System process cr3 (kcr3 — the root of the guest AS). The RW
|
||||||
|
* handle is HELD across the epoch (kcr3 source + gva_write target); ONLY the RO-fd (share_fd)
|
||||||
|
* leaves outward — write goes through this command plane, never a writable mmap. Runs on the
|
||||||
|
* off-loop worker; a stale handle from a prior epoch is dropped first (serialized FIFO with
|
||||||
|
* in-flight writes). */
|
||||||
|
static int mc_bootstrap_armed(struct vmsig_adapter* a, uint64_t* out_kcr3) {
|
||||||
|
if (a->win) { vmie_win32_close(a->win); a->win = NULL; a->mem = NULL; } /* drop stale epoch handle */
|
||||||
|
vmie_win32* v = vmie_win32_open(a->ram_path, a->low);
|
||||||
|
if (!v) return -1;
|
||||||
|
if (host_bootstrap(v) != 0) { vmie_win32_close(v); return -1; }
|
||||||
|
process procs[16];
|
||||||
|
int n = proc_list(v, 0, procs, 16);
|
||||||
|
uint64_t kcr3 = 0;
|
||||||
|
for (int i = 0; i < n && i < 16; i++)
|
||||||
|
if (!strcmp(procs[i].name, "System")) { kcr3 = procs[i].cr3; break; }
|
||||||
|
if (!kcr3) { vmie_win32_close(v); return -1; }
|
||||||
|
a->win = v; /* HOLD: RW handle lives across the epoch */
|
||||||
|
a->mem = vmie_win32_mem(v); /* borrowed; valid until vmie_win32_close(v) */
|
||||||
|
a->kcr3 = kcr3;
|
||||||
|
*out_kcr3 = kcr3;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ---- worker job: cold bring-up OR atomic write, off-loop ----------------- *
|
||||||
|
* Demultiplexed by rq->op. BOTH run on the SAME single worker thread, so a write on the
|
||||||
|
* held handle never races the close-on-rebootstrap (FIFO). The job MUST NOT touch core
|
||||||
|
* structures — it only reads a->mem/a->kcr3 (stable between re-bootstraps on this thread). */
|
||||||
|
static int mc_job(void* user, const void* req, void* res) {
|
||||||
|
struct vmsig_adapter* a = user;
|
||||||
|
const mc_req* rq = req;
|
||||||
|
mc_res* rs = res;
|
||||||
|
memset(rs, 0, sizeof *rs);
|
||||||
|
rs->op = rq->op;
|
||||||
|
|
||||||
|
if (rq->op == MC_JOB_WRITE) {
|
||||||
|
rs->corr = rq->corr; rs->origin = rq->origin;
|
||||||
|
if (a->stub) { rs->ok = 1; return 0; } /* stub: ack without actuation */
|
||||||
|
#ifdef VMSIG_WITH_VMIE
|
||||||
|
/* a->mem is NULL until a bootstrap has succeeded (or after one failed and cleared it):
|
||||||
|
* the guard turns that into an ok=0 ACK (observable to the initiator), not a crash. */
|
||||||
|
rs->ok = (a->mem && gva_write(a->mem, (uintptr_t)a->kcr3, (uintptr_t)rq->gva,
|
||||||
|
rq->src, rq->len) == 0);
|
||||||
|
return rs->ok ? 0 : -1;
|
||||||
|
#else
|
||||||
|
rs->ok = 0;
|
||||||
|
return -1; /* armed without the build flag: write impossible */
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* MC_JOB_BOOTSTRAP */
|
||||||
|
if (a->stub) {
|
||||||
|
rs->kcr3 = 0xC0DE0000ull + (uint64_t)rq->boot_count * 0x1000ull; /* changes per epoch */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#ifdef VMSIG_WITH_VMIE
|
||||||
|
uint64_t kcr3 = 0;
|
||||||
|
if (mc_bootstrap_armed(a, &kcr3) != 0) return -1;
|
||||||
|
rs->kcr3 = kcr3;
|
||||||
|
return 0;
|
||||||
|
#else
|
||||||
|
return -1; /* armed without the build flag: bootstrap impossible -> ERROR */
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mc_kick_bootstrap(struct vmsig_adapter* a) {
|
||||||
|
a->boot_count++;
|
||||||
|
mc_req rq;
|
||||||
|
memset(&rq, 0, sizeof rq);
|
||||||
|
rq.op = MC_JOB_BOOTSTRAP; rq.boot_count = a->boot_count;
|
||||||
|
(void)vmsig_worker_submit(a->worker, &rq, sizeof rq); /* full => drop (rare) */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- reg hooks (vmsig_memctx_reg.ctx = a; called by the core on the loop thread) ---- */
|
||||||
|
static void mc_reg_describe(void* ctx, vmsig_memctx* out_pod,
|
||||||
|
const vmsig_memseg** out_segs, uint32_t* out_nseg) {
|
||||||
|
struct vmsig_adapter* a = ctx;
|
||||||
|
*out_pod = a->cur_pod; /* kcr3/low/nseg/flags; the core overwrites the epoch */
|
||||||
|
*out_segs = a->cur_segs;
|
||||||
|
*out_nseg = a->cur_nseg;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int mc_reg_share_fd(void* ctx) {
|
||||||
|
struct vmsig_adapter* a = ctx;
|
||||||
|
if (a->cfg_ro_fd >= 0)
|
||||||
|
return fcntl(a->cfg_ro_fd, F_DUPFD_CLOEXEC, 0); /* infra-sealed RO-fd: dup */
|
||||||
|
if (a->stub) {
|
||||||
|
if (a->stub_fd < 0) return -1;
|
||||||
|
char path[64];
|
||||||
|
snprintf(path, sizeof path, "/proc/self/fd/%d", a->stub_fd);
|
||||||
|
return open(path, O_RDONLY | O_CLOEXEC); /* fresh O_RDONLY on the backing */
|
||||||
|
}
|
||||||
|
if (!a->ram_path) return -1;
|
||||||
|
return open(a->ram_path, O_RDONLY | O_CLOEXEC); /* armed default */
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mc_reg_invalidate(void* ctx, uint32_t epoch) {
|
||||||
|
struct vmsig_adapter* a = ctx;
|
||||||
|
(void)epoch; /* the core owns the epoch; the adapter must re-bootstrap */
|
||||||
|
a->have_ctx = 0; /* the previous context is invalid */
|
||||||
|
mc_kick_bootstrap(a); /* off-loop; on_ready re-emits MEMCTX (new epoch) */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- vtable ---- */
|
||||||
|
static vmsig_adapter* mc_open(const void* cfg, uint32_t endpoint) {
|
||||||
|
const vmsig_memctx_cfg* c = cfg;
|
||||||
|
struct vmsig_adapter* a = calloc(1, sizeof *a);
|
||||||
|
if (!a) return NULL;
|
||||||
|
a->endpoint = endpoint;
|
||||||
|
a->stub = c ? c->stub : 1;
|
||||||
|
a->ram_path = c ? c->ram_path : NULL;
|
||||||
|
a->low = c ? c->low : 0;
|
||||||
|
a->cfg_ro_fd = (c && c->ro_fd >= 0) ? c->ro_fd : -1;
|
||||||
|
if (!a->ram_path && a->cfg_ro_fd < 0) a->stub = 1; /* no path/fd => stub */
|
||||||
|
a->stub_fd = -1;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int mc_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
|
||||||
|
if (cap < 1) return -1;
|
||||||
|
a->emit = *emit;
|
||||||
|
|
||||||
|
a->worker = vmsig_worker_new(mc_job, a, 1, MC_WORKER_DEPTH);
|
||||||
|
if (!a->worker) return -1;
|
||||||
|
|
||||||
|
if (a->stub && a->cfg_ro_fd < 0) {
|
||||||
|
a->stub_fd = mc_make_stub_fd(MC_STUB_SIZE);
|
||||||
|
if (a->stub_fd < 0) { vmsig_worker_free(a->worker); a->worker = NULL; return -1; }
|
||||||
|
}
|
||||||
|
|
||||||
|
/* worker completion-eventfd as the readiness source (cookie=0). */
|
||||||
|
reg[0].fd = vmsig_worker_evfd(a->worker);
|
||||||
|
reg[0].epoll_events = EPOLLIN;
|
||||||
|
reg[0].shape = VMSIG_RDY_EVENTFD;
|
||||||
|
reg[0].cookie = 0;
|
||||||
|
|
||||||
|
/* register the reg BEFORE the first bootstrap: the core slot gets the hooks. describe
|
||||||
|
* is not called until the slot is valid (which only happens after the first MEMCTX). */
|
||||||
|
if (a->emit.register_memctx) {
|
||||||
|
vmsig_memctx_reg r;
|
||||||
|
memset(&r, 0, sizeof r);
|
||||||
|
r.endpoint = a->endpoint;
|
||||||
|
r.source = VMSIG_SRC_MEMCTX;
|
||||||
|
r.ctx = a;
|
||||||
|
r.describe = mc_reg_describe;
|
||||||
|
r.share_fd = mc_reg_share_fd;
|
||||||
|
r.invalidate = mc_reg_invalidate;
|
||||||
|
if (a->emit.register_memctx(a->emit.token, &r) == 0) a->registered = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
vmsig_event up;
|
||||||
|
memset(&up, 0, sizeof up);
|
||||||
|
up.kind = VMSIG_EV_SEAM_UP; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
|
||||||
|
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||||
|
a->emit.emit(a->emit.token, &up);
|
||||||
|
|
||||||
|
mc_kick_bootstrap(a); /* first bootstrap off-loop; assemble the locator on completion */
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int mc_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
|
||||||
|
(void)cookie; (void)events;
|
||||||
|
vmsig_worker_ack(a->worker);
|
||||||
|
mc_res rs;
|
||||||
|
int rc;
|
||||||
|
while (vmsig_worker_poll(a->worker, &rs, sizeof rs, &rc) == 1) {
|
||||||
|
if (rs.op == MC_JOB_WRITE) {
|
||||||
|
/* atomic write completed: addressed ACT_ACK to the initiator. */
|
||||||
|
mc_memwrite_ack(a, rs.ok && rc == 0, rs.corr, rs.origin);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (rc != 0) {
|
||||||
|
/* bootstrap failed: ERROR (source MEMCTX); do NOT publish an invalid kcr3. */
|
||||||
|
vmsig_event er;
|
||||||
|
memset(&er, 0, sizeof er);
|
||||||
|
er.kind = VMSIG_EV_ERROR; er.source = VMSIG_SRC_MEMCTX; er.dir = VMSIG_DIR_UP;
|
||||||
|
er.prio = VMSIG_PRIO_URGENT; er.endpoint = a->endpoint;
|
||||||
|
a->emit.emit(a->emit.token, &er);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* assemble the locator on the loop thread from rs.kcr3. a->kcr3 is the gva_write
|
||||||
|
* TARGET and is owned SOLELY by the worker thread (set in mc_bootstrap_armed, read by
|
||||||
|
* MC_JOB_WRITE — same thread, FIFO happens-before); the loop must NOT also write it, or
|
||||||
|
* an in-flight write at line ~170 would race it. cur_pod.kcr3 is loop-only (delivery). */
|
||||||
|
memset(&a->cur_pod, 0, sizeof a->cur_pod);
|
||||||
|
a->cur_pod.kcr3 = rs.kcr3;
|
||||||
|
a->cur_pod.low = a->low ? a->low : MC_STUB_SIZE;
|
||||||
|
a->cur_pod.flags = VMSIG_MEMCTX_RDONLY;
|
||||||
|
a->cur_nseg = 1; /* single-low identity (gpa 0 .. low) */
|
||||||
|
a->cur_segs[0].gpa = 0;
|
||||||
|
a->cur_segs[0].len = a->cur_pod.low;
|
||||||
|
a->cur_segs[0].file_off = 0;
|
||||||
|
a->cur_pod.nseg = a->cur_nseg;
|
||||||
|
a->have_ctx = 1;
|
||||||
|
|
||||||
|
/* emit the MEMCTX trigger: the core authoritatively re-describes + stamps the epoch. */
|
||||||
|
vmsig_event up;
|
||||||
|
memset(&up, 0, sizeof up);
|
||||||
|
up.kind = VMSIG_EV_MEMCTX; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
|
||||||
|
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||||
|
memcpy(up.inln, &a->cur_pod, sizeof a->cur_pod);
|
||||||
|
a->emit.emit(a->emit.token, &up);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Emit an addressed ACT_ACK for a MEMWRITE (source MEMCTX, to the initiator). inln carries
|
||||||
|
* {ok,corr,origin} (same shape as the input adapter's ACK), so control reads ok at offset 0.
|
||||||
|
* ok=0 covers extent-deny / no-SRC / queue-full / write failure (default-deny, observable). */
|
||||||
|
static void mc_memwrite_ack(struct vmsig_adapter* a, int ok, uint32_t corr, uint32_t origin) {
|
||||||
|
struct { int ok; uint32_t corr; uint32_t origin; } body = { ok, corr, origin };
|
||||||
|
vmsig_event up;
|
||||||
|
memset(&up, 0, sizeof up);
|
||||||
|
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_MEMCTX; up.dir = VMSIG_DIR_UP;
|
||||||
|
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint;
|
||||||
|
up.corr = corr; up.origin = origin;
|
||||||
|
up.payload.flags = VMSIG_PL_INLINE;
|
||||||
|
memcpy(up.inln, &body, sizeof body);
|
||||||
|
a->emit.emit(a->emit.token, &up);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* DOWN MEMWRITE handler: validate extent, copy SRC off-loop, submit the atomic gva_write to
|
||||||
|
* the worker. Default-deny: any invalid path (no SRC flag, len out of bounds, short payload,
|
||||||
|
* queue full) ACKs ok=0 and does NOT actuate. The completion ACK for a queued write arrives
|
||||||
|
* via mc_on_ready. Returns 0 when the event is consumed by this seam, 1 when it is not ours. */
|
||||||
|
static int mc_submit(vmsig_adapter* a, const vmsig_event* ev) {
|
||||||
|
if (ev->kind != VMSIG_EV_CMD_MEMWRITE) return 1; /* not for this seam */
|
||||||
|
|
||||||
|
const vmsig_memwrite* mw = (const vmsig_memwrite*)ev->inln;
|
||||||
|
uint32_t len = mw->len;
|
||||||
|
if (len == 0 || len > VMSIG_MEMWRITE_MAX) { /* extent: bounded */
|
||||||
|
mc_memwrite_ack(a, 0, ev->corr, ev->origin);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
mc_req rq; memset(&rq, 0, sizeof rq);
|
||||||
|
rq.op = MC_JOB_WRITE; rq.gva = mw->gva; rq.len = len;
|
||||||
|
rq.corr = ev->corr; rq.origin = ev->origin;
|
||||||
|
|
||||||
|
/* copy SRC into the worker req (off-loop gva_write reads from rq.src). */
|
||||||
|
if (mw->flags & VMSIG_MW_SRC_INLINE) {
|
||||||
|
if (len > VMSIG_MEMWRITE_INLINE) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; }
|
||||||
|
memcpy(rq.src, ev->inln + sizeof *mw, len); /* inln tail after the 16-byte header */
|
||||||
|
} else if (mw->flags & VMSIG_MW_SRC_PAYLOAD) {
|
||||||
|
if (!ev->payload.data || ev->payload.len < len) { mc_memwrite_ack(a, 0, ev->corr, ev->origin); return 0; }
|
||||||
|
memcpy(rq.src, ev->payload.data, len); /* in-proc borrowed payload */
|
||||||
|
} else {
|
||||||
|
mc_memwrite_ack(a, 0, ev->corr, ev->origin); /* no SRC flag */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vmsig_worker_submit(a->worker, &rq, sizeof rq) != 0) {
|
||||||
|
mc_memwrite_ack(a, 0, ev->corr, ev->origin); /* queue full -> ACK err */
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0; /* completion ACK arrives via mc_on_ready */
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mc_close(vmsig_adapter* a) {
|
||||||
|
if (!a) return;
|
||||||
|
if (a->registered && a->emit.unregister_memctx)
|
||||||
|
a->emit.unregister_memctx(a->emit.token, a->endpoint);
|
||||||
|
if (a->worker) vmsig_worker_free(a->worker); /* join: bootstrap + write jobs finished */
|
||||||
|
#ifdef VMSIG_WITH_VMIE
|
||||||
|
if (a->win) vmie_win32_close(a->win); /* AFTER worker join: no in-flight gva_write */
|
||||||
|
#endif
|
||||||
|
if (a->stub_fd >= 0) close(a->stub_fd);
|
||||||
|
/* cfg_ro_fd belongs to the infrastructure (the open caller) — do NOT close it. */
|
||||||
|
free(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const vmsig_adapter_ops MC_OPS = {
|
||||||
|
.name = "memctx", .source = VMSIG_SRC_MEMCTX, .codec = VMSIG_CODEC_MEMCTX,
|
||||||
|
.open = mc_open, .attach = mc_attach, .on_readiness = mc_on_ready,
|
||||||
|
.submit = mc_submit, .close = mc_close
|
||||||
|
};
|
||||||
|
|
||||||
|
const vmsig_adapter_ops* vmsig_memctx_ops(void) { return &MC_OPS; }
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
#ifndef VMSIG_VMHOST_H
|
||||||
|
#define VMSIG_VMHOST_H
|
||||||
|
|
||||||
|
/* Private config of the vmhost adapter (signaling's own QMP client).
|
||||||
|
* cfg==NULL or no qmp_path => stub mode (synthetic events, no QEMU).
|
||||||
|
* qmp_path given => armed: connect to QEMU's QMP socket ('@' prefix = abstract).
|
||||||
|
* No build flag needed — the client depends only on POSIX and its own code. */
|
||||||
|
typedef struct {
|
||||||
|
int stub;
|
||||||
|
const char* qmp_path;
|
||||||
|
} vmsig_vmhost_cfg;
|
||||||
|
|
||||||
|
#endif /* VMSIG_VMHOST_H */
|
||||||
@@ -0,0 +1,313 @@
|
|||||||
|
/* vmhost.c — QEMU/QMP host-plane: signaling's OWN layer for observing the VM
|
||||||
|
* and its basic control. Not a wrapper over a neighbor repo — an own QMP client;
|
||||||
|
* depends only on POSIX, so it is always functional (no build flag).
|
||||||
|
*
|
||||||
|
* This is the first truly epoll-native source: the QMP socket (VMSIG_RDY_FD) lives
|
||||||
|
* directly in the loop, non-blocking, async events. Up: QMP events -> VM_LIFECYCLE
|
||||||
|
* (broadcast), EOF -> SEAM_DOWN. Down: CMD_VM -> QMP command with id correlation,
|
||||||
|
* reply addressed to the initiator. stub mode (no QEMU) synthesizes events/replies. */
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include "vmsig_adapter.h"
|
||||||
|
#include "vmhost.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <sys/un.h>
|
||||||
|
#include <sys/timerfd.h>
|
||||||
|
#include <sys/epoll.h>
|
||||||
|
|
||||||
|
#define VMHOST_BUF 4096
|
||||||
|
#define VMHOST_STUB_MS 200
|
||||||
|
#define VMHOST_MAX_PENDING 64
|
||||||
|
|
||||||
|
enum { ST_STUB = 0, ST_CONNECTING, ST_NEGOTIATING, ST_READY, ST_DEAD };
|
||||||
|
|
||||||
|
typedef struct { uint32_t id, origin, corr; uint8_t op; int used; } pend_ent;
|
||||||
|
|
||||||
|
struct vmsig_adapter {
|
||||||
|
uint32_t endpoint;
|
||||||
|
int stub;
|
||||||
|
const char* qmp_path;
|
||||||
|
vmsig_emit emit;
|
||||||
|
int fd; /* QMP socket (armed) or timerfd (stub) */
|
||||||
|
int st;
|
||||||
|
uint32_t cur; /* current synthetic state (stub) */
|
||||||
|
char buf[VMHOST_BUF];
|
||||||
|
size_t buflen;
|
||||||
|
uint32_t next_id;
|
||||||
|
pend_ent pend[VMHOST_MAX_PENDING];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ---- minimal QMP line parse (top-level keys only; full JSON — deferred) ---- */
|
||||||
|
static int jstr(const char* line, const char* key, char* out, size_t cap) {
|
||||||
|
const char* p = strstr(line, key);
|
||||||
|
if (!p) return 0;
|
||||||
|
p += strlen(key);
|
||||||
|
while (*p == ' ' || *p == '\t' || *p == ':') p++;
|
||||||
|
if (*p != '"') return 0;
|
||||||
|
p++;
|
||||||
|
size_t i = 0;
|
||||||
|
while (*p && *p != '"' && i + 1 < cap) out[i++] = *p++;
|
||||||
|
out[i] = 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
static long jnum(const char* line, const char* key) {
|
||||||
|
const char* p = strstr(line, key);
|
||||||
|
if (!p) return -1;
|
||||||
|
p += strlen(key);
|
||||||
|
while (*p == ' ' || *p == '\t' || *p == ':') p++;
|
||||||
|
if (*p < '0' || *p > '9') return -1;
|
||||||
|
return strtol(p, NULL, 10);
|
||||||
|
}
|
||||||
|
static uint32_t ev_state(const char* n) {
|
||||||
|
if (!strcmp(n, "RESUME")) return VMSIG_VM_RUNNING;
|
||||||
|
if (!strcmp(n, "STOP")) return VMSIG_VM_PAUSED;
|
||||||
|
if (!strcmp(n, "SHUTDOWN")) return VMSIG_VM_SHUTDOWN;
|
||||||
|
if (!strcmp(n, "RESET")) return VMSIG_VM_RESET;
|
||||||
|
if (!strcmp(n, "POWERDOWN")) return VMSIG_VM_POWERDOWN;
|
||||||
|
if (!strcmp(n, "GUEST_PANICKED")) return VMSIG_VM_CRASHED;
|
||||||
|
return VMSIG_VM_UNKNOWN;
|
||||||
|
}
|
||||||
|
static uint32_t status_state(const char* s) {
|
||||||
|
if (!strcmp(s, "running")) return VMSIG_VM_RUNNING;
|
||||||
|
if (!strcmp(s, "paused")) return VMSIG_VM_PAUSED;
|
||||||
|
if (!strcmp(s, "shutdown")) return VMSIG_VM_SHUTDOWN;
|
||||||
|
return VMSIG_VM_UNKNOWN;
|
||||||
|
}
|
||||||
|
static const char* op_qmp(uint32_t op) {
|
||||||
|
switch (op) {
|
||||||
|
case VMSIG_VMOP_QUERY: return "query-status";
|
||||||
|
case VMSIG_VMOP_CONT: return "cont";
|
||||||
|
case VMSIG_VMOP_STOP: return "stop";
|
||||||
|
case VMSIG_VMOP_RESET: return "system_reset";
|
||||||
|
case VMSIG_VMOP_POWERDOWN: return "system_powerdown";
|
||||||
|
case VMSIG_VMOP_QUIT: return "quit";
|
||||||
|
default: return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static pend_ent* pend_alloc(struct vmsig_adapter* a) {
|
||||||
|
for (int i = 0; i < VMHOST_MAX_PENDING; i++) if (!a->pend[i].used) return &a->pend[i];
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
static pend_ent* pend_find(struct vmsig_adapter* a, uint32_t id) {
|
||||||
|
for (int i = 0; i < VMHOST_MAX_PENDING; i++)
|
||||||
|
if (a->pend[i].used && a->pend[i].id == id) return &a->pend[i];
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- emission of neutral UP events ---- */
|
||||||
|
static void emit_vm(struct vmsig_adapter* a, uint32_t state, uint32_t origin, uint32_t corr) {
|
||||||
|
vmsig_vm_state vs = { state, 0 };
|
||||||
|
vmsig_event up;
|
||||||
|
memset(&up, 0, sizeof up);
|
||||||
|
up.kind = VMSIG_EV_VM_LIFECYCLE; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
|
||||||
|
up.prio = (state == VMSIG_VM_RUNNING || state == VMSIG_VM_PAUSED)
|
||||||
|
? VMSIG_PRIO_NORMAL : VMSIG_PRIO_URGENT;
|
||||||
|
up.endpoint = a->endpoint; up.origin = origin; up.corr = corr;
|
||||||
|
up.payload.flags = VMSIG_PL_INLINE;
|
||||||
|
memcpy(up.inln, &vs, sizeof vs);
|
||||||
|
a->emit.emit(a->emit.token, &up);
|
||||||
|
}
|
||||||
|
static void emit_seam(struct vmsig_adapter* a, vmsig_kind k) {
|
||||||
|
vmsig_event up;
|
||||||
|
memset(&up, 0, sizeof up);
|
||||||
|
up.kind = k; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
|
||||||
|
up.prio = VMSIG_PRIO_URGENT; up.endpoint = a->endpoint;
|
||||||
|
a->emit.emit(a->emit.token, &up);
|
||||||
|
}
|
||||||
|
static void emit_ack(struct vmsig_adapter* a, uint32_t origin, uint32_t corr, int ok) {
|
||||||
|
vmsig_event up;
|
||||||
|
memset(&up, 0, sizeof up);
|
||||||
|
up.kind = VMSIG_EV_ACT_ACK; up.source = VMSIG_SRC_VMHOST; up.dir = VMSIG_DIR_UP;
|
||||||
|
up.prio = VMSIG_PRIO_NORMAL; up.endpoint = a->endpoint; up.origin = origin; up.corr = corr;
|
||||||
|
up.payload.flags = VMSIG_PL_INLINE;
|
||||||
|
up.inln[0] = (uint8_t)(ok ? 1 : 0);
|
||||||
|
a->emit.emit(a->emit.token, &up);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- armed: handle one QMP line ---- */
|
||||||
|
static void handle_line(struct vmsig_adapter* a, const char* line) {
|
||||||
|
switch (a->st) {
|
||||||
|
case ST_CONNECTING:
|
||||||
|
if (strstr(line, "\"QMP\"")) { /* greeting -> negotiate capabilities */
|
||||||
|
static const char cap[] = "{\"execute\":\"qmp_capabilities\"}\r\n";
|
||||||
|
ssize_t r = write(a->fd, cap, sizeof cap - 1); (void)r;
|
||||||
|
a->st = ST_NEGOTIATING;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case ST_NEGOTIATING:
|
||||||
|
if (strstr(line, "\"return\"")) { a->st = ST_READY; emit_seam(a, VMSIG_EV_SEAM_UP); }
|
||||||
|
break;
|
||||||
|
case ST_READY:
|
||||||
|
if (strstr(line, "\"event\"")) {
|
||||||
|
char name[64];
|
||||||
|
if (jstr(line, "\"event\"", name, sizeof name)) {
|
||||||
|
uint32_t s = ev_state(name);
|
||||||
|
if (s != VMSIG_VM_UNKNOWN) emit_vm(a, s, 0, 0); /* broadcast */
|
||||||
|
}
|
||||||
|
} else if (strstr(line, "\"return\"") || strstr(line, "\"error\"")) {
|
||||||
|
long id = jnum(line, "\"id\"");
|
||||||
|
pend_ent* p = id >= 0 ? pend_find(a, (uint32_t)id) : NULL;
|
||||||
|
if (p) {
|
||||||
|
if (p->op == VMSIG_VMOP_QUERY && strstr(line, "\"return\"")) {
|
||||||
|
char stbuf[32]; uint32_t s = VMSIG_VM_UNKNOWN;
|
||||||
|
if (jstr(line, "\"status\"", stbuf, sizeof stbuf)) s = status_state(stbuf);
|
||||||
|
emit_vm(a, s, p->origin, p->corr); /* addressed reply */
|
||||||
|
} else {
|
||||||
|
emit_ack(a, p->origin, p->corr, strstr(line, "\"return\"") != NULL);
|
||||||
|
}
|
||||||
|
p->used = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void armed_dead(struct vmsig_adapter* a) {
|
||||||
|
emit_seam(a, VMSIG_EV_SEAM_DOWN); /* VM transport died */
|
||||||
|
if (a->fd >= 0) { close(a->fd); a->fd = -1; } /* close removes the fd from epoll */
|
||||||
|
a->st = ST_DEAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- vtable ---- */
|
||||||
|
static vmsig_adapter* vh_open(const void* cfg, uint32_t endpoint) {
|
||||||
|
const vmsig_vmhost_cfg* c = cfg;
|
||||||
|
struct vmsig_adapter* a = calloc(1, sizeof *a);
|
||||||
|
if (!a) return NULL;
|
||||||
|
a->endpoint = endpoint;
|
||||||
|
a->qmp_path = (c && c->qmp_path && c->qmp_path[0]) ? c->qmp_path : NULL;
|
||||||
|
a->stub = (a->qmp_path == NULL); /* path given => armed, otherwise stub */
|
||||||
|
a->fd = -1;
|
||||||
|
a->cur = VMSIG_VM_RUNNING;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vh_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
|
||||||
|
if (cap < 1) return -1;
|
||||||
|
a->emit = *emit;
|
||||||
|
|
||||||
|
if (a->stub) {
|
||||||
|
a->fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
|
||||||
|
if (a->fd < 0) return -1;
|
||||||
|
struct itimerspec its;
|
||||||
|
memset(&its, 0, sizeof its);
|
||||||
|
its.it_interval.tv_sec = VMHOST_STUB_MS / 1000u;
|
||||||
|
its.it_interval.tv_nsec = (long)(VMHOST_STUB_MS % 1000u) * 1000000L;
|
||||||
|
its.it_value = its.it_interval;
|
||||||
|
if (timerfd_settime(a->fd, 0, &its, NULL) < 0) { close(a->fd); a->fd = -1; return -1; }
|
||||||
|
a->st = ST_STUB;
|
||||||
|
reg[0].fd = a->fd; reg[0].epoll_events = EPOLLIN;
|
||||||
|
reg[0].shape = VMSIG_RDY_TIMERFD; reg[0].cookie = 0;
|
||||||
|
emit_seam(a, VMSIG_EV_SEAM_UP);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* armed: connect to QEMU's QMP socket */
|
||||||
|
int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
|
||||||
|
if (fd < 0) return -1;
|
||||||
|
struct sockaddr_un addr;
|
||||||
|
memset(&addr, 0, sizeof addr);
|
||||||
|
addr.sun_family = AF_UNIX;
|
||||||
|
socklen_t alen;
|
||||||
|
size_t n = strlen(a->qmp_path);
|
||||||
|
if (a->qmp_path[0] == '@') {
|
||||||
|
if (n > sizeof addr.sun_path) { close(fd); return -1; }
|
||||||
|
addr.sun_path[0] = 0;
|
||||||
|
memcpy(addr.sun_path + 1, a->qmp_path + 1, n - 1);
|
||||||
|
alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
|
||||||
|
} else {
|
||||||
|
if (n >= sizeof addr.sun_path) { close(fd); return -1; }
|
||||||
|
memcpy(addr.sun_path, a->qmp_path, n);
|
||||||
|
alen = (socklen_t)sizeof addr;
|
||||||
|
}
|
||||||
|
if (connect(fd, (struct sockaddr*)&addr, alen) < 0 && errno != EINPROGRESS) {
|
||||||
|
close(fd); return -1;
|
||||||
|
}
|
||||||
|
a->fd = fd; a->st = ST_CONNECTING;
|
||||||
|
reg[0].fd = fd; reg[0].epoll_events = EPOLLIN;
|
||||||
|
reg[0].shape = VMSIG_RDY_FD; reg[0].cookie = 0;
|
||||||
|
/* SEAM_UP is emitted upon reaching READY (after qmp_capabilities) */
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vh_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
|
||||||
|
(void)cookie; (void)events;
|
||||||
|
|
||||||
|
if (a->stub) {
|
||||||
|
uint64_t ticks;
|
||||||
|
while (read(a->fd, &ticks, sizeof ticks) == (ssize_t)sizeof ticks) { /* drain */ }
|
||||||
|
a->cur = (a->cur == VMSIG_VM_RUNNING) ? VMSIG_VM_PAUSED : VMSIG_VM_RUNNING;
|
||||||
|
emit_vm(a, a->cur, 0, 0); /* broadcast */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a->st == ST_DEAD) return 0;
|
||||||
|
for (;;) {
|
||||||
|
if (a->buflen >= sizeof a->buf) a->buflen = 0; /* line overflow -> reset */
|
||||||
|
ssize_t r = read(a->fd, a->buf + a->buflen, sizeof a->buf - a->buflen);
|
||||||
|
if (r == 0) { armed_dead(a); return 0; }
|
||||||
|
if (r < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK) break; armed_dead(a); return 0; }
|
||||||
|
a->buflen += (size_t)r;
|
||||||
|
size_t start = 0;
|
||||||
|
for (size_t i = 0; i < a->buflen; i++) {
|
||||||
|
if (a->buf[i] == '\n') { a->buf[i] = 0; handle_line(a, a->buf + start); start = i + 1; }
|
||||||
|
}
|
||||||
|
if (start > 0) { memmove(a->buf, a->buf + start, a->buflen - start); a->buflen -= start; }
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vh_submit(vmsig_adapter* a, const vmsig_event* ev) {
|
||||||
|
if (ev->kind != VMSIG_EV_CMD_VM) return 1; /* not for this seam */
|
||||||
|
vmsig_vm_cmd cmd;
|
||||||
|
memcpy(&cmd, ev->inln, sizeof cmd);
|
||||||
|
|
||||||
|
if (a->stub) {
|
||||||
|
uint32_t s;
|
||||||
|
switch (cmd.op) {
|
||||||
|
case VMSIG_VMOP_QUERY: s = a->cur; break;
|
||||||
|
case VMSIG_VMOP_CONT: s = a->cur = VMSIG_VM_RUNNING; break;
|
||||||
|
case VMSIG_VMOP_STOP: s = a->cur = VMSIG_VM_PAUSED; break;
|
||||||
|
case VMSIG_VMOP_RESET: s = VMSIG_VM_RESET; break;
|
||||||
|
case VMSIG_VMOP_POWERDOWN: s = VMSIG_VM_POWERDOWN; break;
|
||||||
|
case VMSIG_VMOP_QUIT: s = VMSIG_VM_SHUTDOWN; break;
|
||||||
|
default: return 1;
|
||||||
|
}
|
||||||
|
emit_vm(a, s, ev->origin, ev->corr); /* reply addressed to the initiator */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a->st != ST_READY) return -1;
|
||||||
|
const char* q = op_qmp(cmd.op);
|
||||||
|
if (!q) return 1;
|
||||||
|
pend_ent* p = pend_alloc(a);
|
||||||
|
if (!p) return -1; /* backpressure: pending table is full */
|
||||||
|
uint32_t id = ++a->next_id;
|
||||||
|
p->used = 1; p->id = id; p->origin = ev->origin; p->corr = ev->corr; p->op = (uint8_t)cmd.op;
|
||||||
|
char line[160];
|
||||||
|
int len = snprintf(line, sizeof line, "{\"execute\":\"%s\",\"id\":%u}\r\n", q, id);
|
||||||
|
ssize_t r = write(a->fd, line, (size_t)len);
|
||||||
|
if (r != (ssize_t)len) { p->used = 0; return -1; }
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vh_close(vmsig_adapter* a) {
|
||||||
|
if (!a) return;
|
||||||
|
if (a->fd >= 0) close(a->fd);
|
||||||
|
free(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const vmsig_adapter_ops VH_OPS = {
|
||||||
|
.name = "vmhost", .source = VMSIG_SRC_VMHOST, .codec = VMSIG_CODEC_VMHOST,
|
||||||
|
.open = vh_open, .attach = vh_attach, .on_readiness = vh_on_ready,
|
||||||
|
.submit = vh_submit, .close = vh_close
|
||||||
|
};
|
||||||
|
|
||||||
|
const vmsig_adapter_ops* vmsig_vmhost_ops(void) { return &VH_OPS; }
|
||||||
@@ -0,0 +1,182 @@
|
|||||||
|
/* cli.c — vmsig spine demonstrator (no real VM).
|
||||||
|
*
|
||||||
|
* Brings up the context + epoll core, attaches an in-proc control and a set of stub
|
||||||
|
* adapters (input/vmhost/memctx) on a single endpoint (VM 0). Proves the bidirectional seam:
|
||||||
|
* UP: SEAM_UP, VM_LIFECYCLE (vmhost stub tick), MEMCTX (kcr3+locator + RO-fd);
|
||||||
|
* DOWN: CMD_ACQUIRE+CMD_INPUT -> input adapter -> ACT_ACK (correlation);
|
||||||
|
* CMD_VM QUERY -> vmhost -> VM_LIFECYCLE (addressed reply).
|
||||||
|
* The address-space context arrives via MULTICAST: control receives kcr3 and a
|
||||||
|
* pre-opened O_RDONLY fd of the RAM region (control does NOT see ram_path; it mmaps
|
||||||
|
* the fd itself, write -> EACCES). (vgpu frame perception now lives in an out-of-repo
|
||||||
|
* S-lib that consumes this MEMCTX seam — not in signaling.)
|
||||||
|
* Shutdown: on SIGINT or automatically, once all paths are proven. */
|
||||||
|
#include "vmsig.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
|
||||||
|
static vmsig_core* g_core;
|
||||||
|
static void on_sigint(int s) { (void)s; if (g_core) vmsig_core_stop(g_core); }
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
vmsig_core* core;
|
||||||
|
void* ctl;
|
||||||
|
int total, lifecycles, acks, seams, memctx;
|
||||||
|
uint64_t last_kcr3;
|
||||||
|
uint32_t last_epoch;
|
||||||
|
int sent_first; /* sent acquire+input+vm on the first lifecycle tick */
|
||||||
|
} demo;
|
||||||
|
|
||||||
|
static const char* kind_name(vmsig_kind k) {
|
||||||
|
switch (k) {
|
||||||
|
case VMSIG_EV_SEAM_UP: return "SEAM_UP";
|
||||||
|
case VMSIG_EV_SEAM_DOWN: return "SEAM_DOWN";
|
||||||
|
case VMSIG_EV_VM_LIFECYCLE: return "VM_LIFECYCLE";
|
||||||
|
case VMSIG_EV_ACT_ACK: return "ACT_ACK";
|
||||||
|
case VMSIG_EV_MEMCTX: return "MEMCTX";
|
||||||
|
default: return "?";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Core -> control: address-space context + pre-opened O_RDONLY fd of the RAM region.
|
||||||
|
* Demonstrate RO: mmap(PROT_READ) ok, mmap(PROT_WRITE) -> EACCES. The fd is borrowed
|
||||||
|
* (closed by the core after the call) — here we mmap and immediately unmap. */
|
||||||
|
static int on_memctx(void* user, const vmsig_event* ev, int fd) {
|
||||||
|
demo* d = user;
|
||||||
|
const vmsig_memctx* m = (const vmsig_memctx*)ev->inln;
|
||||||
|
d->memctx++;
|
||||||
|
d->last_kcr3 = m->kcr3; d->last_epoch = m->epoch;
|
||||||
|
uint32_t nseg = 0;
|
||||||
|
const vmsig_memseg* segs = vmsig_memctx_segs(ev, &nseg);
|
||||||
|
printf(" UP MEMCTX ep=%u kcr3=%#llx low=%#llx epoch=%u nseg=%u rdonly=%d\n",
|
||||||
|
(unsigned)ev->endpoint, (unsigned long long)m->kcr3,
|
||||||
|
(unsigned long long)m->low, (unsigned)m->epoch, (unsigned)nseg,
|
||||||
|
(m->flags & VMSIG_MEMCTX_RDONLY) ? 1 : 0);
|
||||||
|
if (fd >= 0 && m->low) {
|
||||||
|
void* ro = mmap(NULL, (size_t)m->low, PROT_READ, MAP_SHARED, fd, 0);
|
||||||
|
if (ro != MAP_FAILED) {
|
||||||
|
void* rw = mmap(NULL, (size_t)m->low, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||||
|
printf(" RO-fd: mmap(PROT_READ) ok, mmap(PROT_WRITE) %s\n",
|
||||||
|
rw == MAP_FAILED ? "EACCES (RO enforced)" : "UNEXPECTEDLY succeeded!");
|
||||||
|
if (rw != MAP_FAILED) munmap(rw, (size_t)m->low);
|
||||||
|
munmap(ro, (size_t)m->low);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(void)segs;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int on_event(void* user, const vmsig_event* ev) {
|
||||||
|
demo* d = user;
|
||||||
|
d->total++;
|
||||||
|
switch (ev->kind) {
|
||||||
|
case VMSIG_EV_SEAM_UP: d->seams++; break;
|
||||||
|
case VMSIG_EV_ACT_ACK: d->acks++; break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) {
|
||||||
|
d->lifecycles++;
|
||||||
|
vmsig_vm_state vs; memcpy(&vs, ev->inln, sizeof vs);
|
||||||
|
printf(" UP VM_LIFECYCLE ep=%u state=%u%s\n",
|
||||||
|
(unsigned)ev->endpoint, (unsigned)vs.state, ev->origin ? " (reply)" : "");
|
||||||
|
} else if (ev->kind != VMSIG_EV_MEMCTX) { /* MEMCTX is printed in on_memctx */
|
||||||
|
printf(" UP %-12s src=%u ep=%u seq=%u prio=%u\n",
|
||||||
|
kind_name(ev->kind), (unsigned)ev->source, (unsigned)ev->endpoint,
|
||||||
|
(unsigned)ev->seq, (unsigned)ev->prio);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* On the first lifecycle tick: acquire the INPUT lease, send input, and query VM status. */
|
||||||
|
if (ev->kind == VMSIG_EV_VM_LIFECYCLE && !ev->origin && !d->sent_first) {
|
||||||
|
d->sent_first = 1;
|
||||||
|
/* Input is a destructive class: first acquire the exclusive INPUT lease. */
|
||||||
|
vmsig_event acq;
|
||||||
|
memset(&acq, 0, sizeof acq);
|
||||||
|
acq.kind = VMSIG_EV_CMD_ACQUIRE; acq.source = VMSIG_SRC_INPUT; acq.dir = VMSIG_DIR_DOWN;
|
||||||
|
acq.prio = VMSIG_PRIO_HIGH; acq.endpoint = 0;
|
||||||
|
((vmsig_lease_req*)acq.inln)->cls = VMSIG_LEASE_INPUT;
|
||||||
|
printf(" DOWN CMD_ACQUIRE INPUT@ep0\n");
|
||||||
|
vmsig_inproc_send(d->ctl, &acq);
|
||||||
|
|
||||||
|
vmsig_event in;
|
||||||
|
memset(&in, 0, sizeof in);
|
||||||
|
in.kind = VMSIG_EV_CMD_INPUT; in.source = VMSIG_SRC_INPUT; in.dir = VMSIG_DIR_DOWN;
|
||||||
|
in.prio = VMSIG_PRIO_HIGH; in.endpoint = 0; in.corr = 0xC0FFEEu;
|
||||||
|
in.payload.flags = VMSIG_PL_INLINE;
|
||||||
|
vmsig_input act; memset(&act, 0, sizeof act); /* neutral public input contract */
|
||||||
|
act.kind = VMSIG_INPUT_ABS; act.code = 0; act.value = 100; /* demo: abs axis X = 100 */
|
||||||
|
memcpy(in.inln, &act, sizeof act);
|
||||||
|
printf(" DOWN CMD_INPUT ABS axis=0 val=100 corr=0x%X\n", (unsigned)in.corr);
|
||||||
|
vmsig_inproc_send(d->ctl, &in);
|
||||||
|
|
||||||
|
vmsig_event vm;
|
||||||
|
memset(&vm, 0, sizeof vm);
|
||||||
|
vm.kind = VMSIG_EV_CMD_VM; vm.source = VMSIG_SRC_VMHOST; vm.dir = VMSIG_DIR_DOWN;
|
||||||
|
vm.prio = VMSIG_PRIO_NORMAL; vm.endpoint = 0; vm.corr = 0x5Au;
|
||||||
|
vmsig_vm_cmd vc = { VMSIG_VMOP_QUERY };
|
||||||
|
memcpy(vm.inln, &vc, sizeof vc);
|
||||||
|
printf(" DOWN CMD_VM QUERY\n");
|
||||||
|
vmsig_inproc_send(d->ctl, &vm);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* All paths proven — stop (for automated verification). */
|
||||||
|
if (d->memctx >= 1 && d->acks >= 1 && d->lifecycles >= 2) vmsig_core_stop(d->core);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
if (!ctx) { fprintf(stderr, "ctx_new failed\n"); return 1; }
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
if (!core) { fprintf(stderr, "core_new failed\n"); vmsig_ctx_free(ctx); return 1; }
|
||||||
|
g_core = core;
|
||||||
|
signal(SIGINT, on_sigint);
|
||||||
|
|
||||||
|
demo d;
|
||||||
|
memset(&d, 0, sizeof d);
|
||||||
|
d.core = core;
|
||||||
|
|
||||||
|
vmsig_inproc_cfg ccfg;
|
||||||
|
memset(&ccfg, 0, sizeof ccfg);
|
||||||
|
ccfg.on_event = on_event;
|
||||||
|
ccfg.on_memctx = on_memctx;
|
||||||
|
ccfg.user = &d;
|
||||||
|
ccfg.sub.source_mask = 0; /* all sources */
|
||||||
|
ccfg.sub.prio_min = VMSIG_PRIO_BULK;
|
||||||
|
ccfg.sub.endpoint_mask = 0; /* all VMs */
|
||||||
|
|
||||||
|
void* ctl = vmsig_inproc_control_new(&ccfg);
|
||||||
|
if (!ctl) { fprintf(stderr, "control_new failed\n"); vmsig_core_free(core); vmsig_ctx_free(ctx); return 1; }
|
||||||
|
d.ctl = ctl;
|
||||||
|
|
||||||
|
/* Trusted in-proc control: full grant on VM 0 (the policy is set by the embedding
|
||||||
|
* program; for an out-of-process poller the grant would be issued upon authentication). */
|
||||||
|
vmsig_grant grant;
|
||||||
|
memset(&grant, 0, sizeof grant);
|
||||||
|
grant.principal = 1;
|
||||||
|
grant.endpoint_mask = 1u << 0;
|
||||||
|
grant.source_mask = 0xFFFFFFFFu;
|
||||||
|
grant.cap_mask = VMSIG_CAP_OBSERVE | VMSIG_CAP_INPUT | VMSIG_CAP_LIFECYCLE |
|
||||||
|
VMSIG_CAP_MEMCTX | VMSIG_CAP_POWER | VMSIG_CAP_VM;
|
||||||
|
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &grant);
|
||||||
|
|
||||||
|
/* Single endpoint (VM 0), stub adapters (cfg = NULL). */
|
||||||
|
if (vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, 0) < 0 ||
|
||||||
|
vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) < 0 || /* stub QEMU plane */
|
||||||
|
vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) < 0) { /* stub AS context */
|
||||||
|
fprintf(stderr, "add_adapter failed\n");
|
||||||
|
vmsig_core_free(core); vmsig_ctx_free(ctx); return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("vmsig_cli: loop started (Ctrl-C to stop)\n");
|
||||||
|
int rc = vmsig_core_run(core);
|
||||||
|
printf("vmsig_cli: loop finished rc=%d (events=%d seams=%d lifecycles=%d acks=%d memctx=%d kcr3=%#llx epoch=%u)\n",
|
||||||
|
rc, d.total, d.seams, d.lifecycles, d.acks, d.memctx,
|
||||||
|
(unsigned long long)d.last_kcr3, (unsigned)d.last_epoch);
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
/* inproc.c — reference in-process control: a thin shim turning a C callback into
|
||||||
|
* the neutral control vtable. fd = -1 (no epoll registration); UP arrives via a
|
||||||
|
* direct on_event call, DOWN leaves through the emit hook installed by the core. */
|
||||||
|
#include "vmsig_control.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
vmsig_inproc_cfg cfg;
|
||||||
|
int (*emit_down)(void* token, vmsig_event*);
|
||||||
|
void* token;
|
||||||
|
} inproc_ctl;
|
||||||
|
|
||||||
|
static int ip_fd(void* ctl) { (void)ctl; return -1; }
|
||||||
|
static int ip_subscribe(void* ctl, vmsig_sub* out) { inproc_ctl* c = ctl; *out = c->cfg.sub; return 0; }
|
||||||
|
static int ip_deliver(void* ctl, const vmsig_event* ev) {
|
||||||
|
inproc_ctl* c = ctl;
|
||||||
|
return c->cfg.on_event ? c->cfg.on_event(c->cfg.user, ev) : 0;
|
||||||
|
}
|
||||||
|
static void ip_set_emit_down(void* ctl, int (*emit)(void* token, vmsig_event*), void* token) {
|
||||||
|
inproc_ctl* c = ctl; c->emit_down = emit; c->token = token;
|
||||||
|
}
|
||||||
|
static void ip_close(void* ctl) { free(ctl); }
|
||||||
|
|
||||||
|
/* Core -> in-proc algorithm: address-space context (MEMCTX) + RO-fd as a direct int.
|
||||||
|
* The fd is borrowed (dup/mmap to retain it); the core closes it after the call. */
|
||||||
|
static int ip_attach_memctx(void* ctl, const vmsig_event* ev, int fd) {
|
||||||
|
inproc_ctl* c = ctl;
|
||||||
|
if (!c->cfg.on_memctx) return -1;
|
||||||
|
return c->cfg.on_memctx(c->cfg.user, ev, fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const vmsig_control_ops IP_OPS = {
|
||||||
|
.name = "inproc",
|
||||||
|
.fd = ip_fd,
|
||||||
|
.subscribe = ip_subscribe,
|
||||||
|
.deliver = ip_deliver,
|
||||||
|
.on_readable = NULL, /* no fd — nothing to read */
|
||||||
|
.set_emit_down = ip_set_emit_down,
|
||||||
|
.close = ip_close,
|
||||||
|
.attach_memctx = ip_attach_memctx
|
||||||
|
};
|
||||||
|
|
||||||
|
const vmsig_control_ops* vmsig_inproc_control_ops(void) { return &IP_OPS; }
|
||||||
|
|
||||||
|
void* vmsig_inproc_control_new(const vmsig_inproc_cfg* cfg) {
|
||||||
|
inproc_ctl* c = calloc(1, sizeof *c);
|
||||||
|
if (!c) return NULL;
|
||||||
|
if (cfg) c->cfg = *cfg;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
int vmsig_inproc_send(void* ctl, vmsig_event* down) {
|
||||||
|
inproc_ctl* c = ctl;
|
||||||
|
if (!c || !c->emit_down) return -1;
|
||||||
|
return c->emit_down(c->token, down);
|
||||||
|
}
|
||||||
@@ -0,0 +1,318 @@
|
|||||||
|
/* socket.c — out-of-process control over a unix socket.
|
||||||
|
*
|
||||||
|
* The listener registers in the core as a SLOT_SOURCE (listen-fd). On accept the
|
||||||
|
* peer is authenticated via SO_PEERCRED, the policy issues a neutral grant; an empty
|
||||||
|
* grant => the connection is closed (not a valid poller). Otherwise a per-conn
|
||||||
|
* control is created: its fd is driven by the epoll core, DOWN frames are parsed and
|
||||||
|
* dispatched through emit_down (enforced by the grant), UP events are serialized into
|
||||||
|
* a frame. On EOF — deferred reap.
|
||||||
|
*
|
||||||
|
* DoS protection: per-uid limit of concurrent connections (against eviction of
|
||||||
|
* legitimate ones); a janitor timerfd detaches "stuck" partial frames (slowloris).
|
||||||
|
* The global ceiling and slot reuse live in the core. */
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include "vmsig_socket.h"
|
||||||
|
#include "core_internal.h" /* core_add_source, core_request_drop, add_control */
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <sys/uio.h>
|
||||||
|
#include <sys/un.h>
|
||||||
|
#include <sys/timerfd.h>
|
||||||
|
#include <sys/stat.h> /* umask */
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
#define VMSIG_SOCK_PER_UID_MAX 8 /* concurrent connections per uid */
|
||||||
|
#define VMSIG_SOCK_IDLE_NS (10ull * 1000000000ull) /* timeout for a stuck partial frame */
|
||||||
|
#define VMSIG_SOCK_JANITOR_S 5 /* sweep period */
|
||||||
|
|
||||||
|
typedef struct sock_listener sock_listener;
|
||||||
|
|
||||||
|
static uint64_t now_ns(void) {
|
||||||
|
struct timespec ts;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== wire codec (public — also for external clients) ===== */
|
||||||
|
void vmsig_wire_encode(vmsig_wire* w, const vmsig_event* ev) {
|
||||||
|
memset(w, 0, sizeof *w);
|
||||||
|
w->magic = VMSIG_WIRE_MAGIC; w->version = VMSIG_WIRE_VERSION;
|
||||||
|
w->kind = ev->kind; w->source = ev->source; w->dir = ev->dir; w->prio = ev->prio;
|
||||||
|
w->endpoint = ev->endpoint; w->corr = ev->corr;
|
||||||
|
memcpy(w->inln, ev->inln, sizeof w->inln);
|
||||||
|
}
|
||||||
|
int vmsig_wire_decode(const vmsig_wire* w, vmsig_event* ev) {
|
||||||
|
if (w->magic != VMSIG_WIRE_MAGIC || w->version != VMSIG_WIRE_VERSION) return -1;
|
||||||
|
memset(ev, 0, sizeof *ev);
|
||||||
|
ev->kind = w->kind; ev->source = w->source; ev->dir = w->dir; ev->prio = w->prio;
|
||||||
|
ev->endpoint = w->endpoint; ev->corr = w->corr;
|
||||||
|
ev->payload.flags = VMSIG_PL_INLINE;
|
||||||
|
memcpy(ev->inln, w->inln, sizeof ev->inln);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== per-conn control ===== */
|
||||||
|
typedef struct sock_conn {
|
||||||
|
int fd;
|
||||||
|
vmsig_core* core;
|
||||||
|
int id;
|
||||||
|
uint32_t uid;
|
||||||
|
uint64_t last_ns; /* activity for the janitor */
|
||||||
|
sock_listener* L;
|
||||||
|
struct sock_conn* lnext; /* listener's connection list */
|
||||||
|
int (*emit_down)(void* token, vmsig_event*);
|
||||||
|
void* token;
|
||||||
|
uint8_t buf[sizeof(vmsig_wire)];
|
||||||
|
size_t buflen;
|
||||||
|
} sock_conn;
|
||||||
|
|
||||||
|
static int conn_fd(void* ctl) { return ((sock_conn*)ctl)->fd; }
|
||||||
|
|
||||||
|
static int conn_subscribe(void* ctl, vmsig_sub* out) {
|
||||||
|
(void)ctl; memset(out, 0, sizeof *out); return 0; /* everything; the grant gates it */
|
||||||
|
}
|
||||||
|
|
||||||
|
static int conn_deliver(void* ctl, const vmsig_event* ev) {
|
||||||
|
sock_conn* c = ctl;
|
||||||
|
vmsig_wire w;
|
||||||
|
vmsig_wire_encode(&w, ev);
|
||||||
|
ssize_t r = write(c->fd, &w, sizeof w); /* best-effort; EAGAIN => frame dropped */
|
||||||
|
(void)r;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void conn_set_emit_down(void* ctl, int (*emit)(void* token, vmsig_event*), void* token) {
|
||||||
|
sock_conn* c = ctl; c->emit_down = emit; c->token = token;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int conn_on_readable(void* ctl) {
|
||||||
|
sock_conn* c = ctl;
|
||||||
|
for (;;) {
|
||||||
|
ssize_t n = read(c->fd, c->buf + c->buflen, sizeof c->buf - c->buflen);
|
||||||
|
if (n == 0) { core_request_drop(c->core, c->id); return 0; } /* EOF */
|
||||||
|
if (n < 0) {
|
||||||
|
if (errno == EAGAIN || errno == EWOULDBLOCK) break;
|
||||||
|
core_request_drop(c->core, c->id);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
c->last_ns = now_ns();
|
||||||
|
c->buflen += (size_t)n;
|
||||||
|
if (c->buflen == sizeof c->buf) {
|
||||||
|
vmsig_event ev;
|
||||||
|
if (vmsig_wire_decode((const vmsig_wire*)c->buf, &ev) == 0) {
|
||||||
|
ev.dir = VMSIG_DIR_DOWN; /* from a poller — DOWN only */
|
||||||
|
if (c->emit_down) c->emit_down(c->token, &ev); /* enforced by the grant */
|
||||||
|
}
|
||||||
|
c->buflen = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== listener ===== */
|
||||||
|
struct sock_listener {
|
||||||
|
int listen_fd;
|
||||||
|
int janitor_fd;
|
||||||
|
vmsig_core* core;
|
||||||
|
vmsig_socket_policy policy;
|
||||||
|
void* ud;
|
||||||
|
sock_conn* conns; /* singly-linked list of active connections */
|
||||||
|
};
|
||||||
|
|
||||||
|
static void listener_unlink(sock_listener* L, sock_conn* c) {
|
||||||
|
sock_conn** pp = &L->conns;
|
||||||
|
while (*pp) { if (*pp == c) { *pp = c->lnext; return; } pp = &(*pp)->lnext; }
|
||||||
|
}
|
||||||
|
|
||||||
|
static int listener_uid_count(sock_listener* L, uint32_t uid) {
|
||||||
|
int n = 0;
|
||||||
|
for (sock_conn* c = L->conns; c; c = c->lnext) if (c->uid == uid) n++;
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void conn_close(void* ctl) {
|
||||||
|
sock_conn* c = ctl;
|
||||||
|
if (c->L) listener_unlink(c->L, c);
|
||||||
|
if (c->fd >= 0) close(c->fd);
|
||||||
|
free(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Send a SINGLE 80-byte vmsig_wire frame + ONE RO-fd in a cmsg (SCM_RIGHTS). This keeps
|
||||||
|
* the control-socket stream fixed-framed at sizeof(vmsig_wire): the client reads one
|
||||||
|
* frame via recvmsg and extracts the fd only on an fd-carrying frame. Partial cmsg
|
||||||
|
* transfer is not allowed (the fd is all-or-nothing): a short sendmsg -> -1. Shared
|
||||||
|
* primitive for the memctx handoff (one SCM_RIGHTS mechanism). */
|
||||||
|
static int conn_send_fd_frame(sock_conn* c, const vmsig_wire* w, int fd) {
|
||||||
|
struct iovec iov;
|
||||||
|
iov.iov_base = (void*)w;
|
||||||
|
iov.iov_len = sizeof *w;
|
||||||
|
|
||||||
|
union {
|
||||||
|
char buf[CMSG_SPACE(sizeof(int))];
|
||||||
|
struct cmsghdr align;
|
||||||
|
} cm;
|
||||||
|
memset(&cm, 0, sizeof cm);
|
||||||
|
|
||||||
|
struct msghdr mh;
|
||||||
|
memset(&mh, 0, sizeof mh);
|
||||||
|
mh.msg_iov = &iov;
|
||||||
|
mh.msg_iovlen = 1;
|
||||||
|
mh.msg_control = cm.buf;
|
||||||
|
mh.msg_controllen = sizeof cm.buf;
|
||||||
|
|
||||||
|
struct cmsghdr* cmsg = CMSG_FIRSTHDR(&mh);
|
||||||
|
cmsg->cmsg_level = SOL_SOCKET;
|
||||||
|
cmsg->cmsg_type = SCM_RIGHTS;
|
||||||
|
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
|
||||||
|
memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
ssize_t n = sendmsg(c->fd, &mh, MSG_NOSIGNAL);
|
||||||
|
if (n < 0) {
|
||||||
|
if (errno == EINTR) continue;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return ((size_t)n == sizeof *w) ? 0 : -1; /* partial frame -> failure */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Core -> socket-control: handoff of an address-space context (kind=MEMCTX, inln=vmsig_memctx
|
||||||
|
* POD) + RO-fd of the RAM region in a cmsg. The segs payload does NOT go on the wire (the
|
||||||
|
* fixed-framed vmsig_wire carries only inln); the holder opens it at `low`. */
|
||||||
|
static int conn_attach_memctx(void* ctl, const vmsig_event* ev, int fd) {
|
||||||
|
sock_conn* c = ctl;
|
||||||
|
if (fd < 0 || !ev) return -1;
|
||||||
|
vmsig_wire w;
|
||||||
|
vmsig_wire_encode(&w, ev); /* kind=MEMCTX, inln=vmsig_memctx; payload is not serialized */
|
||||||
|
return conn_send_fd_frame(c, &w, fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const vmsig_control_ops CONN_OPS = {
|
||||||
|
.name = "socket",
|
||||||
|
.fd = conn_fd, .subscribe = conn_subscribe, .deliver = conn_deliver,
|
||||||
|
.on_readable = conn_on_readable, .set_emit_down = conn_set_emit_down, .close = conn_close,
|
||||||
|
.attach_memctx = conn_attach_memctx
|
||||||
|
};
|
||||||
|
|
||||||
|
static void on_accept(void* user, uint32_t events) {
|
||||||
|
(void)events;
|
||||||
|
sock_listener* L = user;
|
||||||
|
for (;;) {
|
||||||
|
int fd = accept4(L->listen_fd, NULL, NULL, SOCK_NONBLOCK | SOCK_CLOEXEC);
|
||||||
|
if (fd < 0) break; /* EAGAIN / other — done */
|
||||||
|
|
||||||
|
uint32_t uid = (uint32_t)-1, pid = 0;
|
||||||
|
struct ucred uc; socklen_t ul = sizeof uc;
|
||||||
|
if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &uc, &ul) == 0) {
|
||||||
|
uid = (uint32_t)uc.uid; pid = (uint32_t)uc.pid;
|
||||||
|
}
|
||||||
|
vmsig_grant g;
|
||||||
|
if (L->policy) g = L->policy(uid, pid, L->ud);
|
||||||
|
else memset(&g, 0, sizeof g);
|
||||||
|
|
||||||
|
if (g.cap_mask == 0 || g.endpoint_mask == 0) { /* not a valid poller */
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_REJECT, uid, 0, 0, pid };
|
||||||
|
core_audit(L->core, &a);
|
||||||
|
close(fd);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (listener_uid_count(L, uid) >= VMSIG_SOCK_PER_UID_MAX) { /* anti-eviction */
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_REJECT, uid, 0, 0, pid };
|
||||||
|
core_audit(L->core, &a);
|
||||||
|
close(fd);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
sock_conn* conn = calloc(1, sizeof *conn);
|
||||||
|
if (!conn) { close(fd); continue; }
|
||||||
|
conn->fd = fd; conn->core = L->core; conn->id = -1;
|
||||||
|
conn->uid = uid; conn->last_ns = now_ns(); conn->L = L;
|
||||||
|
conn->lnext = L->conns; L->conns = conn;
|
||||||
|
int id = vmsig_core_add_control(L->core, &CONN_OPS, conn, &g);
|
||||||
|
if (id < 0) { /* no slot — reject */
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_REJECT, uid, 0, 0, pid };
|
||||||
|
core_audit(L->core, &a);
|
||||||
|
listener_unlink(L, conn); close(fd); free(conn); continue;
|
||||||
|
}
|
||||||
|
conn->id = id;
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_ADMIT, g.principal, 0, 0, pid };
|
||||||
|
core_audit(L->core, &a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* janitor: detach connections with a stuck partial frame (slowloris) */
|
||||||
|
static void on_janitor(void* user, uint32_t events) {
|
||||||
|
(void)events;
|
||||||
|
sock_listener* L = user;
|
||||||
|
uint64_t v;
|
||||||
|
while (read(L->janitor_fd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
|
||||||
|
uint64_t now = now_ns();
|
||||||
|
for (sock_conn* c = L->conns; c; c = c->lnext)
|
||||||
|
if (c->buflen > 0 && now - c->last_ns > VMSIG_SOCK_IDLE_NS)
|
||||||
|
core_request_drop(c->core, c->id);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* listener cleanup on core_free (owner = the core, via on_free of the first source) */
|
||||||
|
static void listener_free(void* user) {
|
||||||
|
sock_listener* L = user;
|
||||||
|
if (L->janitor_fd >= 0) close(L->janitor_fd);
|
||||||
|
if (L->listen_fd >= 0) close(L->listen_fd);
|
||||||
|
free(L);
|
||||||
|
}
|
||||||
|
|
||||||
|
int vmsig_socket_attach(vmsig_core* core, const char* path,
|
||||||
|
vmsig_socket_policy policy, void* ud) {
|
||||||
|
if (!core || !path || !*path) return -1;
|
||||||
|
int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
|
||||||
|
if (fd < 0) return -1;
|
||||||
|
|
||||||
|
struct sockaddr_un addr;
|
||||||
|
memset(&addr, 0, sizeof addr);
|
||||||
|
addr.sun_family = AF_UNIX;
|
||||||
|
socklen_t alen;
|
||||||
|
size_t n = strlen(path);
|
||||||
|
if (path[0] == '@') { /* abstract namespace */
|
||||||
|
if (n > sizeof addr.sun_path) { close(fd); return -1; }
|
||||||
|
addr.sun_path[0] = 0;
|
||||||
|
memcpy(addr.sun_path + 1, path + 1, n - 1);
|
||||||
|
alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
|
||||||
|
} else { /* filesystem path */
|
||||||
|
if (n >= sizeof addr.sun_path) { close(fd); return -1; }
|
||||||
|
unlink(path);
|
||||||
|
memcpy(addr.sun_path, path, n);
|
||||||
|
alen = (socklen_t)sizeof addr;
|
||||||
|
}
|
||||||
|
/* Create the filesystem socket with restrictive perms (0600): the path must not be
|
||||||
|
* the only gate — connect requires write, so we open it to the owner only.
|
||||||
|
* (An abstract socket has no FS perms; its access is bounded by the net namespace.) */
|
||||||
|
mode_t old_um = 0;
|
||||||
|
int restrict_perm = (path[0] != '@');
|
||||||
|
if (restrict_perm) old_um = umask(0177);
|
||||||
|
int br = bind(fd, (struct sockaddr*)&addr, alen);
|
||||||
|
if (restrict_perm) umask(old_um);
|
||||||
|
if (br < 0) { close(fd); return -1; }
|
||||||
|
if (listen(fd, 64) < 0) { close(fd); return -1; }
|
||||||
|
|
||||||
|
int jfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
|
||||||
|
if (jfd < 0) { close(fd); return -1; }
|
||||||
|
struct itimerspec its;
|
||||||
|
memset(&its, 0, sizeof its);
|
||||||
|
its.it_interval.tv_sec = VMSIG_SOCK_JANITOR_S;
|
||||||
|
its.it_value = its.it_interval;
|
||||||
|
if (timerfd_settime(jfd, 0, &its, NULL) < 0) { close(jfd); close(fd); return -1; }
|
||||||
|
|
||||||
|
sock_listener* L = calloc(1, sizeof *L);
|
||||||
|
if (!L) { close(jfd); close(fd); return -1; }
|
||||||
|
L->listen_fd = fd; L->janitor_fd = jfd; L->core = core; L->policy = policy; L->ud = ud;
|
||||||
|
/* the listen source owns the listener (on_free=listener_free closes both fds + free) */
|
||||||
|
if (core_add_source(core, fd, on_accept, L, listener_free) < 0) {
|
||||||
|
close(jfd); close(fd); free(L); return -1;
|
||||||
|
}
|
||||||
|
/* janitor without on_free (L already belongs to the core); on error core_free releases it */
|
||||||
|
if (core_add_source(core, jfd, on_janitor, L, NULL) < 0) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
+224
@@ -0,0 +1,224 @@
|
|||||||
|
/* core.c — core lifecycle and registration of adapters/controls.
|
||||||
|
* The loop and pumps live in loop.c. */
|
||||||
|
#include "core_internal.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <sys/epoll.h>
|
||||||
|
#include <sys/eventfd.h>
|
||||||
|
|
||||||
|
core_slot* core_register_fd(vmsig_core* c, int fd, uint32_t epoll_events, slot_role role) {
|
||||||
|
/* reuse a detached (SLOT_DEAD) slot so c->slots[] does not grow on every
|
||||||
|
* connection */
|
||||||
|
core_slot* s = NULL;
|
||||||
|
for (int i = 0; i < c->nslots; i++)
|
||||||
|
if (c->slots[i]->role == SLOT_DEAD) { s = c->slots[i]; break; }
|
||||||
|
|
||||||
|
if (!s) {
|
||||||
|
if (c->nslots == c->cap_slots) {
|
||||||
|
int ncap = c->cap_slots ? c->cap_slots * 2 : 16;
|
||||||
|
core_slot** ns = realloc(c->slots, (size_t)ncap * sizeof *ns);
|
||||||
|
if (!ns) return NULL;
|
||||||
|
c->slots = ns;
|
||||||
|
c->cap_slots = ncap;
|
||||||
|
}
|
||||||
|
s = calloc(1, sizeof *s);
|
||||||
|
if (!s) return NULL;
|
||||||
|
c->slots[c->nslots++] = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(s, 0, sizeof *s);
|
||||||
|
s->role = role;
|
||||||
|
s->fd = fd;
|
||||||
|
|
||||||
|
struct epoll_event ee;
|
||||||
|
memset(&ee, 0, sizeof ee);
|
||||||
|
ee.events = epoll_events;
|
||||||
|
ee.data.ptr = s;
|
||||||
|
if (epoll_ctl(c->epfd, EPOLL_CTL_ADD, fd, &ee) < 0) { s->role = SLOT_DEAD; return NULL; }
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
vmsig_core* vmsig_core_new(vmsig_ctx* ctx) {
|
||||||
|
if (!ctx) return NULL;
|
||||||
|
vmsig_core* c = calloc(1, sizeof *c);
|
||||||
|
if (!c) return NULL;
|
||||||
|
c->ctx = ctx;
|
||||||
|
c->epfd = -1;
|
||||||
|
c->wake_fd = -1;
|
||||||
|
|
||||||
|
c->epfd = epoll_create1(EPOLL_CLOEXEC);
|
||||||
|
if (c->epfd < 0) { free(c); return NULL; }
|
||||||
|
|
||||||
|
c->wake_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
|
||||||
|
if (c->wake_fd < 0) { close(c->epfd); free(c); return NULL; }
|
||||||
|
if (!core_register_fd(c, c->wake_fd, EPOLLIN, SLOT_WAKEUP)) {
|
||||||
|
close(c->wake_fd); close(c->epfd); free(c); return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* context pacing timerfds (created in ctx_new) as loop sources */
|
||||||
|
for (int d = VMSIG_DIR_UP; d <= VMSIG_DIR_DOWN; d++) {
|
||||||
|
int tfd = vmsig_ctx_timing_fd(ctx, (vmsig_dir)d);
|
||||||
|
if (tfd >= 0) core_register_fd(c, tfd, EPOLLIN, SLOT_CTX_TIMING);
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
int vmsig_core_add_adapter(vmsig_core* c, const vmsig_adapter_ops* ops,
|
||||||
|
const void* cfg, uint32_t endpoint) {
|
||||||
|
if (!c || !ops || c->nadapters >= VMSIG_MAX_ADAPTERS) return -1;
|
||||||
|
|
||||||
|
vmsig_adapter* a = ops->open(cfg, endpoint);
|
||||||
|
if (!a) return -1;
|
||||||
|
|
||||||
|
vmsig_emit emit = { core_emit_up, core_register_memctx, core_unregister_memctx, c };
|
||||||
|
vmsig_fd_reg reg[VMSIG_ADAPTER_FDS];
|
||||||
|
memset(reg, 0, sizeof reg);
|
||||||
|
|
||||||
|
int n = ops->attach(a, &emit, reg, VMSIG_ADAPTER_FDS);
|
||||||
|
if (n < 0) { ops->close(a); return -1; }
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
uint32_t events = reg[i].epoll_events ? reg[i].epoll_events : (uint32_t)EPOLLIN;
|
||||||
|
core_slot* s = core_register_fd(c, reg[i].fd, events, SLOT_ADAPTER);
|
||||||
|
if (!s) { ops->close(a); return -1; }
|
||||||
|
s->ops = ops;
|
||||||
|
s->adapter = a;
|
||||||
|
s->cookie = reg[i].cookie;
|
||||||
|
}
|
||||||
|
|
||||||
|
int id = c->nadapters;
|
||||||
|
c->adapters[c->nadapters].ops = ops;
|
||||||
|
c->adapters[c->nadapters].a = a;
|
||||||
|
c->adapters[c->nadapters].endpoint = endpoint;
|
||||||
|
c->nadapters++;
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
int vmsig_core_add_control(vmsig_core* c, const vmsig_control_ops* ops, void* ctl,
|
||||||
|
const vmsig_grant* grant) {
|
||||||
|
if (!c || !ops) return -1;
|
||||||
|
|
||||||
|
/* reuse a freed (reaped) slot; otherwise grow up to the ceiling */
|
||||||
|
int id = -1;
|
||||||
|
for (int i = 0; i < c->ncontrols; i++)
|
||||||
|
if (!c->controls[i].active) { id = i; break; }
|
||||||
|
if (id < 0) {
|
||||||
|
if (c->ncontrols >= VMSIG_MAX_CONTROLS) return -1;
|
||||||
|
id = c->ncontrols++;
|
||||||
|
}
|
||||||
|
core_control_ent* e = &c->controls[id];
|
||||||
|
uint16_t gen = e->gen; /* generation survives the slot memset */
|
||||||
|
memset(e, 0, sizeof *e);
|
||||||
|
e->gen = (uint16_t)(gen + 1); /* new generation for this (re)use */
|
||||||
|
e->ops = ops;
|
||||||
|
e->ctl = ctl;
|
||||||
|
e->active = 1;
|
||||||
|
if (grant) e->grant = *grant; /* otherwise stays zero => default-deny */
|
||||||
|
e->dctx.core = c;
|
||||||
|
e->dctx.ctl_id = id;
|
||||||
|
|
||||||
|
if (ops->subscribe) ops->subscribe(ctl, &e->sub);
|
||||||
|
/* emit_down token is our down_ctx, so emit_down can find this control's grant */
|
||||||
|
if (ops->set_emit_down) ops->set_emit_down(ctl, core_emit_down, &e->dctx);
|
||||||
|
|
||||||
|
int fd = ops->fd ? ops->fd(ctl) : -1;
|
||||||
|
if (fd >= 0) {
|
||||||
|
core_slot* s = core_register_fd(c, fd, EPOLLIN, SLOT_CONTROL);
|
||||||
|
if (!s) return -1;
|
||||||
|
s->cops = ops;
|
||||||
|
s->ctl = ctl;
|
||||||
|
e->slot = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Late subscriber: replay retained MEMCTX (if a context is already published and
|
||||||
|
* this control is qualified). For a control added BEFORE the first publication,
|
||||||
|
* the cell is not yet valid — it receives MEMCTX via the normal multicast in pump_up. */
|
||||||
|
core_memctx_replay(c, id);
|
||||||
|
|
||||||
|
return id; /* ncontrols already bumped when picking id (on growth); reuse does not grow it */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* ===== MEMCTX registration: per-endpoint retain cell (called by the adapter on the loop thread) =====
|
||||||
|
* Registers the address-space context adapter's reg hooks. The core holds THIS and does
|
||||||
|
* NOT store a copy of the locator: on delivery/replay it calls reg.describe/share_fd.
|
||||||
|
* valid/epoch are maintained in route/epoch_bump (not here): register only records that
|
||||||
|
* "the adapter is connected". */
|
||||||
|
int core_register_memctx(void* token, const vmsig_memctx_reg* reg) {
|
||||||
|
vmsig_core* c = token;
|
||||||
|
if (!c || !reg || reg->endpoint >= 64) return -1;
|
||||||
|
core_memctx_cell* cell = &c->memctx[reg->endpoint];
|
||||||
|
cell->reg = *reg;
|
||||||
|
cell->registered = 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void core_unregister_memctx(void* token, uint32_t endpoint) {
|
||||||
|
vmsig_core* c = token;
|
||||||
|
if (!c || endpoint >= 64) return;
|
||||||
|
core_memctx_cell* cell = &c->memctx[endpoint];
|
||||||
|
cell->registered = 0;
|
||||||
|
cell->valid = 0;
|
||||||
|
memset(&cell->reg, 0, sizeof cell->reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vmsig_core_set_audit(vmsig_core* c, void (*cb)(void* ud, const vmsig_audit* a), void* ud) {
|
||||||
|
if (!c) return;
|
||||||
|
c->audit_cb = cb;
|
||||||
|
c->audit_ud = ud;
|
||||||
|
}
|
||||||
|
|
||||||
|
void core_audit(vmsig_core* c, const vmsig_audit* a) {
|
||||||
|
if (c && c->audit_cb) c->audit_cb(c->audit_ud, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vmsig_core_set_arb_policy(vmsig_core* c, vmsig_arb_policy cb, void* ud) {
|
||||||
|
if (!c) return;
|
||||||
|
c->arb_cb = cb;
|
||||||
|
c->arb_ud = ud;
|
||||||
|
/* lease[][] is zeroed in vmsig_core_new (calloc) => all cells free. */
|
||||||
|
}
|
||||||
|
|
||||||
|
int core_add_source(vmsig_core* c, int fd, void (*cb)(void* user, uint32_t events),
|
||||||
|
void* user, void (*on_free)(void* user)) {
|
||||||
|
if (!c || fd < 0 || !cb) return -1;
|
||||||
|
core_slot* s = core_register_fd(c, fd, EPOLLIN, SLOT_SOURCE);
|
||||||
|
if (!s) return -1;
|
||||||
|
s->on_source = cb;
|
||||||
|
s->on_free = on_free;
|
||||||
|
s->source_user = user;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void core_request_drop(vmsig_core* c, int ctl_id) {
|
||||||
|
if (!c || ctl_id < 0 || ctl_id >= c->ncontrols) return;
|
||||||
|
c->controls[ctl_id].reap = 1;
|
||||||
|
core_wake(c); /* wake the loop for a reap pass (without stop) */
|
||||||
|
}
|
||||||
|
|
||||||
|
void vmsig_core_free(vmsig_core* c) {
|
||||||
|
if (!c) return;
|
||||||
|
/* graceful: stop workers and close SI handles / sockets. Adapters are closed
|
||||||
|
* FIRST: their close stops off-loop workers and unregisters their seams (e.g.
|
||||||
|
* memctx) BEFORE destruction. */
|
||||||
|
for (int i = 0; i < c->nadapters; i++)
|
||||||
|
if (c->adapters[i].ops->close) c->adapters[i].ops->close(c->adapters[i].a);
|
||||||
|
for (int i = 0; i < c->ncontrols; i++)
|
||||||
|
if (c->controls[i].active && c->controls[i].ops->close)
|
||||||
|
c->controls[i].ops->close(c->controls[i].ctl);
|
||||||
|
|
||||||
|
/* cleanup of fd sources (e.g. unix listener: close listen/janitor fd + free) */
|
||||||
|
for (int i = 0; i < c->nslots; i++)
|
||||||
|
if (c->slots[i]->role == SLOT_SOURCE && c->slots[i]->on_free)
|
||||||
|
c->slots[i]->on_free(c->slots[i]->source_user);
|
||||||
|
|
||||||
|
for (int i = 0; i < c->nslots; i++) free(c->slots[i]);
|
||||||
|
free(c->slots);
|
||||||
|
if (c->wake_fd >= 0) close(c->wake_fd);
|
||||||
|
if (c->epfd >= 0) close(c->epfd);
|
||||||
|
/* ctx is not ours: its owner frees it */
|
||||||
|
free(c);
|
||||||
|
}
|
||||||
@@ -0,0 +1,170 @@
|
|||||||
|
#ifndef VMSIG_CORE_INTERNAL_H
|
||||||
|
#define VMSIG_CORE_INTERNAL_H
|
||||||
|
#include "vmsig_core.h"
|
||||||
|
#include <signal.h>
|
||||||
|
|
||||||
|
/* Private internals of the epoll core. Each registered fd carries a
|
||||||
|
* core_slot* in epoll_event.data.ptr; the slot's role decides how to handle it. */
|
||||||
|
|
||||||
|
#define VMSIG_MAX_EVENTS 64
|
||||||
|
#define VMSIG_MAX_ADAPTERS 256 /* up to ~64 VMs * 3 adapters + slack (mode A) */
|
||||||
|
#define VMSIG_MAX_CONTROLS 64 /* concurrent pollers; more => processes (C) */
|
||||||
|
#define VMSIG_ADAPTER_FDS 8 /* max fds per adapter */
|
||||||
|
#define VMSIG_DOWN_PENDING_MAX 256 /* ceiling of DOWN commands per poller in ctx (fairness) */
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
SLOT_WAKEUP, /* wake/stop eventfd */
|
||||||
|
SLOT_ADAPTER, /* adapter fd (timerfd/eventfd/socket) */
|
||||||
|
SLOT_CTX_TIMING, /* context pacing timerfd */
|
||||||
|
SLOT_CONTROL, /* out-of-process control socket */
|
||||||
|
SLOT_SOURCE, /* arbitrary fd + callback (e.g. listen-fd) */
|
||||||
|
SLOT_DEAD /* detached (reaped); loop ignores it */
|
||||||
|
} slot_role;
|
||||||
|
|
||||||
|
typedef struct core_slot {
|
||||||
|
slot_role role;
|
||||||
|
int fd;
|
||||||
|
/* for SLOT_ADAPTER */
|
||||||
|
const vmsig_adapter_ops* ops;
|
||||||
|
vmsig_adapter* adapter;
|
||||||
|
uint32_t cookie;
|
||||||
|
/* for SLOT_CONTROL */
|
||||||
|
const vmsig_control_ops* cops;
|
||||||
|
void* ctl;
|
||||||
|
/* for SLOT_SOURCE */
|
||||||
|
void (*on_source)(void* user, uint32_t events);
|
||||||
|
void (*on_free)(void* user); /* invoked at core_free (source cleanup) */
|
||||||
|
void* source_user;
|
||||||
|
} core_slot;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const vmsig_adapter_ops* ops;
|
||||||
|
vmsig_adapter* a;
|
||||||
|
uint32_t endpoint;
|
||||||
|
} core_adapter_ent;
|
||||||
|
|
||||||
|
|
||||||
|
/* ===== Retained address-space context (MEMCTX seam) =====
|
||||||
|
* The core retains per-endpoint "a current context exists in the current epoch" + the
|
||||||
|
* adapter's reg pointer (describe/share_fd/invalidate). Replays to a late qualified
|
||||||
|
* subscriber (CAP_MEMCTX + source_mask + endpoint) re-sharing the RO-fd. Does NOT store a
|
||||||
|
* copy of the locator: on delivery/replay it calls reg.describe (adapter snapshot) +
|
||||||
|
* reg.share_fd (fresh RO-fd). Invalidated on epoch change; cleared on unregister/free. */
|
||||||
|
typedef struct {
|
||||||
|
int registered; /* adapter called register_memctx (reg valid) */
|
||||||
|
int valid; /* a published context exists in the current epoch */
|
||||||
|
uint32_t epoch; /* snapshot epoch (== core epoch[ep] when valid) */
|
||||||
|
vmsig_memctx_reg reg; /* valid when registered */
|
||||||
|
} core_memctx_cell;
|
||||||
|
|
||||||
|
/* ===== Lease layer (arbitration of exclusive ownership of destructive resources) =====
|
||||||
|
* One cell per (endpoint, lease-class): who owns it (origin) + a snapshot of arb_prio at
|
||||||
|
* acquisition time. owner=0 => free. The snapshot (rather than the live grant) makes the
|
||||||
|
* policy resilient to the owner's grant changing after acquisition. */
|
||||||
|
#define VMSIG_LEASE_CLASSES 3 /* INPUT, POWER, MEMWRITE (== VMSIG_LEASE_CLASS_MAX) */
|
||||||
|
typedef struct {
|
||||||
|
uint32_t owner; /* origin (gen<<16)|(id+1) of the owner; 0 = free */
|
||||||
|
uint32_t owner_prio; /* owner's arb_prio at acquisition time (snapshot) */
|
||||||
|
} core_lease_cell;
|
||||||
|
|
||||||
|
struct vmsig_core; /* fwd for core_down_ctx */
|
||||||
|
|
||||||
|
/* DOWN emission context: handed to a control in set_emit_down so emit_down knows WHICH
|
||||||
|
* control issued the command (for grant lookup and enforcement). Stable: lives in the
|
||||||
|
* fixed controls[] array. */
|
||||||
|
typedef struct {
|
||||||
|
struct vmsig_core* core;
|
||||||
|
int ctl_id;
|
||||||
|
} core_down_ctx;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const vmsig_control_ops* ops;
|
||||||
|
void* ctl;
|
||||||
|
vmsig_sub sub;
|
||||||
|
vmsig_grant grant; /* poller's rights ceiling (default-deny) */
|
||||||
|
core_down_ctx dctx; /* token for emit_down */
|
||||||
|
int active; /* 0 = detached/reaped (slot free) */
|
||||||
|
int reap; /* reap requested (deferred) */
|
||||||
|
core_slot* slot; /* SLOT_CONTROL fd slot (or NULL) */
|
||||||
|
uint32_t pending; /* DOWN commands of this poller in ctx (fairness cap) */
|
||||||
|
uint16_t gen; /* slot generation: +1 on each (re)use */
|
||||||
|
} core_control_ent;
|
||||||
|
|
||||||
|
struct vmsig_core {
|
||||||
|
int epfd;
|
||||||
|
int wake_fd; /* eventfd: nudge + stop */
|
||||||
|
vmsig_ctx* ctx;
|
||||||
|
volatile sig_atomic_t stopping;
|
||||||
|
|
||||||
|
core_adapter_ent adapters[VMSIG_MAX_ADAPTERS];
|
||||||
|
int nadapters;
|
||||||
|
core_control_ent controls[VMSIG_MAX_CONTROLS];
|
||||||
|
int ncontrols;
|
||||||
|
|
||||||
|
core_slot** slots; /* all allocated slots (for free) */
|
||||||
|
int nslots;
|
||||||
|
int cap_slots;
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t epoch[64]; /* per-endpoint VM session epoch */
|
||||||
|
core_memctx_cell memctx[64]; /* per-endpoint retained context */
|
||||||
|
|
||||||
|
core_lease_cell lease[64][VMSIG_LEASE_CLASSES]; /* lease per (endpoint, class) */
|
||||||
|
vmsig_arb_policy arb_cb; /* preemption policy (NULL=default) */
|
||||||
|
void* arb_ud;
|
||||||
|
|
||||||
|
void (*audit_cb)(void* ud, const vmsig_audit* a);
|
||||||
|
void* audit_ud;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Emit an audit record (no-op if no callback is set). Defined in core.c. */
|
||||||
|
void core_audit(vmsig_core* c, const vmsig_audit* a);
|
||||||
|
|
||||||
|
/* Register an fd in epoll + create a slot (see core.c). */
|
||||||
|
core_slot* core_register_fd(vmsig_core* c, int fd, uint32_t epoll_events, slot_role role);
|
||||||
|
|
||||||
|
/* Register an arbitrary fd source with a callback (e.g. a socket listen-fd).
|
||||||
|
* The callback is called on the loop thread when the fd is ready. on_free (may be NULL)
|
||||||
|
* is called at vmsig_core_free to clean up the source's resource. 0/-1. */
|
||||||
|
int core_add_source(vmsig_core* c, int fd, void (*cb)(void* user, uint32_t events),
|
||||||
|
void* user, void (*on_free)(void* user));
|
||||||
|
|
||||||
|
/* Request detaching a control by id (deferred reap after the batch: epoll DEL,
|
||||||
|
* close fd, ops->close). Safe to call from the control's own on_readable. */
|
||||||
|
void core_request_drop(vmsig_core* c, int ctl_id);
|
||||||
|
|
||||||
|
/* emit hooks handed to adapters (UP) and controls (DOWN). Defined in loop.c. */
|
||||||
|
int core_emit_up (void* token, vmsig_event* ev);
|
||||||
|
int core_emit_down(void* token, vmsig_event* ev);
|
||||||
|
|
||||||
|
/* ===== Address-space context (MEMCTX seam; retained context) ===== */
|
||||||
|
/* Context registration hooks (handed to the adapter in vmsig_emit; defined in core.c). */
|
||||||
|
int core_register_memctx (void* token, const vmsig_memctx_reg* reg);
|
||||||
|
void core_unregister_memctx(void* token, uint32_t endpoint);
|
||||||
|
|
||||||
|
/* Multicast MEMCTX to qualified subscribers + mark the retain cell valid
|
||||||
|
* (from pump_up on the VMSIG_EV_MEMCTX trigger; defined in loop.c). */
|
||||||
|
void core_memctx_route(vmsig_core* c, const vmsig_event* trigger);
|
||||||
|
|
||||||
|
/* Replay retained MEMCTX to a single (late) subscriber (from vmsig_core_add_control;
|
||||||
|
* defined in loop.c). */
|
||||||
|
void core_memctx_replay(vmsig_core* c, int ctl_id);
|
||||||
|
|
||||||
|
/* Bump the endpoint's epoch on a destructive lifecycle transition: epoch++, invalidate
|
||||||
|
* the retain cell, emit MEMCTX_INVALIDATED, request re-bootstrap from the adapter.
|
||||||
|
* Observed by the core in pump_up on UP VM_LIFECYCLE (defined in loop.c). */
|
||||||
|
void core_epoch_bump(vmsig_core* c, uint32_t endpoint);
|
||||||
|
|
||||||
|
/* ===== Lease layer (defined in loop.c) ===== */
|
||||||
|
/* Intercept CMD_ACQUIRE/RELEASE/LEASE_STATUS (synchronously from core_emit_down, not in ctx). */
|
||||||
|
void core_lease_acquire(vmsig_core* c, int ctl_id, const vmsig_event* ev);
|
||||||
|
void core_lease_release(vmsig_core* c, int ctl_id, const vmsig_event* ev);
|
||||||
|
void core_lease_status (vmsig_core* c, int ctl_id, const vmsig_event* ev);
|
||||||
|
|
||||||
|
/* Reclaim the lease of a dead control (from core_reap, BEFORE e->active=0). */
|
||||||
|
void core_lease_reap_control(vmsig_core* c, int ctl_id);
|
||||||
|
|
||||||
|
/* Wake the loop (eventfd nudge). Defined in loop.c. */
|
||||||
|
void core_wake(vmsig_core* c);
|
||||||
|
|
||||||
|
#endif /* VMSIG_CORE_INTERNAL_H */
|
||||||
@@ -0,0 +1,620 @@
|
|||||||
|
/* loop.c — non-blocking epoll loop, dispatch, pump up/down, emit hooks,
|
||||||
|
* graceful shutdown. No sleep/polling/busy-wait: every wakeup is an fd. */
|
||||||
|
#include "core_internal.h"
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/epoll.h>
|
||||||
|
|
||||||
|
static void drain_counter_fd(int fd) {
|
||||||
|
uint64_t v;
|
||||||
|
while (read(fd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
void core_wake(vmsig_core* c) {
|
||||||
|
uint64_t one = 1;
|
||||||
|
ssize_t r = write(c->wake_fd, &one, sizeof one);
|
||||||
|
(void)r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int core_emit_up(void* token, vmsig_event* ev) {
|
||||||
|
vmsig_core* c = token;
|
||||||
|
int r = vmsig_ctx_submit(c->ctx, VMSIG_DIR_UP, ev);
|
||||||
|
core_wake(c); /* nudge in case of emission off the loop thread */
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* origin = (gen<<16)|(id+1): low 16 bits are the control's id+1, high bits the slot
|
||||||
|
* generation. Lets a reply be addressed to the initiator and stale reuse filtered out. */
|
||||||
|
static uint32_t origin_pack(int id, uint16_t gen) {
|
||||||
|
return ((uint32_t)gen << 16) | ((uint32_t)(id + 1) & 0xFFFFu);
|
||||||
|
}
|
||||||
|
/* Live control by origin with generation check; NULL if gone/slot reused. */
|
||||||
|
static core_control_ent* origin_ctl(vmsig_core* c, uint32_t origin) {
|
||||||
|
if (!origin) return NULL;
|
||||||
|
int id = (int)(origin & 0xFFFFu) - 1;
|
||||||
|
uint16_t gen = (uint16_t)(origin >> 16);
|
||||||
|
if (id < 0 || id >= c->ncontrols) return NULL;
|
||||||
|
core_control_ent* e = &c->controls[id];
|
||||||
|
if (!e->active || e->gen != gen) return NULL;
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Capability for a DOWN command (unknown => deny). Destructive CMD_LIFECYCLE
|
||||||
|
* (powerdown/reset, code in inln[0]) requires CAP_POWER, safe ones CAP_LIFECYCLE. */
|
||||||
|
static uint32_t cap_for_down(const vmsig_event* ev) {
|
||||||
|
switch (ev->kind) {
|
||||||
|
case VMSIG_EV_CMD_INPUT:
|
||||||
|
case VMSIG_EV_CMD_QUERY_INPUT: return VMSIG_CAP_INPUT; /* injection / held-key query */
|
||||||
|
case VMSIG_EV_CMD_LIFECYCLE:
|
||||||
|
return (ev->inln[0] == VMSIG_LIFE_POWERDOWN || ev->inln[0] == VMSIG_LIFE_RESET)
|
||||||
|
? VMSIG_CAP_POWER : VMSIG_CAP_LIFECYCLE;
|
||||||
|
case VMSIG_EV_CMD_VM: /* op in inln[0] (vmsig_vm_cmd, op<256) */
|
||||||
|
return (ev->inln[0] == VMSIG_VMOP_RESET || ev->inln[0] == VMSIG_VMOP_POWERDOWN ||
|
||||||
|
ev->inln[0] == VMSIG_VMOP_QUIT) ? VMSIG_CAP_POWER : VMSIG_CAP_VM;
|
||||||
|
case VMSIG_EV_CMD_MEMWRITE: return VMSIG_CAP_MEMWRITE; /* atomic guest-memory write */
|
||||||
|
default: return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* ===== Lease layer: classification and helpers ===== */
|
||||||
|
|
||||||
|
/* Lease class for a DOWN command. MIRRORS cap_for_down by destructiveness:
|
||||||
|
* - CMD_INPUT -> INPUT;
|
||||||
|
* - CMD_LIFECYCLE powerdown/reset -> POWER;
|
||||||
|
* - CMD_VM reset/powerdown/quit -> POWER;
|
||||||
|
* - everything else (safe/read-only/stream/query) -> -1 (not lease-gated).
|
||||||
|
* CMD_LIFECYCLE and CMD_VM route to DIFFERENT adapters (INPUT/VMHOST) but share ONE
|
||||||
|
* POWER class per endpoint: a single owner of VM destruction (intentional). */
|
||||||
|
static int lease_class_for_down(const vmsig_event* ev) {
|
||||||
|
switch (ev->kind) {
|
||||||
|
case VMSIG_EV_CMD_INPUT:
|
||||||
|
return VMSIG_LEASE_INPUT;
|
||||||
|
case VMSIG_EV_CMD_LIFECYCLE:
|
||||||
|
return (ev->inln[0] == VMSIG_LIFE_POWERDOWN || ev->inln[0] == VMSIG_LIFE_RESET)
|
||||||
|
? VMSIG_LEASE_POWER : -1;
|
||||||
|
case VMSIG_EV_CMD_VM:
|
||||||
|
return (ev->inln[0] == VMSIG_VMOP_RESET || ev->inln[0] == VMSIG_VMOP_POWERDOWN ||
|
||||||
|
ev->inln[0] == VMSIG_VMOP_QUIT) ? VMSIG_LEASE_POWER : -1;
|
||||||
|
case VMSIG_EV_CMD_MEMWRITE:
|
||||||
|
return VMSIG_LEASE_MEMWRITE; /* always destructive (write to shared guest memory) */
|
||||||
|
default:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Cap required to lease a class (probing/holding a class without the cap is forbidden). */
|
||||||
|
static uint32_t cap_for_lease_class(int cls) {
|
||||||
|
return cls == VMSIG_LEASE_INPUT ? VMSIG_CAP_INPUT
|
||||||
|
: cls == VMSIG_LEASE_POWER ? VMSIG_CAP_POWER
|
||||||
|
: cls == VMSIG_LEASE_MEMWRITE ? VMSIG_CAP_MEMWRITE : 0u;
|
||||||
|
}
|
||||||
|
/* Source bitmask permitted to hold a lease class: mirrors the grant's source ceiling
|
||||||
|
* (which grant_allows_down enforces on the command itself). Leasing is intercepted
|
||||||
|
* BEFORE grant_allows_down, so source is checked HERE — otherwise a principal without
|
||||||
|
* the required seam could hold someone else's cell (DoS), bypassing source_mask.
|
||||||
|
* INPUT -> SRC_INPUT; POWER -> SRC_INPUT (lifecycle) OR SRC_VMHOST (vm) — one
|
||||||
|
* destructive path suffices; MEMWRITE -> SRC_MEMCTX (lives on the MEMCTX seam). */
|
||||||
|
static uint32_t source_mask_for_lease_class(int cls) {
|
||||||
|
return cls == VMSIG_LEASE_INPUT ? (1u << VMSIG_SRC_INPUT)
|
||||||
|
: cls == VMSIG_LEASE_POWER ? ((1u << VMSIG_SRC_INPUT) | (1u << VMSIG_SRC_VMHOST))
|
||||||
|
: cls == VMSIG_LEASE_MEMWRITE ? (1u << VMSIG_SRC_MEMCTX) : 0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Capability to receive an UP event: address-space context (MEMCTX/MEMCTX_INVALIDATED)
|
||||||
|
* -> CAP_MEMCTX; cursor is screen data, available to a GUI observer (OBSERVE) OR an
|
||||||
|
* input actor (INPUT); otherwise CAP_OBSERVE (frames/SEAM/generic). The grant_allows_up
|
||||||
|
* gate checks intersection, so OBSERVE|INPUT means "either of the two". */
|
||||||
|
static uint32_t cap_for_up(const vmsig_event* ev) {
|
||||||
|
if (ev->kind == VMSIG_EV_CURSOR_STATE) return VMSIG_CAP_OBSERVE | VMSIG_CAP_INPUT;
|
||||||
|
return (ev->source == VMSIG_SRC_MEMCTX) ? VMSIG_CAP_MEMCTX : VMSIG_CAP_OBSERVE;
|
||||||
|
}
|
||||||
|
static int grant_allows_down(const vmsig_grant* g, const vmsig_event* ev) {
|
||||||
|
if (ev->endpoint >= 64) return 0; /* 64-bit mask: <=64 VMs/cores */
|
||||||
|
if (!(g->endpoint_mask & (1ull << ev->endpoint))) return 0;
|
||||||
|
if (!(g->source_mask & (1u << ev->source))) return 0; /* source ceiling on DOWN too */
|
||||||
|
uint32_t need = cap_for_down(ev);
|
||||||
|
return need && (g->cap_mask & need);
|
||||||
|
}
|
||||||
|
static int grant_allows_up(const vmsig_grant* g, const vmsig_event* ev) {
|
||||||
|
if (ev->endpoint >= 64) return 0;
|
||||||
|
if (!(g->cap_mask & cap_for_up(ev))) return 0;
|
||||||
|
if (!(g->endpoint_mask & (1ull << ev->endpoint))) return 0;
|
||||||
|
if (!(g->source_mask & (1u << ev->source))) return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Find an adapter by (endpoint, source). NULL if none. Used by pump_down to route a
|
||||||
|
* DOWN command to its adapter. */
|
||||||
|
static core_adapter_ent* core_find_adapter(vmsig_core* c, uint32_t endpoint,
|
||||||
|
vmsig_source source) {
|
||||||
|
for (int i = 0; i < c->nadapters; i++) {
|
||||||
|
core_adapter_ent* e = &c->adapters[i];
|
||||||
|
if (e->ops->source == source && e->endpoint == endpoint) return e;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== Lease layer: grant/release/status/finalization/reclaim =====
|
||||||
|
* Intercepted in core_emit_down BEFORE grant_allows_down (synchronous, not in ctx, does
|
||||||
|
* not touch pending). Addressed UP replies to the initiator via core_emit_up
|
||||||
|
* (origin+generation). */
|
||||||
|
|
||||||
|
/* Addressed UP reply to the initiator of a lease request. */
|
||||||
|
static void lease_reply(vmsig_core* c, const vmsig_event* req, vmsig_kind kind,
|
||||||
|
uint32_t cls, uint32_t reason) {
|
||||||
|
vmsig_event up;
|
||||||
|
memset(&up, 0, sizeof up);
|
||||||
|
up.kind = kind; up.source = VMSIG_SRC_CORE; up.dir = VMSIG_DIR_UP;
|
||||||
|
up.prio = VMSIG_PRIO_URGENT; up.endpoint = req->endpoint; up.origin = req->origin;
|
||||||
|
vmsig_lease_req lr = { cls, reason };
|
||||||
|
memcpy(up.inln, &lr, sizeof lr);
|
||||||
|
core_emit_up(c, &up);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Lease denial: audit (visibility of authorization/contention denials — capability/
|
||||||
|
* endpoint enumeration via ACQUIRE is observable) + addressed LEASE_DENIED to initiator. */
|
||||||
|
static void lease_deny(vmsig_core* c, const vmsig_event* req, uint32_t principal,
|
||||||
|
uint32_t cls, uint32_t reason) {
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_LEASE_DENIED, principal, req->endpoint, cls, reason };
|
||||||
|
core_audit(c, &a);
|
||||||
|
lease_reply(c, req, VMSIG_EV_LEASE_DENIED, cls, reason);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Principal of the cell owner (for STATUS); 0 if owner is dead/absent. */
|
||||||
|
static uint32_t lease_owner_principal(vmsig_core* c, uint32_t owner) {
|
||||||
|
core_control_ent* e = origin_ctl(c, owner);
|
||||||
|
return e ? e->grant.principal : 0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* IMPORTANT (layer isolation): signaling does NOT release held keys on lease loss and
|
||||||
|
* does NOT track held state at all. held is the ACTUATOR's record (vmctl); release is the
|
||||||
|
* control's decision. On owner change/reset the cell is simply freed; stuck keys remain
|
||||||
|
* the control's concern (it can issue CMD_QUERY_INPUT and release its own while owner). */
|
||||||
|
|
||||||
|
void core_lease_acquire(vmsig_core* c, int ctl_id, const vmsig_event* ev) {
|
||||||
|
core_control_ent* e = &c->controls[ctl_id];
|
||||||
|
uint32_t cls = ((const vmsig_lease_req*)ev->inln)->cls;
|
||||||
|
uint32_t ep = ev->endpoint;
|
||||||
|
|
||||||
|
/* 1. validate class/endpoint/grant (default-deny; every denial is audited). */
|
||||||
|
if (cls >= VMSIG_LEASE_CLASS_MAX) {
|
||||||
|
lease_deny(c, ev, e->grant.principal, cls, VMSIG_LEASE_DENY_BADCLASS);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (ep >= 64 || !(e->grant.endpoint_mask & (1ull << ep))) {
|
||||||
|
lease_deny(c, ev, e->grant.principal, cls, VMSIG_LEASE_DENY_NOGRANT);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!(e->grant.cap_mask & cap_for_lease_class((int)cls))) {
|
||||||
|
lease_deny(c, ev, e->grant.principal, cls, VMSIG_LEASE_DENY_NOCAP);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/* source ceiling: holding a class without rights to its seam is forbidden (else a
|
||||||
|
* DoS hold of someone else's cell bypassing source_mask, since interception is
|
||||||
|
* BEFORE grant_allows_down). */
|
||||||
|
if (!(e->grant.source_mask & source_mask_for_lease_class((int)cls))) {
|
||||||
|
lease_deny(c, ev, e->grant.principal, cls, VMSIG_LEASE_DENY_NOGRANT);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
core_lease_cell* cell = &c->lease[ep][cls];
|
||||||
|
uint32_t me = ev->origin;
|
||||||
|
|
||||||
|
/* 2a. free OR dead owner (origin_ctl==NULL) => take as if free. */
|
||||||
|
core_control_ent* owner_e = cell->owner ? origin_ctl(c, cell->owner) : NULL;
|
||||||
|
if (cell->owner == 0 || !owner_e) {
|
||||||
|
cell->owner = me; cell->owner_prio = e->grant.arb_prio;
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_LEASE_GRANTED, e->grant.principal, ep, cls, 0 };
|
||||||
|
core_audit(c, &a);
|
||||||
|
lease_reply(c, ev, VMSIG_EV_LEASE_GRANTED, cls, 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 2b. owner is the caller itself => idempotent GRANTED. */
|
||||||
|
if (cell->owner == me) {
|
||||||
|
lease_reply(c, ev, VMSIG_EV_LEASE_GRANTED, cls, 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 2c. held by a LIVE other owner => policy. incumbent is the live grant. */
|
||||||
|
vmsig_arb_decision dec;
|
||||||
|
if (c->arb_cb) {
|
||||||
|
dec = c->arb_cb(c->arb_ud, ep, cls, &owner_e->grant, &e->grant);
|
||||||
|
} else {
|
||||||
|
dec = (e->grant.arb_prio > cell->owner_prio) ? VMSIG_ARB_PREEMPT : VMSIG_ARB_DENY;
|
||||||
|
}
|
||||||
|
if (dec != VMSIG_ARB_PREEMPT) {
|
||||||
|
/* equal priority => owner keeps it (HELD); strictly lower => LOWER_PRIO. */
|
||||||
|
uint32_t reason = (e->grant.arb_prio < cell->owner_prio)
|
||||||
|
? VMSIG_LEASE_DENY_LOWER_PRIO : VMSIG_LEASE_DENY_HELD;
|
||||||
|
lease_deny(c, ev, e->grant.principal, cls, reason);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* PREEMPT: notify the old owner (REVOKED), switch owner, grant to the new one.
|
||||||
|
* signaling does NOT release held keys (that is the control's decision): the
|
||||||
|
* ex-owner is responsible for its stuck keys; the new owner can query held
|
||||||
|
* (CMD_QUERY_INPUT) and release them. */
|
||||||
|
uint32_t old_owner = cell->owner;
|
||||||
|
{
|
||||||
|
vmsig_event rv;
|
||||||
|
memset(&rv, 0, sizeof rv);
|
||||||
|
rv.endpoint = ep; rv.origin = old_owner;
|
||||||
|
lease_reply(c, &rv, VMSIG_EV_LEASE_REVOKED, cls, 0);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_LEASE_REVOKED, owner_e->grant.principal, ep, cls, 0 };
|
||||||
|
core_audit(c, &a);
|
||||||
|
}
|
||||||
|
cell->owner = me; cell->owner_prio = e->grant.arb_prio;
|
||||||
|
{
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_LEASE_GRANTED, e->grant.principal, ep, cls, 0 };
|
||||||
|
core_audit(c, &a);
|
||||||
|
}
|
||||||
|
lease_reply(c, ev, VMSIG_EV_LEASE_GRANTED, cls, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void core_lease_release(vmsig_core* c, int ctl_id, const vmsig_event* ev) {
|
||||||
|
core_control_ent* e = &c->controls[ctl_id];
|
||||||
|
uint32_t cls = ((const vmsig_lease_req*)ev->inln)->cls;
|
||||||
|
uint32_t ep = ev->endpoint;
|
||||||
|
|
||||||
|
/* cross-endpoint isolation + cap/source gate BEFORE any action (like acquire). */
|
||||||
|
if (cls >= VMSIG_LEASE_CLASS_MAX || ep >= 64) return;
|
||||||
|
if (!(e->grant.endpoint_mask & (1ull << ep))) return;
|
||||||
|
if (!(e->grant.cap_mask & cap_for_lease_class((int)cls))) return;
|
||||||
|
if (!(e->grant.source_mask & source_mask_for_lease_class((int)cls))) return;
|
||||||
|
|
||||||
|
core_lease_cell* cell = &c->lease[ep][cls];
|
||||||
|
if (cell->owner != ev->origin) return; /* not owner => no-op */
|
||||||
|
|
||||||
|
/* signaling does NOT release held keys — that is the control's decision (it releases
|
||||||
|
* its own keys before release if needed). Here we only free the cell. */
|
||||||
|
cell->owner = 0; cell->owner_prio = 0;
|
||||||
|
lease_reply(c, ev, VMSIG_EV_LEASE_RELEASED, cls, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void core_lease_status(vmsig_core* c, int ctl_id, const vmsig_event* ev) {
|
||||||
|
core_control_ent* e = &c->controls[ctl_id];
|
||||||
|
uint32_t cls = ((const vmsig_lease_req*)ev->inln)->cls;
|
||||||
|
uint32_t ep = ev->endpoint;
|
||||||
|
|
||||||
|
/* busy-state can be probed only within one's own endpoint and with the class cap
|
||||||
|
* (else a principal without CAP_INPUT/CAP_POWER would leak busy-state/other principal). */
|
||||||
|
if (cls >= VMSIG_LEASE_CLASS_MAX || ep >= 64) return;
|
||||||
|
if (!(e->grant.endpoint_mask & (1ull << ep))) return;
|
||||||
|
if (!(e->grant.cap_mask & cap_for_lease_class((int)cls))) return;
|
||||||
|
if (!(e->grant.source_mask & source_mask_for_lease_class((int)cls))) return;
|
||||||
|
|
||||||
|
core_lease_cell* cell = &c->lease[ep][cls];
|
||||||
|
uint32_t busy = (cell->owner && origin_ctl(c, cell->owner)) ? 1u : 0u;
|
||||||
|
|
||||||
|
vmsig_event up;
|
||||||
|
memset(&up, 0, sizeof up);
|
||||||
|
up.kind = VMSIG_EV_LEASE_STATUS; up.source = VMSIG_SRC_CORE; up.dir = VMSIG_DIR_UP;
|
||||||
|
up.prio = VMSIG_PRIO_URGENT; up.endpoint = ep; up.origin = ev->origin;
|
||||||
|
vmsig_lease_status st = { cls, busy, busy ? lease_owner_principal(c, cell->owner) : 0u };
|
||||||
|
memcpy(up.inln, &st, sizeof st);
|
||||||
|
core_emit_up(c, &up);
|
||||||
|
}
|
||||||
|
|
||||||
|
void core_lease_reap_control(vmsig_core* c, int ctl_id) {
|
||||||
|
/* Clear all cells owned by this (still live) slot, BEFORE active=0.
|
||||||
|
* origin is compared by the slot's current gen (active, gen valid at reap time). */
|
||||||
|
uint32_t owner = origin_pack(ctl_id, c->controls[ctl_id].gen);
|
||||||
|
for (uint32_t ep = 0; ep < 64; ep++) {
|
||||||
|
for (int cls = 0; cls < VMSIG_LEASE_CLASSES; cls++) {
|
||||||
|
core_lease_cell* cell = &c->lease[ep][cls];
|
||||||
|
if (cell->owner != owner) continue;
|
||||||
|
/* only free the cell; the dead owner's held keys are NOT our concern (vmctl's
|
||||||
|
* record; the next owner sees them via CMD_QUERY_INPUT and decides itself). */
|
||||||
|
cell->owner = 0; cell->owner_prio = 0;
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_LEASE_RECLAIMED,
|
||||||
|
c->controls[ctl_id].grant.principal, ep, (uint32_t)cls, 0 };
|
||||||
|
core_audit(c, &a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* DOWN emit from a control: enforcement against THIS control's grant. */
|
||||||
|
int core_emit_down(void* token, vmsig_event* ev) {
|
||||||
|
core_down_ctx* d = token;
|
||||||
|
vmsig_core* c = d->core;
|
||||||
|
core_control_ent* e = &c->controls[d->ctl_id];
|
||||||
|
if (!e->active) { vmsig_payload_release(ev); return -1; }
|
||||||
|
|
||||||
|
/* Lease arbitration is intercepted HERE (synchronous, not in ctx, does not touch
|
||||||
|
* pending). origin is needed for the addressed reply and as the owner key. */
|
||||||
|
if (ev->kind == VMSIG_EV_CMD_ACQUIRE || ev->kind == VMSIG_EV_CMD_RELEASE ||
|
||||||
|
ev->kind == VMSIG_EV_CMD_LEASE_STATUS) {
|
||||||
|
ev->origin = origin_pack(d->ctl_id, e->gen);
|
||||||
|
if (ev->kind == VMSIG_EV_CMD_ACQUIRE) core_lease_acquire(c, d->ctl_id, ev);
|
||||||
|
else if (ev->kind == VMSIG_EV_CMD_RELEASE) core_lease_release(c, d->ctl_id, ev);
|
||||||
|
else core_lease_status(c, d->ctl_id, ev);
|
||||||
|
vmsig_payload_release(ev);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!grant_allows_down(&e->grant, ev)) {
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_DOWN_DENIED, e->grant.principal,
|
||||||
|
ev->endpoint, (uint32_t)ev->kind, 0 };
|
||||||
|
core_audit(c, &a); /* rejected by policy (endpoint/source/class) */
|
||||||
|
vmsig_payload_release(ev);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Lease GATE: destruction is passed ONLY by the class's current owner.
|
||||||
|
* A non-owner (or an owner whose slot is dead) => drop + audit LEASE_DENIED
|
||||||
|
* (distinguishable from grant-deny). A free cell => also drop: destruction cannot be
|
||||||
|
* used without an explicit lease. Safe/read-only commands (cls<0) are not gated. */
|
||||||
|
{
|
||||||
|
int cls = lease_class_for_down(ev);
|
||||||
|
if (cls >= 0 && ev->endpoint < 64) {
|
||||||
|
uint32_t me = origin_pack(d->ctl_id, e->gen);
|
||||||
|
uint32_t owner = c->lease[ev->endpoint][cls].owner;
|
||||||
|
if (owner != me || !origin_ctl(c, owner)) {
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_LEASE_DENIED, e->grant.principal,
|
||||||
|
ev->endpoint, (uint32_t)ev->kind, 0 };
|
||||||
|
core_audit(c, &a);
|
||||||
|
vmsig_payload_release(ev);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (e->pending >= VMSIG_DOWN_PENDING_MAX) { /* fairness/DoS: DOWN cap per poller */
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_DOWN_DENIED, e->grant.principal,
|
||||||
|
ev->endpoint, (uint32_t)ev->kind, 0 };
|
||||||
|
core_audit(c, &a);
|
||||||
|
vmsig_payload_release(ev);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
ev->origin = origin_pack(d->ctl_id, e->gen); /* addressed reply + pending accounting */
|
||||||
|
e->pending++;
|
||||||
|
int r = vmsig_ctx_submit(c->ctx, VMSIG_DIR_DOWN, ev);
|
||||||
|
if (r != 0) e->pending--; /* not enqueued (drop/err) */
|
||||||
|
core_wake(c);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int sub_match(const vmsig_sub* sub, const vmsig_event* ev) {
|
||||||
|
if (sub->source_mask && !(sub->source_mask & (1u << ev->source))) return 0;
|
||||||
|
if (ev->prio < sub->prio_min) return 0;
|
||||||
|
if (sub->endpoint_mask) {
|
||||||
|
if (ev->endpoint >= 64 || !(sub->endpoint_mask & (1ull << ev->endpoint))) return 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== Address-space context (MEMCTX seam): multicast / retain-replay / epoch =====
|
||||||
|
* The core vends ONE coherent datum per-endpoint: kcr3+locator paired with an RO-fd. A
|
||||||
|
* MEMCTX trigger from the adapter => the core builds the AUTHORITATIVE locator from the
|
||||||
|
* adapter snapshot (reg.describe) + stamps the epoch (single source of truth) and
|
||||||
|
* distributes to qualified subscribers with re-sharing of the RO-fd. The same path serves
|
||||||
|
* replay to a late subscriber. */
|
||||||
|
|
||||||
|
/* Build a MEMCTX delivery event for endpoint ep. segs are borrowed from the adapter's
|
||||||
|
* buffer (delivery is synchronous on the loop thread; ownership is not transferred).
|
||||||
|
* 1 — built. */
|
||||||
|
static int core_memctx_build(vmsig_core* c, uint32_t ep, vmsig_event* ev) {
|
||||||
|
if (ep >= 64) return 0;
|
||||||
|
core_memctx_cell* cell = &c->memctx[ep];
|
||||||
|
if (!cell->registered || !cell->reg.describe) return 0;
|
||||||
|
|
||||||
|
vmsig_memctx pod;
|
||||||
|
memset(&pod, 0, sizeof pod);
|
||||||
|
const vmsig_memseg* segs = NULL;
|
||||||
|
uint32_t nseg = 0;
|
||||||
|
cell->reg.describe(cell->reg.ctx, &pod, &segs, &nseg);
|
||||||
|
pod.epoch = c->epoch[ep]; /* core stamps the epoch */
|
||||||
|
pod.nseg = nseg;
|
||||||
|
pod.flags |= VMSIG_MEMCTX_RDONLY; /* outward — always read-only */
|
||||||
|
|
||||||
|
memset(ev, 0, sizeof *ev);
|
||||||
|
ev->kind = VMSIG_EV_MEMCTX; ev->source = VMSIG_SRC_MEMCTX; ev->dir = VMSIG_DIR_UP;
|
||||||
|
ev->prio = VMSIG_PRIO_NORMAL; ev->endpoint = ep;
|
||||||
|
memcpy(ev->inln, &pod, sizeof pod);
|
||||||
|
ev->payload.data = (void*)segs; /* borrowed: owner is the adapter */
|
||||||
|
ev->payload.len = (size_t)nseg * sizeof(vmsig_memseg);
|
||||||
|
ev->payload.codec = VMSIG_CODEC_MEMCTX;
|
||||||
|
ev->payload.flags = VMSIG_PL_BORROWED;
|
||||||
|
ev->payload.release = NULL;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Deliver MEMCTX to one qualified control: fresh RO-fd from reg.share_fd
|
||||||
|
* (socket -> cmsg, in-proc -> direct int), attach_memctx, close fd (the core does not own
|
||||||
|
* the fd). On success — audit MEMCTX_GRANTED. */
|
||||||
|
static void core_memctx_deliver_one(vmsig_core* c, core_memctx_cell* cell,
|
||||||
|
core_control_ent* e, const vmsig_event* ev) {
|
||||||
|
if (!e->ops->attach_memctx) return; /* control does not accept MEMCTX */
|
||||||
|
int fd = cell->reg.share_fd ? cell->reg.share_fd(cell->reg.ctx) : -1;
|
||||||
|
int r = e->ops->attach_memctx(e->ctl, ev, fd);
|
||||||
|
if (fd >= 0) close(fd); /* the core does not own the ro-fd */
|
||||||
|
if (r == 0) {
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_MEMCTX_GRANTED, e->grant.principal,
|
||||||
|
ev->endpoint, 0, 0 };
|
||||||
|
core_audit(c, &a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void core_memctx_route(vmsig_core* c, const vmsig_event* trigger) {
|
||||||
|
uint32_t ep = trigger->endpoint;
|
||||||
|
if (ep >= 64) return;
|
||||||
|
core_memctx_cell* cell = &c->memctx[ep];
|
||||||
|
if (!cell->registered) return;
|
||||||
|
|
||||||
|
vmsig_event ev;
|
||||||
|
if (!core_memctx_build(c, ep, &ev)) return;
|
||||||
|
|
||||||
|
cell->valid = 1; /* epoch context published */
|
||||||
|
cell->epoch = c->epoch[ep];
|
||||||
|
|
||||||
|
for (int i = 0; i < c->ncontrols; i++) {
|
||||||
|
core_control_ent* e = &c->controls[i];
|
||||||
|
if (!e->active) continue;
|
||||||
|
if (grant_allows_up(&e->grant, &ev) && sub_match(&e->sub, &ev))
|
||||||
|
core_memctx_deliver_one(c, cell, e, &ev);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void core_memctx_replay(vmsig_core* c, int ctl_id) {
|
||||||
|
if (ctl_id < 0 || ctl_id >= c->ncontrols) return;
|
||||||
|
core_control_ent* e = &c->controls[ctl_id];
|
||||||
|
if (!e->active) return;
|
||||||
|
for (uint32_t ep = 0; ep < 64; ep++) {
|
||||||
|
core_memctx_cell* cell = &c->memctx[ep];
|
||||||
|
if (!cell->registered || !cell->valid) continue;
|
||||||
|
vmsig_event ev;
|
||||||
|
if (!core_memctx_build(c, ep, &ev)) continue;
|
||||||
|
if (grant_allows_up(&e->grant, &ev) && sub_match(&e->sub, &ev))
|
||||||
|
core_memctx_deliver_one(c, cell, e, &ev);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void core_epoch_bump(vmsig_core* c, uint32_t endpoint) {
|
||||||
|
if (endpoint >= 64) return;
|
||||||
|
c->epoch[endpoint]++;
|
||||||
|
core_memctx_cell* cell = &c->memctx[endpoint];
|
||||||
|
cell->valid = 0; /* prior-epoch context is not replayed */
|
||||||
|
|
||||||
|
vmsig_event up;
|
||||||
|
memset(&up, 0, sizeof up);
|
||||||
|
up.kind = VMSIG_EV_MEMCTX_INVALIDATED; up.source = VMSIG_SRC_MEMCTX;
|
||||||
|
up.dir = VMSIG_DIR_UP; up.prio = VMSIG_PRIO_URGENT; up.endpoint = endpoint;
|
||||||
|
vmsig_memctx_inv inv = { endpoint, c->epoch[endpoint] };
|
||||||
|
memcpy(up.inln, &inv, sizeof inv);
|
||||||
|
core_emit_up(c, &up); /* broadcast to holders (CAP_MEMCTX gate) */
|
||||||
|
|
||||||
|
/* request re-bootstrap from the adapter: it re-emits MEMCTX{epoch+1} when ready. */
|
||||||
|
if (cell->registered && cell->reg.invalidate)
|
||||||
|
cell->reg.invalidate(cell->reg.ctx, c->epoch[endpoint]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* UP: drain the context queue and dispatch to subscribed controls */
|
||||||
|
static void pump_up(vmsig_core* c) {
|
||||||
|
vmsig_event ev;
|
||||||
|
while (vmsig_ctx_next(c->ctx, VMSIG_DIR_UP, &ev) == 1) {
|
||||||
|
if (ev.kind == VMSIG_EV_MEMCTX) {
|
||||||
|
/* Context trigger: the core builds the authoritative locator (adapter snapshot
|
||||||
|
* + epoch stamp) and distributes to qualified holders with re-sharing of the
|
||||||
|
* RO-fd. The trigger itself is NOT delivered as an ordinary event. */
|
||||||
|
core_memctx_route(c, &ev);
|
||||||
|
vmsig_payload_release(&ev); /* inline trigger (release=NULL) — harmless */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (ev.kind == VMSIG_EV_VM_LIFECYCLE && ev.origin == 0) {
|
||||||
|
/* Epoch-transition observation: a destructive async transition (VMHOST
|
||||||
|
* broadcast) invalidates the address-space context. NOT continue — VM_LIFECYCLE
|
||||||
|
* still goes to subscribers below via the normal broadcast. */
|
||||||
|
const vmsig_vm_state* vs = (const vmsig_vm_state*)ev.inln;
|
||||||
|
if (vs->state == VMSIG_VM_RESET || vs->state == VMSIG_VM_POWERDOWN ||
|
||||||
|
vs->state == VMSIG_VM_SHUTDOWN)
|
||||||
|
core_epoch_bump(c, ev.endpoint);
|
||||||
|
}
|
||||||
|
if (ev.origin) {
|
||||||
|
/* addressed reply ONLY to the initiator (origin+generation). The command was
|
||||||
|
* already authorized by the grant => we deliver the reply without re-check; if
|
||||||
|
* the initiator is gone/slot reused — we drop (private data, not broadcast). */
|
||||||
|
core_control_ent* e = origin_ctl(c, ev.origin);
|
||||||
|
if (e && e->ops->deliver) e->ops->deliver(e->ctl, &ev);
|
||||||
|
} else {
|
||||||
|
/* unaddressed event — broadcast; effective = grant ∩ sub */
|
||||||
|
for (int i = 0; i < c->ncontrols; i++) {
|
||||||
|
core_control_ent* e = &c->controls[i];
|
||||||
|
if (!e->active) continue;
|
||||||
|
if (grant_allows_up(&e->grant, &ev) && sub_match(&e->sub, &ev) && e->ops->deliver)
|
||||||
|
e->ops->deliver(e->ctl, &ev);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vmsig_payload_release(&ev);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* DOWN: drain the queue and route the command to the adapter (source+endpoint) */
|
||||||
|
static void pump_down(vmsig_core* c) {
|
||||||
|
vmsig_event ev;
|
||||||
|
while (vmsig_ctx_next(c->ctx, VMSIG_DIR_DOWN, &ev) == 1) {
|
||||||
|
core_control_ent* oe = origin_ctl(c, ev.origin); /* command has left ctx */
|
||||||
|
if (oe && oe->pending) oe->pending--; /* THE ONLY decrement */
|
||||||
|
|
||||||
|
/* In-flight fencing: destruction whose origin is NO LONGER the class owner (lease
|
||||||
|
* lost between the emit_down gate and dequeue) is dropped BEFORE actuation. Does
|
||||||
|
* NOT finalize (finalization is done by acquire/reap) — else a double key-up.
|
||||||
|
* pending is NOT touched here (already decremented above). */
|
||||||
|
int cls = lease_class_for_down(&ev);
|
||||||
|
if (cls >= 0 && ev.endpoint < 64 && c->lease[ev.endpoint][cls].owner != ev.origin) {
|
||||||
|
/* dropping destruction that lost the lease is observable (origin owner's principal). */
|
||||||
|
vmsig_audit a = { VMSIG_AUDIT_LEASE_DENIED, lease_owner_principal(c, ev.origin),
|
||||||
|
ev.endpoint, (uint32_t)ev.kind, (uint32_t)cls };
|
||||||
|
core_audit(c, &a);
|
||||||
|
vmsig_payload_release(&ev);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
core_adapter_ent* e = core_find_adapter(c, ev.endpoint, ev.source);
|
||||||
|
if (e && e->ops->submit) e->ops->submit(e->a, &ev);
|
||||||
|
vmsig_payload_release(&ev);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Deferred reap of detached controls: after the batch (safe — not inside their own
|
||||||
|
* on_readable). epoll DEL + mark slot dead + ops->close. */
|
||||||
|
static void core_reap(vmsig_core* c) {
|
||||||
|
for (int i = 0; i < c->ncontrols; i++) {
|
||||||
|
core_control_ent* e = &c->controls[i];
|
||||||
|
if (!e->reap || !e->active) continue;
|
||||||
|
if (e->slot) {
|
||||||
|
epoll_ctl(c->epfd, EPOLL_CTL_DEL, e->slot->fd, NULL);
|
||||||
|
e->slot->role = SLOT_DEAD;
|
||||||
|
}
|
||||||
|
core_lease_reap_control(c, i); /* return leases + finalize held BEFORE active=0 */
|
||||||
|
if (e->ops->close) e->ops->close(e->ctl);
|
||||||
|
e->active = 0;
|
||||||
|
e->reap = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int vmsig_core_run(vmsig_core* c) {
|
||||||
|
if (!c) return -1;
|
||||||
|
struct epoll_event evs[VMSIG_MAX_EVENTS];
|
||||||
|
while (!__atomic_load_n(&c->stopping, __ATOMIC_ACQUIRE)) {
|
||||||
|
int n = epoll_wait(c->epfd, evs, VMSIG_MAX_EVENTS, -1);
|
||||||
|
if (n < 0) { if (errno == EINTR) continue; return -1; }
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
core_slot* s = (core_slot*)evs[i].data.ptr;
|
||||||
|
switch (s->role) {
|
||||||
|
case SLOT_WAKEUP:
|
||||||
|
drain_counter_fd(s->fd); /* stopping is checked in while */
|
||||||
|
break;
|
||||||
|
case SLOT_ADAPTER:
|
||||||
|
if (s->ops->on_readiness)
|
||||||
|
s->ops->on_readiness(s->adapter, s->cookie, evs[i].events);
|
||||||
|
break;
|
||||||
|
case SLOT_CTX_TIMING:
|
||||||
|
drain_counter_fd(s->fd);
|
||||||
|
break;
|
||||||
|
case SLOT_CONTROL:
|
||||||
|
if (s->cops->on_readable)
|
||||||
|
s->cops->on_readable(s->ctl);
|
||||||
|
break;
|
||||||
|
case SLOT_SOURCE:
|
||||||
|
if (s->on_source)
|
||||||
|
s->on_source(s->source_user, evs[i].events);
|
||||||
|
break;
|
||||||
|
case SLOT_DEAD:
|
||||||
|
break; /* detached — ignore */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pump_up(c);
|
||||||
|
pump_down(c);
|
||||||
|
core_reap(c);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void vmsig_core_stop(vmsig_core* c) {
|
||||||
|
if (!c) return;
|
||||||
|
__atomic_store_n(&c->stopping, 1, __ATOMIC_RELEASE); /* cross-thread stop signal */
|
||||||
|
core_wake(c);
|
||||||
|
}
|
||||||
+203
@@ -0,0 +1,203 @@
|
|||||||
|
/* ctx.c — transfer context: priority, ordering, protocol timing.
|
||||||
|
* This is the SISC-critical seam. No behavioral timing here: commands arrive
|
||||||
|
* already decided by control; the context only orders and paces them. */
|
||||||
|
#include "ctx_internal.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <sys/timerfd.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
/* Default depth ceiling for a single band (per source,dir) when no policy is set. */
|
||||||
|
#define VMSIG_CTX_DEFAULT_INFLIGHT 4096
|
||||||
|
|
||||||
|
static uint64_t now_ns(void) {
|
||||||
|
struct timespec ts;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- node recycling (free-list under the shared mutex) ------------------- */
|
||||||
|
static ev_node* node_get(vmsig_ctx* c) {
|
||||||
|
ev_node* n = c->freelist;
|
||||||
|
if (n) { c->freelist = n->next; return n; }
|
||||||
|
return malloc(sizeof *n);
|
||||||
|
}
|
||||||
|
static void node_put(vmsig_ctx* c, ev_node* n) {
|
||||||
|
n->next = c->freelist;
|
||||||
|
c->freelist = n;
|
||||||
|
}
|
||||||
|
|
||||||
|
vmsig_ctx* vmsig_ctx_new(void) {
|
||||||
|
vmsig_ctx* c = calloc(1, sizeof *c);
|
||||||
|
if (!c) return NULL;
|
||||||
|
if (pthread_mutex_init(&c->lock, NULL) != 0) { free(c); return NULL; }
|
||||||
|
for (int d = 0; d < 2; d++) {
|
||||||
|
c->dir[d].timing_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
|
||||||
|
if (c->dir[d].timing_fd < 0) {
|
||||||
|
for (int k = 0; k < d; k++) close(c->dir[k].timing_fd);
|
||||||
|
pthread_mutex_destroy(&c->lock);
|
||||||
|
free(c);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
void vmsig_ctx_free(vmsig_ctx* c) {
|
||||||
|
if (!c) return;
|
||||||
|
for (int d = 0; d < 2; d++) {
|
||||||
|
for (int p = 0; p < VMSIG_PRIO_MAX; p++) {
|
||||||
|
ev_node* n = c->dir[d].band[p].head;
|
||||||
|
while (n) { ev_node* nx = n->next; vmsig_payload_release(&n->ev); free(n); n = nx; }
|
||||||
|
}
|
||||||
|
if (c->dir[d].timing_fd >= 0) close(c->dir[d].timing_fd);
|
||||||
|
}
|
||||||
|
/* actually free the recycled nodes (no payload attached) */
|
||||||
|
ev_node* f = c->freelist;
|
||||||
|
while (f) { ev_node* nx = f->next; free(f); f = nx; }
|
||||||
|
pthread_mutex_destroy(&c->lock);
|
||||||
|
free(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
int vmsig_ctx_set_policy(vmsig_ctx* c, vmsig_source src, vmsig_dir dir,
|
||||||
|
vmsig_prio default_prio, const vmsig_timing* t) {
|
||||||
|
if (!c || src >= VMSIG_SRC_MAX || dir > VMSIG_DIR_DOWN) return -1;
|
||||||
|
pthread_mutex_lock(&c->lock);
|
||||||
|
ctx_policy* pol = &c->policy[src][dir];
|
||||||
|
pol->default_prio = default_prio;
|
||||||
|
if (t) pol->timing = *t; else memset(&pol->timing, 0, sizeof pol->timing);
|
||||||
|
pol->policy_set = 1;
|
||||||
|
pthread_mutex_unlock(&c->lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void band_push_tail(ev_band* b, ev_node* n) {
|
||||||
|
n->next = NULL;
|
||||||
|
if (b->tail) b->tail->next = n; else b->head = n;
|
||||||
|
b->tail = n;
|
||||||
|
b->count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
int vmsig_ctx_submit(vmsig_ctx* c, vmsig_dir dir, vmsig_event* ev) {
|
||||||
|
if (!c || !ev || dir > VMSIG_DIR_DOWN) return -1;
|
||||||
|
vmsig_source src = ev->source < VMSIG_SRC_MAX ? ev->source : VMSIG_SRC_NONE;
|
||||||
|
|
||||||
|
pthread_mutex_lock(&c->lock);
|
||||||
|
ctx_policy* pol = &c->policy[src][dir];
|
||||||
|
|
||||||
|
/* effective priority = max(policy default, emitter request) */
|
||||||
|
vmsig_prio eff = ev->prio > pol->default_prio ? ev->prio : pol->default_prio;
|
||||||
|
if (eff >= VMSIG_PRIO_MAX) eff = VMSIG_PRIO_MAX - 1;
|
||||||
|
|
||||||
|
ev->seq = ++c->seq;
|
||||||
|
if (ev->ts_ns == 0) ev->ts_ns = now_ns();
|
||||||
|
ev->prio = eff;
|
||||||
|
|
||||||
|
ev_band* band = &c->dir[dir].band[eff];
|
||||||
|
|
||||||
|
/* coalescing: a burst of the same kind+endpoint is collapsed (newest wins) */
|
||||||
|
if (pol->timing.coalesce_ns) {
|
||||||
|
for (ev_node* n = band->head; n; n = n->next) {
|
||||||
|
if (n->ev.kind == ev->kind && n->ev.endpoint == ev->endpoint) {
|
||||||
|
vmsig_payload_release(&n->ev);
|
||||||
|
uint32_t keep_seq = n->ev.seq; /* keep position in the order */
|
||||||
|
n->ev = *ev;
|
||||||
|
n->ev.seq = keep_seq;
|
||||||
|
pthread_mutex_unlock(&c->lock);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* backpressure: channel depth is bounded. When no policy is set
|
||||||
|
* (max_inflight==0), a BUILT-IN default ceiling applies (drop newest),
|
||||||
|
* so the queue does not grow without bound under a command flood. */
|
||||||
|
uint32_t cap = pol->timing.max_inflight ? pol->timing.max_inflight
|
||||||
|
: VMSIG_CTX_DEFAULT_INFLIGHT;
|
||||||
|
uint8_t dp = pol->timing.max_inflight ? pol->timing.drop_policy
|
||||||
|
: VMSIG_DROP_NEWEST;
|
||||||
|
if (band->count >= (int)cap) {
|
||||||
|
if (dp == VMSIG_DROP_OLDEST) {
|
||||||
|
ev_node* old = band->head; /* drop the oldest */
|
||||||
|
if (old) {
|
||||||
|
band->head = old->next;
|
||||||
|
if (!band->head) band->tail = NULL;
|
||||||
|
band->count--;
|
||||||
|
vmsig_payload_release(&old->ev);
|
||||||
|
node_put(c, old);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* NEWEST / BLOCK (the loop must not block) — drop the incoming event */
|
||||||
|
vmsig_payload_release(ev);
|
||||||
|
pthread_mutex_unlock(&c->lock);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ev_node* node = node_get(c);
|
||||||
|
if (!node) { pthread_mutex_unlock(&c->lock); return -1; }
|
||||||
|
node->ev = *ev; /* take ownership of the payload */
|
||||||
|
band_push_tail(band, node);
|
||||||
|
pthread_mutex_unlock(&c->lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int vmsig_ctx_next(vmsig_ctx* c, vmsig_dir dir, vmsig_event* out) {
|
||||||
|
if (!c || !out || dir > VMSIG_DIR_DOWN) return -1;
|
||||||
|
pthread_mutex_lock(&c->lock);
|
||||||
|
ctx_dir* d = &c->dir[dir];
|
||||||
|
uint64_t now = now_ns();
|
||||||
|
uint64_t min_rem = 0;
|
||||||
|
int have_rem = 0;
|
||||||
|
|
||||||
|
/* Walk bands from highest priority to lowest, and within a band from head
|
||||||
|
* to tail, returning the FIRST event "matured" against its protocol min_gap.
|
||||||
|
* A paced source thus waits without blocking ready events of other sources.
|
||||||
|
* Within one source the order is preserved (its earlier events come first). */
|
||||||
|
for (int p = VMSIG_PRIO_MAX - 1; p >= 0; p--) {
|
||||||
|
ev_band* b = &d->band[p];
|
||||||
|
ev_node* prev = NULL;
|
||||||
|
ev_node* n = b->head;
|
||||||
|
while (n) {
|
||||||
|
vmsig_source src = n->ev.source < VMSIG_SRC_MAX ? n->ev.source : VMSIG_SRC_NONE;
|
||||||
|
ctx_policy* pol = &c->policy[src][dir];
|
||||||
|
int due = 1;
|
||||||
|
uint64_t rem = 0;
|
||||||
|
if (pol->timing.min_gap_ns) {
|
||||||
|
uint64_t due_at = pol->last_emit_ns + pol->timing.min_gap_ns;
|
||||||
|
if (now < due_at) { due = 0; rem = due_at - now; }
|
||||||
|
}
|
||||||
|
if (due) {
|
||||||
|
if (prev) prev->next = n->next; else b->head = n->next;
|
||||||
|
if (b->tail == n) b->tail = prev;
|
||||||
|
b->count--;
|
||||||
|
pol->last_emit_ns = now;
|
||||||
|
*out = n->ev; /* payload ownership -> caller */
|
||||||
|
node_put(c, n);
|
||||||
|
pthread_mutex_unlock(&c->lock);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (!have_rem || rem < min_rem) { min_rem = rem; have_rem = 1; }
|
||||||
|
prev = n;
|
||||||
|
n = n->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* nothing matured: arm the timing-fd for the nearest due time (if any waiting) */
|
||||||
|
if (have_rem) {
|
||||||
|
struct itimerspec its;
|
||||||
|
memset(&its, 0, sizeof its);
|
||||||
|
its.it_value.tv_sec = (time_t)(min_rem / 1000000000ull);
|
||||||
|
its.it_value.tv_nsec = (long)(min_rem % 1000000000ull);
|
||||||
|
if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0) its.it_value.tv_nsec = 1;
|
||||||
|
timerfd_settime(d->timing_fd, 0, &its, NULL);
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&c->lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int vmsig_ctx_timing_fd(vmsig_ctx* c, vmsig_dir dir) {
|
||||||
|
if (!c || dir > VMSIG_DIR_DOWN) return -1;
|
||||||
|
return c->dir[dir].timing_fd;
|
||||||
|
}
|
||||||
@@ -0,0 +1,41 @@
|
|||||||
|
#ifndef VMSIG_CTX_INTERNAL_H
|
||||||
|
#define VMSIG_CTX_INTERNAL_H
|
||||||
|
#include "vmsig_ctx.h"
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
/* Private internals of the transfer context. Priority bands are simple
|
||||||
|
* linked FIFOs (one node per event; recycling is a later optimization). */
|
||||||
|
|
||||||
|
typedef struct ev_node {
|
||||||
|
vmsig_event ev;
|
||||||
|
struct ev_node* next;
|
||||||
|
} ev_node;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
ev_node* head;
|
||||||
|
ev_node* tail;
|
||||||
|
int count;
|
||||||
|
} ev_band;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
vmsig_prio default_prio;
|
||||||
|
vmsig_timing timing;
|
||||||
|
uint64_t last_emit_ns; /* for protocol min_gap (per source,dir) */
|
||||||
|
int policy_set;
|
||||||
|
} ctx_policy;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
ev_band band[VMSIG_PRIO_MAX]; /* 4 priority bands */
|
||||||
|
int timing_fd; /* pacing timerfd (created in ctx_new) */
|
||||||
|
} ctx_dir;
|
||||||
|
|
||||||
|
struct vmsig_ctx {
|
||||||
|
pthread_mutex_t lock;
|
||||||
|
uint32_t seq;
|
||||||
|
ev_node* freelist; /* ev_node recycling */
|
||||||
|
ctx_dir dir[2]; /* indexed by VMSIG_DIR_UP/DOWN */
|
||||||
|
ctx_policy policy[VMSIG_SRC_MAX][2]; /* [source][dir] */
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* VMSIG_CTX_INTERNAL_H */
|
||||||
@@ -0,0 +1,95 @@
|
|||||||
|
/* test_authz.c — authorization to receive the coherent address-space context (MEMCTX).
|
||||||
|
* Several co-resident controls on one endpoint; MEMCTX is multicast upward and reaches
|
||||||
|
* ONLY the qualified holder. We check the least-privilege matrix:
|
||||||
|
* GOOD (CAP_MEMCTX + source_mask MEMCTX + endpoint) -> receives MEMCTX;
|
||||||
|
* NOCAP (CAP_OBSERVE, no MEMCTX) -> does NOT receive (cap);
|
||||||
|
* NOSRC (CAP_MEMCTX, source_mask without MEMCTX bit) -> does NOT receive (source_mask);
|
||||||
|
* NOEP (CAP_MEMCTX, but endpoint outside the mask) -> does NOT receive (endpoint).
|
||||||
|
* A vmhost stub provides watchdog ticks (VM_LIFECYCLE) — a termination guarantee if
|
||||||
|
* MEMCTX somehow never arrives (then the asserts catch its absence). In-proc, under ASAN. */
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include "vmsig.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
static int g_fail = 0;
|
||||||
|
#define CHECK(cond, msg) do { \
|
||||||
|
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define EP 0u
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
vmsig_core* core;
|
||||||
|
const char* tag;
|
||||||
|
int memctx; /* how many MEMCTX this control received */
|
||||||
|
int ticks; /* watchdog: VM_LIFECYCLE (only on GOOD) */
|
||||||
|
int is_good; /* GOOD stops the loop on the first MEMCTX */
|
||||||
|
} holder;
|
||||||
|
|
||||||
|
static int on_ev(void* user, const vmsig_event* ev) {
|
||||||
|
holder* h = user;
|
||||||
|
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) {
|
||||||
|
h->ticks++;
|
||||||
|
if (h->is_good && h->ticks > 20) vmsig_core_stop(h->core); /* failsafe */
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int on_memctx(void* user, const vmsig_event* ev, int fd) {
|
||||||
|
holder* h = user;
|
||||||
|
(void)ev; (void)fd;
|
||||||
|
h->memctx++;
|
||||||
|
if (h->is_good) vmsig_core_stop(h->core);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void add_holder(vmsig_core* core, holder* h, uint32_t cap,
|
||||||
|
uint32_t source_mask, uint64_t endpoint_mask) {
|
||||||
|
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||||
|
cfg.on_event = on_ev; cfg.on_memctx = on_memctx; cfg.user = h;
|
||||||
|
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||||
|
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||||
|
g.principal = 7; g.endpoint_mask = endpoint_mask;
|
||||||
|
g.source_mask = source_mask; g.cap_mask = cap;
|
||||||
|
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
printf("test_authz (memctx least-privilege)\n");
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
|
||||||
|
holder good = { core, "GOOD", 0, 0, 1 };
|
||||||
|
holder nocap = { core, "NOCAP", 0, 0, 0 };
|
||||||
|
holder nosrc = { core, "NOSRC", 0, 0, 0 };
|
||||||
|
holder noep = { core, "NOEP", 0, 0, 0 };
|
||||||
|
|
||||||
|
/* GOOD: CAP_MEMCTX (+OBSERVE for watchdog lifecycle ticks), source MEMCTX, ep0 -> receives. */
|
||||||
|
add_holder(core, &good, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << EP);
|
||||||
|
/* NOCAP: OBSERVE only (no CAP_MEMCTX) -> deny by cap. */
|
||||||
|
add_holder(core, &nocap, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << EP);
|
||||||
|
/* NOSRC: CAP_MEMCTX, but source_mask without the MEMCTX bit -> deny by source_mask. */
|
||||||
|
add_holder(core, &nosrc, VMSIG_CAP_MEMCTX, ~(1u << VMSIG_SRC_MEMCTX), 1ull << EP);
|
||||||
|
/* NOEP: CAP_MEMCTX, source ok, but endpoint outside the mask (ep1) -> deny by endpoint. */
|
||||||
|
add_holder(core, &noep, VMSIG_CAP_MEMCTX, 0xFFFFFFFFu, 1ull << 1);
|
||||||
|
|
||||||
|
/* watchdog lifecycle ticks + address-space context on one endpoint (stub). */
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, EP) >= 0, "add vmhost adapter");
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, EP) >= 0, "add memctx adapter");
|
||||||
|
|
||||||
|
int rc = vmsig_core_run(core);
|
||||||
|
printf(" rc=%d GOOD.memctx=%d NOCAP=%d NOSRC=%d NOEP=%d\n",
|
||||||
|
rc, good.memctx, nocap.memctx, nosrc.memctx, noep.memctx);
|
||||||
|
|
||||||
|
CHECK(good.memctx >= 1, "GOOD receives MEMCTX (cap+source+endpoint)");
|
||||||
|
CHECK(nocap.memctx == 0, "NOCAP does not receive (no CAP_MEMCTX)");
|
||||||
|
CHECK(nosrc.memctx == 0, "NOSRC does not receive (source_mask without MEMCTX)");
|
||||||
|
CHECK(noep.memctx == 0, "NOEP does not receive (endpoint outside mask)");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
printf("authz tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||||
|
return g_fail ? 1 : 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,125 @@
|
|||||||
|
/* test_ctx.c — unit tests for the transfer context (public vmsig_ctx_* API):
|
||||||
|
* priority->seq, coalescing, backpressure (drop oldest/newest), protocol
|
||||||
|
* pacing via timing-fd. Links against libvmsig; run through ctest. */
|
||||||
|
#include "vmsig.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <poll.h>
|
||||||
|
|
||||||
|
static int g_fail = 0;
|
||||||
|
#define CHECK(cond, msg) do { \
|
||||||
|
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
/* Submit a DOWN event with the given prio/kind/endpoint/corr. */
|
||||||
|
static int put(vmsig_ctx* c, vmsig_prio p, vmsig_kind k, uint32_t ep, uint32_t corr) {
|
||||||
|
vmsig_event e;
|
||||||
|
memset(&e, 0, sizeof e);
|
||||||
|
e.source = VMSIG_SRC_NONE; e.dir = VMSIG_DIR_DOWN;
|
||||||
|
e.prio = p; e.kind = k; e.endpoint = ep; e.corr = corr;
|
||||||
|
return vmsig_ctx_submit(c, VMSIG_DIR_DOWN, &e);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 1. priority first, then FIFO by seq within a band ------------------- */
|
||||||
|
static void test_priority_seq(void) {
|
||||||
|
printf("test_priority_seq\n");
|
||||||
|
vmsig_ctx* c = vmsig_ctx_new();
|
||||||
|
put(c, VMSIG_PRIO_BULK, VMSIG_EV_CMD_INPUT, 0, 0xA);
|
||||||
|
put(c, VMSIG_PRIO_URGENT, VMSIG_EV_CMD_INPUT, 0, 0xB);
|
||||||
|
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 0xC);
|
||||||
|
put(c, VMSIG_PRIO_BULK, VMSIG_EV_CMD_INPUT, 0, 0xD);
|
||||||
|
put(c, VMSIG_PRIO_URGENT, VMSIG_EV_CMD_INPUT, 0, 0xE);
|
||||||
|
uint32_t want[5] = { 0xB, 0xE, 0xC, 0xA, 0xD }; /* URGENT(seq) -> NORMAL -> BULK(seq) */
|
||||||
|
vmsig_event o;
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
int r = vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o);
|
||||||
|
CHECK(r == 1, "next must return an event");
|
||||||
|
CHECK(o.corr == want[i], "priority->seq order");
|
||||||
|
}
|
||||||
|
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 0, "queue drained");
|
||||||
|
vmsig_ctx_free(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 2. coalescing: a burst of one kind+endpoint collapses (newest wins) - */
|
||||||
|
static void test_coalesce(void) {
|
||||||
|
printf("test_coalesce\n");
|
||||||
|
vmsig_ctx* c = vmsig_ctx_new();
|
||||||
|
vmsig_timing t; memset(&t, 0, sizeof t); t.coalesce_ns = 1;
|
||||||
|
vmsig_ctx_set_policy(c, VMSIG_SRC_NONE, VMSIG_DIR_DOWN, VMSIG_PRIO_NORMAL, &t);
|
||||||
|
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_VM, 0, 0x11);
|
||||||
|
int r2 = put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_VM, 0, 0x22); /* should coalesce */
|
||||||
|
CHECK(r2 == 1, "second submit coalesced (=1)");
|
||||||
|
vmsig_event o;
|
||||||
|
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1, "one event after coalescing");
|
||||||
|
CHECK(o.corr == 0x22, "newest data after coalescing");
|
||||||
|
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 0, "nothing more");
|
||||||
|
vmsig_ctx_free(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 3. backpressure drop_oldest --------------------------------------- */
|
||||||
|
static void test_backpressure_oldest(void) {
|
||||||
|
printf("test_backpressure_oldest\n");
|
||||||
|
vmsig_ctx* c = vmsig_ctx_new();
|
||||||
|
vmsig_timing t; memset(&t, 0, sizeof t);
|
||||||
|
t.max_inflight = 2; t.drop_policy = VMSIG_DROP_OLDEST;
|
||||||
|
vmsig_ctx_set_policy(c, VMSIG_SRC_NONE, VMSIG_DIR_DOWN, VMSIG_PRIO_NORMAL, &t);
|
||||||
|
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 1);
|
||||||
|
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 2);
|
||||||
|
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 3); /* evicts 1 */
|
||||||
|
vmsig_event o;
|
||||||
|
int got[8], n = 0;
|
||||||
|
while (vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1) got[n++] = (int)o.corr;
|
||||||
|
CHECK(n == 2, "2 remain after drop_oldest");
|
||||||
|
CHECK(n == 2 && got[0] == 2 && got[1] == 3, "oldest evicted (1)");
|
||||||
|
vmsig_ctx_free(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 4. backpressure drop_newest --------------------------------------- */
|
||||||
|
static void test_backpressure_newest(void) {
|
||||||
|
printf("test_backpressure_newest\n");
|
||||||
|
vmsig_ctx* c = vmsig_ctx_new();
|
||||||
|
vmsig_timing t; memset(&t, 0, sizeof t);
|
||||||
|
t.max_inflight = 2; t.drop_policy = VMSIG_DROP_NEWEST;
|
||||||
|
vmsig_ctx_set_policy(c, VMSIG_SRC_NONE, VMSIG_DIR_DOWN, VMSIG_PRIO_NORMAL, &t);
|
||||||
|
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 1);
|
||||||
|
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 2);
|
||||||
|
int r3 = put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 3); /* dropped */
|
||||||
|
CHECK(r3 == 1, "third submit dropped (=1)");
|
||||||
|
vmsig_event o;
|
||||||
|
int got[8], n = 0;
|
||||||
|
while (vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1) got[n++] = (int)o.corr;
|
||||||
|
CHECK(n == 2 && got[0] == 1 && got[1] == 2, "newest dropped (3)");
|
||||||
|
vmsig_ctx_free(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 5. protocol pacing via timing-fd ---------------------------------- */
|
||||||
|
static void test_pacing(void) {
|
||||||
|
printf("test_pacing\n");
|
||||||
|
vmsig_ctx* c = vmsig_ctx_new();
|
||||||
|
vmsig_timing t; memset(&t, 0, sizeof t);
|
||||||
|
t.min_gap_ns = 20u * 1000000u; /* 20 ms gap */
|
||||||
|
vmsig_ctx_set_policy(c, VMSIG_SRC_NONE, VMSIG_DIR_DOWN, VMSIG_PRIO_NORMAL, &t);
|
||||||
|
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 1);
|
||||||
|
put(c, VMSIG_PRIO_NORMAL, VMSIG_EV_CMD_INPUT, 0, 2);
|
||||||
|
vmsig_event o;
|
||||||
|
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1, "first delivered immediately");
|
||||||
|
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 0, "second paced (0 for now)");
|
||||||
|
int tfd = vmsig_ctx_timing_fd(c, VMSIG_DIR_DOWN);
|
||||||
|
CHECK(tfd >= 0, "timing-fd valid");
|
||||||
|
struct pollfd pfd = { .fd = tfd, .events = POLLIN };
|
||||||
|
int pr = poll(&pfd, 1, 1000); /* wait for it to fire, no longer than 1s */
|
||||||
|
CHECK(pr == 1, "timing-fd became ready within the gap");
|
||||||
|
CHECK(vmsig_ctx_next(c, VMSIG_DIR_DOWN, &o) == 1, "after the gap the second matured");
|
||||||
|
CHECK(o.corr == 2, "this is exactly the second event");
|
||||||
|
vmsig_ctx_free(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
test_priority_seq();
|
||||||
|
test_coalesce();
|
||||||
|
test_backpressure_oldest();
|
||||||
|
test_backpressure_newest();
|
||||||
|
test_pacing();
|
||||||
|
printf("ctx tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||||
|
return g_fail ? 1 : 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,93 @@
|
|||||||
|
/* test_inputobs.c — input observation:
|
||||||
|
* held-query: a control with CAP_INPUT, on CMD_QUERY_INPUT, receives INPUT_HELD from the
|
||||||
|
* vmctl record (stub without vmctl => count=0); without CAP_INPUT — DOWN_DENIED.
|
||||||
|
* (The cursor sensor moved out of signaling with the FRAME adapter: CURSOR_STATE is now
|
||||||
|
* emitted by the out-of-repo vgpu-perception shell-as-control, not by a signaling adapter.)
|
||||||
|
* In-proc, under ASAN. Links against libvmsig. */
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include "vmsig.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
static int g_fail = 0;
|
||||||
|
#define CHECK(c, m) do { if (!(c)) { printf(" FAIL: %s\n", (m)); g_fail = 1; } } while (0)
|
||||||
|
|
||||||
|
#define EP 0u
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
vmsig_core* core;
|
||||||
|
void* ctl;
|
||||||
|
int held; /* INPUT_HELD count */
|
||||||
|
int last_held_count;
|
||||||
|
int stop_held; /* stop after N held replies (0=no) */
|
||||||
|
} obs;
|
||||||
|
|
||||||
|
static int on_ev(void* u, const vmsig_event* ev) {
|
||||||
|
obs* o = u;
|
||||||
|
if (ev->kind == VMSIG_EV_INPUT_HELD) {
|
||||||
|
const vmsig_input_held* h = (const vmsig_input_held*)ev->inln;
|
||||||
|
o->held++; o->last_held_count = (int)h->count;
|
||||||
|
if (o->stop_held && o->held >= o->stop_held) vmsig_core_stop(o->core);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void add_ctl(vmsig_core* core, obs* o, uint32_t cap, uint32_t src_mask) {
|
||||||
|
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||||
|
cfg.on_event = on_ev; cfg.user = o;
|
||||||
|
cfg.sub.source_mask = src_mask; cfg.sub.prio_min = VMSIG_PRIO_BULK;
|
||||||
|
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||||
|
o->ctl = ctl;
|
||||||
|
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||||
|
g.endpoint_mask = 1ull << EP; g.source_mask = src_mask; g.cap_mask = cap;
|
||||||
|
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void send_query_input(void* ctl) {
|
||||||
|
vmsig_event d; memset(&d, 0, sizeof d);
|
||||||
|
d.kind = VMSIG_EV_CMD_QUERY_INPUT; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
|
||||||
|
d.endpoint = EP; d.prio = VMSIG_PRIO_HIGH;
|
||||||
|
vmsig_inproc_send(ctl, &d);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int g_down_denied = 0;
|
||||||
|
static void audit_cb(void* ud, const vmsig_audit* a) {
|
||||||
|
(void)ud; if (a->kind == VMSIG_AUDIT_DOWN_DENIED) g_down_denied++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- held-query: CAP_INPUT -> INPUT_HELD (stub count=0); without cap -> DOWN_DENIED ---- */
|
||||||
|
static void test_held_query(void) {
|
||||||
|
printf("test_held_query\n");
|
||||||
|
g_down_denied = 0;
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
vmsig_core_set_audit(core, audit_cb, NULL);
|
||||||
|
vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, EP); /* stub input (no vmctl) */
|
||||||
|
|
||||||
|
obs a; memset(&a, 0, sizeof a); a.core = core; a.stop_held = 1;
|
||||||
|
add_ctl(core, &a, VMSIG_CAP_INPUT, 0xFFFFFFFFu);
|
||||||
|
send_query_input(a.ctl);
|
||||||
|
vmsig_core_run(core); /* pump_down -> INPUT_HELD -> pump_up */
|
||||||
|
CHECK(a.held == 1, "held: CAP_INPUT receives INPUT_HELD");
|
||||||
|
CHECK(a.last_held_count == 0, "held: stub without vmctl -> count=0");
|
||||||
|
|
||||||
|
/* without CAP_INPUT (OBSERVE only): CMD_QUERY_INPUT rejected BEFORE ctx (synchronously). */
|
||||||
|
obs b; memset(&b, 0, sizeof b); b.core = core;
|
||||||
|
add_ctl(core, &b, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu);
|
||||||
|
int before = g_down_denied;
|
||||||
|
send_query_input(b.ctl);
|
||||||
|
CHECK(b.held == 0, "held: without CAP_INPUT -> no INPUT_HELD");
|
||||||
|
CHECK(g_down_denied == before + 1, "held: without CAP_INPUT -> DOWN_DENIED");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
printf("test_inputobs\n");
|
||||||
|
test_held_query();
|
||||||
|
printf("inputobs tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||||
|
return g_fail ? 1 : 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,525 @@
|
|||||||
|
/* test_lease.c — arbitration layer (exclusive lease of destructive resources).
|
||||||
|
*
|
||||||
|
* 13 cases from the contract (docs/plans/lease-arbitration.md §Tests). In-proc,
|
||||||
|
* runs without a live loop where synchronous interception suffices
|
||||||
|
* (ACQUIRE/RELEASE/STATUS and the lease gate are synchronous in core_emit_down);
|
||||||
|
* a live vmsig_core_run() — to check finalization/fencing/reclaim, where the
|
||||||
|
* input-adapter worker is involved (actuation on the worker thread, ACK via the loop).
|
||||||
|
*
|
||||||
|
* Observability of finalization ordering: the input-worker FIFO => ACT_ACK order ==
|
||||||
|
* submit order. A synthesized key-up has origin=0 (broadcast), the owner's CMD_INPUT
|
||||||
|
* is an addressed ACK. A shared monotonic log records the relative ordering. */
|
||||||
|
#include "vmsig.h"
|
||||||
|
#include "core_internal.h" /* core_request_drop: deterministic reclaim of an in-proc control */
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
static int g_fail = 0;
|
||||||
|
#define CHECK(cond, msg) do { \
|
||||||
|
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
/* ---------- shared control infrastructure ---------- */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int granted, denied, released, revoked, status;
|
||||||
|
int last_deny_reason;
|
||||||
|
int last_status_busy;
|
||||||
|
uint32_t last_status_owner;
|
||||||
|
} lease_counts;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
void* core;
|
||||||
|
lease_counts cnt[4]; /* indexed by control */
|
||||||
|
/* log of ACT_ACK (actuations) in arrival order: tag = corr (0 = synthesized up) */
|
||||||
|
int ack_log[64];
|
||||||
|
int nack;
|
||||||
|
int stop_after_acks; /* stop the loop after N actuations (0=not auto) */
|
||||||
|
int total_replies; /* count of all lease UP events (GRANTED/DENIED/RELEASED/STATUS/REVOKED) */
|
||||||
|
int stop_replies; /* stop the loop when total_replies>=this (0=not auto) */
|
||||||
|
/* phase orchestration for finalization/fencing (on the loop thread via on_ev) */
|
||||||
|
void* ctl_a;
|
||||||
|
void* ctl_b;
|
||||||
|
int phase; /* count of scenario phases passed */
|
||||||
|
int scenario; /* 0=none, 1=preempt-finalize, 2=in-flight-fence */
|
||||||
|
} lease_state;
|
||||||
|
|
||||||
|
typedef struct { lease_state* s; int idx; } cref;
|
||||||
|
|
||||||
|
/* Registry of allocated crefs (control user-data): inproc close() frees the
|
||||||
|
* ctl itself but not the user-data => we free them centrally at the end (ASAN cleanliness). */
|
||||||
|
static cref* g_crefs[64];
|
||||||
|
static int g_ncrefs = 0;
|
||||||
|
static cref* cref_new(lease_state* s, int idx) {
|
||||||
|
cref* r = calloc(1, sizeof *r);
|
||||||
|
r->s = s; r->idx = idx;
|
||||||
|
if (g_ncrefs < 64) g_crefs[g_ncrefs++] = r;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
static void cref_free_all(void) {
|
||||||
|
for (int i = 0; i < g_ncrefs; i++) free(g_crefs[i]);
|
||||||
|
g_ncrefs = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* forward declarations of send helpers (used in on_ev for phase orchestration) */
|
||||||
|
static int acquire(void* ctl, uint32_t cls, uint32_t ep);
|
||||||
|
static int send_key(void* ctl, int code, int value, uint32_t corr, uint32_t ep);
|
||||||
|
|
||||||
|
static void on_lease_ev(lease_state* s, int idx, const vmsig_event* ev) {
|
||||||
|
lease_counts* c = &s->cnt[idx];
|
||||||
|
switch (ev->kind) {
|
||||||
|
case VMSIG_EV_LEASE_GRANTED: c->granted++; s->total_replies++; break;
|
||||||
|
case VMSIG_EV_LEASE_DENIED: {
|
||||||
|
c->denied++;
|
||||||
|
c->last_deny_reason = (int)((const vmsig_lease_req*)ev->inln)->reason;
|
||||||
|
s->total_replies++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VMSIG_EV_LEASE_RELEASED: c->released++; s->total_replies++; break;
|
||||||
|
case VMSIG_EV_LEASE_REVOKED: c->revoked++; s->total_replies++; break;
|
||||||
|
case VMSIG_EV_LEASE_STATUS: {
|
||||||
|
c->status++;
|
||||||
|
const vmsig_lease_status* st = (const vmsig_lease_status*)ev->inln;
|
||||||
|
c->last_status_busy = (int)st->busy;
|
||||||
|
c->last_status_owner = st->owner_principal;
|
||||||
|
s->total_replies++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
if (s->stop_replies && s->total_replies >= s->stop_replies)
|
||||||
|
vmsig_core_stop(s->core);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int on_ev(void* user, const vmsig_event* ev) {
|
||||||
|
cref* r = user; lease_state* s = r->s;
|
||||||
|
on_lease_ev(s, r->idx, ev);
|
||||||
|
if (ev->kind == VMSIG_EV_ACT_ACK) {
|
||||||
|
if (s->nack < 64) s->ack_log[s->nack++] = (int)ev->corr;
|
||||||
|
|
||||||
|
/* Phase orchestration (loop thread): wait for the REAL actuation of A's key-down
|
||||||
|
* (the held-set is filled in pump_down=in_submit), and only THEN let B preempt —
|
||||||
|
* otherwise finalization on a synchronous acquire would run over an empty held-set. */
|
||||||
|
if (s->scenario == 1 && ev->corr == 11 && s->phase == 0) {
|
||||||
|
s->phase = 1;
|
||||||
|
acquire(s->ctl_b, VMSIG_LEASE_INPUT, 0); /* B preempts AFTER A's actuation */
|
||||||
|
send_key(s->ctl_b, 31, 1, 22, 0);
|
||||||
|
send_key(s->ctl_b, 31, 0, 23, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s->stop_after_acks && s->nack >= s->stop_after_acks)
|
||||||
|
vmsig_core_stop(s->core);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------- DOWN send helpers ---------- */
|
||||||
|
|
||||||
|
static int send_lease(void* ctl, vmsig_kind kind, uint32_t cls, uint32_t ep) {
|
||||||
|
vmsig_event d;
|
||||||
|
memset(&d, 0, sizeof d);
|
||||||
|
d.kind = kind; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
|
||||||
|
d.endpoint = ep; d.prio = VMSIG_PRIO_HIGH;
|
||||||
|
vmsig_lease_req lr = { cls, 0 };
|
||||||
|
memcpy(d.inln, &lr, sizeof lr);
|
||||||
|
return vmsig_inproc_send(ctl, &d);
|
||||||
|
}
|
||||||
|
static int acquire(void* ctl, uint32_t cls, uint32_t ep) {
|
||||||
|
return send_lease(ctl, VMSIG_EV_CMD_ACQUIRE, cls, ep);
|
||||||
|
}
|
||||||
|
static int release_(void* ctl, uint32_t cls, uint32_t ep) {
|
||||||
|
return send_lease(ctl, VMSIG_EV_CMD_RELEASE, cls, ep);
|
||||||
|
}
|
||||||
|
static int status(void* ctl, uint32_t cls, uint32_t ep) {
|
||||||
|
return send_lease(ctl, VMSIG_EV_CMD_LEASE_STATUS, cls, ep);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* CMD_INPUT: KEY/BTN with value, corr for tracking. */
|
||||||
|
static int send_key(void* ctl, int code, int value, uint32_t corr, uint32_t ep) {
|
||||||
|
vmsig_event d;
|
||||||
|
memset(&d, 0, sizeof d);
|
||||||
|
d.kind = VMSIG_EV_CMD_INPUT; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
|
||||||
|
d.endpoint = ep; d.prio = VMSIG_PRIO_HIGH; d.corr = corr;
|
||||||
|
vmsig_input in; memset(&in, 0, sizeof in);
|
||||||
|
in.kind = VMSIG_INPUT_KEY; in.code = code; in.value = value;
|
||||||
|
memcpy(d.inln, &in, sizeof in);
|
||||||
|
return vmsig_inproc_send(ctl, &d);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int send_life(void* ctl, int op, uint32_t corr, uint32_t ep) {
|
||||||
|
vmsig_event d;
|
||||||
|
memset(&d, 0, sizeof d);
|
||||||
|
d.kind = VMSIG_EV_CMD_LIFECYCLE; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
|
||||||
|
d.endpoint = ep; d.prio = VMSIG_PRIO_URGENT; d.corr = corr; d.inln[0] = (uint8_t)op;
|
||||||
|
return vmsig_inproc_send(ctl, &d);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pump the loop until `n` more lease replies arrive (UP delivery via ctx
|
||||||
|
* requires pump_up). Lease DOWN intercepts are synchronous, but their UP replies are
|
||||||
|
* paced by the context => a live loop is needed. */
|
||||||
|
static void pump_n(lease_state* s, int n) {
|
||||||
|
vmsig_core* c = (vmsig_core*)s->core;
|
||||||
|
c->stopping = 0; /* white-box: reuse the loop between phases */
|
||||||
|
s->stop_replies = s->total_replies + n;
|
||||||
|
vmsig_core_run(c);
|
||||||
|
s->stop_replies = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* add_ctl(vmsig_core* core, lease_state* s, int idx, uint32_t cap,
|
||||||
|
uint32_t arb_prio, uint64_t epmask) {
|
||||||
|
cref* r = cref_new(s, idx);
|
||||||
|
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||||
|
cfg.on_event = on_ev; cfg.user = r;
|
||||||
|
cfg.sub.source_mask = 0xFFFFFFFFu; cfg.sub.prio_min = VMSIG_PRIO_BULK;
|
||||||
|
cfg.sub.endpoint_mask = 0; /* all VMs */
|
||||||
|
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||||
|
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||||
|
g.endpoint_mask = epmask; g.source_mask = 0xFFFFFFFFu;
|
||||||
|
g.cap_mask = cap | VMSIG_CAP_OBSERVE; /* OBSERVE => sees the broadcast finalization ACT_ACK */
|
||||||
|
g.arb_prio = arb_prio;
|
||||||
|
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||||
|
return ctl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== Synchronous test group (no loop): ACQUIRE/RELEASE/STATUS interception ===== */
|
||||||
|
|
||||||
|
static void test_sync_group(void) {
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||||
|
|
||||||
|
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
|
||||||
|
void* B = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 100, 1ull << 0);
|
||||||
|
void* Lo= add_ctl(core, &s, 2, VMSIG_CAP_INPUT, 5, 1ull << 0); /* low priority */
|
||||||
|
void* X = add_ctl(core, &s, 3, VMSIG_CAP_INPUT, 10, 1ull << 1); /* grant on ep1, not ep0 */
|
||||||
|
|
||||||
|
/* Lease DOWN intercepts are synchronous, but the UP reply is delivered via ctx => after
|
||||||
|
* each lease request we pump the loop until the corresponding UP arrives. */
|
||||||
|
|
||||||
|
/* 1) acquire -> GRANTED; the owner's CMD_INPUT passes the gate (==0). */
|
||||||
|
acquire(A, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
CHECK(s.cnt[0].granted == 1, "1: A gets GRANTED");
|
||||||
|
CHECK(send_key(A, 30, 1, 1, 0) == 0, "1: owner's CMD_INPUT passes the gate");
|
||||||
|
send_key(A, 30, 0, 2, 0); /* release, so as not to leave held for the next tests */
|
||||||
|
|
||||||
|
/* 2) gate: non-owner B -> CMD_INPUT dropped (-1). */
|
||||||
|
CHECK(send_key(B, 30, 1, 3, 0) == -1, "2: non-owner: CMD_INPUT dropped by the gate");
|
||||||
|
|
||||||
|
/* 3) equal priorities: a contender of the same prio -> DENIED{HELD}. */
|
||||||
|
{
|
||||||
|
void* C = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 10, 1ull << 0); (void)C;
|
||||||
|
int before = s.cnt[1].denied;
|
||||||
|
acquire(C, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
CHECK(s.cnt[1].denied == before + 1, "3: equal prio -> DENIED");
|
||||||
|
CHECK(s.cnt[1].last_deny_reason == VMSIG_LEASE_DENY_HELD, "3: reason=HELD");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 7) without cap -> NOCAP; foreign endpoint -> NOGRANT. */
|
||||||
|
{
|
||||||
|
void* NC = add_ctl(core, &s, 2, 0u /* without INPUT */, 10, 1ull << 0);
|
||||||
|
int before = s.cnt[2].denied;
|
||||||
|
acquire(NC, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
CHECK(s.cnt[2].denied == before + 1, "7: without cap -> DENIED");
|
||||||
|
CHECK(s.cnt[2].last_deny_reason == VMSIG_LEASE_DENY_NOCAP, "7: reason=NOCAP");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
int before = s.cnt[3].denied;
|
||||||
|
acquire(X, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1); /* X has a grant on ep1, requests ep0 */
|
||||||
|
CHECK(s.cnt[3].denied == before + 1, "7: foreign endpoint -> DENIED");
|
||||||
|
CHECK(s.cnt[3].last_deny_reason == VMSIG_LEASE_DENY_NOGRANT, "7: reason=NOGRANT");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 8) per-endpoint / per-class independence. */
|
||||||
|
{
|
||||||
|
void* P0 = add_ctl(core, &s, 1, VMSIG_CAP_POWER, 50, 1ull << 0);
|
||||||
|
int gb = s.cnt[1].granted, gx = s.cnt[3].granted;
|
||||||
|
acquire(X, VMSIG_LEASE_INPUT, 1); pump_n(&s, 1); /* X on its own ep1 — free */
|
||||||
|
CHECK(s.cnt[3].granted == gx + 1, "8: X owns INPUT@ep1 independently");
|
||||||
|
acquire(P0, VMSIG_LEASE_POWER, 0); pump_n(&s, 1); /* POWER@ep0 is free, even though INPUT@ep0 is held by A */
|
||||||
|
CHECK(s.cnt[1].granted == gb + 1, "8: POWER@ep0 independent of INPUT@ep0");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 11) STATUS: busy=1 while A holds INPUT@ep0. */
|
||||||
|
{
|
||||||
|
int before = s.cnt[1].status;
|
||||||
|
status(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
CHECK(s.cnt[1].status == before + 1, "11: STATUS replies");
|
||||||
|
CHECK(s.cnt[1].last_status_busy == 1, "11: busy=1 while A owns");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 6) release -> reacquire: A releases, Lo (low prio) now gets it. */
|
||||||
|
{
|
||||||
|
int rb = s.cnt[0].released;
|
||||||
|
release_(A, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
CHECK(s.cnt[0].released == rb + 1, "6: A gets RELEASED");
|
||||||
|
int gb = s.cnt[2].granted;
|
||||||
|
acquire(Lo, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1); /* free -> even low prio takes it */
|
||||||
|
CHECK(s.cnt[2].granted == gb + 1, "6: reacquire after release succeeds");
|
||||||
|
status(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
CHECK(s.cnt[1].last_status_busy == 1, "11: busy=1 after reacquire");
|
||||||
|
release_(Lo, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
status(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
CHECK(s.cnt[1].last_status_busy == 0, "11: busy=0 after releasing all");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 12) safe commands are NOT gated by the lease (nobody holds INPUT@ep0). */
|
||||||
|
{
|
||||||
|
void* SAFE = add_ctl(core, &s, 1, VMSIG_CAP_LIFECYCLE | VMSIG_CAP_INPUT, 1, 1ull << 0);
|
||||||
|
/* PAUSE = safe lifecycle: lease_class_for_down -> -1 => not gated. */
|
||||||
|
CHECK(send_life(SAFE, VMSIG_LIFE_PAUSE, 90, 0) == 0,
|
||||||
|
"12: safe lifecycle (PAUSE) is not gated by the lease");
|
||||||
|
}
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== 13) policy seam: custom "always DENY" ===== */
|
||||||
|
static vmsig_arb_decision policy_always_deny(void* ud, uint32_t ep, uint32_t cls,
|
||||||
|
const vmsig_grant* inc, const vmsig_grant* con) {
|
||||||
|
(void)ud; (void)ep; (void)cls; (void)inc; (void)con;
|
||||||
|
return VMSIG_ARB_DENY;
|
||||||
|
}
|
||||||
|
static void test_policy_group(void) {
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
vmsig_core_set_arb_policy(core, policy_always_deny, NULL);
|
||||||
|
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||||
|
|
||||||
|
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
|
||||||
|
void* B = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 999, 1ull << 0); /* highest prio */
|
||||||
|
|
||||||
|
acquire(A, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
CHECK(s.cnt[0].granted == 1, "13: A owns");
|
||||||
|
acquire(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1); /* high prio, but policy=DENY */
|
||||||
|
CHECK(s.cnt[1].granted == 0, "13: custom DENY => high prio does NOT preempt");
|
||||||
|
CHECK(s.cnt[1].denied == 1, "13: B got DENIED");
|
||||||
|
CHECK(s.cnt[0].revoked == 0, "13: A not revoked");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== 4) preemption: high prio takes the lease away (REVOKED to the old, GRANTED to the new).
|
||||||
|
* signaling does NOT release what is held (rolling back finalization is the control's decision). ===== */
|
||||||
|
static void test_preempt(void) {
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||||
|
|
||||||
|
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
|
||||||
|
void* B = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 100, 1ull << 0);
|
||||||
|
vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, 0); /* stub input */
|
||||||
|
s.ctl_a = A; s.ctl_b = B; s.scenario = 1;
|
||||||
|
|
||||||
|
acquire(A, VMSIG_LEASE_INPUT, 0);
|
||||||
|
send_key(A, 30, 1, 11, 0); /* A injects a KEY (corr=11) */
|
||||||
|
/* B preempts from on_ev AFTER ack corr=11. There is NO finalization => wait for 3 actuations:
|
||||||
|
* A-down(11), B-down(22), B-up(23). */
|
||||||
|
s.stop_after_acks = 3;
|
||||||
|
vmsig_core_run(core);
|
||||||
|
|
||||||
|
CHECK(s.cnt[1].granted == 1, "4: B gets GRANTED on preemption");
|
||||||
|
CHECK(s.cnt[0].revoked == 1, "4: A gets LEASE_REVOKED");
|
||||||
|
int saw22 = 0;
|
||||||
|
for (int i = 0; i < s.nack; i++) if (s.ack_log[i] == 22) saw22 = 1;
|
||||||
|
CHECK(saw22, "4: the new owner's (B) input is actuated after preemption");
|
||||||
|
/* in-flight fencing of the ex-owner — covered separately in test_inflight_fence. */
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== reacquire by the owner with a key held down does NOT synthesize an up (self-preemption) ===== */
|
||||||
|
static void test_self_reacquire(void) {
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||||
|
|
||||||
|
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
|
||||||
|
vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, 0);
|
||||||
|
|
||||||
|
acquire(A, VMSIG_LEASE_INPUT, 0);
|
||||||
|
send_key(A, 30, 1, 11, 0); /* hold down */
|
||||||
|
acquire(A, VMSIG_LEASE_INPUT, 0); /* reacquire by the same owner -> idempotent */
|
||||||
|
send_key(A, 31, 1, 22, 0); /* another key */
|
||||||
|
send_key(A, 30, 0, 33, 0);
|
||||||
|
send_key(A, 31, 0, 44, 0);
|
||||||
|
|
||||||
|
s.stop_after_acks = 4; /* there must be NO synthesized up (corr=0) */
|
||||||
|
vmsig_core_run(core);
|
||||||
|
|
||||||
|
CHECK(s.cnt[0].granted == 2, "self: repeated ACQUIRE -> another GRANTED");
|
||||||
|
int saw_zero = 0;
|
||||||
|
for (int i = 0; i < s.nack; i++) if (s.ack_log[i] == 0) saw_zero = 1;
|
||||||
|
CHECK(!saw_zero, "self: reacquire by the owner does NOT synthesize a key-up");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== 9) reclaim-on-death: the slot is freed, RECLAIMED, B GRANTED =====
|
||||||
|
* We model death via core_request_drop(id): core_reap -> core_lease_reap_control
|
||||||
|
* frees the owner's slot (held is NOT finalized — that's a vmctl write / control's decision). */
|
||||||
|
static int g_reclaimed = 0;
|
||||||
|
static int g_lease_denied = 0;
|
||||||
|
static void audit_cb(void* ud, const vmsig_audit* a) {
|
||||||
|
(void)ud;
|
||||||
|
if (a->kind == VMSIG_AUDIT_LEASE_RECLAIMED) g_reclaimed++;
|
||||||
|
if (a->kind == VMSIG_AUDIT_LEASE_DENIED) g_lease_denied++;
|
||||||
|
}
|
||||||
|
/* On RECLAIMED we stop the loop (to end the reap run deterministically): ud=core. */
|
||||||
|
static void reclaim_audit_cb(void* ud, const vmsig_audit* a) {
|
||||||
|
if (a->kind == VMSIG_AUDIT_LEASE_RECLAIMED) {
|
||||||
|
g_reclaimed++;
|
||||||
|
if (ud) vmsig_core_stop((vmsig_core*)ud);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Variant of add_ctl that returns the control's id (via out). */
|
||||||
|
static void* add_ctl_id(vmsig_core* core, lease_state* s, int idx, uint32_t cap,
|
||||||
|
uint32_t arb_prio, uint64_t epmask, int* out_id) {
|
||||||
|
cref* r = cref_new(s, idx);
|
||||||
|
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||||
|
cfg.on_event = on_ev; cfg.user = r;
|
||||||
|
cfg.sub.source_mask = 0xFFFFFFFFu; cfg.sub.prio_min = VMSIG_PRIO_BULK;
|
||||||
|
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||||
|
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||||
|
g.endpoint_mask = epmask; g.source_mask = 0xFFFFFFFFu;
|
||||||
|
g.cap_mask = cap | VMSIG_CAP_OBSERVE; g.arb_prio = arb_prio;
|
||||||
|
int id = vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||||
|
if (out_id) *out_id = id;
|
||||||
|
return ctl;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_reclaim(void) {
|
||||||
|
g_reclaimed = 0;
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
vmsig_core_set_audit(core, reclaim_audit_cb, core); /* RECLAIMED -> stop the loop */
|
||||||
|
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||||
|
|
||||||
|
int a_id = -1;
|
||||||
|
void* A = add_ctl_id(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0, &a_id);
|
||||||
|
void* B = add_ctl_id(core, &s, 1, VMSIG_CAP_INPUT, 5, 1ull << 0, NULL); /* LOW prio */
|
||||||
|
|
||||||
|
acquire(A, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
CHECK(s.cnt[0].granted == 1, "9: A owns");
|
||||||
|
|
||||||
|
/* A's death: reap frees its slot (RECLAIMED); the audit-cb stops the loop. */
|
||||||
|
core_request_drop(core, a_id);
|
||||||
|
core->stopping = 0; /* white-box: reuse the loop (like pump_n) */
|
||||||
|
vmsig_core_run(core);
|
||||||
|
CHECK(g_reclaimed == 1, "9: audit RECLAIMED on owner death");
|
||||||
|
|
||||||
|
/* the slot is free: B (low prio) takes it without preemption */
|
||||||
|
acquire(B, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
CHECK(s.cnt[1].granted == 1, "9: B GRANTED after reclaim (slot is free)");
|
||||||
|
|
||||||
|
(void)A; (void)B;
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== 10) in-flight fencing: losing the lease BEFORE pump_down -> drop ===== */
|
||||||
|
/* A owns it, queues CMD_INPUT into ctx (via emit_down -> ctx), then B preempts
|
||||||
|
* SYNCHRONOUSLY (acquire does not go through ctx). By the time pump_down reaches A's CMD_INPUT,
|
||||||
|
* the owner is already B => the fence drops A's command (there must be NO actuation of A). */
|
||||||
|
static void test_inflight_fence(void) {
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||||
|
|
||||||
|
void* A = add_ctl(core, &s, 0, VMSIG_CAP_INPUT, 10, 1ull << 0);
|
||||||
|
void* B = add_ctl(core, &s, 1, VMSIG_CAP_INPUT, 100, 1ull << 0);
|
||||||
|
vmsig_core_add_adapter(core, vmsig_input_ops(), NULL, 0);
|
||||||
|
|
||||||
|
acquire(A, VMSIG_LEASE_INPUT, 0);
|
||||||
|
/* A queues a command into ctx (corr=55) — it passes the gate (A owns it), lands in the DOWN queue */
|
||||||
|
send_key(A, 30, 1, 55, 0);
|
||||||
|
/* B preempts SYNCHRONOUSLY (before the loop has called pump_down) */
|
||||||
|
acquire(B, VMSIG_LEASE_INPUT, 0);
|
||||||
|
/* B sends its own command (corr=66) */
|
||||||
|
send_key(B, 31, 1, 66, 0);
|
||||||
|
send_key(B, 31, 0, 67, 0);
|
||||||
|
|
||||||
|
/* Expected actuations: finalization on preemption (corr=0, but A held nothing by the
|
||||||
|
* moment of preemption — A's down is still in ctx, the held-set is empty => finalize=0 ups),
|
||||||
|
* then B's 66 and 67. A's 55 MUST be dropped by the fence (no corr=55). */
|
||||||
|
s.stop_after_acks = 2; /* B's down + up */
|
||||||
|
vmsig_core_run(core);
|
||||||
|
|
||||||
|
int saw55 = 0;
|
||||||
|
for (int i = 0; i < s.nack; i++) if (s.ack_log[i] == 55) saw55 = 1;
|
||||||
|
CHECK(!saw55, "10: in-flight ex-owner's command dropped by the fence");
|
||||||
|
CHECK(s.cnt[1].granted == 1, "10: B owns after preemption");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ===== lease source gate + audit on acquire-deny =====
|
||||||
|
* Regression: ACQUIRE is intercepted BEFORE grant_allows_down, so source_mask and
|
||||||
|
* audit of the deny paths must be checked/emitted IN THE lease layer ITSELF (otherwise: holding
|
||||||
|
* someone else's slot bypassing source_mask = DoS; audit-invisible probing of caps/endpoints). */
|
||||||
|
static void* add_ctl_src(vmsig_core* core, lease_state* s, int idx, uint32_t cap,
|
||||||
|
uint64_t epmask, uint32_t source_mask) {
|
||||||
|
cref* r = cref_new(s, idx);
|
||||||
|
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||||
|
cfg.on_event = on_ev; cfg.user = r; cfg.sub.source_mask = 0xFFFFFFFFu;
|
||||||
|
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||||
|
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||||
|
g.endpoint_mask = epmask; g.source_mask = source_mask;
|
||||||
|
g.cap_mask = cap | VMSIG_CAP_OBSERVE; g.arb_prio = 10;
|
||||||
|
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||||
|
return ctl;
|
||||||
|
}
|
||||||
|
static void test_audit_and_source(void) {
|
||||||
|
g_lease_denied = 0;
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
vmsig_core_set_audit(core, audit_cb, NULL);
|
||||||
|
lease_state s; memset(&s, 0, sizeof s); s.core = core;
|
||||||
|
|
||||||
|
/* cap=INPUT, but source_mask WITHOUT SRC_INPUT: the INPUT lease cannot be acquired (DoS bypass). */
|
||||||
|
void* NS = add_ctl_src(core, &s, 0, VMSIG_CAP_INPUT, 1ull << 0, ~(1u << VMSIG_SRC_INPUT));
|
||||||
|
int before = g_lease_denied;
|
||||||
|
acquire(NS, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
CHECK(s.cnt[0].denied == 1, "src: acquire without the SRC_INPUT bit -> DENIED");
|
||||||
|
CHECK(s.cnt[0].last_deny_reason == VMSIG_LEASE_DENY_NOGRANT, "src: reason=NOGRANT");
|
||||||
|
CHECK(g_lease_denied == before + 1, "audit: source-deny emits LEASE_DENIED");
|
||||||
|
|
||||||
|
/* without cap: NOCAP + audit (previously acquire-deny was invisible to the audit). */
|
||||||
|
void* NC = add_ctl_src(core, &s, 1, 0u, 1ull << 0, 0xFFFFFFFFu);
|
||||||
|
before = g_lease_denied;
|
||||||
|
acquire(NC, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
CHECK(s.cnt[1].last_deny_reason == VMSIG_LEASE_DENY_NOCAP, "src: without cap -> NOCAP");
|
||||||
|
CHECK(g_lease_denied == before + 1, "audit: NOCAP-deny emits LEASE_DENIED");
|
||||||
|
|
||||||
|
/* control case: cap+source -> GRANTED, without a spurious deny audit. */
|
||||||
|
void* OK = add_ctl_src(core, &s, 2, VMSIG_CAP_INPUT, 1ull << 0, 0xFFFFFFFFu);
|
||||||
|
before = g_lease_denied;
|
||||||
|
acquire(OK, VMSIG_LEASE_INPUT, 0); pump_n(&s, 1);
|
||||||
|
CHECK(s.cnt[2].granted == 1, "src: cap+source -> GRANTED");
|
||||||
|
CHECK(g_lease_denied == before, "audit: successful acquire does not emit a deny");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
printf("test_lease\n");
|
||||||
|
test_sync_group();
|
||||||
|
test_policy_group();
|
||||||
|
test_preempt();
|
||||||
|
test_self_reacquire();
|
||||||
|
test_inflight_fence();
|
||||||
|
test_reclaim();
|
||||||
|
test_audit_and_source();
|
||||||
|
cref_free_all();
|
||||||
|
|
||||||
|
printf("lease tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||||
|
return g_fail ? 1 : 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,339 @@
|
|||||||
|
/* test_memctx.c — seam for the coherent address-space context (MEMCTX).
|
||||||
|
*
|
||||||
|
* 1) multicast + RO-fd + decode: a holder with CAP_MEMCTX receives MEMCTX, kcr3,
|
||||||
|
* epoch=0, nseg=1; the received fd mmaps PROT_READ, while PROT_WRITE -> EACCES
|
||||||
|
* (RO physically enforced); the vmsig_memctx_segs helper reconstructs segs[];
|
||||||
|
* a co-resident holder without CAP_MEMCTX does NOT receive it (deny);
|
||||||
|
* 2) epoch: a synthetic destructive VM_LIFECYCLE -> MEMCTX_INVALIDATED ->
|
||||||
|
* re-multicast at epoch+1 with a NEW kcr3;
|
||||||
|
* 3) retain/replay: a LATE subscriber (attached AFTER publication) receives
|
||||||
|
* the retained MEMCTX with a valid re-shared RO-fd (synchronously on add_control);
|
||||||
|
* 4) multi-VM: two endpoints, isolation (a VM holder does not see another's context);
|
||||||
|
* 5) socket E2E: MEMCTX travels as a vmsig_wire frame + RO-fd in cmsg (SCM_RIGHTS), the
|
||||||
|
* client mmaps RO via the received fd.
|
||||||
|
* In-proc (except 5) and under ASAN. SISC: not a single control name in the adapter. */
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include "vmsig.h"
|
||||||
|
#include "vmsig_socket.h" /* vmsig_wire, vmsig_socket_attach */
|
||||||
|
#include "core_internal.h" /* core_emit_up (synthetic lifecycle injection) */
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <sys/un.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
static int g_fail = 0;
|
||||||
|
#define CHECK(cond, msg) do { \
|
||||||
|
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
/* ===== in-proc holder ===== */
|
||||||
|
typedef struct holder holder;
|
||||||
|
struct holder {
|
||||||
|
vmsig_core* core;
|
||||||
|
holder* peer; /* multi-VM: stop when both are ready (or NULL) */
|
||||||
|
int is_driver; /* stops the loop on a condition */
|
||||||
|
uint32_t expect_ep;
|
||||||
|
int memctx, invalidated, ticks, bad_ep;
|
||||||
|
uint64_t last_kcr3, kcr3_e0;
|
||||||
|
uint32_t last_epoch, last_nseg;
|
||||||
|
int ro_ok, rw_eacces, seg0_ok;
|
||||||
|
int inject_reset, injected;
|
||||||
|
int stop_epoch; /* stop when last_epoch>=stop_epoch (-1 = else) */
|
||||||
|
};
|
||||||
|
|
||||||
|
static void maybe_stop(holder* h) {
|
||||||
|
if (!h->is_driver) return;
|
||||||
|
if (h->ticks > 30) { vmsig_core_stop(h->core); return; } /* failsafe (vmhost ticks) */
|
||||||
|
if (h->stop_epoch >= 0) {
|
||||||
|
if ((int)h->last_epoch >= h->stop_epoch && h->memctx >= 1) vmsig_core_stop(h->core);
|
||||||
|
} else if (h->peer) {
|
||||||
|
if (h->memctx >= 1 && h->peer->memctx >= 1) vmsig_core_stop(h->core);
|
||||||
|
} else if (h->memctx >= 1) {
|
||||||
|
vmsig_core_stop(h->core);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int h_on_ev(void* u, const vmsig_event* ev) {
|
||||||
|
holder* h = u;
|
||||||
|
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) h->ticks++;
|
||||||
|
else if (ev->kind == VMSIG_EV_MEMCTX_INVALIDATED) h->invalidated++;
|
||||||
|
maybe_stop(h);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int h_on_memctx(void* u, const vmsig_event* ev, int fd) {
|
||||||
|
holder* h = u;
|
||||||
|
const vmsig_memctx* m = (const vmsig_memctx*)ev->inln;
|
||||||
|
h->memctx++;
|
||||||
|
if (ev->endpoint != h->expect_ep) h->bad_ep++;
|
||||||
|
h->last_kcr3 = m->kcr3; h->last_epoch = m->epoch;
|
||||||
|
if (m->epoch == 0) h->kcr3_e0 = m->kcr3;
|
||||||
|
|
||||||
|
uint32_t n = 0;
|
||||||
|
const vmsig_memseg* segs = vmsig_memctx_segs(ev, &n);
|
||||||
|
h->last_nseg = n;
|
||||||
|
if (segs && n >= 1 && segs[0].gpa == 0 && segs[0].len == m->low) h->seg0_ok = 1;
|
||||||
|
|
||||||
|
if (fd >= 0 && m->low) {
|
||||||
|
void* ro = mmap(NULL, (size_t)m->low, PROT_READ, MAP_SHARED, fd, 0);
|
||||||
|
if (ro != MAP_FAILED) { h->ro_ok = 1; munmap(ro, (size_t)m->low); }
|
||||||
|
void* rw = mmap(NULL, (size_t)m->low, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||||
|
if (rw == MAP_FAILED) h->rw_eacces = 1; else munmap(rw, (size_t)m->low);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* epoch test: on the first context (epoch0) inject a destructive transition. */
|
||||||
|
if (h->inject_reset && !h->injected && m->epoch == 0) {
|
||||||
|
h->injected = 1;
|
||||||
|
vmsig_event lc; memset(&lc, 0, sizeof lc);
|
||||||
|
lc.kind = VMSIG_EV_VM_LIFECYCLE; lc.source = VMSIG_SRC_VMHOST; lc.dir = VMSIG_DIR_UP;
|
||||||
|
lc.prio = VMSIG_PRIO_URGENT; lc.endpoint = h->expect_ep; lc.origin = 0;
|
||||||
|
vmsig_vm_state vs = { VMSIG_VM_RESET, 0 };
|
||||||
|
memcpy(lc.inln, &vs, sizeof vs);
|
||||||
|
core_emit_up(h->core, &lc); /* core: epoch++ + invalidate + re-multicast */
|
||||||
|
}
|
||||||
|
maybe_stop(h);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void add_holder(vmsig_core* core, holder* h, uint32_t cap,
|
||||||
|
uint32_t source_mask, uint64_t endpoint_mask) {
|
||||||
|
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||||
|
cfg.on_event = h_on_ev; cfg.on_memctx = h_on_memctx; cfg.user = h;
|
||||||
|
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||||
|
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||||
|
g.principal = 5; g.endpoint_mask = endpoint_mask;
|
||||||
|
g.source_mask = source_mask; g.cap_mask = cap;
|
||||||
|
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 1. multicast + RO-fd + decode + deny ---------------------------------- */
|
||||||
|
static void test_multicast(void) {
|
||||||
|
printf("test_multicast\n");
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
|
||||||
|
holder good; memset(&good, 0, sizeof good);
|
||||||
|
good.core = core; good.is_driver = 1; good.expect_ep = 0; good.stop_epoch = -1;
|
||||||
|
holder deny; memset(&deny, 0, sizeof deny);
|
||||||
|
deny.core = core; deny.expect_ep = 0; deny.stop_epoch = -1;
|
||||||
|
|
||||||
|
add_holder(core, &good, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
|
||||||
|
add_holder(core, &deny, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0); /* no MEMCTX */
|
||||||
|
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
|
||||||
|
|
||||||
|
vmsig_core_run(core);
|
||||||
|
|
||||||
|
CHECK(good.memctx >= 1, "GOOD received MEMCTX");
|
||||||
|
CHECK(good.last_kcr3 != 0, "kcr3 nonzero");
|
||||||
|
CHECK(good.last_epoch == 0, "first publication is epoch 0");
|
||||||
|
CHECK(good.last_nseg == 1, "nseg=1 (single-low)");
|
||||||
|
CHECK(good.seg0_ok, "segs[] decoded by the helper (gpa=0,len=low)");
|
||||||
|
CHECK(good.ro_ok, "RO-fd: mmap(PROT_READ) ok");
|
||||||
|
CHECK(good.rw_eacces, "RO-fd: mmap(PROT_WRITE) -> EACCES (RO enforced)");
|
||||||
|
CHECK(good.bad_ep == 0, "delivery endpoint is correct");
|
||||||
|
CHECK(deny.memctx == 0, "deny without CAP_MEMCTX does NOT receive MEMCTX");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 2. epoch: invalidation + re-multicast epoch+1 ------------------------- */
|
||||||
|
static void test_epoch(void) {
|
||||||
|
printf("test_epoch\n");
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
|
||||||
|
holder h; memset(&h, 0, sizeof h);
|
||||||
|
h.core = core; h.is_driver = 1; h.expect_ep = 0; h.inject_reset = 1; h.stop_epoch = 1;
|
||||||
|
|
||||||
|
add_holder(core, &h, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
|
||||||
|
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
|
||||||
|
|
||||||
|
vmsig_core_run(core);
|
||||||
|
|
||||||
|
CHECK(h.memctx >= 2, "contexts for epochs 0 and 1 received");
|
||||||
|
CHECK(h.invalidated >= 1, "MEMCTX_INVALIDATED delivered on epoch change");
|
||||||
|
CHECK(h.last_epoch == 1, "re-multicast at epoch+1");
|
||||||
|
CHECK(h.kcr3_e0 != 0 && h.last_kcr3 != 0 && h.last_kcr3 != h.kcr3_e0,
|
||||||
|
"new kcr3 after re-bootstrap (epoch 1 kcr3 != epoch 0)");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 3. retain/replay to a late subscriber --------------------------------- */
|
||||||
|
static void test_retain(void) {
|
||||||
|
printf("test_retain\n");
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
|
||||||
|
holder a; memset(&a, 0, sizeof a);
|
||||||
|
a.core = core; a.is_driver = 1; a.expect_ep = 0; a.stop_epoch = -1;
|
||||||
|
|
||||||
|
add_holder(core, &a, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
|
||||||
|
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
|
||||||
|
|
||||||
|
vmsig_core_run(core); /* A receives MEMCTX, loop stopped */
|
||||||
|
CHECK(a.memctx >= 1, "early subscriber A received MEMCTX");
|
||||||
|
|
||||||
|
/* LATE subscriber B: attaches AFTER publication. Replay of the retained context
|
||||||
|
* happens SYNCHRONOUSLY in add_control (cell valid) — without a second loop run. */
|
||||||
|
holder b; memset(&b, 0, sizeof b);
|
||||||
|
b.core = core; b.expect_ep = 0; b.stop_epoch = -1;
|
||||||
|
add_holder(core, &b, VMSIG_CAP_MEMCTX, 0xFFFFFFFFu, 1ull << 0);
|
||||||
|
|
||||||
|
CHECK(b.memctx >= 1, "late subscriber B received the retained MEMCTX (replay)");
|
||||||
|
CHECK(b.last_kcr3 != 0, "B: kcr3 nonzero in the replay");
|
||||||
|
CHECK(b.ro_ok, "B: re-shared RO-fd mmaps PROT_READ");
|
||||||
|
CHECK(b.rw_eacces, "B: re-shared fd is RO (PROT_WRITE -> EACCES)");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 4. multi-VM: endpoint isolation --------------------------------------- */
|
||||||
|
static void test_multivm(void) {
|
||||||
|
printf("test_multivm\n");
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
|
||||||
|
holder h0; memset(&h0, 0, sizeof h0);
|
||||||
|
holder h1; memset(&h1, 0, sizeof h1);
|
||||||
|
h0.core = core; h0.is_driver = 1; h0.expect_ep = 0; h0.stop_epoch = -1; h0.peer = &h1;
|
||||||
|
h1.core = core; h1.is_driver = 1; h1.expect_ep = 1; h1.stop_epoch = -1; h1.peer = &h0;
|
||||||
|
|
||||||
|
/* each holder is scoped to its OWN endpoint (+OBSERVE for watchdog lifecycle ticks on ep0). */
|
||||||
|
add_holder(core, &h0, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
|
||||||
|
add_holder(core, &h1, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 1);
|
||||||
|
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost ep0 (watchdog)");
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx ep0");
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 1) >= 0, "add memctx ep1");
|
||||||
|
|
||||||
|
vmsig_core_run(core);
|
||||||
|
|
||||||
|
CHECK(h0.memctx >= 1 && h0.bad_ep == 0, "VM0 receives ONLY its own context (ep0)");
|
||||||
|
CHECK(h1.memctx >= 1 && h1.bad_ep == 0, "VM1 receives ONLY its own context (ep1)");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 5. socket end-to-end: MEMCTX frame + fd in cmsg ----------------------- */
|
||||||
|
#define SOCK_EP 3u
|
||||||
|
static vmsig_grant sock_policy(uint32_t uid, uint32_t pid, void* ud) {
|
||||||
|
(void)pid; (void)ud;
|
||||||
|
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||||
|
g.principal = uid; g.endpoint_mask = 1ull << SOCK_EP;
|
||||||
|
g.source_mask = 0xFFFFFFFFu; g.cap_mask = VMSIG_CAP_MEMCTX;
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
static void* loop_main(void* p) { vmsig_core_run((vmsig_core*)p); return NULL; }
|
||||||
|
|
||||||
|
static int connect_abstract(const char* name) {
|
||||||
|
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||||
|
if (fd < 0) return -1;
|
||||||
|
struct sockaddr_un a; memset(&a, 0, sizeof a); a.sun_family = AF_UNIX;
|
||||||
|
size_t n = strlen(name);
|
||||||
|
a.sun_path[0] = 0;
|
||||||
|
memcpy(a.sun_path + 1, name + 1, n - 1);
|
||||||
|
socklen_t alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
|
||||||
|
if (connect(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; }
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read ONE 80-byte vmsig_wire frame; the adjacent fd (cmsg) -> into *out_fd. */
|
||||||
|
static int recv_wire(int fd, vmsig_wire* w, int* out_fd) {
|
||||||
|
*out_fd = -1;
|
||||||
|
struct iovec iov = { .iov_base = w, .iov_len = sizeof *w };
|
||||||
|
union { char buf[CMSG_SPACE(sizeof(int))]; struct cmsghdr a; } cm;
|
||||||
|
memset(&cm, 0, sizeof cm);
|
||||||
|
struct msghdr mh; memset(&mh, 0, sizeof mh);
|
||||||
|
mh.msg_iov = &iov; mh.msg_iovlen = 1;
|
||||||
|
mh.msg_control = cm.buf; mh.msg_controllen = sizeof cm.buf;
|
||||||
|
size_t got = 0;
|
||||||
|
while (got < sizeof *w) {
|
||||||
|
iov.iov_base = (char*)w + got; iov.iov_len = sizeof *w - got;
|
||||||
|
ssize_t n = recvmsg(fd, &mh, MSG_CMSG_CLOEXEC);
|
||||||
|
if (n <= 0) return (got == 0) ? 0 : -1;
|
||||||
|
for (struct cmsghdr* c = CMSG_FIRSTHDR(&mh); c; c = CMSG_NXTHDR(&mh, c))
|
||||||
|
if (c->cmsg_level == SOL_SOCKET && c->cmsg_type == SCM_RIGHTS)
|
||||||
|
memcpy(out_fd, CMSG_DATA(c), sizeof(int));
|
||||||
|
got += (size_t)n;
|
||||||
|
mh.msg_control = NULL; mh.msg_controllen = 0; /* fd only on the first recvmsg */
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_socket(void) {
|
||||||
|
printf("test_socket\n");
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, SOCK_EP) >= 0, "add memctx");
|
||||||
|
const char* SOCK = "@vmsig-memctx-e2e";
|
||||||
|
CHECK(vmsig_socket_attach(core, SOCK, sock_policy, NULL) == 0, "socket attach");
|
||||||
|
|
||||||
|
pthread_t th; pthread_create(&th, NULL, loop_main, core);
|
||||||
|
|
||||||
|
int c = connect_abstract(SOCK);
|
||||||
|
CHECK(c >= 0, "client connected");
|
||||||
|
if (c < 0) { vmsig_core_stop(core); pthread_join(th, NULL); vmsig_core_free(core); vmsig_ctx_free(ctx); return; }
|
||||||
|
|
||||||
|
struct timeval tv = { .tv_sec = 3, .tv_usec = 0 };
|
||||||
|
setsockopt(c, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv);
|
||||||
|
|
||||||
|
int got_ctx = 0, ro_fd = -1, ro_ok = 0, rw_eacces = 0;
|
||||||
|
vmsig_memctx pod; memset(&pod, 0, sizeof pod);
|
||||||
|
for (int iter = 0; iter < 20 && !got_ctx; iter++) {
|
||||||
|
vmsig_wire w; int wfd = -1;
|
||||||
|
int r = recv_wire(c, &w, &wfd);
|
||||||
|
if (r != 1) break;
|
||||||
|
if (w.kind == VMSIG_EV_MEMCTX) {
|
||||||
|
got_ctx = 1; ro_fd = wfd;
|
||||||
|
memcpy(&pod, w.inln, sizeof pod);
|
||||||
|
CHECK(ro_fd >= 0, "MEMCTX frame carries an RO-fd in cmsg");
|
||||||
|
CHECK(pod.flags & VMSIG_MEMCTX_RDONLY, "RDONLY flag is set");
|
||||||
|
if (ro_fd >= 0 && pod.low) {
|
||||||
|
void* ro = mmap(NULL, (size_t)pod.low, PROT_READ, MAP_SHARED, ro_fd, 0);
|
||||||
|
if (ro != MAP_FAILED) { ro_ok = 1; munmap(ro, (size_t)pod.low); }
|
||||||
|
void* rw = mmap(NULL, (size_t)pod.low, PROT_READ | PROT_WRITE, MAP_SHARED, ro_fd, 0);
|
||||||
|
if (rw == MAP_FAILED) rw_eacces = 1; else munmap(rw, (size_t)pod.low);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CHECK(got_ctx == 1, "MEMCTX frame arrived over the socket (wire framing)");
|
||||||
|
CHECK(ro_ok, "mmap RO via the received fd");
|
||||||
|
CHECK(rw_eacces, "write-mmap via the received fd fails (RO)");
|
||||||
|
|
||||||
|
if (ro_fd >= 0) close(ro_fd);
|
||||||
|
close(c);
|
||||||
|
vmsig_core_stop(core);
|
||||||
|
pthread_join(th, NULL);
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
test_multicast();
|
||||||
|
test_epoch();
|
||||||
|
test_retain();
|
||||||
|
test_multivm();
|
||||||
|
test_socket();
|
||||||
|
printf("memctx tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||||
|
return g_fail ? 1 : 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,227 @@
|
|||||||
|
/* test_memwrite.c — write-signaled seam (MEMWRITE): atomic guest-memory write under an
|
||||||
|
* exclusive lease. Stub mode (no VM): proves the full path cap -> grant -> lease-gate ->
|
||||||
|
* route -> adapter -> ACT_ACK without actuation, plus the default-deny and fencing
|
||||||
|
* invariants. The adapter never sees a control name (SISC).
|
||||||
|
*
|
||||||
|
* 1) happy path: CAP_MEMWRITE + a MEMWRITE lease -> CMD_MEMWRITE -> ACT_ACK{ok=1};
|
||||||
|
* 2) extent default-deny: len > VMSIG_MEMWRITE_MAX and a missing SRC flag -> ACK{ok=0};
|
||||||
|
* 3) lease gate: CMD_MEMWRITE WITHOUT an acquired lease -> dropped at the gate (no ACK);
|
||||||
|
* 4) cap gate: a control WITHOUT CAP_MEMWRITE cannot acquire the lease (DENIED{NOCAP});
|
||||||
|
* 5) in-flight fence: A holds the lease, queues a write, B preempts SYNCHRONOUSLY -> A's
|
||||||
|
* queued write is dropped by the fence (no ACK for A's corr), B's write actuates.
|
||||||
|
* In-proc, under ASAN. */
|
||||||
|
#include "vmsig.h"
|
||||||
|
#include "memctx.h" /* VMSIG_MEMWRITE_MAX: the adapter's extent bound (private) */
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
static int g_fail = 0;
|
||||||
|
#define CHECK(cond, msg) do { \
|
||||||
|
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
/* ---- in-proc control: records lease replies + MEMWRITE ACKs ---- */
|
||||||
|
typedef struct {
|
||||||
|
void* core;
|
||||||
|
int granted, denied, last_deny_reason;
|
||||||
|
int ack_ok[64]; /* ok flag per ACK in arrival order */
|
||||||
|
uint32_t ack_corr[64]; /* corr per ACK */
|
||||||
|
int nack;
|
||||||
|
int stop_replies, replies; /* stop the loop after N lease replies (0=off) */
|
||||||
|
int stop_acks; /* stop the loop after N acks (0=off) */
|
||||||
|
} cstate;
|
||||||
|
|
||||||
|
typedef struct { cstate* s; } cref;
|
||||||
|
static cref* g_refs[16]; static int g_nrefs = 0;
|
||||||
|
static cref* cref_new(cstate* s) {
|
||||||
|
cref* r = calloc(1, sizeof *r); r->s = s;
|
||||||
|
if (g_nrefs < 16) g_refs[g_nrefs++] = r;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
static void cref_free_all(void) { for (int i = 0; i < g_nrefs; i++) free(g_refs[i]); g_nrefs = 0; }
|
||||||
|
|
||||||
|
static int on_ev(void* user, const vmsig_event* ev) {
|
||||||
|
cref* r = user; cstate* s = r->s;
|
||||||
|
switch (ev->kind) {
|
||||||
|
case VMSIG_EV_LEASE_GRANTED: s->granted++; s->replies++; break;
|
||||||
|
case VMSIG_EV_LEASE_DENIED:
|
||||||
|
s->denied++;
|
||||||
|
s->last_deny_reason = (int)((const vmsig_lease_req*)ev->inln)->reason;
|
||||||
|
s->replies++;
|
||||||
|
break;
|
||||||
|
case VMSIG_EV_ACT_ACK:
|
||||||
|
if (s->nack < 64) {
|
||||||
|
/* inln layout from mc_memwrite_ack: {int ok; uint32_t corr; uint32_t origin}. */
|
||||||
|
int ok; memcpy(&ok, ev->inln, sizeof ok);
|
||||||
|
s->ack_ok[s->nack] = ok;
|
||||||
|
s->ack_corr[s->nack] = ev->corr;
|
||||||
|
s->nack++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
if (s->stop_replies && s->replies >= s->stop_replies) vmsig_core_stop(s->core);
|
||||||
|
if (s->stop_acks && s->nack >= s->stop_acks) vmsig_core_stop(s->core);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* add_ctl(vmsig_core* core, cstate* s, uint32_t cap, uint32_t arb_prio) {
|
||||||
|
cref* r = cref_new(s);
|
||||||
|
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
|
||||||
|
cfg.on_event = on_ev; cfg.user = r;
|
||||||
|
cfg.sub.source_mask = 0xFFFFFFFFu; cfg.sub.prio_min = VMSIG_PRIO_BULK;
|
||||||
|
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||||
|
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||||
|
g.endpoint_mask = 1ull << 0; g.source_mask = 0xFFFFFFFFu;
|
||||||
|
g.cap_mask = cap | VMSIG_CAP_OBSERVE; g.arb_prio = arb_prio;
|
||||||
|
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||||
|
return ctl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- DOWN send helpers ---- */
|
||||||
|
static int acquire_mw(void* ctl) {
|
||||||
|
vmsig_event d; memset(&d, 0, sizeof d);
|
||||||
|
d.kind = VMSIG_EV_CMD_ACQUIRE; d.source = VMSIG_SRC_MEMCTX; d.dir = VMSIG_DIR_DOWN;
|
||||||
|
d.endpoint = 0; d.prio = VMSIG_PRIO_HIGH;
|
||||||
|
vmsig_lease_req lr = { VMSIG_LEASE_MEMWRITE, 0 };
|
||||||
|
memcpy(d.inln, &lr, sizeof lr);
|
||||||
|
return vmsig_inproc_send(ctl, &d);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* CMD_MEMWRITE with inline SRC; corr for tracking. flags: VMSIG_MW_SRC_* (0 => no SRC). */
|
||||||
|
static int send_write(void* ctl, uint64_t gva, uint32_t len, uint32_t flags,
|
||||||
|
const void* src, uint32_t corr) {
|
||||||
|
vmsig_event d; memset(&d, 0, sizeof d);
|
||||||
|
d.kind = VMSIG_EV_CMD_MEMWRITE; d.source = VMSIG_SRC_MEMCTX; d.dir = VMSIG_DIR_DOWN;
|
||||||
|
d.endpoint = 0; d.prio = VMSIG_PRIO_HIGH; d.corr = corr;
|
||||||
|
vmsig_memwrite mw = { gva, len, flags };
|
||||||
|
memcpy(d.inln, &mw, sizeof mw);
|
||||||
|
if ((flags & VMSIG_MW_SRC_INLINE) && src && len <= VMSIG_MEMWRITE_INLINE)
|
||||||
|
memcpy(d.inln + sizeof mw, src, len);
|
||||||
|
return vmsig_inproc_send(ctl, &d);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Run the loop until N acks (used after queuing actuated writes). */
|
||||||
|
static void run_until_acks(cstate* s, int n) {
|
||||||
|
vmsig_core* c = (vmsig_core*)s->core;
|
||||||
|
s->stop_acks = n; s->stop_replies = 0;
|
||||||
|
vmsig_core_run(c);
|
||||||
|
s->stop_acks = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 1+2+3: happy path, extent default-deny, lease gate -------------------- */
|
||||||
|
static void test_path_and_deny(void) {
|
||||||
|
printf("test_path_and_deny\n");
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
cstate s; memset(&s, 0, sizeof s); s.core = core;
|
||||||
|
|
||||||
|
void* A = add_ctl(core, &s, VMSIG_CAP_MEMWRITE, 10);
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
|
||||||
|
|
||||||
|
/* 3) lease gate: without ACQUIRE the write is dropped at the gate (-1, no actuation). */
|
||||||
|
uint8_t pat[8] = { 0xDE, 0xAD, 0xBE, 0xEF, 1, 2, 3, 4 };
|
||||||
|
CHECK(send_write(A, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 99) == -1,
|
||||||
|
"3: CMD_MEMWRITE without a lease is dropped by the gate");
|
||||||
|
|
||||||
|
/* acquire the MEMWRITE lease (synchronous intercept; UP reply paced by ctx). */
|
||||||
|
CHECK(acquire_mw(A) == 0, "acquire submitted");
|
||||||
|
|
||||||
|
/* 1) happy path: inline write -> queued -> ACT_ACK{ok=1}. Also drains the GRANTED reply. */
|
||||||
|
CHECK(send_write(A, 0x1000, 8, VMSIG_MW_SRC_INLINE, pat, 11) == 0,
|
||||||
|
"1: owner's CMD_MEMWRITE passes the gate");
|
||||||
|
|
||||||
|
/* 2) extent: len > MAX -> ACK{ok=0}, NOT actuated (queued ack on the loop thread). */
|
||||||
|
CHECK(send_write(A, 0x2000, VMSIG_MEMWRITE_MAX + 1, VMSIG_MW_SRC_INLINE, pat, 22) == 0,
|
||||||
|
"2: over-extent write is accepted by the gate (denied inside the adapter)");
|
||||||
|
/* 2b) missing SRC flag -> ACK{ok=0}. */
|
||||||
|
CHECK(send_write(A, 0x3000, 4, 0u, NULL, 33) == 0,
|
||||||
|
"2b: no-SRC-flag write is accepted by the gate (denied inside the adapter)");
|
||||||
|
|
||||||
|
/* expect 3 ACKs (corr 11/22/33) + the GRANTED reply. */
|
||||||
|
run_until_acks(&s, 3);
|
||||||
|
|
||||||
|
CHECK(s.granted == 1, "lease GRANTED once");
|
||||||
|
int saw11_ok = -1, saw22_ok = -1, saw33_ok = -1, saw99 = 0;
|
||||||
|
for (int i = 0; i < s.nack; i++) {
|
||||||
|
if (s.ack_corr[i] == 11) saw11_ok = s.ack_ok[i];
|
||||||
|
if (s.ack_corr[i] == 22) saw22_ok = s.ack_ok[i];
|
||||||
|
if (s.ack_corr[i] == 33) saw33_ok = s.ack_ok[i];
|
||||||
|
if (s.ack_corr[i] == 99) saw99 = 1;
|
||||||
|
}
|
||||||
|
CHECK(saw11_ok == 1, "1: happy-path write ACKs ok=1 (stub)");
|
||||||
|
CHECK(saw22_ok == 0, "2: over-extent write ACKs ok=0 (default-deny)");
|
||||||
|
CHECK(saw33_ok == 0, "2b: no-SRC-flag write ACKs ok=0 (default-deny)");
|
||||||
|
CHECK(!saw99, "3: the gate-dropped write produced no ACK");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 4: cap gate — no CAP_MEMWRITE cannot acquire the lease ----------------- */
|
||||||
|
static void test_cap_gate(void) {
|
||||||
|
printf("test_cap_gate\n");
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
cstate s; memset(&s, 0, sizeof s); s.core = core;
|
||||||
|
|
||||||
|
void* NC = add_ctl(core, &s, 0u /* no MEMWRITE */, 10);
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
|
||||||
|
|
||||||
|
CHECK(acquire_mw(NC) == 0, "acquire submitted");
|
||||||
|
s.stop_replies = 1; vmsig_core_run(core); s.stop_replies = 0;
|
||||||
|
|
||||||
|
CHECK(s.denied == 1, "4: acquire without CAP_MEMWRITE -> DENIED");
|
||||||
|
CHECK(s.last_deny_reason == VMSIG_LEASE_DENY_NOCAP, "4: reason=NOCAP");
|
||||||
|
CHECK(s.granted == 0, "4: not granted");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 5: in-flight fence — losing the lease before pump_down drops the write -- */
|
||||||
|
static void test_inflight_fence(void) {
|
||||||
|
printf("test_inflight_fence\n");
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
cstate s; memset(&s, 0, sizeof s); s.core = core;
|
||||||
|
|
||||||
|
void* A = add_ctl(core, &s, VMSIG_CAP_MEMWRITE, 10);
|
||||||
|
void* B = add_ctl(core, &s, VMSIG_CAP_MEMWRITE, 100); /* higher prio: preempts */
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx");
|
||||||
|
|
||||||
|
uint8_t pat[4] = { 1, 2, 3, 4 };
|
||||||
|
CHECK(acquire_mw(A) == 0, "A acquires");
|
||||||
|
/* A queues a write (corr=55): passes the gate (A owns), lands in the DOWN queue. */
|
||||||
|
CHECK(send_write(A, 0x1000, 4, VMSIG_MW_SRC_INLINE, pat, 55) == 0, "A queues write 55");
|
||||||
|
/* B preempts SYNCHRONOUSLY (acquire does not go through ctx). */
|
||||||
|
CHECK(acquire_mw(B) == 0, "B preempts");
|
||||||
|
/* B's own write (corr=66) — should actuate. */
|
||||||
|
CHECK(send_write(B, 0x2000, 4, VMSIG_MW_SRC_INLINE, pat, 66) == 0, "B queues write 66");
|
||||||
|
|
||||||
|
run_until_acks(&s, 1); /* B's 66 acks; A's 55 must be fenced (no ack) */
|
||||||
|
|
||||||
|
int saw55 = 0, saw66 = 0;
|
||||||
|
for (int i = 0; i < s.nack; i++) {
|
||||||
|
if (s.ack_corr[i] == 55) saw55 = 1;
|
||||||
|
if (s.ack_corr[i] == 66) saw66 = 1;
|
||||||
|
}
|
||||||
|
CHECK(!saw55, "5: ex-owner A's in-flight write is dropped by the fence");
|
||||||
|
CHECK(saw66, "5: new owner B's write actuates after preemption");
|
||||||
|
CHECK(s.granted == 2, "5: A and B each got GRANTED");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
printf("test_memwrite\n");
|
||||||
|
test_path_and_deny();
|
||||||
|
test_cap_gate();
|
||||||
|
test_inflight_fence();
|
||||||
|
cref_free_all();
|
||||||
|
printf("memwrite tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||||
|
return g_fail ? 1 : 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
/* test_mvm.c — mode A (single core, multiple VMs): per-endpoint multiplexing and
|
||||||
|
* per-VM grant scoping on UP delivery. Two vmhost endpoints in one core (each stub
|
||||||
|
* ticks VM_LIFECYCLE per endpoint); a poller granted only VM0 must see only ep0
|
||||||
|
* lifecycle events, the VM1 poller — only ep1. */
|
||||||
|
#include "vmsig.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
static int g_fail = 0;
|
||||||
|
#define CHECK(cond, msg) do { \
|
||||||
|
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
typedef struct { vmsig_core* core; int seen[2][2]; } mvm; /* seen[control][endpoint] */
|
||||||
|
typedef struct { mvm* d; int which; } ctlref;
|
||||||
|
|
||||||
|
static int on_ev(void* user, const vmsig_event* ev) {
|
||||||
|
ctlref* r = user; mvm* d = r->d;
|
||||||
|
if (ev->kind == VMSIG_EV_VM_LIFECYCLE && ev->endpoint < 2)
|
||||||
|
d->seen[r->which][ev->endpoint]++;
|
||||||
|
if (d->seen[0][0] >= 2 && d->seen[1][1] >= 2) vmsig_core_stop(d->core);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
|
||||||
|
mvm d; memset(&d, 0, sizeof d); d.core = core;
|
||||||
|
ctlref r0 = { &d, 0 }, r1 = { &d, 1 };
|
||||||
|
|
||||||
|
vmsig_inproc_cfg c0; memset(&c0, 0, sizeof c0); c0.on_event = on_ev; c0.user = &r0;
|
||||||
|
vmsig_inproc_cfg c1; memset(&c1, 0, sizeof c1); c1.on_event = on_ev; c1.user = &r1;
|
||||||
|
void* ctl0 = vmsig_inproc_control_new(&c0);
|
||||||
|
void* ctl1 = vmsig_inproc_control_new(&c1);
|
||||||
|
|
||||||
|
/* grants segregate the pollers per VM */
|
||||||
|
vmsig_grant g0; memset(&g0, 0, sizeof g0);
|
||||||
|
g0.endpoint_mask = 1ull << 0; g0.source_mask = 0xFFFFFFFFu; g0.cap_mask = VMSIG_CAP_OBSERVE;
|
||||||
|
vmsig_grant g1; memset(&g1, 0, sizeof g1);
|
||||||
|
g1.endpoint_mask = 1ull << 1; g1.source_mask = 0xFFFFFFFFu; g1.cap_mask = VMSIG_CAP_OBSERVE;
|
||||||
|
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl0, &g0);
|
||||||
|
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl1, &g1);
|
||||||
|
|
||||||
|
/* two VMs in one core: a vmhost adapter per endpoint (stub ticks VM_LIFECYCLE) */
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "VM0 adapter");
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 1) >= 0, "VM1 adapter");
|
||||||
|
|
||||||
|
int rc = vmsig_core_run(core);
|
||||||
|
printf("test_mvm rc=%d c0[ep0=%d ep1=%d] c1[ep0=%d ep1=%d]\n",
|
||||||
|
rc, d.seen[0][0], d.seen[0][1], d.seen[1][0], d.seen[1][1]);
|
||||||
|
|
||||||
|
CHECK(d.seen[0][0] >= 2, "control0 sees lifecycle of its own VM0");
|
||||||
|
CHECK(d.seen[0][1] == 0, "control0 does NOT see VM1 (grant scoping)");
|
||||||
|
CHECK(d.seen[1][1] >= 2, "control1 sees lifecycle of its own VM1");
|
||||||
|
CHECK(d.seen[1][0] == 0, "control1 does NOT see VM0");
|
||||||
|
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
printf("multi-vm tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||||
|
return g_fail ? 1 : 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,121 @@
|
|||||||
|
/* test_sec.c — security layer: grant enforcement on DOWN commands.
|
||||||
|
* Checks capability split (OBSERVE != INPUT != POWER != VM), source_mask
|
||||||
|
* on DOWN, destructive vs safe lifecycle/VM, foreign endpoint, default-deny.
|
||||||
|
* (Memory is no longer a DOWN command: the address-space context is multicast
|
||||||
|
* upward and gated by CAP_MEMCTX — see test_authz/test_memctx; here only DOWN
|
||||||
|
* actuation.) vmsig_inproc_send returns the result of core_emit_down (the grant of
|
||||||
|
* THIS specific control) — no need to run the loop. */
|
||||||
|
#include "vmsig.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
static int g_fail = 0;
|
||||||
|
#define CHECK(cond, msg) do { \
|
||||||
|
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
static int g_denied = 0;
|
||||||
|
static void audit_cb(void* ud, const vmsig_audit* a) {
|
||||||
|
(void)ud;
|
||||||
|
if (a->kind == VMSIG_AUDIT_DOWN_DENIED) g_denied++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* DOWN command of kind on endpoint ep; source derived from kind */
|
||||||
|
static int send(void* ctl, vmsig_kind kind, uint32_t ep) {
|
||||||
|
vmsig_event d;
|
||||||
|
memset(&d, 0, sizeof d);
|
||||||
|
d.kind = kind; d.dir = VMSIG_DIR_DOWN; d.endpoint = ep; d.prio = VMSIG_PRIO_NORMAL;
|
||||||
|
d.source = (kind == VMSIG_EV_CMD_INPUT || kind == VMSIG_EV_CMD_LIFECYCLE) ? VMSIG_SRC_INPUT
|
||||||
|
: VMSIG_SRC_VMHOST;
|
||||||
|
return vmsig_inproc_send(ctl, &d);
|
||||||
|
}
|
||||||
|
/* CMD_LIFECYCLE with a specific operation (code in inln[0]) */
|
||||||
|
static int send_life(void* ctl, int op, uint32_t ep) {
|
||||||
|
vmsig_event d;
|
||||||
|
memset(&d, 0, sizeof d);
|
||||||
|
d.kind = VMSIG_EV_CMD_LIFECYCLE; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
|
||||||
|
d.endpoint = ep; d.prio = VMSIG_PRIO_NORMAL; d.inln[0] = (uint8_t)op;
|
||||||
|
return vmsig_inproc_send(ctl, &d);
|
||||||
|
}
|
||||||
|
/* Acquire a lease of class cls on ep (destructive/input now requires a lease). */
|
||||||
|
static int acq(void* ctl, uint32_t cls, uint32_t ep) {
|
||||||
|
vmsig_event d;
|
||||||
|
memset(&d, 0, sizeof d);
|
||||||
|
d.kind = VMSIG_EV_CMD_ACQUIRE; d.source = VMSIG_SRC_INPUT; d.dir = VMSIG_DIR_DOWN;
|
||||||
|
d.endpoint = ep; d.prio = VMSIG_PRIO_HIGH;
|
||||||
|
vmsig_lease_req lr = { cls, 0 };
|
||||||
|
memcpy(d.inln, &lr, sizeof lr);
|
||||||
|
return vmsig_inproc_send(ctl, &d);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* CMD_VM with an operation (vmsig_vm_cmd in inln) */
|
||||||
|
static int send_vm(void* ctl, int op, uint32_t ep) {
|
||||||
|
vmsig_event d;
|
||||||
|
memset(&d, 0, sizeof d);
|
||||||
|
d.kind = VMSIG_EV_CMD_VM; d.source = VMSIG_SRC_VMHOST; d.dir = VMSIG_DIR_DOWN;
|
||||||
|
d.endpoint = ep; d.prio = VMSIG_PRIO_NORMAL;
|
||||||
|
vmsig_vm_cmd c = { (uint32_t)op };
|
||||||
|
memcpy(d.inln, &c, sizeof c);
|
||||||
|
return vmsig_inproc_send(ctl, &d);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* add_ctl(vmsig_core* core, uint32_t cap, uint32_t source_mask) {
|
||||||
|
vmsig_inproc_cfg cfg;
|
||||||
|
memset(&cfg, 0, sizeof cfg); /* on_event=NULL, sub=0: no UP needed */
|
||||||
|
void* ctl = vmsig_inproc_control_new(&cfg);
|
||||||
|
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||||
|
g.endpoint_mask = 1ull << 0; g.source_mask = source_mask; g.cap_mask = cap;
|
||||||
|
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||||
|
return ctl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
vmsig_core_set_audit(core, audit_cb, NULL);
|
||||||
|
|
||||||
|
void* A = add_ctl(core, VMSIG_CAP_OBSERVE, 0xFFFFFFFFu); /* screen observer */
|
||||||
|
void* B = add_ctl(core, VMSIG_CAP_INPUT | VMSIG_CAP_LIFECYCLE, 0xFFFFFFFFu);/* input + safe lifecycle */
|
||||||
|
void* P = add_ctl(core, VMSIG_CAP_POWER, 0xFFFFFFFFu); /* destructive power */
|
||||||
|
void* S = add_ctl(core, VMSIG_CAP_INPUT, 1u << VMSIG_SRC_FRAME); /* INPUT, but source=FRAME */
|
||||||
|
void* V = add_ctl(core, VMSIG_CAP_VM, 0xFFFFFFFFu); /* VM control (safe) */
|
||||||
|
void* C = vmsig_inproc_control_new(&(vmsig_inproc_cfg){0}); /* default-deny */
|
||||||
|
vmsig_core_add_control(core, vmsig_inproc_control_ops(), C, NULL);
|
||||||
|
|
||||||
|
printf("test_security\n");
|
||||||
|
/* A — screen observer: does NOT actuate input/lifecycle (split CAP) */
|
||||||
|
CHECK(send(A, VMSIG_EV_CMD_INPUT, 0) == -1, "OBSERVE != input"); /* deny 1 */
|
||||||
|
CHECK(send_life(A, VMSIG_LIFE_PAUSE, 0) == -1, "OBSERVE != lifecycle"); /* deny 2 */
|
||||||
|
|
||||||
|
/* B — input + SAFE lifecycle, but NOT destructive power. Destructive/input
|
||||||
|
* now passes ONLY while holding a class lease => ACQUIRE first. */
|
||||||
|
acq(B, VMSIG_LEASE_INPUT, 0);
|
||||||
|
CHECK(send(B, VMSIG_EV_CMD_INPUT, 0) == 0, "INPUT => input allowed");
|
||||||
|
CHECK(send_life(B, VMSIG_LIFE_PAUSE, 0) == 0, "LIFECYCLE => pause allowed");
|
||||||
|
CHECK(send_life(B, VMSIG_LIFE_POWERDOWN, 0) == -1,"powerdown requires CAP_POWER"); /* deny 3 */
|
||||||
|
|
||||||
|
/* P — destructive power (with a POWER class lease) */
|
||||||
|
acq(P, VMSIG_LEASE_POWER, 0);
|
||||||
|
CHECK(send_life(P, VMSIG_LIFE_POWERDOWN, 0) == 0, "POWER => powerdown allowed");
|
||||||
|
|
||||||
|
/* S — has INPUT, but source_mask lacks SRC_INPUT: DOWN input denied */
|
||||||
|
CHECK(send(S, VMSIG_EV_CMD_INPUT, 0) == -1, "source_mask on DOWN: SRC_INPUT denied"); /* deny 4 */
|
||||||
|
|
||||||
|
/* V — VM control: safe ops yes, destructive ones require CAP_POWER */
|
||||||
|
CHECK(send_vm(V, VMSIG_VMOP_CONT, 0) == 0, "CAP_VM => cont allowed");
|
||||||
|
CHECK(send_vm(V, VMSIG_VMOP_POWERDOWN, 0) == -1, "VM powerdown requires CAP_POWER"); /* deny 5 */
|
||||||
|
CHECK(send_vm(P, VMSIG_VMOP_POWERDOWN, 0) == 0, "CAP_POWER => VM powerdown allowed");
|
||||||
|
|
||||||
|
/* C — default-deny */
|
||||||
|
CHECK(send_vm(C, VMSIG_VMOP_QUERY, 0) == -1, "default-deny is deaf"); /* deny 6 */
|
||||||
|
|
||||||
|
/* audit recorded all 6 DOWN denials */
|
||||||
|
CHECK(g_denied == 6, "audit: all DOWN denials recorded");
|
||||||
|
|
||||||
|
vmsig_core_free(core); /* closes/frees all controls */
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
|
||||||
|
printf("security tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||||
|
return g_fail ? 1 : 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,154 @@
|
|||||||
|
/* test_sock.c — out-of-process control: wire codec + authentication/admission.
|
||||||
|
* Bring up two listeners (one admitting, one rejecting) on abstract sockets, run
|
||||||
|
* the core in a separate thread, connect clients and check: policy invoked,
|
||||||
|
* valid poller admitted, unauthorized rejected (EOF), reap without a crash. */
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include "vmsig.h"
|
||||||
|
#include "vmsig_socket.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdatomic.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <sys/un.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
static int g_fail = 0;
|
||||||
|
#define CHECK(cond, msg) do { \
|
||||||
|
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
static atomic_int g_auth = 0;
|
||||||
|
static atomic_int g_deny = 0;
|
||||||
|
static atomic_int g_admit = 0;
|
||||||
|
static atomic_int g_reject = 0;
|
||||||
|
|
||||||
|
static void audit_cb(void* ud, const vmsig_audit* a) {
|
||||||
|
(void)ud;
|
||||||
|
if (a->kind == VMSIG_AUDIT_ADMIT) atomic_fetch_add(&g_admit, 1);
|
||||||
|
else if (a->kind == VMSIG_AUDIT_REJECT) atomic_fetch_add(&g_reject, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static vmsig_grant pol_ok(uint32_t uid, uint32_t pid, void* ud) {
|
||||||
|
(void)pid; (void)ud;
|
||||||
|
atomic_fetch_add(&g_auth, 1);
|
||||||
|
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||||
|
g.principal = uid; g.endpoint_mask = 1u << 0;
|
||||||
|
g.source_mask = 0xFFFFFFFFu; g.cap_mask = VMSIG_CAP_OBSERVE;
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
static vmsig_grant pol_deny(uint32_t uid, uint32_t pid, void* ud) {
|
||||||
|
(void)uid; (void)pid; (void)ud;
|
||||||
|
atomic_fetch_add(&g_deny, 1);
|
||||||
|
vmsig_grant g; memset(&g, 0, sizeof g); /* empty => reject */
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int connect_abstract(const char* name) {
|
||||||
|
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||||
|
if (fd < 0) return -1;
|
||||||
|
struct sockaddr_un a; memset(&a, 0, sizeof a); a.sun_family = AF_UNIX;
|
||||||
|
size_t n = strlen(name);
|
||||||
|
a.sun_path[0] = 0;
|
||||||
|
memcpy(a.sun_path + 1, name + 1, n - 1);
|
||||||
|
socklen_t alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
|
||||||
|
if (connect(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; }
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* loop_main(void* p) { vmsig_core_run((vmsig_core*)p); return NULL; }
|
||||||
|
|
||||||
|
static void wait_atomic(atomic_int* a, int want, int ms) {
|
||||||
|
for (int i = 0; i < ms; i++) {
|
||||||
|
if (atomic_load(a) >= want) return;
|
||||||
|
struct timespec t = { .tv_sec = 0, .tv_nsec = 1000000 };
|
||||||
|
nanosleep(&t, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_wire(void) {
|
||||||
|
printf("test_wire\n");
|
||||||
|
vmsig_event ev; memset(&ev, 0, sizeof ev);
|
||||||
|
ev.kind = VMSIG_EV_CMD_VM; ev.source = VMSIG_SRC_VMHOST; ev.dir = VMSIG_DIR_DOWN;
|
||||||
|
ev.prio = VMSIG_PRIO_HIGH; ev.endpoint = 0; ev.corr = 0xABCD;
|
||||||
|
for (int i = 0; i < 48; i++) ev.inln[i] = (uint8_t)i;
|
||||||
|
|
||||||
|
vmsig_wire w; vmsig_wire_encode(&w, &ev);
|
||||||
|
vmsig_event d;
|
||||||
|
CHECK(vmsig_wire_decode(&w, &d) == 0, "decode ok");
|
||||||
|
CHECK(d.kind == ev.kind && d.source == ev.source &&
|
||||||
|
d.endpoint == ev.endpoint && d.corr == ev.corr, "frame fields match");
|
||||||
|
CHECK(memcmp(d.inln, ev.inln, 48) == 0, "inln matches");
|
||||||
|
|
||||||
|
vmsig_wire bad = w; bad.magic = 0; vmsig_event x;
|
||||||
|
CHECK(vmsig_wire_decode(&bad, &x) == -1, "bad magic rejected");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
test_wire();
|
||||||
|
|
||||||
|
printf("test_socket\n");
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
vmsig_core_set_audit(core, audit_cb, NULL);
|
||||||
|
const char* OK = "@vmsig-sock-ok-test";
|
||||||
|
const char* DENY = "@vmsig-sock-deny-test";
|
||||||
|
CHECK(vmsig_socket_attach(core, OK, pol_ok, NULL) == 0, "attach ok listener");
|
||||||
|
CHECK(vmsig_socket_attach(core, DENY, pol_deny, NULL) == 0, "attach deny listener");
|
||||||
|
|
||||||
|
pthread_t th;
|
||||||
|
pthread_create(&th, NULL, loop_main, core);
|
||||||
|
|
||||||
|
/* valid poller: connect -> policy -> admission */
|
||||||
|
int c1 = connect_abstract(OK);
|
||||||
|
CHECK(c1 >= 0, "client connected (ok)");
|
||||||
|
wait_atomic(&g_auth, 1, 1000);
|
||||||
|
CHECK(atomic_load(&g_auth) >= 1, "policy invoked — poller authenticated/admitted");
|
||||||
|
if (c1 >= 0) close(c1); /* disconnect -> deferred reap (no crash) */
|
||||||
|
|
||||||
|
/* unauthorized: connect -> server closes -> EOF on the client */
|
||||||
|
int c2 = connect_abstract(DENY);
|
||||||
|
CHECK(c2 >= 0, "client connected (deny)");
|
||||||
|
wait_atomic(&g_deny, 1, 1000);
|
||||||
|
CHECK(atomic_load(&g_deny) >= 1, "deny policy invoked");
|
||||||
|
if (c2 >= 0) {
|
||||||
|
struct timeval tv = { .tv_sec = 1, .tv_usec = 0 };
|
||||||
|
setsockopt(c2, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv);
|
||||||
|
char b; ssize_t r = read(c2, &b, 1);
|
||||||
|
CHECK(r == 0, "connection rejected by server (EOF)");
|
||||||
|
close(c2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* slot reuse: churn > MAX_CONTROLS(64). Without returning slots the listener
|
||||||
|
* would die after 64 cycles. Each cycle: connect(ok) -> wait auth++ -> close. */
|
||||||
|
int base = atomic_load(&g_auth);
|
||||||
|
const int churn = 70;
|
||||||
|
for (int k = 0; k < churn; k++) {
|
||||||
|
int fc = connect_abstract(OK);
|
||||||
|
if (fc < 0) { CHECK(0, "churn connect"); break; }
|
||||||
|
wait_atomic(&g_auth, base + k + 1, 1000);
|
||||||
|
close(fc);
|
||||||
|
struct timespec ts = { .tv_sec = 0, .tv_nsec = 2 * 1000000 };
|
||||||
|
nanosleep(&ts, NULL); /* let the loop reap before the next connection */
|
||||||
|
}
|
||||||
|
CHECK(atomic_load(&g_auth) >= base + churn,
|
||||||
|
"slots reused: churn > MAX_CONTROLS admitted");
|
||||||
|
|
||||||
|
/* audit recorded admissions and rejections */
|
||||||
|
CHECK(atomic_load(&g_admit) >= 1, "audit: poller admission");
|
||||||
|
CHECK(atomic_load(&g_reject) >= 1, "audit: rejection (deny listener)");
|
||||||
|
|
||||||
|
struct timespec t = { .tv_sec = 0, .tv_nsec = 50 * 1000000 };
|
||||||
|
nanosleep(&t, NULL); /* let the loop process the reaps */
|
||||||
|
vmsig_core_stop(core);
|
||||||
|
pthread_join(th, NULL);
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
|
||||||
|
printf("socket tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||||
|
return g_fail ? 1 : 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,146 @@
|
|||||||
|
/* test_vmhost.c — QEMU/QMP host-plane, armed path: fake QMP server (this test)
|
||||||
|
* <-> real QMP client vmhost. We verify: handshake (greeting -> qmp_capabilities
|
||||||
|
* -> return -> SEAM_UP), async events -> VM_LIFECYCLE (broadcast), CMD_VM{QUERY}
|
||||||
|
* -> command to server -> return -> addressed VM_LIFECYCLE to the initiator, EOF -> SEAM_DOWN. */
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include "vmsig.h"
|
||||||
|
#include "vmhost.h" /* private cfg (CMake provides the include path) */
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdatomic.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <sys/un.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
|
static int g_fail = 0;
|
||||||
|
#define CHECK(cond, msg) do { \
|
||||||
|
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
static atomic_int g_seamup = 0, g_seamdown = 0;
|
||||||
|
static atomic_int g_paused = 0, g_running_bcast = 0, g_query_reply = 0;
|
||||||
|
static void* g_ctl = NULL;
|
||||||
|
|
||||||
|
static int on_ev(void* user, const vmsig_event* ev) {
|
||||||
|
(void)user;
|
||||||
|
if (ev->kind == VMSIG_EV_SEAM_UP && ev->source == VMSIG_SRC_VMHOST) {
|
||||||
|
atomic_store(&g_seamup, 1);
|
||||||
|
vmsig_event d; memset(&d, 0, sizeof d); /* once ready — query status */
|
||||||
|
d.kind = VMSIG_EV_CMD_VM; d.source = VMSIG_SRC_VMHOST; d.dir = VMSIG_DIR_DOWN;
|
||||||
|
d.prio = VMSIG_PRIO_NORMAL; d.endpoint = 0; d.corr = 0x55;
|
||||||
|
vmsig_vm_cmd c = { VMSIG_VMOP_QUERY }; memcpy(d.inln, &c, sizeof c);
|
||||||
|
vmsig_inproc_send(g_ctl, &d);
|
||||||
|
} else if (ev->kind == VMSIG_EV_SEAM_DOWN && ev->source == VMSIG_SRC_VMHOST) {
|
||||||
|
atomic_store(&g_seamdown, 1);
|
||||||
|
} else if (ev->kind == VMSIG_EV_VM_LIFECYCLE) {
|
||||||
|
vmsig_vm_state vs; memcpy(&vs, ev->inln, sizeof vs);
|
||||||
|
if (ev->origin) { /* addressed reply to our QUERY */
|
||||||
|
if (vs.state == VMSIG_VM_RUNNING) atomic_store(&g_query_reply, 1);
|
||||||
|
} else { /* broadcast async event */
|
||||||
|
if (vs.state == VMSIG_VM_PAUSED) atomic_store(&g_paused, 1);
|
||||||
|
if (vs.state == VMSIG_VM_RUNNING) atomic_store(&g_running_bcast, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* loop_main(void* p) { vmsig_core_run((vmsig_core*)p); return NULL; }
|
||||||
|
|
||||||
|
static int srv_listen(const char* name) {
|
||||||
|
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||||
|
if (fd < 0) return -1;
|
||||||
|
struct sockaddr_un a; memset(&a, 0, sizeof a); a.sun_family = AF_UNIX;
|
||||||
|
size_t n = strlen(name);
|
||||||
|
a.sun_path[0] = 0; memcpy(a.sun_path + 1, name + 1, n - 1);
|
||||||
|
socklen_t alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
|
||||||
|
if (bind(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; }
|
||||||
|
if (listen(fd, 4) < 0) { close(fd); return -1; }
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
static void srv_send(int fd, const char* s) { ssize_t r = write(fd, s, strlen(s)); (void)r; }
|
||||||
|
static int srv_expect(int fd, const char* needle) {
|
||||||
|
char buf[1024]; size_t len = 0;
|
||||||
|
for (int i = 0; i < 200; i++) { /* up to ~2s */
|
||||||
|
ssize_t r = read(fd, buf + len, sizeof buf - 1 - len);
|
||||||
|
if (r > 0) { len += (size_t)r; buf[len] = 0; if (strstr(buf, needle)) return 1; }
|
||||||
|
else if (r == 0) return 0;
|
||||||
|
else { struct timespec t = { 0, 10 * 1000000 }; nanosleep(&t, NULL); }
|
||||||
|
if (len >= sizeof buf - 1) len = 0;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
static void wait_atomic(atomic_int* a, int ms) {
|
||||||
|
for (int i = 0; i < ms; i++) {
|
||||||
|
if (atomic_load(a)) return;
|
||||||
|
struct timespec t = { 0, 1000000 }; nanosleep(&t, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
const char* QMP = "@vmsig-qmp-fake-test";
|
||||||
|
int srv = srv_listen(QMP);
|
||||||
|
if (srv < 0) { printf("srv_listen failed\n"); return 1; }
|
||||||
|
|
||||||
|
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||||
|
vmsig_core* core = vmsig_core_new(ctx);
|
||||||
|
|
||||||
|
vmsig_inproc_cfg cc; memset(&cc, 0, sizeof cc); cc.on_event = on_ev;
|
||||||
|
void* ctl = vmsig_inproc_control_new(&cc);
|
||||||
|
g_ctl = ctl;
|
||||||
|
vmsig_grant g; memset(&g, 0, sizeof g);
|
||||||
|
g.endpoint_mask = 1ull << 0; g.source_mask = 0xFFFFFFFFu;
|
||||||
|
g.cap_mask = VMSIG_CAP_OBSERVE | VMSIG_CAP_VM;
|
||||||
|
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
|
||||||
|
|
||||||
|
/* armed vmhost: it will connect to our fake QMP */
|
||||||
|
vmsig_vmhost_cfg vcfg; memset(&vcfg, 0, sizeof vcfg); vcfg.qmp_path = QMP;
|
||||||
|
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), &vcfg, 0) >= 0, "vmhost armed attach");
|
||||||
|
|
||||||
|
pthread_t th; pthread_create(&th, NULL, loop_main, core);
|
||||||
|
|
||||||
|
/* === QMP server role === */
|
||||||
|
int c = accept(srv, NULL, NULL);
|
||||||
|
CHECK(c >= 0, "server accepted vmhost connection");
|
||||||
|
if (c >= 0) {
|
||||||
|
struct timeval tv = { 0, 50 * 1000 };
|
||||||
|
setsockopt(c, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv);
|
||||||
|
|
||||||
|
srv_send(c, "{\"QMP\": {\"version\": {}, \"capabilities\": []}}\r\n");
|
||||||
|
CHECK(srv_expect(c, "qmp_capabilities"), "client sent qmp_capabilities");
|
||||||
|
srv_send(c, "{\"return\": {}}\r\n"); /* -> READY -> SEAM_UP */
|
||||||
|
|
||||||
|
srv_send(c, "{\"event\": \"STOP\"}\r\n"); /* -> broadcast PAUSED */
|
||||||
|
CHECK(srv_expect(c, "query-status"), "client sent query-status (from CMD_VM)");
|
||||||
|
srv_send(c, "{\"return\": {\"status\": \"running\"}, \"id\": 1}\r\n"); /* -> addressed reply */
|
||||||
|
srv_send(c, "{\"event\": \"RESUME\"}\r\n"); /* -> broadcast RUNNING */
|
||||||
|
|
||||||
|
wait_atomic(&g_seamup, 1000);
|
||||||
|
wait_atomic(&g_paused, 1000);
|
||||||
|
wait_atomic(&g_query_reply, 1000);
|
||||||
|
wait_atomic(&g_running_bcast, 1000);
|
||||||
|
|
||||||
|
close(c); /* EOF -> SEAM_DOWN */
|
||||||
|
wait_atomic(&g_seamdown, 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK(atomic_load(&g_seamup), "handshake complete (SEAM_UP)");
|
||||||
|
CHECK(atomic_load(&g_paused), "async STOP -> VM_LIFECYCLE PAUSED (broadcast)");
|
||||||
|
CHECK(atomic_load(&g_query_reply), "CMD_VM QUERY -> addressed VM_LIFECYCLE RUNNING");
|
||||||
|
CHECK(atomic_load(&g_running_bcast),"async RESUME -> VM_LIFECYCLE RUNNING (broadcast)");
|
||||||
|
CHECK(atomic_load(&g_seamdown), "EOF QMP -> SEAM_DOWN");
|
||||||
|
|
||||||
|
vmsig_core_stop(core);
|
||||||
|
pthread_join(th, NULL);
|
||||||
|
vmsig_core_free(core);
|
||||||
|
vmsig_ctx_free(ctx);
|
||||||
|
close(srv);
|
||||||
|
|
||||||
|
printf("vmhost tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||||
|
return g_fail ? 1 : 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user