vmsig: management daemon, runtime endpoint lifecycle, roster, discovery, in-tree drivers, packaging

- core: runtime attach/detach of a per-endpoint adapter trio (runtime-safe add_adapter + vmsig_core_detach_endpoint, deferred reap)
- roster: VMSIG_EV_ROSTER + CAP_ROSTER, retained per-endpoint and replayed to late subscribers
- discovery: inotify trigger dir, vmid/endpoint slot allocator, host probe; vmsigd daemon with config + per-uid admission
- input driver and vgpu perception built in-tree; vgpu perception as a separate library
- memctx: own the supplied ro_fd (closed at detach)
- deb packaging: install rules, systemd unit, tmpfiles, default config
This commit is contained in:
2026-06-22 17:25:06 +03:00
parent 0d387a4249
commit 9bde398b6c
55 changed files with 4703 additions and 61 deletions
+1
View File
@@ -3,3 +3,4 @@ cmake-*/
compile*
Testing/
CLAUDE.md
dist/
+130 -21
View File
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.16)
project(vmsig VERSION 0.2.0 C)
project(vmsig VERSION 0.3.0 LANGUAGES C)
set(CMAKE_C_STANDARD 17)
set(CMAKE_C_STANDARD_REQUIRED ON)
@@ -10,12 +10,11 @@ option(VMSIG_LTO "Enable LTO" OFF)
# builds against headers only: the SI calls are hidden behind these flags, and the
# stub mode proves the seam without a real VM.
option(VMSIG_WITH_VMIE "Link real vmie (libvmie.a, PIC) for armed memctx" OFF)
option(VMSIG_WITH_VMCTL "Link real vmctl (libvmctl.a, PIC) for armed input" OFF)
# ---- Sibling library sources (set these to your local checkouts) ------------
# Only needed for the armed builds below; the default stub build needs neither.
# ---- Sibling library source (set to your local checkout) --------------------
# vmie stays an EXTERNAL library (.so/.deb); only needed for the armed memctx build.
# The input driver (vmctl) is ABSORBED in-tree (src/si/input/) — no external flag.
set(LIBVMIE_PATH "" CACHE PATH "Path to the vmie library sources (for VMSIG_WITH_VMIE)")
set(LIBVMCTL_PATH "" CACHE PATH "Path to the vmctl library sources (for VMSIG_WITH_VMCTL)")
find_package(Threads REQUIRED)
@@ -29,7 +28,17 @@ add_library(vmsig SHARED
src/adapter/input/input.c
src/adapter/vmhost/vmhost.c
src/control/inproc.c
src/control/socket.c)
src/control/socket.c
src/discovery/slot.c
src/discovery/linux/host_probe.c
src/discovery/discovery.c
# SI input driver (vmctl), absorbed in-tree (host-only: QMP + uinput)
src/si/input/open.c
src/si/input/qmp.c
src/si/input/qmp_driver.c
src/si/input/keymap.c
src/si/input/power.c
src/si/input/linux/uinput_driver.c)
target_include_directories(vmsig
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
@@ -38,30 +47,24 @@ target_include_directories(vmsig
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/include
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/memctx/include
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/input/include
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/vmhost/include)
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/vmhost/include
${CMAKE_CURRENT_SOURCE_DIR}/src/discovery/include
${CMAKE_CURRENT_SOURCE_DIR}/src/si/input/include)
target_link_libraries(vmsig PRIVATE Threads::Threads)
# armed: the real vmie as a pre-built PIC .a (IMPORTED) — headers and symbols.
# armed: vmie stays an EXTERNAL shared library (.so/.deb) — pre-built, IMPORTED. Both
# libvmsig (armed memctx) and libvgpu-perception link it dynamically (no duplication; the
# package Depends on libvmie). Headers + symbols come from the imported target.
if(VMSIG_WITH_VMIE)
add_library(vmie STATIC IMPORTED)
add_library(vmie SHARED IMPORTED)
set_target_properties(vmie PROPERTIES
IMPORTED_LOCATION ${LIBVMIE_PATH}/.build/libvmie.a
IMPORTED_LOCATION ${LIBVMIE_PATH}/.build/libvmie.so
INTERFACE_INCLUDE_DIRECTORIES ${LIBVMIE_PATH}/include)
target_link_libraries(vmsig PRIVATE vmie)
target_compile_definitions(vmsig PRIVATE VMSIG_WITH_VMIE)
endif()
# armed: the real vmctl as a pre-built PIC .a (IMPORTED).
if(VMSIG_WITH_VMCTL)
add_library(vmctl STATIC IMPORTED)
set_target_properties(vmctl PROPERTIES
IMPORTED_LOCATION ${LIBVMCTL_PATH}/.build/libvmctl.a
INTERFACE_INCLUDE_DIRECTORIES ${LIBVMCTL_PATH}/include)
target_link_libraries(vmsig PRIVATE vmctl)
target_compile_definitions(vmsig PRIVATE VMSIG_WITH_VMCTL)
endif()
target_compile_options(vmsig PRIVATE -O2 -Wall -Wextra)
if(VMSIG_LTO)
target_compile_options(vmsig PRIVATE -flto)
@@ -73,6 +76,50 @@ add_executable(vmsig_cli src/cli.c)
target_link_libraries(vmsig_cli PRIVATE vmsig)
target_compile_options(vmsig_cli PRIVATE -Wall -Wextra)
# ---- vgpu-perception: host-side vgpu Sensor S-lib (absorbed in-tree) ---------
# A SEPARATE shipped library (NOT fused into libvmsig — it is consumed by the shell, not the
# signaling core). Host-only: reads the vgpu shared region from its own RO vmie_mem. Built
# only when armed (needs vmie). The in-guest Windows producer (vgpu-streamer.exe) stays in a
# separate repo and is NOT part of this delivery.
if(VMSIG_WITH_VMIE)
add_library(vgpu-perception SHARED
src/si/vgpu-perception/discover.c
src/si/vgpu-perception/sample.c
src/si/vgpu-perception/control.c)
target_include_directories(vgpu-perception
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/si/vgpu-perception/include)
target_link_libraries(vgpu-perception PUBLIC vmie) # memmodel.h/win32.h via the vmie target
target_compile_options(vgpu-perception PRIVATE -O2 -Wall -Wextra)
add_executable(vgpu_perceptiontest src/test/test_perception.c)
target_include_directories(vgpu_perceptiontest PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/si/vgpu-perception/include)
target_link_libraries(vgpu_perceptiontest PRIVATE vgpu-perception)
target_compile_options(vgpu_perceptiontest PRIVATE -O2 -Wall -Wextra)
add_test(NAME vgpu_perception COMMAND vgpu_perceptiontest)
set_tests_properties(vgpu_perception PROPERTIES
ENVIRONMENT "LD_LIBRARY_PATH=${LIBVMIE_PATH}/.build:${CMAKE_BINARY_DIR}")
endif()
# ---- vmsigd: the management daemon -----------------------------------------
# Links libvmsig (works in stub or armed; armed memctx needs vmie at runtime). Discovery +
# socket + a coarse per-uid admission policy; serves whatever appears under the watch dir.
add_executable(vmsigd
src/daemon/vmsigd.c
src/daemon/config.c
src/daemon/admission.c)
target_link_libraries(vmsigd PRIVATE vmsig Threads::Threads)
target_include_directories(vmsigd PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/daemon/include
${CMAKE_CURRENT_SOURCE_DIR}/src/discovery/include
${CMAKE_CURRENT_SOURCE_DIR}/src/core/include)
target_compile_options(vmsigd PRIVATE -O2 -Wall -Wextra)
if(VMSIG_LTO)
target_compile_options(vmsigd PRIVATE -flto)
target_link_options(vmsigd PRIVATE -flto)
endif()
# ---- transfer-context tests (ctest) -----------------------------------------
enable_testing()
add_executable(vmsig_test src/test/test_ctx.c)
@@ -97,6 +144,43 @@ target_link_libraries(vmsig_mvmtest PRIVATE vmsig)
target_compile_options(vmsig_mvmtest PRIVATE -Wall -Wextra)
add_test(NAME mvm COMMAND vmsig_mvmtest)
add_executable(vmsig_dyneptest src/test/test_dynep.c)
target_link_libraries(vmsig_dyneptest PRIVATE vmsig Threads::Threads)
target_compile_options(vmsig_dyneptest PRIVATE -Wall -Wextra)
add_test(NAME dynep COMMAND vmsig_dyneptest)
add_executable(vmsig_rostertest src/test/test_roster.c)
target_link_libraries(vmsig_rostertest PRIVATE vmsig)
target_include_directories(vmsig_rostertest PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/core/include)
target_compile_options(vmsig_rostertest PRIVATE -Wall -Wextra)
add_test(NAME roster COMMAND vmsig_rostertest)
add_executable(vmsig_slottest src/test/test_slot.c)
target_link_libraries(vmsig_slottest PRIVATE vmsig)
target_include_directories(vmsig_slottest PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/discovery/include)
target_compile_options(vmsig_slottest PRIVATE -Wall -Wextra)
add_test(NAME slot COMMAND vmsig_slottest)
add_executable(vmsig_discoverytest src/test/test_discovery.c)
target_link_libraries(vmsig_discoverytest PRIVATE vmsig)
target_include_directories(vmsig_discoverytest PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/discovery/include)
target_compile_options(vmsig_discoverytest PRIVATE -Wall -Wextra)
add_test(NAME discovery COMMAND vmsig_discoverytest)
add_executable(vmsig_daemoncfgtest
src/test/test_daemoncfg.c
src/daemon/config.c
src/daemon/admission.c)
target_link_libraries(vmsig_daemoncfgtest PRIVATE vmsig)
target_include_directories(vmsig_daemoncfgtest PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/daemon/include
${CMAKE_CURRENT_SOURCE_DIR}/src/discovery/include)
target_compile_options(vmsig_daemoncfgtest PRIVATE -Wall -Wextra)
add_test(NAME daemoncfg COMMAND vmsig_daemoncfgtest)
add_executable(vmsig_authztest src/test/test_authz.c)
target_link_libraries(vmsig_authztest PRIVATE vmsig)
target_compile_options(vmsig_authztest PRIVATE -Wall -Wextra)
@@ -105,7 +189,8 @@ add_test(NAME authz COMMAND vmsig_authztest)
add_executable(vmsig_memctxtest src/test/test_memctx.c)
target_link_libraries(vmsig_memctxtest PRIVATE vmsig Threads::Threads)
target_include_directories(vmsig_memctxtest PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/core/include)
${CMAKE_CURRENT_SOURCE_DIR}/src/core/include
${CMAKE_CURRENT_SOURCE_DIR}/src/adapter/memctx/include)
target_compile_options(vmsig_memctxtest PRIVATE -Wall -Wextra)
add_test(NAME memctx COMMAND vmsig_memctxtest)
@@ -137,3 +222,27 @@ add_test(NAME memwrite COMMAND vmsig_memwritetest)
# the demonstrator doubles as an end-to-end seam test (self-terminates rc=0)
add_test(NAME cli COMMAND vmsig_cli)
# ---- install rules (for the .deb stage) -------------------------------------
option(VMSIG_INSTALL "Generate install() rules (daemon/lib/headers/unit/config)" OFF)
if(VMSIG_INSTALL)
include(GNUInstallDirs)
install(TARGETS vmsigd RUNTIME DESTINATION ${CMAKE_INSTALL_SBINDIR})
install(TARGETS vmsig LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
if(TARGET vgpu-perception) # armed builds ship the host vgpu S-lib alongside
install(TARGETS vgpu-perception LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()
# public contracts (signaling + absorbed SI host headers) under include/vmsig/
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vmsig
FILES_MATCHING PATTERN "vmsig*.h"
PATTERN "vmctl.h"
PATTERN "vgpu_stream.h"
PATTERN "vgpu_perception.h")
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/packaging/systemd/vmsigd.service
DESTINATION lib/systemd/system)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/packaging/tmpfiles/vmsig.conf
DESTINATION lib/tmpfiles.d)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/packaging/config/vmsigd.conf
DESTINATION /etc/vmsig)
endif()
+46
View File
@@ -0,0 +1,46 @@
# vmsig packaging — `make deb` builds the .deb over a `cmake --install` stage.
# Private values are NOT baked into the tree: pass them via the variables below (the
# defaults are neutral placeholders; CI overrides them from vars/secrets).
#
# make deb LIBVMIE_PATH=/path/to/vmie VERSION=1.2.3 \
# MAINTAINER="Name <addr>" DEPENDS="libc6, libvmie0"
VERSION ?= 0.0.0
MAINTAINER ?= vmsig packaging <root@localhost>
# libvmie0 is vmie's own runtime package (SONAME libvmie.so.0): libvmsig.so and
# libvgpu-perception.so dynamically link it, so it is a HARD runtime dependency.
DEPENDS ?= libc6, libvmie0
ARCH ?= amd64
LIBVMIE_PATH ?=
BUILD_DIR ?= .build-pkg
STAGE ?= $(CURDIR)/dist/stage
DIST ?= $(CURDIR)/dist
.PHONY: deb clean
# Armed package: the shipped daemon needs vmie for memctx. vmie stays an external dependency
# (the package Depends on its runtime; pass DEPENDS to add it).
deb:
@test -n "$(LIBVMIE_PATH)" || { echo "set LIBVMIE_PATH=/path/to/vmie sources (armed memctx)"; exit 1; }
rm -rf $(STAGE)
cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=Release -DVMSIG_INSTALL=ON \
-DVMSIG_WITH_VMIE=ON -DLIBVMIE_PATH=$(LIBVMIE_PATH)
cmake --build $(BUILD_DIR) -j
DESTDIR=$(STAGE) cmake --install $(BUILD_DIR) --prefix /usr
mkdir -p $(STAGE)/DEBIAN
sed -e 's/@VERSION@/$(VERSION)/' \
-e 's|@MAINTAINER@|$(MAINTAINER)|' \
-e 's/@DEPENDS@/$(DEPENDS)/' \
packaging/deb/control.in > $(STAGE)/DEBIAN/control
cp packaging/deb/conffiles $(STAGE)/DEBIAN/conffiles
install -m 0755 packaging/deb/postinst $(STAGE)/DEBIAN/postinst
install -m 0755 packaging/deb/prerm $(STAGE)/DEBIAN/prerm
# strip inherited setgid from staged dirs (a setgid build tree => dpkg-deb rejects DEBIAN)
find $(STAGE) -type d -exec chmod g-s {} +
mkdir -p $(DIST)
dpkg-deb --root-owner-group --build $(STAGE) $(DIST)/vmsig_$(VERSION)_$(ARCH).deb
@echo "built: $(DIST)/vmsig_$(VERSION)_$(ARCH).deb"
clean:
rm -rf $(BUILD_DIR) $(DIST)
+270
View File
@@ -0,0 +1,270 @@
#ifndef VGPU_PERCEPTION_H
#define VGPU_PERCEPTION_H
/* vgpu_perception.h — host-side, read-only perception over the vgpu region.
*
* A pure functional core that builds vgpu semantics ON TOP OF a guest
* address-space root handed in by the caller. It only PERCEIVES: it discovers
* the region by structural invariants, samples frames and reads cursor /
* geometry / lifecycle, and returns SNAPSHOTS (POD values). It never owns
* coherence, never opens RW guest memory, never decides control or behavioural
* timing, never emits events upward.
*
* Where the region lives (the correction that shapes this API): the region is a
* RW shared mapping projected into the USER address space of a producer PROCESS,
* NOT a kernel VA in the System address space. So the core is handed a RO win32
* context (which the caller opened with the System kcr3), enumerates processes
* with proc_list, and finds the region in a process user-AS under that process's
* own cr3 (process.cr3). The System kcr3 is needed ONLY to open the context and
* walk processes; once the region is found, it is always read under the
* producer's process.cr3 (cached in the handle). The handle carries proc_cr3.
*
* What this core does NOT do (by design — those belong to the caller):
* - It does NOT own the vmie_win32 context / vmie_mem: both are BORROWED. The
* caller opens the RO win32 context (its lifetime is tied to the guest
* address-space mapping epoch) and closes it when that mapping goes stale.
* The core never opens or closes either.
* - It does NOT sleep / poll / spawn threads / arm timers: the two-phase
* liveness handshake is two calls; the WAIT between them is the caller's.
* - It does NOT transport frames. Frame transport is the caller's concern;
* the core is a PULL source — the caller takes desc+bytes from
* vgpup_sample_frame and routes them. No sink callback here.
* - It does NOT write control. vgpup_build_control_write only BUILDS the
* desired frame + offsets; the actual write is performed elsewhere, by a
* component that holds read-write access to the region.
*
* Two epochs + producer restart (the caller owns the policy; the core only
* reports facts — this is a flat pull model, no polling from below):
* - Address-space invalidation (new kcr3 / new epoch): the caller closes the
* win32 context, drops the old vgpup_region, opens a fresh context on the
* new epoch and re-discovers (vgpup_open). The old handle is invalid (a
* different address space entirely).
* - vgpu run_epoch advance while the context stays live (session break, same
* process): vgpup_read_status records r->run_epoch; vgpup_run_epoch reports
* it. The caller compares and decides whether to reset vgpu state — the
* region/process are unchanged. The core holds no reset policy.
* - Producer process restart (new pid/cr3 under the same live kcr3): the win32
* context is still valid (kernel alive), but the old handle's proc_cr3 /
* region_gva point at a dead process address space. Symptom: a read under
* r->proc_cr3 returns <0 (the process pages are gone). The core only REPORTS
* this (<0 from a read); the DECISION to re-discover is the caller's — it
* calls vgpup_close(old) + vgpup_open(v) so a fresh proc_list finds the
* restarted producer with its new cr3.
*
* Ownership convention:
* - vmie_win32* v, vmie_mem* m — BORROWED. The caller owns their lifecycle
* (tied to the address-space mapping). The core only reads through them.
* - vgpup_region* — heap-owned by the core (small private state). Create with
* vgpup_open, release with vgpup_close. Closing it does NOT touch v / m.
*
* Conventions (mirror memmodel.h):
* - The System kcr3 opens the RO win32 context; the REGION lives in the USER
* address space of the producer process and is read under its process.cr3
* (cached in the handle as proc_cr3). A "GVA" is a 64-bit guest VA in that
* process address space.
* - All guest reads go through gva_read into a local copy; no borrowed
* pointer into guest memory ever escapes a seqlock window or this API.
* - Integer returns: 0 success / negative failure for deterministic calls.
* Lossy read calls (sample/cursor/geometry) are tristate: 1 = consistent
* snapshot produced, 0 = no fresh data / writer kept it busy past the retry
* limit / would not fit (a SKIP, never an error — do not block), <0 = a
* hard memory-read error (page gone / process restarted — the caller
* re-discovers; see "Two epochs + producer restart" above).
*
* Example (the caller drives the two-phase liveness and the read loop):
*
* // caller already opened a RO win32 context with the System kcr3:
* vmie_win32* v = caller_ctx; // BORROWED by the core
* vmie_mem* m = vmie_win32_mem(v); // BORROWED; for the generic gva_*
*
* vgpup_region* r = vgpup_open(v); // phase 1: find producer + candidate
* if (!r) { return; } // no region in any process
*
* // phase 2 is the caller's: it waits >= VGPU_HEARTBEAT_PERIOD_MS, then
* uint64_t proc_cr3, region_gva, hb0;
* vgpup_discover_candidate(v, &proc_cr3, &region_gva, &hb0); // (or reuse open's)
* // ... the caller sleeps here, NOT the core ...
* int alive = vgpup_confirm_alive(m, proc_cr3, region_gva, hb0);
*
* // sampling (lossy pull):
* static uint8_t buf[VGPU_SLOT_STRIDE];
* vgpup_frame_info fi;
* if (vgpup_sample_frame(r, m, buf, sizeof buf, &fi) == 1) {
* // route fi.desc + buf[0..fi.bytes) to the chosen transport
* }
*
* vgpup_close(r); // frees core state only; v / m stay with the caller
*/
#include <stdint.h>
#include <stddef.h>
#include "vgpu_stream.h" /* region ABI: producer/control types, slot geometry */
#include "win32.h" /* vmie_win32*, proc_list, process, vmie_win32_mem;
* pulls in memmodel.h for vmie_mem / gva_* the
* producer is found via proc_list under the System
* kcr3, then the region is read under process.cr3 */
/* Opaque found vgpu region in a producer's user address space. Heap-owned by the
* core; holds only small private state (proc_cr3, region/ctrl/ring GVA, last
* frame_id, last run_epoch). It does NOT own v / m — those are passed back in on
* every read. */
typedef struct vgpup_region vgpup_region;
/* ---- handle / lifecycle (the core does NOT own the win32 context) --------- */
/* Phase-1 discover + bind: enumerate processes (proc_list) over the BORROWED RO
* win32 context v, scan each process user-AS by structural invariants, snapshot
* hb0, and build a handle carrying the producer's proc_cr3 + region/ctrl/ring
* GVA. v is BORROWED — the core reads through it but never closes it (its
* lifetime is the caller's, tied to the address-space mapping epoch). Returns a
* heap-owned vgpup_region*, or NULL if no region is found in any process.
* Liveness is NOT
* yet proven: the caller must call vgpup_confirm_alive after waiting
* >= VGPU_HEARTBEAT_PERIOD_MS. Sampling before confirmation is allowed (lossy);
* "producer alive" is true only after a positive confirm.
*
* If a later read returns <0, the producer process may have restarted (its
* pages are gone): the caller re-discovers via vgpup_close(r) + vgpup_open(v). */
vgpup_region* vgpup_open(vmie_win32* v);
/* Release ONLY the core state. Does NOT touch v / m — the caller closes those
* (their lifetime is the caller's). Safe on NULL. */
void vgpup_close(vgpup_region* r);
/* ---- two-phase discovery (the WAIT belongs to the caller) ----------------- */
/* Phase 1: find a producer and a candidate region in its user-AS (no liveness).
* Walks proc_list over v and, for each process, scans its user-AS under
* process.cr3 by structural invariants. On the first hit writes the producer's
* cr3 to *out_proc_cr3, the region base GVA to *out_region_gva and the heartbeat
* snapshot to *out_hb0, and returns 0. Returns <0 if no candidate is found in
* any process or a read fails. Pure; does NOT wait. Needs v for proc_list. */
int vgpup_discover_candidate(vmie_win32* v, uint64_t* out_proc_cr3,
uint64_t* out_region_gva, uint64_t* out_hb0);
/* Phase 2: confirm liveness. The caller calls this >= VGPU_HEARTBEAT_PERIOD_MS
* after phase 1. Re-reads heartbeat at region_gva under proc_cr3 and returns 1
* if it advanced (alive producer), 0 if it did not tick (dead / not the region),
* <0 on a read error. Takes vmie_mem* m (== vmie_win32_mem(v)) and proc_cr3 —
* the win32 surface is no longer needed here, only gva_read. Pure; does NOT
* wait — the inter-phase delay is the caller's. */
int vgpup_confirm_alive(vmie_mem* m, uint64_t proc_cr3,
uint64_t region_gva, uint64_t hb0);
/* ---- snapshots (POD values; read under their seqlock discipline) ---------- */
/* Snapshot of the last published frame's descriptor (read under seq[slot]). */
typedef struct {
uint32_t width, height, stride, format;
uint64_t frame_id;
uint64_t timestamp_ns;
} vgpup_frame_desc;
/* Result of a frame sample: the descriptor plus the count of bytes copied into
* the caller's buffer (== height*stride, tight). */
typedef struct {
vgpup_frame_desc desc;
size_t bytes;
} vgpup_frame_info;
/* Cursor snapshot (read under the cursor_seq acquire gate). seq lets the caller
* tell "cursor idle" from "producer stopped reporting". */
typedef struct {
uint32_t seq; /* cursor_seq observed for this snapshot */
uint32_t visible; /* 1 = shown, 0 = hidden */
int32_t x, y; /* unpacked from cursor_pos (signed) */
uint16_t hot_x, hot_y; /* unpacked from cursor_hotspot */
uint16_t glyph_w, glyph_h; /* unpacked from cursor_glyph */
uint32_t id; /* VGPU_CURSOR_ID_* */
} vgpup_cursor;
/* Display-geometry snapshot (read under the geom_seq seqlock). */
typedef struct {
int32_t virt_x, virt_y;
uint32_t virt_w, virt_h;
int32_t cap_x, cap_y;
uint32_t dpi, refresh_mhz;
} vgpup_geometry;
/* Lifecycle / status snapshot (cold line; single naturally-aligned atomic
* fields, no seqlock — "fresh enough" by the lossy contract). */
typedef struct {
uint64_t heartbeat;
uint32_t run_epoch;
uint32_t status; /* VGPU_ST_* */
uint32_t backend; /* VGPU_BK_* */
uint32_t error_code;
uint32_t applied_fps;
uint32_t supported_formats;
uint32_t ctrl_ack;
uint32_t full_frame_ack;
uint64_t content_change_ns;
} vgpup_status;
/* ---- read API (lossy; seqlock discipline lives inside) -------------------- *
* All read functions read under r->proc_cr3 (the producer's cr3, cached in the
* handle at discovery). m is a BORROWED vmie_mem* (== vmie_win32_mem(v)); the
* cr3 is NOT in the signature — it travels in the handle. A <0 return is a hard
* memory-read error: the producer process may have restarted, so the caller
* re-discovers (see "Two epochs + producer restart" in the file header). */
/* Sample the latest frame. Seqlock-reads latest/seq[slot]/desc, copies the slot
* bytes out of the RING via gva_read, then re-checks seq[slot] in one window.
* dst is the caller's buffer, cap its capacity. Returns 1 = a fresh frame was
* copied (info filled), 0 = no new frame / writer busy past the retry limit /
* frame would not fit cap (lossy SKIP, not an error), <0 = a memory-read error.
* "Fresh" dedups by frame_id: a frame_id <= the last sampled one returns 0. */
int vgpup_sample_frame(vgpup_region* r, vmie_mem* m,
uint8_t* dst, size_t cap, vgpup_frame_info* info);
/* Read the cursor under the cursor_seq acquire gate. 1 = consistent snapshot,
* 0 = writer busy past the retry limit, <0 = read error. */
int vgpup_read_cursor(vgpup_region* r, vmie_mem* m, vgpup_cursor* out);
/* Read display geometry under the geom_seq seqlock. Returns as read_cursor. */
int vgpup_read_geometry(vgpup_region* r, vmie_mem* m, vgpup_geometry* out);
/* Read the cold-line status/lifecycle. 0 = success, <0 = read error. The single
* atomic fields carry no seqlock; the snapshot is "fresh enough" (lossy). */
int vgpup_read_status(vgpup_region* r, vmie_mem* m, vgpup_status* out);
/* The run_epoch from the last vgpup_read_status — a session-break detector for
* the caller while the address space stays live. The core only reports the raw
* value; it holds no reset policy (what to reset is the caller's decision). */
uint32_t vgpup_run_epoch(const vgpup_region* r);
/* ---- control-write — SEAM ONLY (this never writes) ------------------------ */
/* Desired control-block value (host-RW fields). The caller builds it and later
* forwards it to the writer; the actual gva_write is performed elsewhere, by the
* component that holds read-write access to the region. */
typedef struct {
uint32_t desired_state; /* VGPU_CMD_* */
uint32_t target_fps; /* 0 = producer default */
uint32_t draw_cursor; /* 0/1 */
uint32_t full_frame_req; /* edge counter (caller bumps vs the previous) */
} vgpup_control_intent;
/* Build a control frame WITHOUT writing: fill a vgpu_control_t image from `in`,
* and report the control-block GVA plus the offset/length of the significant
* field range, so an external read-write writer can perform an atomic write
* under the ctrl_gen seqlock. This NEVER touches guest memory (the RO fd would
* not allow it anyway). ctrl_gen is left zero here: the writer owns it under the
* seqlock. The significant range is desired_state .. full_frame_req;
* consumer_tick/attached carry separate heartbeat/intent semantics and are NOT
* part of this intent.
* out_frame — filled vgpu_control_t (significant fields from `in`)
* out_ctrl_gva — control-block GVA (region base + VGPU_CONTROL_OFFSET). This
* GVA is valid in the PRODUCER's user address space: the
* external write MUST be performed under r->proc_cr3, NOT the
* System kcr3.
* out_off — offset of the first significant field (offsetof desired_state)
* out_len — length of the significant range (through full_frame_req)
* Returns 0 on success, <0 if r is NULL. The write itself is performed
* elsewhere; there is no live gva_write here and there must not be. */
int vgpup_build_control_write(vgpup_region* r, const vgpup_control_intent* in,
vgpu_control_t* out_frame, uint64_t* out_ctrl_gva,
uint32_t* out_off, uint32_t* out_len);
#endif /* VGPU_PERCEPTION_H */
+169
View File
@@ -0,0 +1,169 @@
#ifndef VGPU_STREAM_H
#define VGPU_STREAM_H
#include <stdint.h>
#include <stddef.h> /* offsetof */
#include <stdalign.h> /* alignas */
#include <assert.h> /* static_assert */
/* ===== Geometry — single source of truth (bare ABI, both ends agree) ===== */
#define VGPU_PAGE 4096u
#define VGPU_SLOT_COUNT 3u
#define VGPU_SLOT_STRIDE (32u * 1024u * 1024u)
#define VGPU_RING_OFFSET (2u * 1024u * 1024u)
#define VGPU_PRODUCER_OFFSET 0u
#define VGPU_CONTROL_OFFSET VGPU_PAGE
#define VGPU_REGION_BYTES (VGPU_RING_OFFSET + (uint64_t)VGPU_SLOT_COUNT * VGPU_SLOT_STRIDE)
#define VGPU_MAX_WIDTH 3840u
#define VGPU_MAX_HEIGHT 2160u
#define VGPU_HEARTBEAT_PERIOD_MS 250u /* producer ticks heartbeat >= 4 Hz always */
#define VGPU_LATEST_NONE 0xFFFFFFFFu
static_assert((uint64_t)VGPU_MAX_WIDTH * VGPU_MAX_HEIGHT * 4u <= VGPU_SLOT_STRIDE,
"max-mode tight BGRA must fit one slot");
/* enum values travel as uint32 wire-values (not as enum fields → no width instability) */
enum { VGPU_FMT_BGRA8888 = 0 };
enum { VGPU_ST_INIT=0, VGPU_ST_CAPTURING=1, VGPU_ST_PAUSED=2, VGPU_ST_STOPPED=3, VGPU_ST_ERROR=4 };
enum { VGPU_BK_NONE=0, VGPU_BK_NVFBC=1, VGPU_BK_DDA=2, VGPU_BK_GDI=3 };
enum { VGPU_CMD_STOP=0, VGPU_CMD_RUN=1, VGPU_CMD_PAUSE=2 };
/* cursor shape identity (wire-uint32); UNKNOWN=0 → custom/unrecognized glyph */
enum { VGPU_CURSOR_ID_UNKNOWN=0, VGPU_CURSOR_ID_ARROW=1, VGPU_CURSOR_ID_IBEAM=2,
VGPU_CURSOR_ID_WAIT=3, VGPU_CURSOR_ID_CROSS=4, VGPU_CURSOR_ID_HAND=5,
VGPU_CURSOR_ID_SIZENS=6, VGPU_CURSOR_ID_SIZEWE=7, VGPU_CURSOR_ID_SIZENWSE=8,
VGPU_CURSOR_ID_SIZENESW=9, VGPU_CURSOR_ID_SIZEALL=10, VGPU_CURSOR_ID_NO=11,
VGPU_CURSOR_ID_APPSTARTING=12 };
/* ===== Per-slot descriptor (under hot.seq[slot]) ===== */
typedef struct {
uint32_t width; /* pixels */
uint32_t height; /* pixels */
uint32_t stride; /* bytes/row; INVARIANT: == width*4 (tight) */
uint32_t format; /* VGPU_FMT_* */
uint64_t frame_id; /* == producer.frame_id at publish time */
uint64_t timestamp_ns; /* capture time, monotonic */
} vgpu_desc_t;
static_assert(sizeof(vgpu_desc_t) == 32, "desc layout");
static_assert(offsetof(vgpu_desc_t, width) == 0, "desc.width");
static_assert(offsetof(vgpu_desc_t, height) == 4, "desc.height");
static_assert(offsetof(vgpu_desc_t, stride) == 8, "desc.stride");
static_assert(offsetof(vgpu_desc_t, format) == 12, "desc.format");
static_assert(offsetof(vgpu_desc_t, frame_id) == 16, "desc.frame_id");
static_assert(offsetof(vgpu_desc_t, timestamp_ns) == 24, "desc.timestamp_ns");
/* ===== Producer block (host-RO): hot publish line + cold status line ===== */
typedef struct {
/* --- hot publish line --- */
alignas(64)
uint32_t latest; /* index of last; VGPU_LATEST_NONE until 1st frame */
uint32_t _r0;
uint64_t frame_id; /* monotonic frame counter (8-aligned) */
uint32_t seq[VGPU_SLOT_COUNT]; /* per-slot seqlock: even=stable, odd=writing */
uint32_t _r1;
vgpu_desc_t desc[VGPU_SLOT_COUNT]; /* self-describing slots */
/* --- cold status line --- */
alignas(64)
uint64_t heartbeat; /* monotonic; ticks always (even STOPPED/PAUSED) */
uint32_t run_epoch; /* +1 per start (session break for host) */
uint32_t status; /* VGPU_ST_* */
uint32_t backend; /* VGPU_BK_* */
uint32_t error_code; /* 0=none; else fatal detail */
uint32_t applied_fps; /* publish-rate cap the producer actually applies;
actual rate may be lower on static content or
backend limits — host measures real fps from
desc.timestamp_ns */
uint32_t supported_formats; /* bitmask (1u<<VGPU_FMT_*) */
uint32_t ctrl_ack; /* echo of control.ctrl_gen (even) applied */
uint32_t full_frame_ack; /* echo of control.full_frame_req honored */
/* --- cursor reporting (host-RO; position is sensor data, independent
* of control.draw_cursor / cursor compositing) --- */
uint32_t cursor_seq; /* @168: monotonic; bumps each cursor publish.
Host reads it last (acquire) to gate a
consistent {cursor_pos,cursor_visible}; lets the
host tell "cursor idle" from "producer stopped
reporting". */
uint32_t cursor_visible; /* @172: 1=cursor shown (CURSOR_SHOWING), 0=hidden */
uint64_t cursor_pos; /* @176: packed screen position, 8-aligned single
atomic MOV. low 32 bits = x, high 32 = y, each a
signed int32 (two's-complement; multi-monitor →
negatives). Pair never tears (one 64-bit store). */
/* --- cursor Tier-1 (host-RO; same cursor_seq gate as cursor_pos/visible) --- */
uint32_t cursor_hotspot; /* @184: low16=hot_x, high16=hot_y (unsigned) */
uint32_t cursor_glyph; /* @188: low16=glyph_w, high16=glyph_h (unsigned) */
uint32_t cursor_id; /* @192: VGPU_CURSOR_ID_* shape identity */
/* --- graphics static-idle: monotonic stamp of last scene-content change --- */
alignas(8) uint64_t content_change_ns; /* @200: host derives idle-ms vs its own clock */
/* --- display geometry (own cache line; geom_seq seqlock; sampled rarely) ---
* captured-surface SIZE is NOT here: it is desc.width/height (authoritative, tight). */
alignas(64)
uint32_t geom_seq; /* @256: even=stable, odd=writing (frame-seqlock) */
int32_t virt_x; /* @260: virtual-desktop origin (signed) */
int32_t virt_y; /* @264 */
uint32_t virt_w; /* @268: virtual-desktop bbox size (interprets neg pos) */
uint32_t virt_h; /* @272 */
int32_t cap_x; /* @276: captured-output origin in virtual-desktop coords */
int32_t cap_y; /* @280: (captured size = desc.width/height, not here) */
uint32_t dpi; /* @284: captured-output effective DPI; 96=100%; 0=unknown */
uint32_t refresh_mhz; /* @288: captured-output refresh in milli-Hz; 0=unknown */
} vgpu_producer_t;
static_assert(alignof(vgpu_producer_t) == 64, "producer align");
static_assert(sizeof(vgpu_producer_t) <= VGPU_PAGE, "producer fits page 0");
/* host-read field layout frozen as ABI */
static_assert(offsetof(vgpu_producer_t, latest) == 0, "producer.latest");
static_assert(offsetof(vgpu_producer_t, frame_id) == 8, "producer.frame_id");
static_assert(offsetof(vgpu_producer_t, seq) == 16, "producer.seq");
static_assert(offsetof(vgpu_producer_t, desc) == 32, "producer.desc");
static_assert(offsetof(vgpu_producer_t, heartbeat) == 128, "producer.heartbeat");
static_assert(offsetof(vgpu_producer_t, run_epoch) == 136, "producer.run_epoch");
static_assert(offsetof(vgpu_producer_t, status) == 140, "producer.status");
static_assert(offsetof(vgpu_producer_t, backend) == 144, "producer.backend");
static_assert(offsetof(vgpu_producer_t, error_code) == 148, "producer.error_code");
static_assert(offsetof(vgpu_producer_t, applied_fps) == 152, "producer.applied_fps");
static_assert(offsetof(vgpu_producer_t, supported_formats) == 156, "producer.supported_formats");
static_assert(offsetof(vgpu_producer_t, ctrl_ack) == 160, "producer.ctrl_ack");
static_assert(offsetof(vgpu_producer_t, full_frame_ack) == 164, "producer.full_frame_ack");
static_assert(offsetof(vgpu_producer_t, cursor_seq) == 168, "producer.cursor_seq");
static_assert(offsetof(vgpu_producer_t, cursor_visible) == 172, "producer.cursor_visible");
static_assert(offsetof(vgpu_producer_t, cursor_pos) == 176, "producer.cursor_pos");
/* cursor Tier-1 (cursor line, gated by cursor_seq) */
static_assert(offsetof(vgpu_producer_t, cursor_hotspot) == 184, "producer.cursor_hotspot");
static_assert(offsetof(vgpu_producer_t, cursor_glyph) == 188, "producer.cursor_glyph");
static_assert(offsetof(vgpu_producer_t, cursor_id) == 192, "producer.cursor_id");
/* graphics static-idle */
static_assert(offsetof(vgpu_producer_t, content_change_ns) == 200, "producer.content_change_ns");
/* display geometry (own cache line; captured SIZE is desc.width/height, not here) */
static_assert(offsetof(vgpu_producer_t, geom_seq) == 256, "producer.geom_seq");
static_assert(offsetof(vgpu_producer_t, virt_x) == 260, "producer.virt_x");
static_assert(offsetof(vgpu_producer_t, virt_y) == 264, "producer.virt_y");
static_assert(offsetof(vgpu_producer_t, virt_w) == 268, "producer.virt_w");
static_assert(offsetof(vgpu_producer_t, virt_h) == 272, "producer.virt_h");
static_assert(offsetof(vgpu_producer_t, cap_x) == 276, "producer.cap_x");
static_assert(offsetof(vgpu_producer_t, cap_y) == 280, "producer.cap_y");
static_assert(offsetof(vgpu_producer_t, dpi) == 284, "producer.dpi");
static_assert(offsetof(vgpu_producer_t, refresh_mhz) == 288, "producer.refresh_mhz");
/* ===== Control block (host-RW), own page, generation-guarded ===== */
typedef struct {
alignas(64)
uint32_t ctrl_gen; /* generation seqlock: even=stable, odd=writing (host writes) */
uint32_t desired_state; /* VGPU_CMD_* (STOP/RUN/PAUSE) */
uint32_t target_fps; /* desired fps; 0=producer default */
uint32_t draw_cursor; /* 1=compose cursor */
uint32_t full_frame_req; /* edge counter: bump → force fresh full frame */
uint32_t consumer_tick; /* host heartbeat (producer watches with timeout) */
uint32_t attached; /* 1=host attached (intent, not death-proof) */
} vgpu_control_t;
static_assert(alignof(vgpu_control_t) == 64, "control align");
static_assert(sizeof(vgpu_control_t) <= VGPU_PAGE, "control fits page 1");
/* host-write field layout frozen as ABI */
static_assert(offsetof(vgpu_control_t, ctrl_gen) == 0, "control.ctrl_gen");
static_assert(offsetof(vgpu_control_t, desired_state) == 4, "control.desired_state");
static_assert(offsetof(vgpu_control_t, target_fps) == 8, "control.target_fps");
static_assert(offsetof(vgpu_control_t, draw_cursor) == 12, "control.draw_cursor");
static_assert(offsetof(vgpu_control_t, full_frame_req) == 16, "control.full_frame_req");
static_assert(offsetof(vgpu_control_t, consumer_tick) == 20, "control.consumer_tick");
static_assert(offsetof(vgpu_control_t, attached) == 24, "control.attached");
#endif
+108
View File
@@ -0,0 +1,108 @@
#ifndef VMCTL_H
#define VMCTL_H
#include <stddef.h>
/* vmctl.h — public API for a QEMU VM Input layer (actuator): input injection +
* power/lifecycle actuation. One handle; the input driver is selected
* declaratively through vmctl_config. OS-agnostic surface. */
typedef struct vmctl vmctl_t; /* opaque handle */
/* ===== Input drivers + open ===== */
typedef enum {
VMCTL_DRIVER_QMP, /* QMP input-send-event (no guest driver required) */
VMCTL_DRIVER_UINPUT /* host uinput source; optional passthrough into guest */
/* via QEMU virtio-input-host-pci (Linux). uinput != virtio. */
} vmctl_driver;
#define VMCTL_PTR_ABS 1 /* uinput: absolute tablet */
#define VMCTL_PTR_REL 2 /* uinput: relative mouse */
#define VMCTL_PTR_BOTH 3 /* uinput: two devices A=abs B=rel */
typedef struct {
unsigned bustype; /* HID bus type, e.g. 0x0003 (USB) */
unsigned vendor; /* vendor id */
unsigned product; /* product id */
unsigned version; /* device version */
const char* name; /* device name; library copies it */
} vmctl_uinput_id;
typedef struct {
vmctl_driver driver;
const char* qmp_path; /* QMP unix socket; required for QMP, optional (passthrough) for UINPUT */
const char* input_bus; /* virtio-input-host-pci bus "pci.0" for passthrough; "" = none */
int ptr_mode; /* UINPUT VMCTL_PTR_*; 0 for QMP */
const vmctl_uinput_id* uinput_id; /* UINPUT only; NULL = built-in defaults */
} vmctl_config;
vmctl_t* vmctl_open (const vmctl_config* cfg); /* NULL on error */
void vmctl_close(vmctl_t* v); /* safe on NULL */
/* ===== Input constants ===== */
#define VMCTL_ABS_MAX 32767 /* abs coordinates 0..VMCTL_ABS_MAX */
#define VMCTL_AXIS_X 0
#define VMCTL_AXIS_Y 1
#define VMCTL_SCROLL_V 0 /* vertical */
#define VMCTL_SCROLL_H 1 /* horizontal */
#define VMCTL_BTN_LEFT 0
#define VMCTL_BTN_RIGHT 1
#define VMCTL_BTN_MIDDLE 2
#define VMCTL_BTN_SIDE 3
#define VMCTL_BTN_EXTRA 4
#define VMCTL_BTN_FORWARD 5
#define VMCTL_BTN_BACK 6
#define VMCTL_BTN_TASK 7
#define VMCTL_KEY_CODE_MAX 0x2ff /* highest supported evdev key code (inclusive) */
#define VMCTL_KEYS_SNAPSHOT_BYTES ((VMCTL_KEY_CODE_MAX + 1) / 8) /* bytes for vmctl_keys_snapshot */
/* ===== Event batch (value-type, stack; build ONLY via builders — ev[] is not API) ===== */
#define VMCTL_BATCH_MAX 64
typedef struct {
int kind; /* internal event-kind code; set by builders */
int code; /* axis / button / evdev-code (per kind) */
int value; /* abs-value / rel-delta / down(0|1) */
double scroll; /* scroll magnitude (scroll only) */
} vmctl_event;
typedef struct { vmctl_event ev[VMCTL_BATCH_MAX]; int count; } vmctl_batch;
void vmctl_batch_init (vmctl_batch* b);
void vmctl_batch_abs (vmctl_batch* b, int axis, int value);
void vmctl_batch_rel (vmctl_batch* b, int axis, int delta);
void vmctl_batch_btn (vmctl_batch* b, int btn, int down);
void vmctl_batch_key (vmctl_batch* b, int evdev_code, int down);
void vmctl_batch_scroll(vmctl_batch* b, int axis, double value);
int vmctl_batch_send (vmctl_t* v, vmctl_batch* b); /* one round-trip; 0=ok, -1=err */
/* ===== Single events (wrappers over a 1-event batch) ===== */
int vmctl_abs (vmctl_t* v, int axis, int value); /* 0..VMCTL_ABS_MAX */
int vmctl_rel (vmctl_t* v, int axis, int delta);
int vmctl_btn (vmctl_t* v, int btn, int down); /* VMCTL_BTN_* */
int vmctl_key (vmctl_t* v, int evdev_code, int down); /* Linux KEY_* */
int vmctl_scroll(vmctl_t* v, int axis, double value); /* VMCTL_SCROLL_* */
/* ===== Held-state receipt (read-only) =====
* "held" = key/button state as THIS handle last actuated it, not guest truth.
* It is the actuator's record of its own last output (sensing the guest belongs
* to the sensors layer, not here). Updated only after a successful send; the
* send path NEVER reads this map (no dedup, no auto-release, no autorepeat). */
int vmctl_key_held (vmctl_t* v, int evdev_code); /* Linux KEY_*; 1=down 0=up */
int vmctl_btn_held (vmctl_t* v, int btn); /* VMCTL_BTN_*; 1=down 0=up */
int vmctl_keys_snapshot(vmctl_t* v, unsigned char* bits, size_t nbytes);
/* copy key down-bits (EVIOCGKEY-style);
returns bytes written, -1 on bad args */
unsigned vmctl_btns_snapshot(vmctl_t* v); /* VMCTL_BTN_* down-bits as a mask (bits 0..7) */
/* ===== Power/lifecycle actuation (requires a QMP connection; -1 if there is none) ===== */
int vmctl_powerdown(vmctl_t* v); /* system_powerdown (ACPI soft-off) */
int vmctl_reset (vmctl_t* v); /* system_reset */
int vmctl_wakeup (vmctl_t* v); /* system_wakeup (from S3/S4) */
int vmctl_pause (vmctl_t* v); /* stop */
int vmctl_resume (vmctl_t* v); /* cont */
/* Transfer sequencing/context belongs to signaling; timing and decisions to
* control; reading VM state to sensors. Here, in the Input layer, only atomic
* actuation. */
#endif /* VMCTL_H */
+3
View File
@@ -37,6 +37,9 @@ typedef struct {
#define VMSIG_CAP_MEMWRITE 0x100u /* CMD_MEMWRITE: atomic write-signaled mutation of shared guest memory
* (separate from the freed CAP_MEMREAD bit — read != write; fresh bit
* avoids stale-grant aliasing to this privileged cap). */
#define VMSIG_CAP_ROSTER 0x200u /* SUBSCRIPTION to the VM roster (UP VMSIG_EV_ROSTER): which VMs occupy
* which endpoints, by name/state. Distinct from CAP_OBSERVE — this is
* host-wide inventory enumeration, not observing one VM's content. */
typedef struct {
uint32_t principal; /* id for auditing (uid/token) */
+11 -1
View File
@@ -51,10 +51,20 @@ void vmsig_core_set_arb_policy(vmsig_core* c, vmsig_arb_policy cb, void*
/* Register an adapter for VM `endpoint`: open(cfg,endpoint) -> attach(...),
* enroll each yielded fd into epoll and into the dispatch table fd->(adapter,cookie).
* Returns the adapter id (>=0) or -1. */
* Returns the adapter id (>=0) or -1. Runtime-safe: may be called AFTER vmsig_core_run
* has started, from a loop-thread callback (e.g. a discovery SLOT_SOURCE), to hot-plug
* a VM's adapters; a freed adapter slot is reused so churn does not exhaust the table. */
int vmsig_core_add_adapter(vmsig_core* c, const vmsig_adapter_ops* ops,
const void* cfg, uint32_t endpoint);
/* Request runtime detach of EVERY adapter currently attached to `endpoint` (the whole
* VM trio). Deferred: the teardown (epoch settle + SEAM_DOWN + lease release + epoll DEL
* + ops->close) runs after the current event batch, like core_request_drop for controls.
* Safe to call from a loop-thread callback (e.g. inotify discovery). No-op if endpoint
* is not attached or >= 64. The composing of the trio at attach is the caller's job
* (3x add_adapter); detach is by endpoint so the caller needs no per-adapter ids. */
void vmsig_core_detach_endpoint(vmsig_core* c, uint32_t endpoint);
/* Attach a control endpoint (in-process or socket) with a GRANT (capability set).
* grant == NULL => default-deny (poller inert). The core sees only the neutral
* vtable + grant + (opt.) fd. Returns the control id (>=0) or -1. */
+5
View File
@@ -63,6 +63,11 @@ typedef enum {
/* --- UP: cursor (vgpu sensor; emitted by the vgpu-perception shell-as-control) --- */
VMSIG_EV_CURSOR_STATE = 37, /* cursor position/visibility; inln=vmsig_cursor; cap OBSERVE|INPUT */
/* --- UP: VM roster (inventory coherence; daemon-originated, source=CORE) --- */
VMSIG_EV_ROSTER = 38, /* which VM occupies this endpoint: inln=vmsig_roster
* {vmid,state,action,name}, endpoint in the header; retained
* per-endpoint + replayed to late subscribers; cap ROSTER */
/* --- UP: input/lifecycle ack (INPUT seam) --- */
VMSIG_EV_ACT_ACK = 48, /* down-command completed (ok/err) */
VMSIG_EV_VM_LIFECYCLE = 49, /* power/lifecycle state report */
+40
View File
@@ -0,0 +1,40 @@
#ifndef VMSIG_ROSTER_H
#define VMSIG_ROSTER_H
#include <stdint.h>
/* vmsig_roster.h — NEUTRAL inventory-coherence contract.
*
* The signaling daemon owns the discovery namespace and assigns each running VM a stable
* ENDPOINT slot [0,64). The roster is the per-endpoint datum "which VM currently occupies
* this slot, by what name, in what coarse lifecycle state". It is published as an UP event
* VMSIG_EV_ROSTER (source=CORE), retained per endpoint and replayed to a late subscriber —
* exactly like the MEMCTX datum, but carrying identity rather than an address-space handle.
*
* This is COHERENCE of shared state (the endpoint roster is shared across all controls),
* NOT perception and NOT access-brokering. A consumer decodes it WITHOUT any host/Proxmox
* knowledge: `endpoint` rides in the event header (ev->endpoint), the rest in inln[48].
* CAP_ROSTER gates RECEIVING the datum (subscription), not access — access stays OS-DAC. */
/* Roster transition (entry->action). */
enum {
VMSIG_ROSTER_ATTACH = 0, /* endpoint is now occupied by `vmid` */
VMSIG_ROSTER_DETACH = 1, /* endpoint vacated (the slot bit is being released) */
VMSIG_ROSTER_UPDATE = 2 /* same vmid on the slot; state and/or name changed */
};
/* roster->flags bits */
#define VMSIG_ROSTER_NAME_TRUNC 0x1u /* the VM name did not fit and was truncated */
#define VMSIG_ROSTER_NAME_MAX 32 /* inline, NUL-terminated, truncated name */
/* The roster datum, carried inline (inln[48]). `endpoint` is NOT here — it is the event
* header's ev->endpoint (where every event carries it, and what the wire serializes). */
typedef struct {
uint32_t vmid; /* host VM id (e.g. Proxmox vmid 100..1e9) — does NOT fit endpoint */
uint32_t state; /* coarse lifecycle: VMSIG_VM_* (vmsig_event.h), from the host plane */
uint32_t action; /* VMSIG_ROSTER_ATTACH/DETACH/UPDATE */
uint32_t flags; /* VMSIG_ROSTER_* (e.g. NAME_TRUNC) */
char name[VMSIG_ROSTER_NAME_MAX]; /* NUL-terminated, truncated display name */
} vmsig_roster; /* 4+4+4+4+32 = 48 — exactly inln[48] */
#endif /* VMSIG_ROSTER_H */
+27
View File
@@ -0,0 +1,27 @@
# vmsigd.conf — vmsig management daemon configuration.
# Installed as a dpkg conffile (operator edits are preserved across upgrades).
# Control listener the consumer dials. '@' prefix => abstract namespace.
socket = /run/vmsig/vmsigd.sock
# Discovery namespace: a VM is managed iff its guest-RAM backing file appears here as
# vm-<vmid>-ram. The daemon owns this directory (created at boot via tmpfiles).
watch = /dev/shm/vmsig
# Inventory source of truth (read on demand; not watched) and the QMP socket directory.
pve_conf = /etc/pve/qemu-server
qmp_dir = /var/run/qemu-server
# vmid<->endpoint slot persistence (tmpfs; re-derived per daemon restart). "" => off.
slots = /dev/shm/vmsig/.slots
# ---- Admission policy: one [grant uid=N] stanza per local uid. -----------------------
# Entitlements are COARSE (the control enforces per-user caps behind the grant). `vmids`
# is either `*` (all VMs) or a list of vmids; it is translated to endpoint bits at connect
# time. `caps` is a comma list of: observe,input,lifecycle,power,vm,memctx,memwrite,roster.
#
# Example (edit before enabling the service):
# [grant uid=0]
# vmids = *
# caps = observe,input,lifecycle,power,vm,memctx,memwrite,roster
# arb_prio = 100
+1
View File
@@ -0,0 +1 @@
/etc/vmsig/vmsigd.conf
+13
View File
@@ -0,0 +1,13 @@
Package: vmsig
Version: @VERSION@
Section: admin
Priority: optional
Architecture: amd64
Depends: @DEPENDS@
Maintainer: @MAINTAINER@
Description: VM signaling coherence daemon and host SI libraries
vmsig serves a unix-socket control plane over the signaling layer for the VMs it
discovers: lifecycle/state, coherent guest address-space context handoff, and arbitrated
input and memory-write actuation. Ships the daemon (vmsigd), the signaling library, the
host-side vgpu perception library, and a systemd unit. Configured via
/etc/vmsig/vmsigd.conf.
+19
View File
@@ -0,0 +1,19 @@
#!/bin/sh
set -e
case "$1" in
configure)
ldconfig || true
mkdir -p /etc/vmsig
chmod 0640 /etc/vmsig/vmsigd.conf 2>/dev/null || true # carries the uid->grant policy
mkdir -p /dev/shm/vmsig && chmod 0755 /dev/shm/vmsig # also (re)created at boot via tmpfiles
if [ -d /run/systemd/system ]; then
systemctl daemon-reload || true
systemd-tmpfiles --create /usr/lib/tmpfiles.d/vmsig.conf || true
systemctl enable vmsigd.service || true # enable, but do NOT start
fi
echo "vmsig: review the [grant] policy in /etc/vmsig/vmsigd.conf, then: systemctl start vmsigd" >&2
;;
abort-upgrade|abort-remove|abort-deconfigure)
;;
esac
exit 0
+12
View File
@@ -0,0 +1,12 @@
#!/bin/sh
set -e
case "$1" in
remove|deconfigure)
if [ -d /run/systemd/system ]; then
systemctl disable --now vmsigd.service || true
fi
;;
upgrade|failed-upgrade)
;;
esac
exit 0
+41
View File
@@ -0,0 +1,41 @@
[Unit]
Description=vmsig VM signaling coherence daemon
# No host/VM is named here: the daemon serves whatever appears under its watch dir.
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
# root: reads QEMU-owned /dev/shm RAM backings, dials per-VM QMP, reads /etc/pve (OS-DAC).
# The security boundary is the per-uid grant, not the process uid; per-VM isolation, if
# required, is the deployment's job (process-per-VM), not this daemon's.
User=root
ExecStart=/usr/sbin/vmsigd
Restart=on-failure
RestartSec=2
# systemd creates and owns /run/vmsig (the control socket dir) and cleans it on stop.
RuntimeDirectory=vmsig
RuntimeDirectoryMode=0755
StandardOutput=journal
StandardError=journal
# ---- hardening: contain a root daemon by namespace/capability, not by uid ----
NoNewPrivileges=true
ProtectSystem=strict
ReadWritePaths=/dev/shm/vmsig /run/vmsig
ReadOnlyPaths=/etc/pve /var/run/qemu-server
ProtectHome=true
PrivateTmp=true
RestrictAddressFamilies=AF_UNIX
CapabilityBoundingSet=
AmbientCapabilities=
ProtectKernelTunables=true
ProtectKernelModules=true
ProtectControlGroups=true
RestrictRealtime=true
LockPersonality=true
[Install]
WantedBy=multi-user.target
+3
View File
@@ -0,0 +1,3 @@
# /dev/shm is tmpfs (wiped on reboot): (re)create the discovery namespace before the unit.
# Type Path Mode UID GID Age Argument
d /dev/shm/vmsig 0755 root root -
+4 -4
View File
@@ -1,10 +1,10 @@
#ifndef VMSIG_INPUT_H
#define VMSIG_INPUT_H
/* Private config of the input adapter (vmctl). cfg==NULL => stub mode. Armed mode
* (VMSIG_WITH_VMCTL) opens vmctl_open() and actuates for real. Injection is ALWAYS
* uinput (orphaned host uinput + external QEMU input-linux). qmp_path is kept for the
* SERVICE path (power/lifecycle via vmctl QMP), not for input injection. */
/* Private config of the input adapter (vmctl, in-tree at src/si/input/). cfg==NULL or
* stub!=0 => stub mode (ack without actuation). stub==0 opens vmctl_open() and actuates for
* real. Injection is ALWAYS uinput (orphaned host uinput + external QEMU input-linux);
* qmp_path is kept for the SERVICE path (power/lifecycle via vmctl QMP), not for injection. */
typedef struct {
int stub;
const char* qmp_path; /* for power/lifecycle (vmctl QMP); NOT input injection */
+5 -17
View File
@@ -3,19 +3,17 @@
* Mechanism (recommended): vmctl is a blocking QMP round-trip; we run it on a
* worker thread, completion ack via a completion-eventfd. The uinput path is a
* local instantaneous write; when armed it would be done inline (see comment in submit).
* Real actuation is under VMSIG_WITH_VMCTL; otherwise the stub acks (spine without a VM). */
* Real actuation when cfg.stub==0 (vmctl opened); otherwise the stub acks (spine without a VM).
* vmctl is the in-tree input driver (src/si/input/, absorbed); cfg.stub gates opening it. */
#include "vmsig_adapter.h"
#include "adapter_util.h"
#include "input.h"
#include "vmctl.h"
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/epoll.h>
#ifdef VMSIG_WITH_VMCTL
#include "vmctl.h"
#endif
/* POD request/result of the worker. */
typedef struct {
int cmd; /* 0 = input event, 1 = lifecycle */
@@ -40,9 +38,7 @@ struct vmsig_adapter {
vmsig_emit emit;
vmsig_worker* worker;
const char* qmp_path; /* borrowed from cfg (valid through attach); SERVICE power/lifecycle */
#ifdef VMSIG_WITH_VMCTL
vmctl_t* vmctl;
#endif
vmctl_t* vmctl; /* NULL in stub mode (cfg.stub) — no actuator opened */
};
static int input_job(void* user, const void* reqp, void* resp) {
@@ -53,7 +49,6 @@ static int input_job(void* user, const void* reqp, void* resp) {
rs->corr = rq->corr;
rs->origin = rq->origin;
rs->noack = rq->noack;
#ifdef VMSIG_WITH_VMCTL
if (a->vmctl) {
int r = -1;
if (rq->cmd == 0) {
@@ -87,9 +82,8 @@ static int input_job(void* user, const void* reqp, void* resp) {
rs->ok = (r == 0);
return r;
}
#endif
(void)a;
rs->ok = 1; /* stub: ack without actuation */
rs->ok = 1; /* stub: ack without actuation (vmctl not opened) */
return 0;
}
@@ -109,7 +103,6 @@ static int in_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg
a->worker = vmsig_worker_new(input_job, a, 1, 64); /* QMP is a serial channel, cap 64 */
if (!a->worker) return -1;
#ifdef VMSIG_WITH_VMCTL
if (!a->stub) {
/* armed: open the actuator. Injection is ALWAYS uinput (orphaned host uinput + external
* QEMU input-linux). PTR_BOTH gives both pointer forms a device (A=abs tablet, B=rel
@@ -125,7 +118,6 @@ static int in_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg
a->vmctl = vmctl_open(&vcfg);
if (!a->vmctl) { vmsig_worker_free(a->worker); a->worker = NULL; return -1; }
}
#endif
reg[0].fd = vmsig_worker_evfd(a->worker);
reg[0].epoll_events = EPOLLIN;
@@ -166,7 +158,6 @@ static int in_submit(vmsig_adapter* a, const vmsig_event* ev) {
* actuate — nothing to hold). */
vmsig_input_held h;
memset(&h, 0, sizeof h);
#ifdef VMSIG_WITH_VMCTL
if (a->vmctl) {
const uint32_t capn = (uint32_t)(sizeof h.ent / sizeof h.ent[0]);
unsigned char bits[VMCTL_KEYS_SNAPSHOT_BYTES];
@@ -184,7 +175,6 @@ static int in_submit(vmsig_adapter* a, const vmsig_event* ev) {
else h.flags |= VMSIG_INPUT_HELD_TRUNC;
}
}
#endif
vmsig_event up;
memset(&up, 0, sizeof up);
up.kind = VMSIG_EV_INPUT_HELD; up.source = VMSIG_SRC_INPUT; up.dir = VMSIG_DIR_UP;
@@ -223,9 +213,7 @@ static int in_submit(vmsig_adapter* a, const vmsig_event* ev) {
static void in_close(vmsig_adapter* a) {
if (!a) return;
vmsig_worker_free(a->worker);
#ifdef VMSIG_WITH_VMCTL
if (a->vmctl) vmctl_close(a->vmctl);
#endif
free(a);
}
+4 -2
View File
@@ -8,8 +8,10 @@ typedef struct {
int stub; /* 1 => synthetic kcr3/RO-fd (spine without a VM) */
const char* ram_path; /* armed: path to guest RAM backing (NOT published outward) */
uint64_t low; /* below-4G split (vmie_win32_open / locator.low) */
int ro_fd; /* >=0 => infra supplied a pre-sealed RO-fd (policy); */
/* <0 => default: open(ram_path, O_RDONLY) / stub-memfd */
int ro_fd; /* >=0 => infra hands a pre-sealed RO-fd (policy); OWNERSHIP */
/* TRANSFERS to the adapter (closed in close()) — the */
/* caller dups first if it must keep its own copy. */
/* <0 => default: open(ram_path, O_RDONLY) / stub-memfd */
} vmsig_memctx_cfg;
/* Max SRC bytes per atomic gva_write (bounds the worker POD slot; mc_req header + src
+6 -2
View File
@@ -85,7 +85,7 @@ struct vmsig_adapter {
int stub;
const char* ram_path; /* armed: RAM-backing path (NOT published outward) */
uint64_t low;
int cfg_ro_fd; /* >=0 => infra-sealed RO-fd (policy); <0 => default */
int cfg_ro_fd; /* >=0 => infra-sealed RO-fd (owned by adapter, closed in mc_close); <0 => default */
vmsig_emit emit;
int registered; /* register_memctx already called */
vmsig_worker* worker; /* off-loop bootstrap + atomic writes */
@@ -398,7 +398,11 @@ static void mc_close(vmsig_adapter* a) {
if (a->win) vmie_win32_close(a->win); /* AFTER worker join: no in-flight gva_write */
#endif
if (a->stub_fd >= 0) close(a->stub_fd);
/* cfg_ro_fd belongs to the infrastructure (the open caller) — do NOT close it. */
/* ro_fd ownership transferred to the adapter at open(): close it here so a re-grant
* (detach + re-attach with a fresh infra ro_fd) does not leak the prior one. Infra
* that must keep its own copy dups before handing it in — symmetric to the holder
* side, which dups the borrowed RO-fd it receives. */
if (a->cfg_ro_fd >= 0) close(a->cfg_ro_fd);
free(a);
}
+49 -10
View File
@@ -68,10 +68,22 @@ vmsig_core* vmsig_core_new(vmsig_ctx* ctx) {
int vmsig_core_add_adapter(vmsig_core* c, const vmsig_adapter_ops* ops,
const void* cfg, uint32_t endpoint) {
if (!c || !ops || c->nadapters >= VMSIG_MAX_ADAPTERS) return -1;
if (!c || !ops) return -1;
/* Reuse a reaped (inactive) adapter entry so runtime detach/re-attach churn does
* not exhaust the fixed table; otherwise grow up to the ceiling. */
int id = -1;
for (int i = 0; i < c->nadapters; i++)
if (!c->adapters[i].active) { id = i; break; }
if (id < 0) {
if (c->nadapters >= VMSIG_MAX_ADAPTERS) return -1;
id = c->nadapters++;
}
core_adapter_ent* e = &c->adapters[id];
uint16_t gen = e->gen; /* generation survives the memset below */
vmsig_adapter* a = ops->open(cfg, endpoint);
if (!a) return -1;
if (!a) return -1; /* entry stays inactive (reusable) */
vmsig_emit emit = { core_emit_up, core_register_memctx, core_unregister_memctx, c };
vmsig_fd_reg reg[VMSIG_ADAPTER_FDS];
@@ -80,23 +92,48 @@ int vmsig_core_add_adapter(vmsig_core* c, const vmsig_adapter_ops* ops,
int n = ops->attach(a, &emit, reg, VMSIG_ADAPTER_FDS);
if (n < 0) { ops->close(a); return -1; }
memset(e, 0, sizeof *e);
e->ops = ops;
e->a = a;
e->endpoint = endpoint;
e->active = 1;
e->gen = (uint16_t)(gen + 1);
e->nslot = 0;
for (int i = 0; i < n; i++) {
uint32_t events = reg[i].epoll_events ? reg[i].epoll_events : (uint32_t)EPOLLIN;
core_slot* s = core_register_fd(c, reg[i].fd, events, SLOT_ADAPTER);
if (!s) { ops->close(a); return -1; }
if (!s) {
/* roll back: deregister the fds enrolled so far, then close + free the entry. */
for (int k = 0; k < e->nslot; k++) {
epoll_ctl(c->epfd, EPOLL_CTL_DEL, e->slots[k]->fd, NULL);
e->slots[k]->role = SLOT_DEAD;
}
ops->close(a);
e->active = 0; e->a = NULL; e->nslot = 0;
return -1;
}
s->ops = ops;
s->adapter = a;
s->cookie = reg[i].cookie;
if (e->nslot < VMSIG_ADAPTER_FDS) e->slots[e->nslot++] = s;
}
int id = c->nadapters;
c->adapters[c->nadapters].ops = ops;
c->adapters[c->nadapters].a = a;
c->adapters[c->nadapters].endpoint = endpoint;
c->nadapters++;
return id;
}
/* Request runtime detach of every adapter on `endpoint` (deferred reap after the batch,
* mirrors core_request_drop). The teardown itself (epoch settle, SEAM_DOWN, lease release,
* epoll DEL, ops->close) runs in core_reap_adapters on the loop thread. */
void vmsig_core_detach_endpoint(vmsig_core* c, uint32_t endpoint) {
if (!c || endpoint >= 64) return;
int any = 0;
for (int i = 0; i < c->nadapters; i++) {
core_adapter_ent* e = &c->adapters[i];
if (e->active && e->endpoint == endpoint) { e->reap = 1; any = 1; }
}
if (any) core_wake(c);
}
int vmsig_core_add_control(vmsig_core* c, const vmsig_control_ops* ops, void* ctl,
const vmsig_grant* grant) {
if (!c || !ops) return -1;
@@ -137,6 +174,7 @@ int vmsig_core_add_control(vmsig_core* c, const vmsig_control_ops* ops, void* ct
* this control is qualified). For a control added BEFORE the first publication,
* the cell is not yet valid — it receives MEMCTX via the normal multicast in pump_up. */
core_memctx_replay(c, id);
core_roster_replay(c, id); /* late subscriber: retained VM roster (CAP_ROSTER) */
return id; /* ncontrols already bumped when picking id (on growth); reuse does not grow it */
}
@@ -205,7 +243,8 @@ void vmsig_core_free(vmsig_core* c) {
* FIRST: their close stops off-loop workers and unregisters their seams (e.g.
* memctx) BEFORE destruction. */
for (int i = 0; i < c->nadapters; i++)
if (c->adapters[i].ops->close) c->adapters[i].ops->close(c->adapters[i].a);
if (c->adapters[i].active && c->adapters[i].ops->close)
c->adapters[i].ops->close(c->adapters[i].a);
for (int i = 0; i < c->ncontrols; i++)
if (c->controls[i].active && c->controls[i].ops->close)
c->controls[i].ops->close(c->controls[i].ctl);
+24
View File
@@ -1,6 +1,7 @@
#ifndef VMSIG_CORE_INTERNAL_H
#define VMSIG_CORE_INTERNAL_H
#include "vmsig_core.h"
#include "vmsig_roster.h"
#include <signal.h>
/* Private internals of the epoll core. Each registered fd carries a
@@ -41,6 +42,11 @@ typedef struct {
const vmsig_adapter_ops* ops;
vmsig_adapter* a;
uint32_t endpoint;
int active; /* 0 = free/reaped slot (reusable) */
int reap; /* deferred runtime detach requested */
uint16_t gen; /* +1 on each (re)use (ABA guard / debug) */
core_slot* slots[VMSIG_ADAPTER_FDS]; /* epoll slots we registered */
int nslot;
} core_adapter_ent;
@@ -57,6 +63,15 @@ typedef struct {
vmsig_memctx_reg reg; /* valid when registered */
} core_memctx_cell;
/* ===== Retained VM roster (inventory coherence; daemon-published) =====
* One value snapshot per endpoint: the last published roster datum. Simpler than the
* MEMCTX cell — roster carries no fd and no borrowed buffer, so the cell is pure POD and
* delivery is the ordinary broadcast (no re-describe / re-share). valid=0 on DETACH. */
typedef struct {
int valid; /* a roster entry is published for this endpoint */
vmsig_roster entry; /* last published {vmid,state,action,name} (by value) */
} core_roster_cell;
/* ===== Lease layer (arbitration of exclusive ownership of destructive resources) =====
* One cell per (endpoint, lease-class): who owns it (origin) + a snapshot of arb_prio at
* acquisition time. owner=0 => free. The snapshot (rather than the live grant) makes the
@@ -108,6 +123,7 @@ struct vmsig_core {
uint32_t epoch[64]; /* per-endpoint VM session epoch */
core_memctx_cell memctx[64]; /* per-endpoint retained context */
core_roster_cell roster[64]; /* per-endpoint retained roster */
core_lease_cell lease[64][VMSIG_LEASE_CLASSES]; /* lease per (endpoint, class) */
vmsig_arb_policy arb_cb; /* preemption policy (NULL=default) */
@@ -150,6 +166,14 @@ void core_memctx_route(vmsig_core* c, const vmsig_event* trigger);
* defined in loop.c). */
void core_memctx_replay(vmsig_core* c, int ctl_id);
/* ===== VM roster (inventory coherence; defined in loop.c alongside the memctx seam) ===== */
/* Publish a roster transition for `endpoint`: retain the datum (valid=0 on DETACH) and
* broadcast VMSIG_EV_ROSTER to qualified subscribers (CAP_ROSTER + source + endpoint). */
void core_roster_publish(vmsig_core* c, uint32_t endpoint, const vmsig_roster* entry);
/* Replay the retained roster to a single (late) subscriber (from vmsig_core_add_control). */
void core_roster_replay(vmsig_core* c, int ctl_id);
/* Bump the endpoint's epoch on a destructive lifecycle transition: epoch++, invalidate
* the retain cell, emit MEMCTX_INVALIDATED, request re-bootstrap from the adapter.
* Observed by the core in pump_up on UP VM_LIFECYCLE (defined in loop.c). */
+115 -4
View File
@@ -105,6 +105,7 @@ static uint32_t source_mask_for_lease_class(int cls) {
* input actor (INPUT); otherwise CAP_OBSERVE (frames/SEAM/generic). The grant_allows_up
* gate checks intersection, so OBSERVE|INPUT means "either of the two". */
static uint32_t cap_for_up(const vmsig_event* ev) {
if (ev->kind == VMSIG_EV_ROSTER) return VMSIG_CAP_ROSTER; /* host-wide inventory */
if (ev->kind == VMSIG_EV_CURSOR_STATE) return VMSIG_CAP_OBSERVE | VMSIG_CAP_INPUT;
return (ev->source == VMSIG_SRC_MEMCTX) ? VMSIG_CAP_MEMCTX : VMSIG_CAP_OBSERVE;
}
@@ -129,7 +130,7 @@ static core_adapter_ent* core_find_adapter(vmsig_core* c, uint32_t endpoint,
vmsig_source source) {
for (int i = 0; i < c->nadapters; i++) {
core_adapter_ent* e = &c->adapters[i];
if (e->ops->source == source && e->endpoint == endpoint) return e;
if (e->active && e->ops->source == source && e->endpoint == endpoint) return e;
}
return NULL;
}
@@ -316,6 +317,22 @@ void core_lease_reap_control(vmsig_core* c, int ctl_id) {
}
}
/* Release ALL lease classes held on `endpoint` (from endpoint detach, BEFORE the adapters
* close). Symmetric to core_lease_reap_control but keyed by endpoint, not owner: when a VM
* disappears its leases must not survive to auto-transfer onto whatever VM later reuses the
* same endpoint bit. The owner principal is recorded for the audit. */
static void core_lease_reap_endpoint(vmsig_core* c, uint32_t endpoint) {
if (endpoint >= 64) return;
for (int cls = 0; cls < VMSIG_LEASE_CLASSES; cls++) {
core_lease_cell* cell = &c->lease[endpoint][cls];
if (!cell->owner) continue;
uint32_t principal = lease_owner_principal(c, cell->owner);
cell->owner = 0; cell->owner_prio = 0;
vmsig_audit a = { VMSIG_AUDIT_LEASE_RECLAIMED, principal, endpoint, (uint32_t)cls, 0 };
core_audit(c, &a);
}
}
/* DOWN emit from a control: enforcement against THIS control's grant. */
int core_emit_down(void* token, vmsig_event* ev) {
core_down_ctx* d = token;
@@ -472,7 +489,57 @@ void core_memctx_replay(vmsig_core* c, int ctl_id) {
}
}
void core_epoch_bump(vmsig_core* c, uint32_t endpoint) {
/* ===== VM roster (inventory coherence): retain + broadcast + replay-to-late ===== *
* Mirrors the MEMCTX retain cell, but the datum is a pure inline POD (no fd, no borrowed
* buffer): delivery is the ordinary broadcast (ops->deliver), with NO interception in
* pump_up. Publish is SYNCHRONOUS (like core_memctx_route) so a control gets the datum
* exactly once: current subscribers via this broadcast, a late one via core_roster_replay. */
static void core_roster_build(uint32_t ep, const vmsig_roster* r, vmsig_event* ev) {
memset(ev, 0, sizeof *ev);
ev->kind = VMSIG_EV_ROSTER; ev->source = VMSIG_SRC_CORE; ev->dir = VMSIG_DIR_UP;
ev->prio = VMSIG_PRIO_URGENT; ev->endpoint = ep;
ev->payload.flags = VMSIG_PL_INLINE;
memcpy(ev->inln, r, sizeof *r);
}
void core_roster_publish(vmsig_core* c, uint32_t endpoint, const vmsig_roster* entry) {
if (!c || endpoint >= 64 || !entry) return;
core_roster_cell* cell = &c->roster[endpoint];
cell->entry = *entry;
/* DETACH clears the retained datum (a vacated slot is not replayed to a late subscriber),
* but the DETACH event is still broadcast to current subscribers so they drop the VM. */
cell->valid = (entry->action != VMSIG_ROSTER_DETACH);
vmsig_event ev;
core_roster_build(endpoint, entry, &ev);
for (int i = 0; i < c->ncontrols; i++) {
core_control_ent* e = &c->controls[i];
if (!e->active || !e->ops->deliver) continue;
if (grant_allows_up(&e->grant, &ev) && sub_match(&e->sub, &ev))
e->ops->deliver(e->ctl, &ev);
}
}
void core_roster_replay(vmsig_core* c, int ctl_id) {
if (!c || ctl_id < 0 || ctl_id >= c->ncontrols) return;
core_control_ent* e = &c->controls[ctl_id];
if (!e->active || !e->ops->deliver) return;
for (uint32_t ep = 0; ep < 64; ep++) {
core_roster_cell* cell = &c->roster[ep];
if (!cell->valid) continue;
vmsig_event ev;
core_roster_build(ep, &cell->entry, &ev);
if (grant_allows_up(&e->grant, &ev) && sub_match(&e->sub, &ev))
e->ops->deliver(e->ctl, &ev);
}
}
/* Bump the endpoint epoch and broadcast MEMCTX_INVALIDATED to holders. When `rebootstrap`
* is set, ask the adapter to re-bootstrap (it re-emits MEMCTX{epoch+1} when ready) — the
* normal destructive-lifecycle path. On endpoint TEARDOWN (detach) `rebootstrap` is 0: the
* adapter is about to be closed, so kicking a re-bootstrap on a worker we are joining would
* be wasted; holders still settle via the INVALIDATED broadcast + the bumped epoch. */
static void core_epoch_invalidate_emit(vmsig_core* c, uint32_t endpoint, int rebootstrap) {
if (endpoint >= 64) return;
c->epoch[endpoint]++;
core_memctx_cell* cell = &c->memctx[endpoint];
@@ -486,11 +553,14 @@ void core_epoch_bump(vmsig_core* c, uint32_t endpoint) {
memcpy(up.inln, &inv, sizeof inv);
core_emit_up(c, &up); /* broadcast to holders (CAP_MEMCTX gate) */
/* request re-bootstrap from the adapter: it re-emits MEMCTX{epoch+1} when ready. */
if (cell->registered && cell->reg.invalidate)
if (rebootstrap && cell->registered && cell->reg.invalidate)
cell->reg.invalidate(cell->reg.ctx, c->epoch[endpoint]);
}
void core_epoch_bump(vmsig_core* c, uint32_t endpoint) {
core_epoch_invalidate_emit(c, endpoint, 1); /* destructive lifecycle: re-bootstrap */
}
/* UP: drain the context queue and dispatch to subscribed controls */
static void pump_up(vmsig_core* c) {
vmsig_event ev;
@@ -575,6 +645,46 @@ static void core_reap(vmsig_core* c) {
}
}
/* Deferred reap of runtime-detached adapters (after the batch). Two passes:
* 1) per-endpoint coherence settle ONCE: release leases + bump epoch / broadcast
* MEMCTX_INVALIDATED (no re-bootstrap — we are tearing down). Done while the memctx
* cell is still registered.
* 2) per-adapter teardown: SEAM_DOWN (close is silent on administrative detach), epoll
* DEL + mark slots dead (so the loop never dispatches a half-closed adapter), then
* ops->close (joins the worker, closes the SI handle AFTER the join).
* Deferred (reap flag set elsewhere) so no live slot is flipped to DEAD inside the batch. */
static void core_reap_adapters(vmsig_core* c) {
uint64_t settled = 0; /* endpoints already coherence-settled this pass */
for (int i = 0; i < c->nadapters; i++) {
core_adapter_ent* e = &c->adapters[i];
if (!e->reap || !e->active) continue;
uint32_t ep = e->endpoint;
if (ep < 64 && !(settled & (1ull << ep))) {
settled |= (1ull << ep);
core_lease_reap_endpoint(c, ep);
core_epoch_invalidate_emit(c, ep, 0); /* settle holders; no re-bootstrap */
}
}
for (int i = 0; i < c->nadapters; i++) {
core_adapter_ent* e = &c->adapters[i];
if (!e->reap || !e->active) continue;
vmsig_event sd;
memset(&sd, 0, sizeof sd);
sd.kind = VMSIG_EV_SEAM_DOWN; sd.source = e->ops->source; sd.dir = VMSIG_DIR_UP;
sd.prio = VMSIG_PRIO_URGENT; sd.endpoint = e->endpoint;
core_emit_up(c, &sd);
for (int k = 0; k < e->nslot; k++) {
if (!e->slots[k]) continue;
epoll_ctl(c->epfd, EPOLL_CTL_DEL, e->slots[k]->fd, NULL);
e->slots[k]->role = SLOT_DEAD;
}
if (e->ops->close) e->ops->close(e->a);
e->a = NULL; e->nslot = 0; e->active = 0; e->reap = 0;
}
}
int vmsig_core_run(vmsig_core* c) {
if (!c) return -1;
struct epoll_event evs[VMSIG_MAX_EVENTS];
@@ -609,6 +719,7 @@ int vmsig_core_run(vmsig_core* c) {
pump_up(c);
pump_down(c);
core_reap(c);
core_reap_adapters(c);
}
return 0;
}
+37
View File
@@ -0,0 +1,37 @@
/* admission.c — vmsigd coarse admission policy (see vmsigd_admission.h). Translates a uid to
* a vmsig_grant, resolving entitled vmids to live endpoint bits via the discovery slot map. */
#define _GNU_SOURCE
#include "vmsigd_admission.h"
#include "discovery.h" /* vmsig_discovery_slot_of_vmid */
#include <string.h>
static const vmsigd_grant_rule* rule_for_uid(const vmsigd_config* cfg, uint32_t uid) {
for (int i = 0; i < cfg->ngrants; i++)
if (cfg->grants[i].uid == uid) return &cfg->grants[i];
return NULL;
}
vmsig_grant vmsigd_policy(uint32_t uid, uint32_t pid, void* ud) {
(void)pid;
vmsigd_admission* a = ud;
vmsig_grant g;
memset(&g, 0, sizeof g);
const vmsigd_grant_rule* r = (a && a->cfg) ? rule_for_uid(a->cfg, uid) : NULL;
if (!r) return g; /* no stanza => empty grant => REJECT */
g.principal = uid;
g.source_mask = 0xFFFFFFFFu; /* coarse: control enforces source finer behind us */
g.cap_mask = r->cap_mask;
g.arb_prio = r->arb_prio;
if (r->all_vms) {
g.endpoint_mask = ~0ull; /* covers all current + future endpoints */
} else {
for (int i = 0; i < r->nvmids; i++) {
int ep = a->disc ? vmsig_discovery_slot_of_vmid(a->disc, r->vmids[i]) : -1;
if (ep >= 0 && ep < 64) g.endpoint_mask |= (1ull << ep);
}
}
return g;
}
+126
View File
@@ -0,0 +1,126 @@
/* config.c — vmsigd config parser (see vmsigd.h). INI-ish: `key = value` globals + repeated
* `[grant uid=N]` stanzas. Pure libc; no core/vmie dependency (unit-testable in any build). */
#define _GNU_SOURCE
#include "vmsigd.h"
#include "vmsig_control.h" /* VMSIG_CAP_* */
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
void vmsigd_config_defaults(vmsigd_config* c) {
memset(c, 0, sizeof *c);
snprintf(c->socket, sizeof c->socket, "%s", "/run/vmsig/vmsigd.sock");
snprintf(c->watch, sizeof c->watch, "%s", "/dev/shm/vmsig");
snprintf(c->pve_conf, sizeof c->pve_conf, "%s", "/etc/pve/qemu-server");
snprintf(c->qmp_dir, sizeof c->qmp_dir, "%s", "/var/run/qemu-server");
snprintf(c->slots, sizeof c->slots, "%s", "/dev/shm/vmsig/.slots");
}
uint32_t vmsigd_caps_from_str(const char* s) {
static const struct { const char* k; uint32_t bit; } map[] = {
{ "observe", VMSIG_CAP_OBSERVE },
{ "input", VMSIG_CAP_INPUT },
{ "lifecycle", VMSIG_CAP_LIFECYCLE },
{ "power", VMSIG_CAP_POWER },
{ "vm", VMSIG_CAP_VM },
{ "memctx", VMSIG_CAP_MEMCTX },
{ "memwrite", VMSIG_CAP_MEMWRITE },
{ "roster", VMSIG_CAP_ROSTER },
};
uint32_t mask = 0;
while (s && *s) {
while (*s == ',' || *s == ' ' || *s == '\t') s++;
const char* w = s;
while (*s && *s != ',' && *s != ' ' && *s != '\t') s++;
size_t len = (size_t)(s - w);
for (size_t i = 0; i < sizeof map / sizeof map[0]; i++)
if (len == strlen(map[i].k) && strncmp(w, map[i].k, len) == 0) { mask |= map[i].bit; break; }
}
return mask;
}
/* Trim leading/trailing whitespace in place; returns the trimmed start. */
static char* trim(char* s) {
while (*s == ' ' || *s == '\t' || *s == '\r') s++;
char* e = s + strlen(s);
while (e > s && (e[-1] == ' ' || e[-1] == '\t' || e[-1] == '\r' || e[-1] == '\n')) *--e = 0;
return s;
}
static void set_path(char* dst, size_t cap, const char* v) { snprintf(dst, cap, "%s", v); }
static void parse_vmids(vmsigd_grant_rule* g, const char* v) {
g->all_vms = 0; g->nvmids = 0;
if (strchr(v, '*')) { g->all_vms = 1; return; }
while (*v) {
while (*v == ',' || *v == ' ' || *v == '\t') v++;
if (*v < '0' || *v > '9') { if (*v) v++; continue; }
uint32_t id = (uint32_t)strtoul(v, NULL, 10);
while (*v >= '0' && *v <= '9') v++;
if (id && g->nvmids < VMSIGD_MAX_VMIDS) g->vmids[g->nvmids++] = id;
}
}
int vmsigd_config_parse_buf(vmsigd_config* c, const char* buf) {
if (!c || !buf) return -1;
char* copy = strdup(buf);
if (!copy) return -1;
vmsigd_grant_rule* cur = NULL; /* current [grant] stanza, or NULL for globals */
char* save = NULL;
for (char* line = strtok_r(copy, "\n", &save); line; line = strtok_r(NULL, "\n", &save)) {
char* p = trim(line);
if (!*p || *p == '#' || *p == ';') continue;
if (*p == '[') {
cur = NULL;
/* [grant uid=N] */
char* u = strstr(p, "uid=");
if (u && c->ngrants < VMSIGD_MAX_GRANTS) {
cur = &c->grants[c->ngrants++];
memset(cur, 0, sizeof *cur);
cur->uid = (uint32_t)strtoul(u + 4, NULL, 10);
}
continue;
}
char* eq = strchr(p, '=');
if (!eq) continue;
*eq = 0;
char* key = trim(p);
char* val = trim(eq + 1);
if (cur) {
if (!strcmp(key, "vmids")) parse_vmids(cur, val);
else if (!strcmp(key, "caps")) cur->cap_mask = vmsigd_caps_from_str(val);
else if (!strcmp(key, "arb_prio")) cur->arb_prio = (uint32_t)strtoul(val, NULL, 10);
} else {
if (!strcmp(key, "socket")) set_path(c->socket, sizeof c->socket, val);
else if (!strcmp(key, "watch")) set_path(c->watch, sizeof c->watch, val);
else if (!strcmp(key, "pve_conf")) set_path(c->pve_conf, sizeof c->pve_conf, val);
else if (!strcmp(key, "qmp_dir")) set_path(c->qmp_dir, sizeof c->qmp_dir, val);
else if (!strcmp(key, "slots")) set_path(c->slots, sizeof c->slots, val);
}
}
free(copy);
return 0;
}
int vmsigd_config_parse_file(vmsigd_config* c, const char* path) {
int fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd < 0) return -1;
char buf[16 * 1024];
size_t got = 0;
for (;;) {
ssize_t n = read(fd, buf + got, sizeof buf - 1 - got);
if (n < 0) { close(fd); return -1; }
if (n == 0) break;
got += (size_t)n;
if (got >= sizeof buf - 1) break;
}
close(fd);
buf[got] = 0;
return vmsigd_config_parse_buf(c, buf);
}
+47
View File
@@ -0,0 +1,47 @@
#ifndef VMSIGD_H
#define VMSIGD_H
#include <stdint.h>
/* vmsigd.h — private config model of the vmsig daemon.
*
* The daemon owns the /dev/shm/vmsig discovery namespace and serves a unix-socket control
* plane over the signaling layer for the VMs discovered there. Its only policy is a COARSE
* admission grant per uid (SISC: signaling is not a fine-grained access broker — the control
* enforces per-user caps behind the grant). Entitlements are expressed in vmid terms and
* translated to an endpoint_mask at connect time against the live slot map. */
#define VMSIGD_MAX_GRANTS 64
#define VMSIGD_MAX_VMIDS 64
#define VMSIGD_PATH_MAX 256
typedef struct {
uint32_t uid;
int all_vms; /* `vmids = *` */
uint32_t vmids[VMSIGD_MAX_VMIDS];
int nvmids;
uint32_t cap_mask; /* VMSIG_CAP_* (from `caps =` keywords) */
uint32_t arb_prio;
} vmsigd_grant_rule;
typedef struct {
char socket[VMSIGD_PATH_MAX]; /* control listener ('@' => abstract) */
char watch[VMSIGD_PATH_MAX]; /* discovery dir (/dev/shm/vmsig) */
char pve_conf[VMSIGD_PATH_MAX]; /* /etc/pve/qemu-server */
char qmp_dir[VMSIGD_PATH_MAX]; /* /var/run/qemu-server */
char slots[VMSIGD_PATH_MAX]; /* slot persistence ("" => off) */
vmsigd_grant_rule grants[VMSIGD_MAX_GRANTS];
int ngrants;
} vmsigd_config;
/* Populate with built-in defaults. */
void vmsigd_config_defaults(vmsigd_config* c);
/* Parse the INI-ish config (globals + repeated [grant uid=N] stanzas) over the defaults
* already in `c`. Unknown keys are ignored. Returns 0, or -1 on open/usage error. */
int vmsigd_config_parse_file(vmsigd_config* c, const char* path);
int vmsigd_config_parse_buf (vmsigd_config* c, const char* buf); /* same, from memory (tests) */
/* Translate a comma/space-separated cap keyword list to a VMSIG_CAP_* mask. */
uint32_t vmsigd_caps_from_str(const char* s);
#endif /* VMSIGD_H */
+21
View File
@@ -0,0 +1,21 @@
#ifndef VMSIGD_ADMISSION_H
#define VMSIGD_ADMISSION_H
#include "vmsigd.h"
#include "vmsig_control.h" /* vmsig_grant */
struct vmsig_discovery;
/* Admission context handed to the socket listener as policy `ud`. The config is read-only at
* connect time; the live discovery resolves entitled vmids to their current endpoint bits. */
typedef struct {
const vmsigd_config* cfg;
struct vmsig_discovery* disc;
} vmsigd_admission;
/* vmsig_socket_policy: uid from SO_PEERCRED -> a coarse grant. No matching [grant uid=N]
* stanza => empty grant (the listener rejects). `vmids = *` => endpoint_mask covers all 64;
* a vmid list resolves each currently-attached vmid to its endpoint bit (an unbound entitled
* vmid contributes no bit yet — the peer learns liveness via the roster). */
vmsig_grant vmsigd_policy(uint32_t uid, uint32_t pid, void* ud);
#endif /* VMSIGD_ADMISSION_H */
+156
View File
@@ -0,0 +1,156 @@
/* vmsigd.c — the vmsig management daemon.
*
* Owns the /dev/shm/vmsig discovery namespace and serves a unix-socket control plane over the
* signaling layer for the VMs found there. It wires nothing VM-specific: discovery hot-plugs
* each VM's adapter trio and publishes the roster; the daemon only supplies the loop, the
* discovery roots, the control socket, and a coarse per-uid admission policy.
*
* Real input/memctx actuation needs an armed library build (memctx -> vmie). A stub build
* still runs (socket/admission/discovery machinery), but memctx will not bootstrap.
*
* Usage: vmsigd [--config PATH] [--socket S] [--watch DIR] [--pve-conf DIR] [--qmp-dir DIR]
* [--slots PATH] [--foreground]
* precedence: argv > environment (VMSIGD_*) > config file > built-in defaults. */
#define _GNU_SOURCE
#include "vmsig.h"
#include "vmsig_socket.h"
#include "discovery.h"
#include "core_internal.h" /* core_add_source (in-repo daemon, intimate with the core) */
#include "vmsigd.h"
#include "vmsigd_admission.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <signal.h>
#include <sys/signalfd.h>
static vmsig_core* g_core;
static vmsigd_config g_cfg;
static char g_cfg_path[VMSIGD_PATH_MAX];
/* Audit trace: admissions/denials, lease and memctx grants — on the loop thread, to stderr
* (systemd routes stderr to the journal). */
static void on_audit(void* ud, const vmsig_audit* a) {
(void)ud;
static const char* k[] = {
"ADMIT", "REJECT", "DOWN_DENIED", "LEASE_GRANTED", "LEASE_DENIED",
"LEASE_REVOKED", "LEASE_RECLAIMED", "MEMCTX_GRANTED"
};
const char* name = (a->kind <= VMSIG_AUDIT_MEMCTX_GRANTED) ? k[a->kind] : "?";
fprintf(stderr, "vmsigd: audit %-14s principal=%u ep=%u cmd=%u detail=%u\n",
name, a->principal, a->endpoint, a->cmd, a->detail);
}
/* Signals arrive as fd readiness (signalfd) on the loop thread — no async-handler hazards.
* TERM/INT => graceful stop; HUP => reload ONLY the admission table from the config file
* (paths/socket/adapters are untouched; already-connected grants are not retroactively
* changed — a peer reconnects to pick up a changed entitlement). */
static void on_signal(void* user, uint32_t events) {
(void)events;
int sfd = *(int*)user;
struct signalfd_siginfo si;
while (read(sfd, &si, sizeof si) == (ssize_t)sizeof si) {
if (si.ssi_signo == SIGINT || si.ssi_signo == SIGTERM) {
vmsig_core_stop(g_core);
} else if (si.ssi_signo == SIGHUP) {
vmsigd_config fresh;
vmsigd_config_defaults(&fresh);
if (g_cfg_path[0] && vmsigd_config_parse_file(&fresh, g_cfg_path) == 0) {
memcpy(g_cfg.grants, fresh.grants, sizeof g_cfg.grants);
g_cfg.ngrants = fresh.ngrants; /* swap admission table only */
fprintf(stderr, "vmsigd: reloaded %d grant rule(s)\n", g_cfg.ngrants);
}
}
}
}
static const char* arg_val(int argc, char** argv, int* i) {
char* a = argv[*i];
char* eq = strchr(a, '=');
if (eq) return eq + 1;
if (*i + 1 < argc) { (*i)++; return argv[*i]; }
return "";
}
static void apply_env(vmsigd_config* c) {
const char* v;
if ((v = getenv("VMSIGD_SOCKET"))) snprintf(c->socket, sizeof c->socket, "%s", v);
if ((v = getenv("VMSIGD_WATCH"))) snprintf(c->watch, sizeof c->watch, "%s", v);
if ((v = getenv("VMSIGD_PVE_CONF"))) snprintf(c->pve_conf, sizeof c->pve_conf, "%s", v);
if ((v = getenv("VMSIGD_QMP_DIR"))) snprintf(c->qmp_dir, sizeof c->qmp_dir, "%s", v);
if ((v = getenv("VMSIGD_SLOTS"))) snprintf(c->slots, sizeof c->slots, "%s", v);
}
int main(int argc, char** argv) {
/* config path: argv --config > env > default. */
const char* cfg_path = getenv("VMSIGD_CONFIG");
if (!cfg_path) cfg_path = "/etc/vmsig/vmsigd.conf";
for (int i = 1; i < argc; i++)
if (!strncmp(argv[i], "--config", 8)) { cfg_path = arg_val(argc, argv, &i); }
vmsigd_config_defaults(&g_cfg);
vmsigd_config_parse_file(&g_cfg, cfg_path); /* missing file => defaults (not fatal) */
snprintf(g_cfg_path, sizeof g_cfg_path, "%s", cfg_path);
apply_env(&g_cfg);
for (int i = 1; i < argc; i++) {
char* a = argv[i];
if (!strncmp(a, "--config", 8)) { (void)arg_val(argc, argv, &i); }
else if (!strncmp(a, "--socket", 8)) snprintf(g_cfg.socket, sizeof g_cfg.socket, "%s", arg_val(argc, argv, &i));
else if (!strncmp(a, "--watch", 7)) snprintf(g_cfg.watch, sizeof g_cfg.watch, "%s", arg_val(argc, argv, &i));
else if (!strncmp(a, "--pve-conf", 10)) snprintf(g_cfg.pve_conf, sizeof g_cfg.pve_conf, "%s", arg_val(argc, argv, &i));
else if (!strncmp(a, "--qmp-dir", 9)) snprintf(g_cfg.qmp_dir, sizeof g_cfg.qmp_dir, "%s", arg_val(argc, argv, &i));
else if (!strncmp(a, "--slots", 7)) snprintf(g_cfg.slots, sizeof g_cfg.slots, "%s", arg_val(argc, argv, &i));
else if (!strcmp(a, "--foreground")) { /* default; systemd Type=simple */ }
else if (!strcmp(a, "-h") || !strcmp(a, "--help")) {
fprintf(stderr, "usage: %s [--config P][--socket S][--watch D][--pve-conf D]"
"[--qmp-dir D][--slots P][--foreground]\n", argv[0]);
return 0;
}
}
/* Signals via signalfd, serviced on the loop thread. SIGPIPE ignored (dead-peer writes). */
signal(SIGPIPE, SIG_IGN);
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, SIGINT); sigaddset(&mask, SIGTERM); sigaddset(&mask, SIGHUP);
sigprocmask(SIG_BLOCK, &mask, NULL);
int sfd = signalfd(-1, &mask, SFD_NONBLOCK | SFD_CLOEXEC);
if (sfd < 0) { perror("vmsigd: signalfd"); return 1; }
vmsig_ctx* ctx = vmsig_ctx_new();
if (!ctx) { fprintf(stderr, "vmsigd: ctx_new failed\n"); close(sfd); return 1; }
g_core = vmsig_core_new(ctx);
if (!g_core) { fprintf(stderr, "vmsigd: core_new failed\n"); vmsig_ctx_free(ctx); close(sfd); return 1; }
vmsig_core_set_audit(g_core, on_audit, NULL);
if (core_add_source(g_core, sfd, on_signal, &sfd, NULL) != 0) {
fprintf(stderr, "vmsigd: signal source registration failed\n");
vmsig_core_free(g_core); vmsig_ctx_free(ctx); close(sfd); return 1;
}
vmsig_discovery* disc = vmsig_discovery_new(
g_core, g_cfg.watch, g_cfg.pve_conf, g_cfg.qmp_dir,
g_cfg.slots[0] ? g_cfg.slots : NULL, NULL, NULL);
if (!disc) {
fprintf(stderr, "vmsigd: discovery_new(%s) failed\n", g_cfg.watch);
vmsig_core_free(g_core); vmsig_ctx_free(ctx); close(sfd); return 1;
}
vmsigd_admission adm = { &g_cfg, disc };
if (vmsig_socket_attach(g_core, g_cfg.socket, vmsigd_policy, &adm) != 0) {
fprintf(stderr, "vmsigd: socket_attach(%s) failed\n", g_cfg.socket);
vmsig_core_free(g_core); vmsig_ctx_free(ctx); close(sfd); return 1;
}
fprintf(stderr, "vmsigd: serving %s (watch=%s pve=%s qmp=%s) %d grant rule(s)\n",
g_cfg.socket, g_cfg.watch, g_cfg.pve_conf, g_cfg.qmp_dir, g_cfg.ngrants);
int rc = vmsig_core_run(g_core);
fprintf(stderr, "vmsigd: loop exit rc=%d\n", rc);
vmsig_core_free(g_core); /* reaps discovery (source on_free) + closes the socket listener */
vmsig_ctx_free(ctx);
close(sfd);
return rc;
}
+363
View File
@@ -0,0 +1,363 @@
/* discovery.c — runtime VM discovery state machine (see discovery.h).
*
* Single-threaded on the loop thread (inotify + timer sources via core_add_source). On a
* "vm-<vmid>-ram" file appearing it corroborates the candidate (host-probe seam), assigns a
* stable endpoint slot, hot-plugs the trio (sink), and publishes the roster; on the file
* disappearing it tears the endpoint down and publishes a roster DETACH. QMP-not-up-yet is a
* transient retry driven by a timerfd (no busy-wait); config errors / stale files drop. */
#define _GNU_SOURCE
#include "discovery.h"
#include "slot.h"
#include "core_internal.h" /* core_roster_publish */
#include "memctx.h" /* vmsig_memctx_cfg */
#include "vmhost.h" /* vmsig_vmhost_cfg */
#include "input.h" /* vmsig_input_cfg */
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <time.h>
#include <dirent.h>
#include <sys/inotify.h>
#include <sys/timerfd.h>
#define DISC_PATH_MAX 256
#define DISC_RETRY_MAX 40 /* give up after ~tens of seconds of QMP-not-up */
#define DISC_BACKOFF_BASE 50000000ull /* 50 ms */
#define DISC_BACKOFF_CAP 2000000000ull /* 2 s */
typedef enum { CAND_FREE = 0, CAND_PROBING, CAND_ATTACHED } cand_state;
typedef struct {
cand_state state;
uint32_t vmid;
int endpoint; /* -1 until attached */
int attempts;
uint64_t next_probe_ns; /* monotonic deadline for the next retry */
vmsig_host_facts facts; /* probe working copy */
} cand_ent;
struct vmsig_discovery {
vmsig_core* core;
char watch_dir[DISC_PATH_MAX];
char slots_path[DISC_PATH_MAX];
int persist;
vmsig_host_probe probe;
vmsig_discovery_sink sink;
int ifd; /* inotify */
int wd;
int tfd; /* retry timerfd */
slot_table slots;
cand_ent cand[VMSIG_SLOT_COUNT];
/* Stable per-endpoint home for the adapter cfg strings (ram_path/qmp_path): the adapters
* keep pointers, and detach is deferred, so this must outlive the candidate. Overwritten
* only on the NEXT attach to the endpoint, which never races a still-open prior adapter. */
vmsig_host_facts ep_facts[VMSIG_SLOT_COUNT];
};
static uint64_t now_ns(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec;
}
static uint64_t backoff_ns(int attempts) {
uint64_t b = DISC_BACKOFF_BASE << (attempts < 6 ? attempts : 6);
return b > DISC_BACKOFF_CAP ? DISC_BACKOFF_CAP : b;
}
/* Parse exactly "vm-<digits>-ram" -> vmid; 0 if it does not match. */
static uint32_t parse_vmid(const char* name) {
if (strncmp(name, "vm-", 3) != 0) return 0;
const char* p = name + 3;
if (*p < '0' || *p > '9') return 0;
uint64_t v = 0;
while (*p >= '0' && *p <= '9') { v = v * 10 + (uint64_t)(*p - '0'); p++; if (v > 0xFFFFFFFFull) return 0; }
if (strcmp(p, "-ram") != 0) return 0;
return (uint32_t)v;
}
static cand_ent* cand_find(vmsig_discovery* d, uint32_t vmid) {
for (int i = 0; i < VMSIG_SLOT_COUNT; i++)
if (d->cand[i].state != CAND_FREE && d->cand[i].vmid == vmid) return &d->cand[i];
return NULL;
}
static cand_ent* cand_alloc(vmsig_discovery* d, uint32_t vmid) {
cand_ent* e = cand_find(d, vmid);
if (e) return e;
for (int i = 0; i < VMSIG_SLOT_COUNT; i++)
if (d->cand[i].state == CAND_FREE) {
memset(&d->cand[i], 0, sizeof d->cand[i]);
d->cand[i].vmid = vmid; d->cand[i].endpoint = -1;
return &d->cand[i];
}
return NULL; /* 64-candidate ceiling */
}
/* Arm the retry timer to the soonest pending probe, or disarm if none pending. */
static void rearm_timer(vmsig_discovery* d) {
uint64_t soonest = 0; int any = 0;
for (int i = 0; i < VMSIG_SLOT_COUNT; i++)
if (d->cand[i].state == CAND_PROBING && d->cand[i].next_probe_ns) {
if (!any || d->cand[i].next_probe_ns < soonest) soonest = d->cand[i].next_probe_ns;
any = 1;
}
struct itimerspec its;
memset(&its, 0, sizeof its);
if (any) {
uint64_t now = now_ns();
uint64_t dt = soonest > now ? soonest - now : 1000000ull; /* >=1ms */
its.it_value.tv_sec = (time_t)(dt / 1000000000ull);
its.it_value.tv_nsec = (long)(dt % 1000000000ull);
}
timerfd_settime(d->tfd, 0, &its, NULL); /* it_value 0 => disarm */
}
static void publish_roster(vmsig_discovery* d, uint32_t ep, uint32_t vmid, uint32_t state,
uint32_t action, const char* name) {
vmsig_roster r;
memset(&r, 0, sizeof r);
r.vmid = vmid; r.state = state; r.action = action;
if (name) {
size_t n = strlen(name);
if (n >= VMSIG_ROSTER_NAME_MAX) { n = VMSIG_ROSTER_NAME_MAX - 1; r.flags |= VMSIG_ROSTER_NAME_TRUNC; }
memcpy(r.name, name, n);
}
core_roster_publish(d->core, ep, &r);
}
static void cand_drop(cand_ent* c) {
c->state = CAND_FREE; c->vmid = 0; c->endpoint = -1; c->attempts = 0; c->next_probe_ns = 0;
}
static void do_attach(vmsig_discovery* d, cand_ent* c) {
int ep = slot_alloc(&d->slots, c->vmid);
if (ep < 0) {
fprintf(stderr, "vmsig discovery: no free endpoint for vmid %u (64-VM ceiling)\n", c->vmid);
cand_drop(c);
return;
}
d->ep_facts[ep] = c->facts; /* stable home for cfg strings the adapters keep */
if (d->sink.attach(d->sink.ud, d->core, c->vmid, (uint32_t)ep, &d->ep_facts[ep]) != 0) {
slot_free(&d->slots, c->vmid);
fprintf(stderr, "vmsig discovery: attach failed for vmid %u\n", c->vmid);
cand_drop(c);
return;
}
c->state = CAND_ATTACHED; c->endpoint = ep;
publish_roster(d, (uint32_t)ep, c->vmid, (uint32_t)c->facts.vm_state, VMSIG_ROSTER_ATTACH,
c->facts.name);
if (d->persist) slot_save(&d->slots, d->slots_path);
}
static void do_detach(vmsig_discovery* d, cand_ent* c) {
int ep = c->endpoint;
if (ep >= 0) {
publish_roster(d, (uint32_t)ep, c->vmid, VMSIG_VM_SHUTDOWN, VMSIG_ROSTER_DETACH,
c->facts.name);
d->sink.detach(d->sink.ud, d->core, c->vmid, (uint32_t)ep); /* deferred teardown */
slot_free(&d->slots, c->vmid); /* bit vacated (ordered) */
if (d->persist) slot_save(&d->slots, d->slots_path);
/* ep_facts[ep] is intentionally NOT cleared: the deferred adapter reap still reads the
* cfg strings; it is overwritten on the next attach to this endpoint. */
}
cand_drop(c);
}
static void try_probe(vmsig_discovery* d, cand_ent* c) {
d->probe.config(&d->probe, c->vmid, &c->facts);
if (!c->facts.ok) { cand_drop(c); return; } /* not ours / no share=on */
d->probe.live(&d->probe, &c->facts);
if (c->facts.retry) {
if (++c->attempts > DISC_RETRY_MAX) {
fprintf(stderr, "vmsig discovery: vmid %u QMP never came up, giving up\n", c->vmid);
cand_drop(c);
return;
}
c->next_probe_ns = now_ns() + backoff_ns(c->attempts);
rearm_timer(d);
return;
}
if (!c->facts.ok) { cand_drop(c); return; } /* stale: file present, VM dead/unparsable */
do_attach(d, c);
}
static void on_file_appear(vmsig_discovery* d, uint32_t vmid) {
cand_ent* c = cand_alloc(d, vmid);
if (!c) { fprintf(stderr, "vmsig discovery: candidate table full, vmid %u ignored\n", vmid); return; }
if (c->state == CAND_ATTACHED) return; /* already live (duplicate event) */
if (c->state == CAND_FREE) { c->state = CAND_PROBING; c->attempts = 0; }
c->next_probe_ns = 0;
try_probe(d, c);
}
static void on_file_gone(vmsig_discovery* d, uint32_t vmid) {
cand_ent* c = cand_find(d, vmid);
if (!c) return;
if (c->state == CAND_ATTACHED) do_detach(d, c);
else cand_drop(c); /* was still probing */
}
/* ---- loop sources ------------------------------------------------------------ */
static void on_inotify(void* user, uint32_t events) {
(void)events;
vmsig_discovery* d = user;
char buf[4096] __attribute__((aligned(__alignof__(struct inotify_event))));
for (;;) {
ssize_t n = read(d->ifd, buf, sizeof buf);
if (n <= 0) { if (n < 0 && errno == EINTR) continue; break; }
for (char* p = buf; p < buf + n; ) {
struct inotify_event* ev = (struct inotify_event*)p;
if (ev->len) {
uint32_t vmid = parse_vmid(ev->name);
if (vmid) {
if (ev->mask & (IN_CREATE | IN_MOVED_TO | IN_CLOSE_WRITE)) on_file_appear(d, vmid);
else if (ev->mask & (IN_DELETE | IN_MOVED_FROM)) on_file_gone(d, vmid);
}
}
p += sizeof(struct inotify_event) + ev->len;
}
}
}
static void on_timer(void* user, uint32_t events) {
(void)events;
vmsig_discovery* d = user;
uint64_t v;
while (read(d->tfd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain */ }
uint64_t now = now_ns();
for (int i = 0; i < VMSIG_SLOT_COUNT; i++) {
cand_ent* c = &d->cand[i];
if (c->state == CAND_PROBING && c->next_probe_ns && c->next_probe_ns <= now)
try_probe(d, c);
}
rearm_timer(d);
}
static void bootstrap_scan(vmsig_discovery* d) {
DIR* dir = opendir(d->watch_dir);
if (!dir) return;
struct dirent* de;
while ((de = readdir(dir)) != NULL) {
uint32_t vmid = parse_vmid(de->d_name);
if (vmid) on_file_appear(d, vmid);
}
closedir(dir);
/* GC persisted-but-not-live slots: a vmid bound in .slots with no live file (it died while
* the daemon was down) keeps its bit pinned; free it so the ceiling is not leaked. */
for (int e = 0; e < VMSIG_SLOT_COUNT; e++) {
uint32_t vmid = d->slots.ent[e].vmid;
if (!vmid) continue;
cand_ent* c = cand_find(d, vmid);
if (!c || c->state != CAND_ATTACHED) slot_free(&d->slots, vmid);
}
if (d->persist) slot_save(&d->slots, d->slots_path);
}
/* ---- default sink: wire the core adapter trio ------------------------------- */
static int default_attach(void* ud, vmsig_core* core, uint32_t vmid, uint32_t endpoint,
const vmsig_host_facts* f) {
(void)ud; (void)vmid;
vmsig_memctx_cfg mc; memset(&mc, 0, sizeof mc);
mc.stub = 0; mc.ram_path = f->ram_path; mc.low = f->low; mc.ro_fd = -1;
vmsig_vmhost_cfg vh; memset(&vh, 0, sizeof vh);
vh.stub = 0; vh.qmp_path = f->qmp_path;
vmsig_input_cfg in; memset(&in, 0, sizeof in);
in.stub = 0; in.qmp_path = NULL; /* input is uinput; power/lifecycle via the vmhost seam */
if (vmsig_core_add_adapter(core, vmsig_memctx_ops(), &mc, endpoint) < 0) goto fail;
if (vmsig_core_add_adapter(core, vmsig_vmhost_ops(), &vh, endpoint) < 0) goto fail;
if (vmsig_core_add_adapter(core, vmsig_input_ops(), &in, endpoint) < 0) goto fail;
return 0;
fail:
vmsig_core_detach_endpoint(core, endpoint); /* roll back any partial trio (deferred) */
return -1;
}
static void default_detach(void* ud, vmsig_core* core, uint32_t vmid, uint32_t endpoint) {
(void)ud; (void)vmid;
vmsig_core_detach_endpoint(core, endpoint);
}
/* ---- lifecycle --------------------------------------------------------------- */
void vmsig_discovery_free(void* user) {
vmsig_discovery* d = user;
if (!d) return;
if (d->ifd >= 0) close(d->ifd);
if (d->tfd >= 0) close(d->tfd);
free(d);
}
vmsig_discovery* vmsig_discovery_new(vmsig_core* core,
const char* watch_dir, const char* pve_conf,
const char* qmp_dir, const char* slots_path,
const vmsig_host_probe* probe,
const vmsig_discovery_sink* sink) {
if (!core || !watch_dir) return NULL;
vmsig_discovery* d = calloc(1, sizeof *d);
if (!d) return NULL;
d->core = core;
d->ifd = d->tfd = d->wd = -1;
snprintf(d->watch_dir, sizeof d->watch_dir, "%s", watch_dir);
if (slots_path && *slots_path) {
snprintf(d->slots_path, sizeof d->slots_path, "%s", slots_path);
d->persist = 1;
}
for (int i = 0; i < VMSIG_SLOT_COUNT; i++) d->cand[i].endpoint = -1;
if (probe) d->probe = *probe;
else d->probe = host_probe_proxmox(d->watch_dir,
pve_conf ? pve_conf : "/etc/pve/qemu-server",
qmp_dir ? qmp_dir : "/var/run/qemu-server");
if (sink) d->sink = *sink;
else { d->sink.attach = default_attach; d->sink.detach = default_detach; d->sink.ud = NULL; }
slot_load(&d->slots, d->persist ? d->slots_path : NULL);
d->ifd = inotify_init1(IN_NONBLOCK | IN_CLOEXEC);
if (d->ifd < 0) { vmsig_discovery_free(d); return NULL; }
d->wd = inotify_add_watch(d->ifd, d->watch_dir,
IN_CREATE | IN_MOVED_TO | IN_DELETE | IN_MOVED_FROM | IN_CLOSE_WRITE | IN_ONLYDIR);
/* a missing watch dir is not fatal: the dir may be created later; bootstrap finds nothing. */
d->tfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
if (d->tfd < 0) { vmsig_discovery_free(d); return NULL; }
/* The inotify source owns the discovery lifetime (on_free frees ifd+tfd+d); the timer
* source shares the handle with on_free=NULL. */
if (core_add_source(core, d->ifd, on_inotify, d, vmsig_discovery_free) != 0) {
vmsig_discovery_free(d); return NULL;
}
if (core_add_source(core, d->tfd, on_timer, d, NULL) != 0) {
/* ifd already enrolled with the on_free; closing here would double-free at core_free.
* Leave it to core_free to reap. Return NULL to signal partial failure is not clean. */
return NULL;
}
bootstrap_scan(d);
rearm_timer(d);
return d;
}
int vmsig_discovery_slot_of_vmid(vmsig_discovery* d, uint32_t vmid) {
if (!d) return -1;
return slot_lookup(&d->slots, vmid);
}
/* ---- TEST-ONLY hooks: drive the state machine deterministically (no inotify/timer) ---- */
void vmsig_discovery_feed(vmsig_discovery* d, uint32_t vmid, int present) {
if (present) on_file_appear(d, vmid); else on_file_gone(d, vmid);
}
void vmsig_discovery_tick(vmsig_discovery* d) { /* force a re-probe of every probing candidate */
for (int i = 0; i < VMSIG_SLOT_COUNT; i++)
if (d->cand[i].state == CAND_PROBING) try_probe(d, &d->cand[i]);
}
+46
View File
@@ -0,0 +1,46 @@
#ifndef VMSIG_DISCOVERY_H
#define VMSIG_DISCOVERY_H
#include "vmsig_core.h"
#include "host_probe.h"
/* discovery.h — runtime VM discovery (private to the discovery module).
*
* Watches a tmpfs trigger dir for "vm-<vmid>-ram" files, corroborates each candidate via the
* host-probe seam, assigns a stable endpoint slot, hot-plugs the VM (sink), and publishes the
* roster. The state machine + slot allocation are decoupled from actuation by a sink seam, so
* the orchestration is unit-testable without armed adapters. */
typedef struct vmsig_discovery vmsig_discovery;
/* Actuation seam: bring a discovered VM up / tear it down. Default (NULL) wires the core
* adapter trio (memctx+vmhost+input via vmsig_core_add_adapter) and detach_endpoint. A test
* injects a recording sink to verify the state machine without real adapters. Roster publish
* is owned by discovery (not the sink): ATTACH after a successful attach, DETACH before tear-down. */
typedef struct {
int (*attach)(void* ud, vmsig_core* core, uint32_t vmid, uint32_t endpoint,
const vmsig_host_facts* f); /* 0 = up, -1 = failed (slot freed) */
void (*detach)(void* ud, vmsig_core* core, uint32_t vmid, uint32_t endpoint);
void* ud;
} vmsig_discovery_sink;
/* Create discovery over `core`. `watch_dir` (e.g. /dev/shm/vmsig) is scanned once and
* inotify-watched. `probe` NULL => default Proxmox probe over (watch_dir, pve_conf, qmp_dir);
* `sink` NULL => default core trio; `slots_path` NULL => no persistence. Registers the inotify
* + retry-timer loop sources and runs a bootstrap scan. The core owns the lifetime (freed at
* vmsig_core_free via the source on_free). NULL on error. */
vmsig_discovery* vmsig_discovery_new(vmsig_core* core,
const char* watch_dir, const char* pve_conf,
const char* qmp_dir, const char* slots_path,
const vmsig_host_probe* probe,
const vmsig_discovery_sink* sink);
/* Resolve vmid -> endpoint for the admission policy (WS4); -1 if not currently attached. */
int vmsig_discovery_slot_of_vmid(vmsig_discovery* d, uint32_t vmid);
/* TEST-ONLY: drive a file appear(present=1)/gone(present=0) directly, bypassing inotify; and
* force a re-probe of every probing candidate, bypassing the retry timer. Lets the state
* machine be unit-tested deterministically without threads/timers. */
void vmsig_discovery_feed(vmsig_discovery* d, uint32_t vmid, int present);
void vmsig_discovery_tick(vmsig_discovery* d);
#endif /* VMSIG_DISCOVERY_H */
+48
View File
@@ -0,0 +1,48 @@
#ifndef VMSIG_HOST_PROBE_H
#define VMSIG_HOST_PROBE_H
#include <stdint.h>
/* host_probe.h — the platform-coupled discovery seam (private to the discovery module).
*
* This is the ONLY surface that knows the host's config convention (/etc/pve/qemu-server),
* the QMP socket path convention, and the `info mtree` text. It produces a NEUTRAL facts
* struct; discovery.c consumes ONLY that and never names a path convention. A non-Proxmox
* host (or a unit test) injects its own vmsig_host_probe with the same two-stage contract. */
#define VMSIG_HF_NAME_MAX 32
#define VMSIG_HF_PATH_MAX 128
typedef struct {
uint32_t vmid;
char name[VMSIG_HF_NAME_MAX]; /* host VM name (truncated) */
char ram_path[VMSIG_HF_PATH_MAX]; /* guest-RAM backing file (the trigger) */
char qmp_path[VMSIG_HF_PATH_MAX]; /* QMP socket ('@' prefix => abstract) */
uint64_t cfg_ram_bytes; /* RAM size from host config (sanity) */
uint64_t low; /* below-4G split (memctx locator); 0=unknown */
int vm_state; /* VMSIG_VM_* from the liveness oracle */
int share_on; /* memory-backend share=on verified */
int ok; /* 1 => all fail-closed gates passed (attach) */
int retry; /* 1 => transient (QMP not up yet) — back off */
} vmsig_host_facts;
/* Two-stage probe. Stage 1 reads host config (cheap, local). Stage 2 corroborates liveness
* and derives `low` (QMP round-trip, bounded). Splitting them lets the state machine treat
* "config error" (permanent, drop) apart from "QMP not up yet" (transient, retry). */
typedef struct vmsig_host_probe {
/* Populate paths + name + cfg_ram_bytes + share_on from host config; stat the RAM file.
* Sets out->ok=0 on any permanent gate failure (no share=on, missing/oversized file).
* Returns 0 when `out` was populated, -1 on a usage error. */
int (*config)(const struct vmsig_host_probe* p, uint32_t vmid, vmsig_host_facts* out);
/* Corroborate liveness + derive `low` via QMP. Mutates `io`: sets vm_state, low, ok; or
* retry=1 (QMP not reachable yet) / ok=0 (stale: file present but VM dead / unparsable). */
int (*live)(const struct vmsig_host_probe* p, vmsig_host_facts* io);
void* ud; /* implementation-private */
} vmsig_host_probe;
/* The default Proxmox probe over (watch_dir, pve_conf). `qmp_dir` is the QMP socket dir
* (Proxmox: /var/run/qemu-server, socket "<qmp_dir>/<vmid>.qmp"). The returned struct
* references the path strings by pointer — the caller keeps them alive. */
vmsig_host_probe host_probe_proxmox(const char* watch_dir, const char* pve_conf,
const char* qmp_dir);
#endif /* VMSIG_HOST_PROBE_H */
+49
View File
@@ -0,0 +1,49 @@
#ifndef VMSIG_SLOT_H
#define VMSIG_SLOT_H
#include <stdint.h>
/* slot.h — vmid <-> endpoint allocator (private to the discovery module).
*
* The signaling core addresses VMs by an ENDPOINT bit in a 64-bit mask (endpoint < 64). A
* Proxmox vmid (100..1e9) does NOT fit 6 bits, so the binding is a PINNED table, not a pure
* function: a vmid keeps the SAME endpoint across VM restarts (so a control's endpoint_mask
* stays coherent), and the table is persisted so a daemon restart re-derives the same map.
*
* Bit reuse is a coherence event, not a silent alias: a freed bit is handed to a DIFFERENT
* vmid only AFTER the roster DETACH for the old occupant has been published. The discovery
* loop is single-threaded and publishes DETACH synchronously before any later attach, so the
* ordering itself enforces this — the allocator only needs to never double-assign a live bit. */
#define VMSIG_SLOT_COUNT 64
typedef struct {
uint32_t vmid; /* 0 => slot free */
} slot_ent;
typedef struct {
slot_ent ent[VMSIG_SLOT_COUNT];
uint64_t used_mask; /* mirror: bit e set <=> ent[e].vmid != 0 */
} slot_table;
/* Reset to all-free. */
void slot_init(slot_table* t);
/* Endpoint pinned to `vmid`, or -1 if `vmid` is not bound (or 0). */
int slot_lookup(const slot_table* t, uint32_t vmid);
/* Pin `vmid` to a stable endpoint. Idempotent: if `vmid` is already bound, returns its
* existing endpoint. Otherwise assigns the lowest free bit. Returns the endpoint [0,64),
* or -1 if `vmid`==0 or the table is full (the 64-VM ceiling). */
int slot_alloc(slot_table* t, uint32_t vmid);
/* Release the slot bound to `vmid` (no-op if not bound). */
void slot_free(slot_table* t, uint32_t vmid);
/* Persist the table to `path` atomically (tmp + rename), mode 0600. 0 / -1. */
int slot_save(const slot_table* t, const char* path);
/* Load the table from `path`. On a missing/corrupt file, initializes empty and returns 0
* (a fresh start is valid). -1 only on a hard error. */
int slot_load(slot_table* t, const char* path);
#endif /* VMSIG_SLOT_H */
+244
View File
@@ -0,0 +1,244 @@
/* host_probe.c — the default Proxmox host-probe (see host_probe.h). The ONLY TU that knows
* /etc/pve/qemu-server, the QMP socket path convention, and `info mtree`. Pure libc +
* AF_UNIX + files; no vmie/vmctl. config() is cheap+local; live() does a bounded blocking
* QMP round-trip (query-status + info mtree) and is fail-closed: anything it cannot confirm
* leaves ok=0 (the VM is not brought up rather than guessed). */
#define _GNU_SOURCE
#include "host_probe.h"
#include "vmsig_event.h" /* VMSIG_VM_* */
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stddef.h>
#include <errno.h>
typedef struct {
const char* watch_dir; /* /dev/shm/vmsig */
const char* pve_conf; /* /etc/pve/qemu-server */
const char* qmp_dir; /* /var/run/qemu-server */
} hp_cfg;
/* ---- /etc/pve config (stage 1) ----------------------------------------------- */
/* Read a whole small file into a heap buffer (NUL-terminated). NULL on error/oversize. */
static char* read_file(const char* path, size_t cap) {
int fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd < 0) return NULL;
char* buf = malloc(cap + 1);
if (!buf) { close(fd); return NULL; }
size_t got = 0;
for (;;) {
ssize_t n = read(fd, buf + got, cap - got);
if (n < 0) { if (errno == EINTR) continue; free(buf); close(fd); return NULL; }
if (n == 0) break;
got += (size_t)n;
if (got >= cap) break;
}
close(fd);
buf[got] = 0;
return buf;
}
/* Value of a top-level "key:" line (Proxmox ini), copied trimmed into out. 1 if found. */
static int conf_val(const char* conf, const char* key, char* out, size_t cap) {
size_t klen = strlen(key);
const char* p = conf;
while (p && *p) {
const char* line = p;
const char* nl = strchr(p, '\n');
size_t llen = nl ? (size_t)(nl - line) : strlen(line);
if (llen > klen && strncmp(line, key, klen) == 0 && line[klen] == ':') {
const char* v = line + klen + 1;
while (*v == ' ' || *v == '\t') v++;
size_t vlen = (size_t)((line + llen) - v);
while (vlen && (v[vlen-1] == ' ' || v[vlen-1] == '\t' || v[vlen-1] == '\r')) vlen--;
if (vlen >= cap) vlen = cap - 1;
memcpy(out, v, vlen); out[vlen] = 0;
return 1;
}
p = nl ? nl + 1 : NULL;
}
return 0;
}
static int hp_config(const struct vmsig_host_probe* p, uint32_t vmid, vmsig_host_facts* out) {
const hp_cfg* c = p->ud;
memset(out, 0, sizeof *out);
out->vmid = vmid;
snprintf(out->ram_path, sizeof out->ram_path, "%s/vm-%u-ram", c->watch_dir, vmid);
snprintf(out->qmp_path, sizeof out->qmp_path, "%s/%u.qmp", c->qmp_dir, vmid);
char path[VMSIG_HF_PATH_MAX + 32];
snprintf(path, sizeof path, "%s/%u.conf", c->pve_conf, vmid);
char* conf = read_file(path, 64 * 1024);
if (!conf) { out->ok = 0; return 0; } /* no host config => not a known VM */
char tmp[VMSIG_HF_NAME_MAX];
if (conf_val(conf, "name", out->name, sizeof out->name) == 0)
snprintf(out->name, sizeof out->name, "vm-%u", vmid);
if (conf_val(conf, "memory", tmp, sizeof tmp))
out->cfg_ram_bytes = (uint64_t)strtoull(tmp, NULL, 10) * 1024ull * 1024ull;
/* share=on is mandatory: without it the host mmap is a private copy, not guest RAM. */
out->share_on = (strstr(conf, "share=on") != NULL) ? 1 : 0;
free(conf);
out->ok = out->share_on ? 1 : 0; /* config-level pass; liveness is stage 2 */
return 0;
}
/* ---- QMP liveness + mtree low (stage 2) -------------------------------------- */
static int qmp_connect(const char* path) {
int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
if (fd < 0) return -1;
struct timeval tv = { .tv_sec = 0, .tv_usec = 250000 }; /* 250ms bound on each recv */
setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv);
setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof tv);
struct sockaddr_un a;
memset(&a, 0, sizeof a);
a.sun_family = AF_UNIX;
size_t n = strlen(path);
socklen_t alen;
if (path[0] == '@') { /* abstract namespace */
if (n > sizeof a.sun_path) { close(fd); return -1; }
a.sun_path[0] = 0;
memcpy(a.sun_path + 1, path + 1, n - 1);
alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
} else {
if (n >= sizeof a.sun_path) { close(fd); return -1; }
memcpy(a.sun_path, path, n);
alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n + 1);
}
if (connect(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; }
return fd;
}
/* Read ONE '\n'-terminated QMP message into buf (QMP frames each JSON object on a line;
* an HMP string return keeps its newlines escaped, so it is still a single line). 1 / 0 / -1. */
static int qmp_read_line(int fd, char* buf, size_t cap, size_t* out_len) {
size_t got = 0;
while (got + 1 < cap) {
ssize_t r = read(fd, buf + got, cap - 1 - got);
if (r < 0) { if (errno == EINTR) continue; return -1; } /* timeout/error */
if (r == 0) return (got > 0) ? 1 : 0;
got += (size_t)r;
char* nl = memchr(buf, '\n', got);
if (nl) { *out_len = got; buf[got] = 0; return 1; }
}
*out_len = got; buf[got] = 0;
return 1; /* line longer than cap: truncated but usable for our scans */
}
/* Read messages until one carries "return"/"error", skipping async "event"s. 1 if a return,
* 0 if an error/closed, -1 on transport error. The matched message is left in buf. */
static int qmp_await_return(int fd, char* buf, size_t cap) {
for (int i = 0; i < 64; i++) {
size_t len = 0;
int r = qmp_read_line(fd, buf, cap, &len);
if (r <= 0) return r;
if (strstr(buf, "\"error\"")) return 0;
if (strstr(buf, "\"return\"")) return 1;
/* greeting {"QMP":...} or async {"event":...} -> keep reading */
}
return -1;
}
static int qmp_cmd(int fd, const char* json, char* buf, size_t cap) {
size_t n = strlen(json);
if (write(fd, json, n) != (ssize_t)n) return -1;
return qmp_await_return(fd, buf, cap);
}
/* Map a QEMU query-status "status" word to VMSIG_VM_*. Alive = running|paused. */
static int qmp_status_word(const char* buf) {
const char* s = strstr(buf, "\"status\"");
if (!s) return VMSIG_VM_UNKNOWN;
s = strchr(s, ':'); if (!s) return VMSIG_VM_UNKNOWN;
s = strchr(s, '"'); if (!s) return VMSIG_VM_UNKNOWN;
s++;
if (!strncmp(s, "running", 7)) return VMSIG_VM_RUNNING;
if (!strncmp(s, "paused", 6)) return VMSIG_VM_PAUSED;
if (!strncmp(s, "prelaunch", 9)) return VMSIG_VM_PAUSED;
if (!strncmp(s, "shutdown", 8)) return VMSIG_VM_SHUTDOWN;
if (!strncmp(s, "guest-panicked", 14) || !strncmp(s, "internal-error", 14))
return VMSIG_VM_CRASHED;
return VMSIG_VM_UNKNOWN;
}
/* Derive the below-4G split from `info mtree` text: the size of the RAM region whose guest
* physical range starts at address 0. Standard QEMU split-RAM layout puts low RAM at
* [0, low) and high RAM above 4G at file offset @low. FAIL-CLOSED: 0 if not found.
* NOTE: parses HMP text (not a stable QMP schema) — verify against real `info mtree` output. */
static uint64_t mtree_low(const char* ret) {
/* The return is a JSON string; lines inside are escaped "\n". Scan for the GPA-0 ram run:
* " 0000000000000000-<end16> (prio N, ram): ..." */
const char* p = ret;
while ((p = strstr(p, "0000000000000000-")) != NULL) {
const char* end_hex = p + 17; /* 16 zeros + '-' */
char* stop = NULL;
unsigned long long end = strtoull(end_hex, &stop, 16);
/* the descriptor after the range must mark it RAM (not the i/o "system" root) */
const char* tail = stop ? stop : end_hex;
const char* nl = strstr(tail, "\\n");
const char* lim = nl ? nl : (tail + 64);
int is_ram = 0;
for (const char* q = tail; q < lim && *q; q++)
if (!strncmp(q, "ram)", 4)) { is_ram = 1; break; }
if (is_ram && end > 0 && end != ~0ull) return end + 1ull; /* [0, end] => low=end+1 */
p = end_hex;
}
return 0;
}
static int hp_live(const struct vmsig_host_probe* p, vmsig_host_facts* io) {
(void)p;
io->retry = 0;
int fd = qmp_connect(io->qmp_path);
if (fd < 0) { io->retry = 1; io->ok = 0; return 0; } /* QMP not up yet => transient */
char* buf = malloc(256 * 1024);
if (!buf) { close(fd); io->retry = 1; io->ok = 0; return 0; }
int alive = 0;
if (qmp_cmd(fd, "{\"execute\":\"qmp_capabilities\"}\n", buf, 256 * 1024) == 1 &&
qmp_cmd(fd, "{\"execute\":\"query-status\"}\n", buf, 256 * 1024) == 1) {
io->vm_state = qmp_status_word(buf);
alive = (io->vm_state == VMSIG_VM_RUNNING || io->vm_state == VMSIG_VM_PAUSED);
} else {
io->retry = 1; /* handshake failed mid-way => transient */
}
if (alive) {
if (qmp_cmd(fd,
"{\"execute\":\"human-monitor-command\","
"\"arguments\":{\"command-line\":\"info mtree -f\"}}\n", buf, 256 * 1024) == 1) {
io->low = mtree_low(buf);
}
}
free(buf);
close(fd);
/* fail-closed: alive AND a parsed split => bring up; else not (stale / unparsable). */
io->ok = (alive && io->low != 0) ? 1 : 0;
return 0;
}
vmsig_host_probe host_probe_proxmox(const char* watch_dir, const char* pve_conf,
const char* qmp_dir) {
static hp_cfg cfg; /* single daemon-wide probe; paths are process-lifetime strings */
cfg.watch_dir = watch_dir;
cfg.pve_conf = pve_conf;
cfg.qmp_dir = qmp_dir;
vmsig_host_probe p = { hp_config, hp_live, &cfg };
return p;
}
+91
View File
@@ -0,0 +1,91 @@
/* slot.c — vmid <-> endpoint allocator (see slot.h). Pure logic + a tiny pointer-free
* on-disk format; no core dependency. */
#define _GNU_SOURCE
#include "slot.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
void slot_init(slot_table* t) {
memset(t, 0, sizeof *t);
}
int slot_lookup(const slot_table* t, uint32_t vmid) {
if (!vmid) return -1;
for (int e = 0; e < VMSIG_SLOT_COUNT; e++)
if (t->ent[e].vmid == vmid) return e;
return -1;
}
int slot_alloc(slot_table* t, uint32_t vmid) {
if (!vmid) return -1;
int e = slot_lookup(t, vmid);
if (e >= 0) return e; /* idempotent pin */
/* lowest free bit: ffsll of the complement (1-based; 0 => none free) */
int b = __builtin_ffsll((long long)~t->used_mask);
if (b == 0) return -1; /* table full (64-VM ceiling) */
e = b - 1;
t->ent[e].vmid = vmid;
t->used_mask |= (1ull << e);
return e;
}
void slot_free(slot_table* t, uint32_t vmid) {
int e = slot_lookup(t, vmid);
if (e < 0) return;
t->ent[e].vmid = 0;
t->used_mask &= ~(1ull << e);
}
/* ---- persistence: magic + version + 64 * uint32 vmid (native byte order, tmpfs-local) ---- */
#define SLOT_MAGIC 0x534C4F54u /* "SLOT" */
#define SLOT_VERSION 1u
typedef struct {
uint32_t magic;
uint32_t version;
uint32_t vmid[VMSIG_SLOT_COUNT];
} slot_blob;
int slot_save(const slot_table* t, const char* path) {
if (!path) return -1;
slot_blob b;
memset(&b, 0, sizeof b);
b.magic = SLOT_MAGIC; b.version = SLOT_VERSION;
for (int e = 0; e < VMSIG_SLOT_COUNT; e++) b.vmid[e] = t->ent[e].vmid;
char tmp[512];
int n = snprintf(tmp, sizeof tmp, "%s.tmp", path);
if (n < 0 || (size_t)n >= sizeof tmp) return -1;
int fd = open(tmp, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0600);
if (fd < 0) return -1;
ssize_t w = write(fd, &b, sizeof b);
int rc = (w == (ssize_t)sizeof b) ? 0 : -1;
if (close(fd) != 0) rc = -1;
if (rc == 0 && rename(tmp, path) != 0) rc = -1;
if (rc != 0) unlink(tmp);
return rc;
}
int slot_load(slot_table* t, const char* path) {
slot_init(t);
if (!path) return 0;
int fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd < 0) return 0; /* no file => fresh start (valid) */
slot_blob b;
ssize_t r = read(fd, &b, sizeof b);
close(fd);
if (r != (ssize_t)sizeof b || b.magic != SLOT_MAGIC || b.version != SLOT_VERSION) {
slot_init(t); /* corrupt/old => fresh start */
return 0;
}
for (int e = 0; e < VMSIG_SLOT_COUNT; e++) {
t->ent[e].vmid = b.vmid[e];
if (b.vmid[e]) t->used_mask |= (1ull << e);
}
return 0;
}
+39
View File
@@ -0,0 +1,39 @@
#ifndef VMCTL_DRIVER_H
#define VMCTL_DRIVER_H
#include "vmctl.h"
#include "qmp.h"
/* driver.h — input-driver vtable, the concrete vmctl handle, and the shared
* event-kind enum. The event kind is the SINGLE source of truth that every
* driver switches on (never on magic numbers). */
typedef enum {
VMCTL_EV_ABS, VMCTL_EV_REL, VMCTL_EV_BTN, VMCTL_EV_KEY, VMCTL_EV_SCROLL
} vmctl_ev_kind;
typedef struct {
int (*send)(vmctl_t* v, const vmctl_batch* b); /* deliver an input batch */
void (*close)(vmctl_t* v); /* release driver resources */
} vmctl_driver_ops;
struct vmctl {
vmctl_driver_ops ops;
vmctl_driver driver;
qmp_conn* qmp; /* control channel; NULL if none */
int ui_fd_a; /* uinput driver: device A; -1 for QMP */
int ui_fd_b; /* uinput driver: device B (BOTH); -1 */
int ptr_mode; /* uinput driver: VMCTL_PTR_*; 0 for QMP */
/* Held-state receipt: key/btn down-bits as THIS handle last actuated them
* (not guest truth). Written only after a successful send in
* vmctl_batch_send; the send path never reads them. Zero-initialised by
* calloc at open = all up. Single-threaded (one handle owner): no locks. */
unsigned char keys_held[VMCTL_KEYS_SNAPSHOT_BYTES]; /* evdev-indexed key down-bits */
unsigned btns_held; /* VMCTL_BTN_* 0..7 down-bits */
};
/* driver factories (called from open.c per cfg->driver) */
vmctl_t* vmctl_open_qmp_driver (const vmctl_config* cfg);
vmctl_t* vmctl_open_uinput_driver(const vmctl_config* cfg);
#endif /* VMCTL_DRIVER_H */
+18
View File
@@ -0,0 +1,18 @@
#ifndef VMCTL_KEYMAP_H
#define VMCTL_KEYMAP_H
#include <stddef.h>
/* keymap.h — the single source of truth for keyboard keys. One descriptor maps
* a Linux evdev code to a QEMU QKeyCode name. Both the QMP and uinput drivers
* derive everything from this table. */
/* NOTE: named vmctl_keymap, not vmctl_key — the public API uses the ordinary
* identifier vmctl_key for the key-injection function (include/vmctl.h), and a
* typedef would collide with it. */
typedef struct { int evdev; const char* qcode; } vmctl_keymap;
extern const vmctl_keymap VMCTL_KEYS[]; /* sorted by evdev (for bsearch) */
extern const int VMCTL_KEYS_LEN;
const char* vmctl_evdev_to_qcode(int evdev); /* NULL if absent */
#endif /* VMCTL_KEYMAP_H */
+14
View File
@@ -0,0 +1,14 @@
#ifndef VMCTL_QMP_H
#define VMCTL_QMP_H
#include <stddef.h>
/* qmp.h — minimal QMP client over an AF_UNIX socket: connect (with capability
* negotiation), disconnect, and synchronous command execution. */
typedef struct qmp_conn qmp_conn;
qmp_conn* qmp_connect(const char* sock_path); /* connect + qmp_capabilities; NULL on error */
void qmp_disconnect(qmp_conn* c);
int qmp_exec(qmp_conn* c, const char* cmd, char* resp, size_t cap); /* 0=return, -1=error */
#endif /* VMCTL_QMP_H */
+115
View File
@@ -0,0 +1,115 @@
/* keymap.c — the single source of truth for keyboard keys. VMCTL_KEYS maps
* Linux evdev codes to QEMU QKeyCode names (sorted by evdev for bsearch);
* vmctl_evdev_to_qcode is the sole lookup, consumed by the QMP driver. */
#include "keymap.h"
#include <linux/input-event-codes.h>
#include <stdlib.h>
const vmctl_keymap VMCTL_KEYS[] = {
{ KEY_ESC, "esc" },
{ KEY_1, "1" },
{ KEY_2, "2" },
{ KEY_3, "3" },
{ KEY_4, "4" },
{ KEY_5, "5" },
{ KEY_6, "6" },
{ KEY_7, "7" },
{ KEY_8, "8" },
{ KEY_9, "9" },
{ KEY_0, "0" },
{ KEY_MINUS, "minus" },
{ KEY_EQUAL, "equal" },
{ KEY_BACKSPACE, "backspace" },
{ KEY_TAB, "tab" },
{ KEY_Q, "q" },
{ KEY_W, "w" },
{ KEY_E, "e" },
{ KEY_R, "r" },
{ KEY_T, "t" },
{ KEY_Y, "y" },
{ KEY_U, "u" },
{ KEY_I, "i" },
{ KEY_O, "o" },
{ KEY_P, "p" },
{ KEY_LEFTBRACE, "bracket_left" },
{ KEY_RIGHTBRACE, "bracket_right" },
{ KEY_ENTER, "ret" },
{ KEY_LEFTCTRL, "ctrl" },
{ KEY_A, "a" },
{ KEY_S, "s" },
{ KEY_D, "d" },
{ KEY_F, "f" },
{ KEY_G, "g" },
{ KEY_H, "h" },
{ KEY_J, "j" },
{ KEY_K, "k" },
{ KEY_L, "l" },
{ KEY_SEMICOLON, "semicolon" },
{ KEY_APOSTROPHE, "apostrophe" },
{ KEY_GRAVE, "grave_accent" },
{ KEY_LEFTSHIFT, "shift" },
{ KEY_BACKSLASH, "backslash" },
{ KEY_Z, "z" },
{ KEY_X, "x" },
{ KEY_C, "c" },
{ KEY_V, "v" },
{ KEY_B, "b" },
{ KEY_N, "n" },
{ KEY_M, "m" },
{ KEY_COMMA, "comma" },
{ KEY_DOT, "dot" },
{ KEY_SLASH, "slash" },
{ KEY_RIGHTSHIFT, "shift_r" },
{ KEY_LEFTALT, "alt" },
{ KEY_SPACE, "spc" },
{ KEY_CAPSLOCK, "caps_lock" },
{ KEY_F1, "f1" },
{ KEY_F2, "f2" },
{ KEY_F3, "f3" },
{ KEY_F4, "f4" },
{ KEY_F5, "f5" },
{ KEY_F6, "f6" },
{ KEY_F7, "f7" },
{ KEY_F8, "f8" },
{ KEY_F9, "f9" },
{ KEY_F10, "f10" },
{ KEY_NUMLOCK, "num_lock" },
{ KEY_SCROLLLOCK, "scroll_lock" },
{ KEY_102ND, "less" },
{ KEY_F11, "f11" },
{ KEY_F12, "f12" },
{ KEY_RIGHTCTRL, "ctrl_r" },
{ KEY_SYSRQ, "print" },
{ KEY_RIGHTALT, "alt_r" },
{ KEY_HOME, "home" },
{ KEY_UP, "up" },
{ KEY_PAGEUP, "pgup" },
{ KEY_LEFT, "left" },
{ KEY_RIGHT, "right" },
{ KEY_END, "end" },
{ KEY_DOWN, "down" },
{ KEY_PAGEDOWN, "pgdn" },
{ KEY_INSERT, "insert" },
{ KEY_DELETE, "delete" },
{ KEY_POWER, "power" },
{ KEY_PAUSE, "pause" },
{ KEY_LEFTMETA, "meta_l" },
{ KEY_RIGHTMETA, "meta_r" },
{ KEY_SLEEP, "sleep" },
{ KEY_WAKEUP, "wake" },
};
const int VMCTL_KEYS_LEN = (int)(sizeof VMCTL_KEYS / sizeof VMCTL_KEYS[0]);
static int key_cmp(const void* a, const void* b) {
return ((const vmctl_keymap*)a)->evdev - ((const vmctl_keymap*)b)->evdev;
}
const char* vmctl_evdev_to_qcode(int evdev) {
vmctl_keymap k = { .evdev = evdev, .qcode = NULL };
const vmctl_keymap* e = bsearch(&k, VMCTL_KEYS, (size_t)VMCTL_KEYS_LEN,
sizeof VMCTL_KEYS[0], key_cmp);
return e ? e->qcode : NULL;
}
+274
View File
@@ -0,0 +1,274 @@
/* uinput_driver.c — Linux uinput input driver (host source) plus optional
* passthrough into the guest. TWO distinct layers, not to be confused:
*
* (1) uinput — the host side: the library creates a /dev/input/eventN node
* and writes struct input_event into it on the hot path (uinput_driver_send).
*
* (2) virtio-input-host-pci — a QEMU device that forwards that host evdev node
* into the guest. It is an OPTIONAL setup step performed over QMP at open
* (device_add) and undone at close (device_del). It is NOT a per-event
* mechanism and lives entirely in the hotplug helpers below.
*
* uinput != virtio. Without qmp_path/input_bus the uinput device is created
* orphaned (an external layer may forward it). The driver switches on
* vmctl_ev_kind (never on magic numbers). */
#include "driver.h"
#include "keymap.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <linux/uinput.h>
#include <linux/input-event-codes.h>
/* HID identity of the synthesized device (values preserved — behaviour unchanged). */
#define HWID_BUS 0x0003
#define HWID_VENDOR 0x046D
#define HWID_PRODUCT 0xC52B
#define HWID_VERSION 0x0111
#define HWID_NAME_A "VMInput-A"
#define HWID_NAME_B "VMInput-B"
/* Hotplug device ids for virtio-input-host-pci passthrough. */
#define PLUG_ID_A "vmctl-a"
#define PLUG_ID_B "vmctl-b"
static const uint16_t BTN_CODES[8] = {
0x110, 0x111, 0x112, 0x113, 0x114, 0x115, 0x116, 0x117
};
static void emit(int fd, uint16_t type, uint16_t code, int32_t val) {
struct input_event e = {.type = type, .code = code, .value = val};
ssize_t r = write(fd, &e, sizeof e);
(void)r;
}
static void syn(int fd) { emit(fd, EV_SYN, SYN_REPORT, 0); }
static int uinput_create(int rel_motion, const vmctl_uinput_id* id, const char* name, char evdev[64]) {
int fd = open("/dev/uinput", O_RDWR | O_CLOEXEC);
if (fd < 0) return -1;
ioctl(fd, UI_SET_EVBIT, EV_SYN);
ioctl(fd, UI_SET_EVBIT, EV_KEY);
/* Keyboard keybits come from the single source of truth: every key in
* VMCTL_KEYS, so a key in the table always works through uinput too. */
for (int i = 0; i < VMCTL_KEYS_LEN; i++)
ioctl(fd, UI_SET_KEYBIT, VMCTL_KEYS[i].evdev);
for (int b = 0; b < 8; b++)
ioctl(fd, UI_SET_KEYBIT, (int)BTN_CODES[b]);
ioctl(fd, UI_SET_EVBIT, EV_REL);
ioctl(fd, UI_SET_RELBIT, REL_WHEEL);
ioctl(fd, UI_SET_RELBIT, REL_HWHEEL);
if (rel_motion) {
ioctl(fd, UI_SET_RELBIT, REL_X);
ioctl(fd, UI_SET_RELBIT, REL_Y);
}
if (!rel_motion) {
ioctl(fd, UI_SET_EVBIT, EV_ABS);
ioctl(fd, UI_SET_ABSBIT, ABS_X);
ioctl(fd, UI_SET_ABSBIT, ABS_Y);
struct uinput_abs_setup ax;
memset(&ax, 0, sizeof ax);
ax.code = ABS_X;
ax.absinfo.minimum = 0;
ax.absinfo.maximum = VMCTL_ABS_MAX;
ioctl(fd, UI_ABS_SETUP, &ax);
ax.code = ABS_Y;
ioctl(fd, UI_ABS_SETUP, &ax);
}
struct uinput_setup us;
memset(&us, 0, sizeof us);
us.id.bustype = (uint16_t)id->bustype;
us.id.vendor = (uint16_t)id->vendor;
us.id.product = (uint16_t)id->product;
us.id.version = (uint16_t)id->version;
strncpy(us.name, name, sizeof us.name - 1);
if (ioctl(fd, UI_DEV_SETUP, &us) < 0 || ioctl(fd, UI_DEV_CREATE) < 0) {
close(fd);
return -1;
}
char sysname[64] = {0};
evdev[0] = '\0';
if (ioctl(fd, UI_GET_SYSNAME(sizeof sysname), sysname) >= 0)
snprintf(evdev, 64, "/dev/input/%s", sysname);
if (!evdev[0]) {
ioctl(fd, UI_DEV_DESTROY);
close(fd);
return -1;
}
return fd;
}
/* ===== virtio-input-host-pci passthrough (layer 2, optional, QMP setup) ===== */
static int qmp_plug(qmp_conn* qmp, const char* bus, const char* evdev, const char* id) {
char cmd[512], resp[1024];
snprintf(cmd, sizeof cmd,
"{\"execute\":\"device_del\",\"arguments\":{\"id\":\"%s\"}}", id);
qmp_exec(qmp, cmd, resp, sizeof resp);
snprintf(cmd, sizeof cmd,
"{\"execute\":\"device_add\",\"arguments\":{"
"\"driver\":\"virtio-input-host-pci\","
"\"id\":\"%s\","
"\"evdev\":\"%s\","
"\"bus\":\"%s\"}}",
id, evdev, bus);
return qmp_exec(qmp, cmd, resp, sizeof resp);
}
static void qmp_unplug(qmp_conn* qmp, const char* id) {
char cmd[256], resp[1024];
snprintf(cmd, sizeof cmd,
"{\"execute\":\"device_del\",\"arguments\":{\"id\":\"%s\"}}", id);
qmp_exec(qmp, cmd, resp, sizeof resp);
}
/* ===== hot path (layer 1, uinput write) ===== */
static int uinput_driver_send(vmctl_t* v, const vmctl_batch* b) {
int fd_a = v->ui_fd_a;
int fd_b = v->ui_fd_b;
int both = (fd_b >= 0);
for (int i = 0; i < b->count; i++) {
int code = b->ev[i].code;
int value = b->ev[i].value;
double scl = b->ev[i].scroll;
switch ((vmctl_ev_kind)b->ev[i].kind) {
case VMCTL_EV_ABS:
if (v->ptr_mode == VMCTL_PTR_REL) return -1;
emit(fd_a, EV_ABS, code == VMCTL_AXIS_X ? ABS_X : ABS_Y, value);
syn(fd_a);
break;
case VMCTL_EV_REL: {
if (!both && v->ptr_mode == VMCTL_PTR_ABS) return -1;
int fd = both ? fd_b : fd_a;
emit(fd, EV_REL, code == VMCTL_AXIS_X ? REL_X : REL_Y, value);
syn(fd);
break;
}
case VMCTL_EV_BTN:
if (code < 0 || code >= 8) return -1;
emit(fd_a, EV_KEY, BTN_CODES[code], value);
syn(fd_a);
break;
case VMCTL_EV_KEY:
emit(fd_a, EV_KEY, (uint16_t)code, value);
syn(fd_a);
break;
case VMCTL_EV_SCROLL:
emit(fd_a, EV_REL, code == VMCTL_SCROLL_V ? REL_WHEEL : REL_HWHEEL, (int32_t)scl);
syn(fd_a);
break;
default:
return -1;
}
}
return 0;
}
static void uinput_driver_close(vmctl_t* v) {
if (v->qmp) {
qmp_unplug(v->qmp, PLUG_ID_A);
if (v->ui_fd_b >= 0) qmp_unplug(v->qmp, PLUG_ID_B);
qmp_disconnect(v->qmp);
}
if (v->ui_fd_a >= 0) { ioctl(v->ui_fd_a, UI_DEV_DESTROY); close(v->ui_fd_a); }
if (v->ui_fd_b >= 0) { ioctl(v->ui_fd_b, UI_DEV_DESTROY); close(v->ui_fd_b); }
}
vmctl_t* vmctl_open_uinput_driver(const vmctl_config* cfg) {
vmctl_t* v = calloc(1, sizeof *v);
if (!v) return NULL;
v->driver = VMCTL_DRIVER_UINPUT;
v->ui_fd_a = -1;
v->ui_fd_b = -1;
/* HID identity: NULL config selects the built-in defaults verbatim; a
* non-NULL config supplies all numeric fields literally (zeros included). */
const vmctl_uinput_id DEFAULT_ID = {
HWID_BUS, HWID_VENDOR, HWID_PRODUCT, HWID_VERSION, HWID_NAME_A
};
const vmctl_uinput_id* id = cfg->uinput_id ? cfg->uinput_id : &DEFAULT_ID;
/* Base name: caller's non-empty name, else NULL = use default A/B names. */
const char* base = (cfg->uinput_id && cfg->uinput_id->name && cfg->uinput_id->name[0])
? cfg->uinput_id->name : NULL;
/* A/B suffix is added by the library only when two devices are created
* (VMCTL_PTR_BOTH) and only over a caller-supplied base name. */
char name_a[UINPUT_MAX_NAME_SIZE];
char name_b[UINPUT_MAX_NAME_SIZE];
const char* dev_a = base ? base : HWID_NAME_A;
const char* dev_b = HWID_NAME_B;
if (cfg->ptr_mode == VMCTL_PTR_BOTH && base) {
int base_max = (int)(sizeof name_a - 1 /*NUL*/ - 2 /*"-A"*/);
snprintf(name_a, sizeof name_a, "%.*s-A", base_max, base);
snprintf(name_b, sizeof name_b, "%.*s-B", base_max, base);
dev_a = name_a;
dev_b = name_b;
}
char evdev_a[64], evdev_b[64];
int rel_a = (cfg->ptr_mode == VMCTL_PTR_REL);
v->ui_fd_a = uinput_create(rel_a, id, dev_a, evdev_a);
if (v->ui_fd_a < 0) { free(v); return NULL; }
if (cfg->ptr_mode == VMCTL_PTR_BOTH) {
v->ui_fd_b = uinput_create(1, id, dev_b, evdev_b);
if (v->ui_fd_b < 0) {
ioctl(v->ui_fd_a, UI_DEV_DESTROY);
close(v->ui_fd_a);
free(v);
return NULL;
}
}
if (cfg->qmp_path) {
v->qmp = qmp_connect(cfg->qmp_path);
if (!v->qmp) {
if (v->ui_fd_b >= 0) { ioctl(v->ui_fd_b, UI_DEV_DESTROY); close(v->ui_fd_b); }
ioctl(v->ui_fd_a, UI_DEV_DESTROY);
close(v->ui_fd_a);
free(v);
return NULL;
}
if (cfg->input_bus && cfg->input_bus[0]) {
if (qmp_plug(v->qmp, cfg->input_bus, evdev_a, PLUG_ID_A) < 0) {
uinput_driver_close(v);
free(v);
return NULL;
}
if (cfg->ptr_mode == VMCTL_PTR_BOTH) {
if (qmp_plug(v->qmp, cfg->input_bus, evdev_b, PLUG_ID_B) < 0) {
qmp_unplug(v->qmp, PLUG_ID_A);
uinput_driver_close(v);
free(v);
return NULL;
}
}
}
}
v->ops.send = uinput_driver_send;
v->ops.close = uinput_driver_close;
v->ptr_mode = cfg->ptr_mode;
return v;
}
+156
View File
@@ -0,0 +1,156 @@
/* open.c — handle lifecycle and the input batch API. vmctl_open dispatches to a
* driver factory by cfg->driver; vmctl_close releases via ops.close. The batch
* builders set vmctl_event.kind (the single event-kind code that drivers read),
* and the single-event wrappers are thin batches of one. */
#include "driver.h"
#include <stdlib.h>
#include <string.h>
vmctl_t* vmctl_open(const vmctl_config* cfg) {
if (!cfg) return NULL;
switch (cfg->driver) {
case VMCTL_DRIVER_QMP: return vmctl_open_qmp_driver(cfg);
case VMCTL_DRIVER_UINPUT: return vmctl_open_uinput_driver(cfg);
default: return NULL;
}
}
void vmctl_close(vmctl_t* v) {
if (!v) return;
v->ops.close(v);
free(v);
}
/* ===== Batch builders ===== */
void vmctl_batch_init(vmctl_batch* b) {
b->count = 0;
}
void vmctl_batch_abs(vmctl_batch* b, int axis, int value) {
if (b->count >= VMCTL_BATCH_MAX) return;
vmctl_event* e = &b->ev[b->count++];
e->kind = VMCTL_EV_ABS; e->code = axis; e->value = value; e->scroll = 0.0;
}
void vmctl_batch_rel(vmctl_batch* b, int axis, int delta) {
if (b->count >= VMCTL_BATCH_MAX) return;
vmctl_event* e = &b->ev[b->count++];
e->kind = VMCTL_EV_REL; e->code = axis; e->value = delta; e->scroll = 0.0;
}
void vmctl_batch_btn(vmctl_batch* b, int btn, int down) {
if (b->count >= VMCTL_BATCH_MAX) return;
vmctl_event* e = &b->ev[b->count++];
e->kind = VMCTL_EV_BTN; e->code = btn; e->value = down; e->scroll = 0.0;
}
void vmctl_batch_key(vmctl_batch* b, int evdev_code, int down) {
if (b->count >= VMCTL_BATCH_MAX) return;
vmctl_event* e = &b->ev[b->count++];
e->kind = VMCTL_EV_KEY; e->code = evdev_code; e->value = down; e->scroll = 0.0;
}
void vmctl_batch_scroll(vmctl_batch* b, int axis, double value) {
if (b->count >= VMCTL_BATCH_MAX) return;
vmctl_event* e = &b->ev[b->count++];
e->kind = VMCTL_EV_SCROLL; e->code = axis; e->value = 0; e->scroll = value;
}
int vmctl_batch_send(vmctl_t* v, vmctl_batch* b) {
if (b->count == 0) return 0;
int rc = v->ops.send(v, b);
if (rc != 0) return rc; /* not sent = not recorded; never touch the receipt */
/* Record the actuated key/btn down-bits (write-only; the send path above
* never reads this map). abs/rel/scroll have no held state. */
for (int i = 0; i < b->count; i++) {
const vmctl_event* e = &b->ev[i];
int down = e->value ? 1 : 0;
switch (e->kind) {
case VMCTL_EV_KEY: {
int code = e->code;
if (code < 0 || code > VMCTL_KEY_CODE_MAX) break; /* out of range: ignore */
unsigned char mask = (unsigned char)(1u << (code & 7));
if (down) v->keys_held[code >> 3] |= mask;
else v->keys_held[code >> 3] &= (unsigned char)~mask;
break;
}
case VMCTL_EV_BTN: {
int btn = e->code;
if (btn < 0 || btn >= 8) break; /* out of range: ignore */
unsigned mask = 1u << btn;
if (down) v->btns_held |= mask;
else v->btns_held &= ~mask;
break;
}
default: break; /* abs/rel/scroll: no-op for receipt */
}
}
return rc;
}
/* ===== Single-event wrappers ===== */
int vmctl_abs(vmctl_t* v, int axis, int value) {
vmctl_batch b;
vmctl_batch_init(&b);
vmctl_batch_abs(&b, axis, value);
return vmctl_batch_send(v, &b);
}
int vmctl_rel(vmctl_t* v, int axis, int delta) {
vmctl_batch b;
vmctl_batch_init(&b);
vmctl_batch_rel(&b, axis, delta);
return vmctl_batch_send(v, &b);
}
int vmctl_btn(vmctl_t* v, int btn, int down) {
vmctl_batch b;
vmctl_batch_init(&b);
vmctl_batch_btn(&b, btn, down);
return vmctl_batch_send(v, &b);
}
int vmctl_key(vmctl_t* v, int evdev_code, int down) {
vmctl_batch b;
vmctl_batch_init(&b);
vmctl_batch_key(&b, evdev_code, down);
return vmctl_batch_send(v, &b);
}
int vmctl_scroll(vmctl_t* v, int axis, double value) {
vmctl_batch b;
vmctl_batch_init(&b);
vmctl_batch_scroll(&b, axis, value);
return vmctl_batch_send(v, &b);
}
/* ===== Held-state receipt (read-only) =====
* Reads of the actuator's own last output; never mutate driver state. The
* in-range predicate matches the write path in vmctl_batch_send. */
int vmctl_key_held(vmctl_t* v, int evdev_code) {
if (!v || evdev_code < 0 || evdev_code > VMCTL_KEY_CODE_MAX) return 0;
return (v->keys_held[evdev_code >> 3] >> (evdev_code & 7)) & 1;
}
int vmctl_btn_held(vmctl_t* v, int btn) {
if (!v || btn < 0 || btn >= 8) return 0;
return (int)((v->btns_held >> btn) & 1u);
}
int vmctl_keys_snapshot(vmctl_t* v, unsigned char* bits, size_t nbytes) {
if (!v || !bits) return -1;
size_t n = nbytes < VMCTL_KEYS_SNAPSHOT_BYTES ? nbytes : VMCTL_KEYS_SNAPSHOT_BYTES;
memcpy(bits, v->keys_held, n);
return (int)n;
}
unsigned vmctl_btns_snapshot(vmctl_t* v) {
if (!v) return 0;
return v->btns_held;
}
+18
View File
@@ -0,0 +1,18 @@
/* power.c — QMP power/lifecycle actuation. This plane is orthogonal to the
* input driver and always rides the shared QMP channel; every entry returns -1
* when there is no connection. */
#include "driver.h"
/* QMP responses are small; a stack buffer suffices. */
static int qmp_simple(vmctl_t* v, const char* cmd) {
if (!v->qmp) return -1;
char resp[1024];
return qmp_exec(v->qmp, cmd, resp, sizeof resp);
}
int vmctl_powerdown(vmctl_t* v) { return qmp_simple(v, "{\"execute\":\"system_powerdown\"}"); }
int vmctl_reset (vmctl_t* v) { return qmp_simple(v, "{\"execute\":\"system_reset\"}"); }
int vmctl_wakeup (vmctl_t* v) { return qmp_simple(v, "{\"execute\":\"system_wakeup\"}"); }
int vmctl_pause (vmctl_t* v) { return qmp_simple(v, "{\"execute\":\"stop\"}"); }
int vmctl_resume (vmctl_t* v) { return qmp_simple(v, "{\"execute\":\"cont\"}"); }
+113
View File
@@ -0,0 +1,113 @@
/* qmp.c — AF_UNIX QMP client: connect + capability handshake, line-based recv
* with a poll timeout, and synchronous command execution. */
#include "qmp.h"
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <poll.h>
#define QMP_TIMEOUT_MS 5000
#define QMP_BUF_SIZE 4096
struct qmp_conn {
int fd;
};
static int recv_line(int fd, char* buf, size_t cap) {
size_t n = 0;
while (n + 1 < cap) {
struct pollfd pfd = { .fd = fd, .events = POLLIN };
if (poll(&pfd, 1, QMP_TIMEOUT_MS) <= 0) return -1;
char c;
if (read(fd, &c, 1) != 1) return -1;
buf[n++] = c;
if (c == '\n') break;
}
buf[n] = '\0';
return (int)n;
}
static int send_all(int fd, const char* s, size_t len) {
while (len > 0) {
ssize_t w = write(fd, s, len);
if (w <= 0) return -1;
s += w;
len -= (size_t)w;
}
return 0;
}
qmp_conn* qmp_connect(const char* sock_path) {
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0) return NULL;
struct sockaddr_un addr;
memset(&addr, 0, sizeof addr);
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, sock_path, sizeof addr.sun_path - 1);
if (connect(fd, (struct sockaddr*)&addr, sizeof addr) < 0) {
close(fd);
return NULL;
}
char buf[QMP_BUF_SIZE];
if (recv_line(fd, buf, sizeof buf) < 0) {
close(fd);
return NULL;
}
const char* cap_cmd = "{\"execute\":\"qmp_capabilities\"}\r\n";
if (send_all(fd, cap_cmd, strlen(cap_cmd)) < 0) {
close(fd);
return NULL;
}
if (recv_line(fd, buf, sizeof buf) < 0) {
close(fd);
return NULL;
}
qmp_conn* c = malloc(sizeof *c);
if (!c) {
close(fd);
return NULL;
}
c->fd = fd;
return c;
}
void qmp_disconnect(qmp_conn* c) {
if (!c) return;
close(c->fd);
free(c);
}
int qmp_exec(qmp_conn* c, const char* cmd, char* resp, size_t cap) {
size_t cmdlen = strlen(cmd);
if (send_all(c->fd, cmd, cmdlen) < 0) return -1;
if (send_all(c->fd, "\r\n", 2) < 0) return -1;
char line[QMP_BUF_SIZE];
for (;;) {
if (recv_line(c->fd, line, sizeof line) < 0) return -1;
if (strstr(line, "\"return\"")) {
if (resp && cap > 0) {
strncpy(resp, line, cap - 1);
resp[cap - 1] = '\0';
}
return 0;
}
if (strstr(line, "\"error\"")) {
if (resp && cap > 0) {
strncpy(resp, line, cap - 1);
resp[cap - 1] = '\0';
}
return -1;
}
}
}
+94
View File
@@ -0,0 +1,94 @@
/* qmp_driver.c — QMP input driver: serialises an input batch into a single
* input-send-event command and sends it in one round-trip. No guest driver is
* required. Switches on vmctl_ev_kind (never on magic numbers). */
#include "driver.h"
#include "keymap.h"
#include <stdlib.h>
#include <stdio.h>
static const char* btn_names[] = {
"left", "right", "middle", "side", "extra", "forward", "back", "task"
};
#define BTN_NAMES_LEN ((int)(sizeof btn_names / sizeof btn_names[0]))
static int qmp_driver_send(vmctl_t* v, const vmctl_batch* b) {
char json[8192];
int pos = 0;
pos += snprintf(json + pos, (int)sizeof json - pos,
"{\"execute\":\"input-send-event\",\"arguments\":{\"events\":[");
for (int i = 0; i < b->count; i++) {
if (i > 0)
pos += snprintf(json + pos, (int)sizeof json - pos, ",");
int code = b->ev[i].code;
int value = b->ev[i].value;
double scl = b->ev[i].scroll;
switch ((vmctl_ev_kind)b->ev[i].kind) {
case VMCTL_EV_ABS:
pos += snprintf(json + pos, (int)sizeof json - pos,
"{\"type\":\"abs\",\"data\":{\"axis\":\"%s\",\"value\":%d}}",
code == VMCTL_AXIS_X ? "x" : "y", value);
break;
case VMCTL_EV_REL:
pos += snprintf(json + pos, (int)sizeof json - pos,
"{\"type\":\"rel\",\"data\":{\"axis\":\"%s\",\"value\":%d}}",
code == VMCTL_AXIS_X ? "x" : "y", value);
break;
case VMCTL_EV_BTN:
if (code < 0 || code >= BTN_NAMES_LEN) return -1;
pos += snprintf(json + pos, (int)sizeof json - pos,
"{\"type\":\"btn\",\"data\":{\"button\":\"%s\",\"down\":%s}}",
btn_names[code], value ? "true" : "false");
break;
case VMCTL_EV_KEY: {
const char* qcode = vmctl_evdev_to_qcode(code);
if (!qcode) return -1;
pos += snprintf(json + pos, (int)sizeof json - pos,
"{\"type\":\"key\",\"data\":{\"key\":{\"type\":\"qcode\","
"\"data\":\"%s\"},\"down\":%s}}",
qcode, value ? "true" : "false");
break;
}
case VMCTL_EV_SCROLL:
pos += snprintf(json + pos, (int)sizeof json - pos,
"{\"type\":\"scl\",\"data\":{\"axis\":\"%s\",\"value\":%g}}",
code == VMCTL_SCROLL_V ? "vertical" : "horizontal", scl);
break;
default:
return -1;
}
}
pos += snprintf(json + pos, (int)sizeof json - pos, "]}}");
char resp[4096];
return qmp_exec(v->qmp, json, resp, sizeof resp);
}
static void qmp_driver_close(vmctl_t* v) {
qmp_disconnect(v->qmp);
}
vmctl_t* vmctl_open_qmp_driver(const vmctl_config* cfg) {
qmp_conn* qmp = qmp_connect(cfg->qmp_path);
if (!qmp) return NULL;
vmctl_t* v = calloc(1, sizeof *v);
if (!v) {
qmp_disconnect(qmp);
return NULL;
}
v->driver = VMCTL_DRIVER_QMP;
v->qmp = qmp;
v->ui_fd_a = -1;
v->ui_fd_b = -1;
v->ptr_mode = 0;
v->ops.send = qmp_driver_send;
v->ops.close = qmp_driver_close;
return v;
}
+39
View File
@@ -0,0 +1,39 @@
/* control.c — control-write SEAM ONLY (this never writes guest memory).
*
* The actual write is performed elsewhere, by a component that holds read-write
* access to the region; this only builds the desired vgpu_control_t image from
* the intent and computes the GVA + offset/length of the significant field range
* for that atomic write under the ctrl_gen seqlock. There is no gva_write here
* and there must not be — the source is a RO fd that would fault on a store anyway.
*
* The reported out_ctrl_gva is a GVA in the PRODUCER's user address space
* (region base + VGPU_CONTROL_OFFSET, cached as r->ctrl_gva): the external write
* MUST be performed under r->proc_cr3, NOT the System kcr3.
*/
#include "perception-internal.h"
int vgpup_build_control_write(vgpup_region* r, const vgpup_control_intent* in,
vgpu_control_t* out_frame, uint64_t* out_ctrl_gva,
uint32_t* out_off, uint32_t* out_len)
{
if (!r || !in || !out_frame || !out_ctrl_gva || !out_off || !out_len) { return -1; }
/* Fill the desired control image. ctrl_gen stays 0: the writer owns it under
* the seqlock. consumer_tick/attached carry separate heartbeat/intent
* semantics and are not part of this intent. */
memset(out_frame, 0, sizeof *out_frame);
out_frame->desired_state = in->desired_state;
out_frame->target_fps = in->target_fps;
out_frame->draw_cursor = in->draw_cursor;
out_frame->full_frame_req = in->full_frame_req;
*out_ctrl_gva = r->ctrl_gva; /* region base + VGPU_CONTROL_OFFSET (cached) */
/* Significant range: desired_state .. full_frame_req (contiguous in the ABI),
* i.e. offsetof(desired_state) through the end of full_frame_req. */
*out_off = (uint32_t)offsetof(vgpu_control_t, desired_state);
*out_len = (uint32_t)(offsetof(vgpu_control_t, full_frame_req) + sizeof(uint32_t)
- offsetof(vgpu_control_t, desired_state));
return 0;
}
+170
View File
@@ -0,0 +1,170 @@
/* discover.c — process discovery + user-AS region scan (NO magic) + handle.
*
* The region is a RW shared mapping projected into the USER address space of a
* producer PROCESS — NOT a kernel VA in the System address space. So discovery
* works by PROCESS: enumerate processes (proc_list) over the RO win32 context,
* and for each one scan its user-AS under process.cr3 in [USER_MIN, USER_MAX]
* for a contiguous RW run >= VGPU_REGION_BYTES, read the producer block at its
* base, and accept it iff the whole structural-invariant table holds. The System
* kcr3 is needed only to open the context and walk processes (the caller already
* baked it into v); the region itself is always read under the producer's cr3.
*
* There is NO magic field in the ABI and the owner forbids inventing one. The
* discriminator is the cheap RW-run filter + the invariant table + two-phase
* heartbeat liveness — and the inter-phase WAIT is the caller's (the core never
* sleeps). Discovery is STRUCTURAL: never filtered by process.name.
*
* Layering: the win32 dependency (proc_list, vmie_win32_mem) lives ONLY in this
* file, in the per-process loop. The per-cr3 scan (vgpup_scan_user_as_for_region)
* is pure gva_* so it stays win32-agnostic and unit-testable under a synthetic
* cr3. A <0 read after binding means the producer process may have restarted
* (its pages are gone); the core only reports it — re-discovery is the caller's.
*/
#include <stdlib.h>
#include "perception-internal.h"
/* How many region runs to ask for per process when probing its user-AS. A user
* address space has many runs; this is generous, and the scan early-exits on the
* first accepted candidate anyway. */
#define VGPUP_MAX_REGIONS 256
/* How many processes to enumerate. proc_list stops at this; raising it would see
* more, but a producer is an ordinary user process well within this bound. */
#define VGPUP_MAX_PROCS 512
/* Read the producer block at `region_gva` under `cr3` into *out (one gva_read of
* the whole block). 0 on success, <0 on read error. */
static int read_producer_block(vmie_mem* m, uint64_t cr3, uint64_t region_gva,
vgpu_producer_t* out)
{
return gva_read(m, (uintptr_t)cr3, (uintptr_t)region_gva, out, sizeof *out) < 0 ? -1 : 0;
}
/* Scan ONE process user-AS (steps 35) under `cr3`: walk the RW runs in
* [USER_MIN, USER_MAX] and, for each contiguous run >= VGPU_REGION_BYTES, test
* the producer block at the run base against the invariant table. On the first
* accepted candidate write its base GVA + heartbeat snapshot and return 0;
* <0 if none is found / a read fails. Pure gva_* — no proc_list, no win32.
*
* Adjacent same-protection runs are coalesced: gva_regions reports VA-contiguous
* runs, but a region can land as one run or as touching neighbours, so we extend
* a running span while the next run starts exactly where the current one ends.
* The window [USER_MIN, USER_MAX] lies in one canonical half, as gva_regions
* requires. The RW filter (VR_R|VR_W) matches the shared mapping's protection
* and is cheap — it reads region metadata, not the 98 MiB of region bytes. */
int vgpup_scan_user_as_for_region(vmie_mem* m, uint64_t cr3,
uint64_t* out_region_gva, uint64_t* out_hb0)
{
vregion runs[VGPUP_MAX_REGIONS];
int n, i;
if (!m || !out_region_gva || !out_hb0) { return -1; }
n = gva_regions(m, (uintptr_t)cr3, USER_MIN, USER_MAX, VR_R | VR_W, runs, VGPUP_MAX_REGIONS);
if (n < 0) { return -1; }
if (n > VGPUP_MAX_REGIONS) { n = VGPUP_MAX_REGIONS; } /* truncated; probe what we got */
for (i = 0; i < n; ++i) {
uint64_t span_base = runs[i].va;
uint64_t span_len = runs[i].len;
int j = i;
/* coalesce adjacent RW runs into one contiguous span */
while (j + 1 < n && runs[j + 1].va == runs[j].va + runs[j].len) {
span_len += runs[j + 1].len;
++j;
}
if (span_len >= VGPU_REGION_BYTES) {
vgpu_producer_t p;
if (read_producer_block(m, cr3, span_base, &p) == 0 &&
vgpup_invariants_hold(&p)) {
*out_region_gva = span_base;
*out_hb0 = p.heartbeat;
return 0;
}
}
}
return -1;
}
/* Phase 1: enumerate processes and scan each one's user-AS for the region. The
* win32 dependency is confined here: vmie_win32_mem(v) for the generic gva_*,
* proc_list(v, skip_system=1, ...) to drop PEB-less System/kernel-only entries
* (a producer is never one). On the first process that yields a candidate write
* its proc_cr3 + region base GVA + heartbeat snapshot and return 0; <0 if no
* process yields one or proc_list / the context is not ready. */
int vgpup_discover_candidate(vmie_win32* v, uint64_t* out_proc_cr3,
uint64_t* out_region_gva, uint64_t* out_hb0)
{
process procs[VGPUP_MAX_PROCS];
vmie_mem* m;
int np, i;
if (!v || !out_proc_cr3 || !out_region_gva || !out_hb0) { return -1; }
m = vmie_win32_mem(v);
if (!m) { return -1; }
np = proc_list(v, /*skip_system=*/1, procs, VGPUP_MAX_PROCS);
if (np < 0) { return -1; }
if (np > VGPUP_MAX_PROCS) { np = VGPUP_MAX_PROCS; } /* truncated; probe what we got */
for (i = 0; i < np; ++i) {
uint64_t region_gva = 0, hb0 = 0;
if (vgpup_scan_user_as_for_region(m, procs[i].cr3, &region_gva, &hb0) == 0) {
*out_proc_cr3 = procs[i].cr3;
*out_region_gva = region_gva;
*out_hb0 = hb0;
return 0;
}
}
return -1;
}
/* Phase 2: re-read heartbeat at region_gva under proc_cr3 and report whether it
* advanced. The caller must have waited >= VGPU_HEARTBEAT_PERIOD_MS since phase
* 1. <0 here can also mean the producer process restarted (pages gone). */
int vgpup_confirm_alive(vmie_mem* m, uint64_t proc_cr3,
uint64_t region_gva, uint64_t hb0)
{
uint64_t hb_now;
if (!m) { return -1; }
if (gva_read(m, (uintptr_t)proc_cr3,
(uintptr_t)region_gva + offsetof(vgpu_producer_t, heartbeat),
&hb_now, sizeof hb_now) < 0) {
return -1;
}
return (hb_now - hb0) > 0u ? 1 : 0;
}
vgpup_region* vgpup_open(vmie_win32* v)
{
uint64_t proc_cr3 = 0, region_gva = 0, hb0 = 0;
vgpup_region* r;
if (vgpup_discover_candidate(v, &proc_cr3, &region_gva, &hb0) != 0) { return NULL; }
r = (vgpup_region*)calloc(1, sizeof *r);
if (!r) { return NULL; }
r->proc_cr3 = proc_cr3;
r->region_gva = region_gva;
r->ctrl_gva = region_gva + VGPU_CONTROL_OFFSET;
r->ring_gva = region_gva + VGPU_RING_OFFSET;
r->last_frame_id = 0;
r->run_epoch = 0;
return r;
}
void vgpup_close(vgpup_region* r)
{
free(r); /* core state only; v / m belong to the caller */
}
uint32_t vgpup_run_epoch(const vgpup_region* r)
{
return r ? r->run_epoch : 0u;
}
@@ -0,0 +1,152 @@
#ifndef VGPU_PERCEPTION_INTERNAL_H
#define VGPU_PERCEPTION_INTERNAL_H
/* perception-internal.h — private consumer-side helpers (NOT a public surface).
*
* Holds the core's private state type, the consumer-side seqlock read discipline
* (the mirror of the producer's atomic-shim accessors, but an independent body —
* we read into local copies via gva_read, never sharing producer code), the
* structural-invariant validator table used by discovery, and the bit unpackers
* for the packed cursor fields. Included only by the perception TUs.
*
* Consumer seqlock discipline: every guest read goes through gva_read into a
* local copy, so the compiler cannot reorder a data read across the seq read —
* each gva_read is an opaque call. We still bump the seq read into its own
* gva_read and treat odd seq / changed seq as "writer in flight → retry".
*/
#include <stdint.h>
#include <stddef.h>
#include <string.h>
#include "vgpu_stream.h"
#include "memmodel.h"
#include "vgpu_perception.h"
/* Bounded seqlock retry. Producer windows are short (a single slot publish), so
* a small count suffices; spinning longer would be a behavioural timing choice
* (control's job), which does not belong in the sensor. Exhausted → lossy skip. */
#define VGPUP_SEQLOCK_RETRIES 8u
/* Private core state. Owns nothing of the address space — only where the region
* lives (in the producer's user-AS, keyed by proc_cr3) and the last-seen
* monotonic markers for dedup / session-break. */
struct vgpup_region {
uint64_t proc_cr3; /* producer process cr3 — key to its user-AS */
uint64_t region_gva; /* producer-block GVA == region base */
uint64_t ctrl_gva; /* region_gva + VGPU_CONTROL_OFFSET (cached) */
uint64_t ring_gva; /* region_gva + VGPU_RING_OFFSET (cached) */
uint64_t last_frame_id; /* dedup: only frames with a greater id are "fresh" */
uint32_t run_epoch; /* last run_epoch seen via vgpup_read_status */
};
/* Per-cr3 user-AS region scan (discovery steps 35 for ONE address space): scan
* gva_regions over [USER_MIN, USER_MAX] under `cr3` for a contiguous RW run of
* >= VGPU_REGION_BYTES, read the producer block at its base, and accept it iff
* the structural-invariant table holds. On the first hit writes the region base
* GVA to *out_region_gva and the heartbeat snapshot to *out_hb0 and returns 0;
* <0 if none is found / a read fails. Pure gva_* (no proc_list / win32) so it is
* testable under a synthetic cr3; vgpup_discover_candidate calls it per process. */
int vgpup_scan_user_as_for_region(vmie_mem* m, uint64_t cr3,
uint64_t* out_region_gva, uint64_t* out_hb0);
/* ---- seqlock primitives -------------------------------------------------- */
static inline int vgpup_seq_is_writing(uint32_t seq) { return (seq & 1u) != 0u; }
/* Read one 32-bit seq field at `gva` into *out under `cr3` (the producer's
* user-AS cr3). 0 on success, <0 on read error. */
static inline int vgpup_read_seq(vmie_mem* m, uintptr_t cr3, uint64_t gva,
uint32_t* out)
{
return gva_read(m, cr3, (uintptr_t)gva, out, sizeof *out) < 0 ? -1 : 0;
}
/* ---- packed-field unpackers (cursor line) -------------------------------- */
static inline int32_t vgpup_cursor_x(uint64_t pos) { return (int32_t)(uint32_t)(pos & 0xFFFFFFFFu); }
static inline int32_t vgpup_cursor_y(uint64_t pos) { return (int32_t)(uint32_t)(pos >> 32); }
static inline uint16_t vgpup_lo16(uint32_t v) { return (uint16_t)(v & 0xFFFFu); }
static inline uint16_t vgpup_hi16(uint32_t v) { return (uint16_t)(v >> 16); }
/* ---- structural-invariant validator (discovery, BY TABLE — no magic) ------
*
* Discovery has no magic field in the ABI (the owner forbids one). The
* discriminator is the conjunction of structural invariants derived from the
* ABI bounds in vgpu_stream.h, plus the two-phase heartbeat liveness handled by
* the caller. The predicates run cheap→costly with early exit; each takes a
* decoded producer-block snapshot and returns 1 (holds) / 0 (rejects). */
typedef int (*vgpup_inv_fn)(const vgpu_producer_t* p);
/* Is `latest` a valid slot index, or the legitimate "no frame yet" sentinel?
* latest == NONE is NOT a rejection (a freshly-started region has no frame). */
static inline int vgpup_inv_latest_in_range(const vgpu_producer_t* p)
{
return p->latest == VGPU_LATEST_NONE || p->latest < VGPU_SLOT_COUNT;
}
/* If a frame is published, its slot seq must be even (stable, not mid-write). */
static inline int vgpup_inv_latest_seq_stable(const vgpu_producer_t* p)
{
if (p->latest == VGPU_LATEST_NONE) { return 1; }
return !vgpup_seq_is_writing(p->seq[p->latest]);
}
/* If a frame is published, its descriptor must be a tight BGRA frame within the
* ABI dimension bounds. */
static inline int vgpup_inv_latest_desc_valid(const vgpu_producer_t* p)
{
const vgpu_desc_t* d;
if (p->latest == VGPU_LATEST_NONE) { return 1; }
d = &p->desc[p->latest];
if (d->format != VGPU_FMT_BGRA8888) { return 0; }
if (d->width == 0u || d->width > VGPU_MAX_WIDTH) { return 0; }
if (d->height == 0u || d->height > VGPU_MAX_HEIGHT) { return 0; }
if (d->stride != d->width * 4u) { return 0; }
return 1;
}
/* Cold-line status enum must be in the ABI range. */
static inline int vgpup_inv_status_in_range(const vgpu_producer_t* p)
{
return p->status <= VGPU_ST_ERROR;
}
/* Cold-line backend enum must be in the ABI range. */
static inline int vgpup_inv_backend_in_range(const vgpu_producer_t* p)
{
return p->backend <= VGPU_BK_GDI;
}
/* The producer must advertise the one wire format we consume. */
static inline int vgpup_inv_supports_bgra(const vgpu_producer_t* p)
{
return (p->supported_formats & (1u << VGPU_FMT_BGRA8888)) != 0u;
}
/* The invariant table, cheap→costly. A candidate is accepted (phase 1) iff
* every predicate holds; the table is the single discriminator, no scattered
* ifs and no hardcoded numbers (all bounds come from vgpu_stream.h). */
static const vgpup_inv_fn VGPUP_INVARIANTS[] = {
vgpup_inv_latest_in_range,
vgpup_inv_status_in_range,
vgpup_inv_backend_in_range,
vgpup_inv_supports_bgra,
vgpup_inv_latest_seq_stable,
vgpup_inv_latest_desc_valid,
};
#define VGPUP_INVARIANT_COUNT (sizeof(VGPUP_INVARIANTS) / sizeof(VGPUP_INVARIANTS[0]))
/* Run the whole invariant table over a decoded producer-block snapshot.
* Returns 1 if every predicate holds, 0 on the first rejection. */
static inline int vgpup_invariants_hold(const vgpu_producer_t* p)
{
size_t i;
for (i = 0; i < VGPUP_INVARIANT_COUNT; ++i) {
if (!VGPUP_INVARIANTS[i](p)) { return 0; }
}
return 1;
}
#endif /* VGPU_PERCEPTION_INTERNAL_H */
+228
View File
@@ -0,0 +1,228 @@
/* sample.c — consumer seqlock reads: frame sampling, cursor, geometry, status.
*
* Every guest read goes through gva_read into a local copy; we never hold a
* gva_ptr across a seqlock window (it is borrowed and not atomic for re-check).
* The discipline is the mirror of the producer's publish order in atomic-shim.h,
* but an independent body — this is consumer code, not shared producer code.
*
* Lossy by contract: when a writer keeps a window busy past VGPUP_SEQLOCK_RETRIES
* we return 0 (skip), never block. Blocking longer would be behavioural timing
* (control's concern), which has no place in the sensor.
*
* All reads go under r->proc_cr3 (the producer's user-AS cr3, cached in the
* handle at discovery), NOT the System kcr3. A <0 from any gva_read means a page
* is gone — the producer process may have restarted; we propagate <0 and the
* caller re-discovers (see vgpu_perception.h "Two epochs + producer restart").
*/
#include "perception-internal.h"
#include <stdio.h> /* TEMP debug (revert): stderr skip-reason trace */
/* Read one cold-line / packed field at producer offset `off` into dst under the
* producer's user-AS cr3. */
static int read_field(vmie_mem* m, uintptr_t cr3, uint64_t region_gva,
size_t off, void* dst, size_t n)
{
return gva_read(m, cr3, (uintptr_t)region_gva + off, dst, n) < 0 ? -1 : 0;
}
int vgpup_sample_frame(vgpup_region* r, vmie_mem* m,
uint8_t* dst, size_t cap, vgpup_frame_info* info)
{
unsigned attempt;
static unsigned long _dc = 0; /* TEMP debug: 1/240 call gate */
int _dbg = ((_dc++ % 240u) == 0u);
if (!r || !m || !dst || !info) { return -1; }
for (attempt = 0; attempt < VGPUP_SEQLOCK_RETRIES; ++attempt) {
uint32_t latest = 0, seq_before = 0, seq_after = 0;
vgpu_desc_t d;
uint64_t slot_gva, seq_gva, desc_gva;
size_t frame_bytes;
/* latest (acquire-equivalent: its own read) */
if (read_field(m, r->proc_cr3, r->region_gva,
offsetof(vgpu_producer_t, latest), &latest, sizeof latest) < 0) {
if (_dbg) fprintf(stderr, "VGPUP_DBG ret=-1 latest-read-fail\n");
return -1;
}
if (latest == VGPU_LATEST_NONE || latest >= VGPU_SLOT_COUNT) {
if (_dbg) fprintf(stderr, "VGPUP_DBG ret=0 A latest=%u\n", latest);
return 0;
}
seq_gva = r->region_gva + offsetof(vgpu_producer_t, seq) + (uint64_t)latest * sizeof(uint32_t);
desc_gva = r->region_gva + offsetof(vgpu_producer_t, desc) + (uint64_t)latest * sizeof(vgpu_desc_t);
if (vgpup_read_seq(m, r->proc_cr3, seq_gva, &seq_before) < 0) { return -1; }
if (vgpup_seq_is_writing(seq_before)) {
if (_dbg) fprintf(stderr, "VGPUP_DBG cont B att=%u latest=%u seqB=%u (writing)\n", attempt, latest, seq_before);
continue; /* writer in slot */
}
if (gva_read(m, (uintptr_t)r->proc_cr3, (uintptr_t)desc_gva, &d, sizeof d) < 0) { return -1; }
/* dedup by frame_id: nothing newer than what we already sampled */
if (d.frame_id <= r->last_frame_id) {
if (_dbg) fprintf(stderr, "VGPUP_DBG ret=0 C dedup dfid=%llu last=%llu\n",
(unsigned long long)d.frame_id, (unsigned long long)r->last_frame_id);
return 0;
}
/* descriptor sanity within the read window (tight BGRA, bounded dims) */
if (d.format != VGPU_FMT_BGRA8888 || d.stride != d.width * 4u ||
d.width == 0u || d.width > VGPU_MAX_WIDTH ||
d.height == 0u || d.height > VGPU_MAX_HEIGHT) {
if (_dbg) fprintf(stderr, "VGPUP_DBG cont D torn att=%u w=%u h=%u s=%u f=%u\n",
attempt, d.width, d.height, d.stride, d.format);
continue; /* likely a torn read; retry */
}
frame_bytes = (size_t)d.height * d.stride;
if (frame_bytes > VGPU_SLOT_STRIDE) { return 0; } /* impossible-large → skip */
if (frame_bytes > cap) {
if (_dbg) fprintf(stderr, "VGPUP_DBG ret=0 F fbytes=%zu cap=%zu\n", frame_bytes, cap);
return 0; /* would not fit → lossy drop */
}
slot_gva = r->ring_gva + (uint64_t)latest * VGPU_SLOT_STRIDE;
if (gva_read(m, (uintptr_t)r->proc_cr3, (uintptr_t)slot_gva, dst, frame_bytes) < 0) {
if (_dbg) fprintf(stderr, "VGPUP_DBG ret=-1 G slot-read-fail latest=%u fbytes=%zu\n", latest, frame_bytes);
return -1;
}
/* re-check the slot seq: unchanged and still even → snapshot consistent */
if (vgpup_read_seq(m, r->proc_cr3, seq_gva, &seq_after) < 0) { return -1; }
if (seq_after != seq_before || vgpup_seq_is_writing(seq_after)) {
if (_dbg) fprintf(stderr, "VGPUP_DBG cont H att=%u latest=%u seqB=%u seqA=%u\n",
attempt, latest, seq_before, seq_after);
continue; /* the slot was rewritten under us — retry */
}
info->desc.width = d.width;
info->desc.height = d.height;
info->desc.stride = d.stride;
info->desc.format = d.format;
info->desc.frame_id = d.frame_id;
info->desc.timestamp_ns = d.timestamp_ns;
info->bytes = frame_bytes;
r->last_frame_id = d.frame_id;
return 1;
}
if (_dbg) fprintf(stderr, "VGPUP_DBG ret=0 I retry-exhaust (%u attempts all busy)\n", VGPUP_SEQLOCK_RETRIES);
return 0; /* writer kept the slot busy past the retry limit — skip */
}
int vgpup_read_cursor(vgpup_region* r, vmie_mem* m, vgpup_cursor* out)
{
unsigned attempt;
if (!r || !m || !out) { return -1; }
/* The producer bumps cursor_seq LAST (acquire), so we read the cursor line
* first and gate on cursor_seq being even and unchanged across the window. */
for (attempt = 0; attempt < VGPUP_SEQLOCK_RETRIES; ++attempt) {
uint32_t seq_before = 0, seq_after = 0;
uint32_t visible = 0, hotspot = 0, glyph = 0, id = 0;
uint64_t pos = 0;
if (vgpup_read_seq(m, r->proc_cr3, r->region_gva + offsetof(vgpu_producer_t, cursor_seq),
&seq_before) < 0) { return -1; }
if (vgpup_seq_is_writing(seq_before)) { continue; }
if (read_field(m, r->proc_cr3, r->region_gva, offsetof(vgpu_producer_t, cursor_visible), &visible, sizeof visible) < 0 ||
read_field(m, r->proc_cr3, r->region_gva, offsetof(vgpu_producer_t, cursor_pos), &pos, sizeof pos) < 0 ||
read_field(m, r->proc_cr3, r->region_gva, offsetof(vgpu_producer_t, cursor_hotspot), &hotspot, sizeof hotspot) < 0 ||
read_field(m, r->proc_cr3, r->region_gva, offsetof(vgpu_producer_t, cursor_glyph), &glyph, sizeof glyph) < 0 ||
read_field(m, r->proc_cr3, r->region_gva, offsetof(vgpu_producer_t, cursor_id), &id, sizeof id) < 0) {
return -1;
}
if (vgpup_read_seq(m, r->proc_cr3, r->region_gva + offsetof(vgpu_producer_t, cursor_seq),
&seq_after) < 0) { return -1; }
if (seq_after != seq_before || vgpup_seq_is_writing(seq_after)) { continue; }
out->seq = seq_after;
out->visible = visible;
out->x = vgpup_cursor_x(pos);
out->y = vgpup_cursor_y(pos);
out->hot_x = vgpup_lo16(hotspot);
out->hot_y = vgpup_hi16(hotspot);
out->glyph_w = vgpup_lo16(glyph);
out->glyph_h = vgpup_hi16(glyph);
out->id = id;
return 1;
}
return 0;
}
int vgpup_read_geometry(vgpup_region* r, vmie_mem* m, vgpup_geometry* out)
{
unsigned attempt;
if (!r || !m || !out) { return -1; }
for (attempt = 0; attempt < VGPUP_SEQLOCK_RETRIES; ++attempt) {
uint32_t seq_before = 0, seq_after = 0;
int32_t virt_x = 0, virt_y = 0, cap_x = 0, cap_y = 0;
uint32_t virt_w = 0, virt_h = 0, dpi = 0, refresh_mhz = 0;
if (vgpup_read_seq(m, r->proc_cr3, r->region_gva + offsetof(vgpu_producer_t, geom_seq),
&seq_before) < 0) { return -1; }
if (vgpup_seq_is_writing(seq_before)) { continue; }
if (read_field(m, r->proc_cr3, r->region_gva, offsetof(vgpu_producer_t, virt_x), &virt_x, sizeof virt_x) < 0 ||
read_field(m, r->proc_cr3, r->region_gva, offsetof(vgpu_producer_t, virt_y), &virt_y, sizeof virt_y) < 0 ||
read_field(m, r->proc_cr3, r->region_gva, offsetof(vgpu_producer_t, virt_w), &virt_w, sizeof virt_w) < 0 ||
read_field(m, r->proc_cr3, r->region_gva, offsetof(vgpu_producer_t, virt_h), &virt_h, sizeof virt_h) < 0 ||
read_field(m, r->proc_cr3, r->region_gva, offsetof(vgpu_producer_t, cap_x), &cap_x, sizeof cap_x) < 0 ||
read_field(m, r->proc_cr3, r->region_gva, offsetof(vgpu_producer_t, cap_y), &cap_y, sizeof cap_y) < 0 ||
read_field(m, r->proc_cr3, r->region_gva, offsetof(vgpu_producer_t, dpi), &dpi, sizeof dpi) < 0 ||
read_field(m, r->proc_cr3, r->region_gva, offsetof(vgpu_producer_t, refresh_mhz), &refresh_mhz, sizeof refresh_mhz) < 0) {
return -1;
}
if (vgpup_read_seq(m, r->proc_cr3, r->region_gva + offsetof(vgpu_producer_t, geom_seq),
&seq_after) < 0) { return -1; }
if (seq_after != seq_before || vgpup_seq_is_writing(seq_after)) { continue; }
out->virt_x = virt_x;
out->virt_y = virt_y;
out->virt_w = virt_w;
out->virt_h = virt_h;
out->cap_x = cap_x;
out->cap_y = cap_y;
out->dpi = dpi;
out->refresh_mhz = refresh_mhz;
return 1;
}
return 0;
}
int vgpup_read_status(vgpup_region* r, vmie_mem* m, vgpup_status* out)
{
vgpu_producer_t p;
if (!r || !m || !out) { return -1; }
/* Cold line: single naturally-aligned atomic fields with no seqlock. Read
* the whole producer block once and pick the cold fields — "fresh enough"
* by the lossy contract. */
if (gva_read(m, (uintptr_t)r->proc_cr3, (uintptr_t)r->region_gva, &p, sizeof p) < 0) { return -1; }
out->heartbeat = p.heartbeat;
out->run_epoch = p.run_epoch;
out->status = p.status;
out->backend = p.backend;
out->error_code = p.error_code;
out->applied_fps = p.applied_fps;
out->supported_formats = p.supported_formats;
out->ctrl_ack = p.ctrl_ack;
out->full_frame_ack = p.full_frame_ack;
out->content_change_ns = p.content_change_ns;
r->run_epoch = p.run_epoch; /* feed the session-break detector */
return 0;
}
+121
View File
@@ -0,0 +1,121 @@
/* test_daemoncfg.c — vmsigd config parser + admission policy (WS4). Config parse is pure;
* admission is exercised against a live discovery (fake probe + recording sink) so the
* vmid->endpoint resolution at connect time is verified end-to-end without armed adapters. */
#define _GNU_SOURCE
#include "vmsig.h"
#include "discovery.h"
#include "vmsigd.h"
#include "vmsigd_admission.h"
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } } while (0)
static void test_config(void) {
printf("test_config\n");
const char* sample =
"# vmsigd config\n"
"socket = /run/foo.sock\n"
"watch = /dev/shm/vmsig\n"
"pve_conf = /etc/pve/qemu-server\n"
"\n"
"[grant uid=0]\n"
"vmids = *\n"
"caps = observe,input,memctx,roster\n"
"arb_prio = 100\n"
"[grant uid=1000]\n"
"vmids = 101, 102\n"
"caps = observe\n"
"arb_prio = 50\n";
vmsigd_config c; vmsigd_config_defaults(&c);
CHECK(vmsigd_config_parse_buf(&c, sample) == 0, "parse ok");
CHECK(strcmp(c.socket, "/run/foo.sock") == 0, "global socket override");
CHECK(strcmp(c.qmp_dir, "/var/run/qemu-server") == 0, "default qmp_dir retained");
CHECK(c.ngrants == 2, "two grant stanzas");
CHECK(c.grants[0].uid == 0 && c.grants[0].all_vms, "grant0 uid=0 vmids=*");
CHECK(c.grants[0].cap_mask ==
(VMSIG_CAP_OBSERVE | VMSIG_CAP_INPUT | VMSIG_CAP_MEMCTX | VMSIG_CAP_ROSTER),
"grant0 caps parsed");
CHECK(c.grants[0].arb_prio == 100, "grant0 arb_prio");
CHECK(c.grants[1].uid == 1000 && !c.grants[1].all_vms && c.grants[1].nvmids == 2 &&
c.grants[1].vmids[0] == 101 && c.grants[1].vmids[1] == 102, "grant1 vmid list");
CHECK(c.grants[1].cap_mask == VMSIG_CAP_OBSERVE, "grant1 caps");
CHECK(c.grants[1].arb_prio == 50, "grant1 arb_prio");
}
/* ---- fake probe + recording sink (attach vmids to slots without armed adapters) ---- */
typedef struct { int dummy; } fakeprobe;
static int fp_config(const vmsig_host_probe* p, uint32_t vmid, vmsig_host_facts* out) {
(void)p; memset(out, 0, sizeof *out); out->vmid = vmid; out->share_on = 1; out->ok = 1;
snprintf(out->name, sizeof out->name, "win-%u", vmid);
return 0;
}
static int fp_live(const vmsig_host_probe* p, vmsig_host_facts* io) {
(void)p; io->retry = 0; io->ok = 1; io->vm_state = VMSIG_VM_RUNNING; io->low = 0x80000000ull;
return 0;
}
static int rs_attach(void* ud, vmsig_core* core, uint32_t vmid, uint32_t ep,
const vmsig_host_facts* f) { (void)ud;(void)core;(void)vmid;(void)ep;(void)f; return 0; }
static void rs_detach(void* ud, vmsig_core* core, uint32_t vmid, uint32_t ep) {
(void)ud;(void)core;(void)vmid;(void)ep;
}
static void test_admission(void) {
printf("test_admission\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
fakeprobe fpd;
vmsig_host_probe probe = { fp_config, fp_live, &fpd };
vmsig_discovery_sink sink = { rs_attach, rs_detach, NULL };
char dir[] = "/tmp/vmsig_adm.XXXXXX"; CHECK(mkdtemp(dir) != NULL, "temp dir");
vmsig_discovery* disc = vmsig_discovery_new(core, dir, NULL, NULL, NULL, &probe, &sink);
CHECK(disc != NULL, "discovery created");
vmsig_discovery_feed(disc, 101, 1); /* -> ep0 */
vmsig_discovery_feed(disc, 102, 1); /* -> ep1 */
vmsigd_config c; vmsigd_config_defaults(&c);
vmsigd_config_parse_buf(&c,
"[grant uid=0]\nvmids=*\ncaps=observe,input,memctx,roster\narb_prio=100\n"
"[grant uid=1000]\nvmids=101,102\ncaps=observe\narb_prio=50\n"
"[grant uid=1001]\nvmids=999\ncaps=observe\narb_prio=10\n");
vmsigd_admission adm = { &c, disc };
/* uid 0: all_vms => full mask */
vmsig_grant g0 = vmsigd_policy(0, 0, &adm);
CHECK(g0.endpoint_mask == ~0ull, "uid0 (vmids=*) covers all endpoints");
CHECK(g0.cap_mask == (VMSIG_CAP_OBSERVE | VMSIG_CAP_INPUT | VMSIG_CAP_MEMCTX | VMSIG_CAP_ROSTER),
"uid0 caps");
CHECK(g0.arb_prio == 100 && g0.principal == 0, "uid0 prio/principal");
/* uid 1000: vmids 101,102 attached at ep0,ep1 => bits 0,1 */
vmsig_grant g1 = vmsigd_policy(1000, 0, &adm);
CHECK(g1.endpoint_mask == ((1ull << 0) | (1ull << 1)), "uid1000 resolved to ep0,ep1 bits");
CHECK(g1.cap_mask == VMSIG_CAP_OBSERVE && g1.arb_prio == 50, "uid1000 caps/prio");
/* uid 1001: vmid 999 not attached => no bits (peer learns via roster / reconnect) */
vmsig_grant g2 = vmsigd_policy(1001, 0, &adm);
CHECK(g2.endpoint_mask == 0, "uid1001 unbound vmid => no endpoint bit yet");
/* unknown uid: empty grant => reject */
vmsig_grant g3 = vmsigd_policy(4242, 0, &adm);
CHECK(g3.cap_mask == 0 && g3.endpoint_mask == 0, "unknown uid => empty grant (reject)");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
rmdir(dir);
}
int main(void) {
test_config();
test_admission();
printf("daemoncfg tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+198
View File
@@ -0,0 +1,198 @@
/* test_discovery.c — discovery state machine (WS3), driven deterministically via the TEST
* hooks (no inotify/timer/threads). A fake host-probe controls config/live verdicts; a
* recording sink captures attach/detach; a CAP_ROSTER subscriber captures the published
* roster. Covers: appear->attach(slot+roster), duplicate, gone->detach(roster+free), bit
* reuse, config-fail drop, stale drop, and the retry-then-attach path. */
#define _GNU_SOURCE
#include "vmsig.h"
#include "vmsig_roster.h"
#include "discovery.h" /* pulls host_probe.h */
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } } while (0)
/* ---- fake host-probe ---- */
typedef struct { int config_ok; int live_mode; int live_calls; } fakeprobe;
/* live_mode: 0=ok, 1=stale(dead, no retry), 2=retry-once-then-ok */
static int fp_config(const vmsig_host_probe* p, uint32_t vmid, vmsig_host_facts* out) {
fakeprobe* f = p->ud;
memset(out, 0, sizeof *out);
out->vmid = vmid;
snprintf(out->name, sizeof out->name, "win-%u", vmid);
snprintf(out->ram_path, sizeof out->ram_path, "/tmp/vm-%u-ram", vmid);
snprintf(out->qmp_path, sizeof out->qmp_path, "/tmp/%u.qmp", vmid);
out->cfg_ram_bytes = 4ull << 30;
out->share_on = f->config_ok;
out->ok = f->config_ok;
return 0;
}
static int fp_live(const vmsig_host_probe* p, vmsig_host_facts* io) {
fakeprobe* f = p->ud;
io->retry = 0;
f->live_calls++;
if (f->live_mode == 1) { io->ok = 0; io->vm_state = VMSIG_VM_SHUTDOWN; return 0; }
if (f->live_mode == 2 && f->live_calls == 1) { io->retry = 1; io->ok = 0; return 0; }
io->ok = 1; io->vm_state = VMSIG_VM_RUNNING; io->low = 0x80000000ull;
return 0;
}
/* ---- recording sink ---- */
typedef struct {
int n_attach, n_detach;
uint32_t la_vmid, la_ep, ld_vmid, ld_ep;
} recsink;
static int rs_attach(void* ud, vmsig_core* core, uint32_t vmid, uint32_t ep,
const vmsig_host_facts* f) {
(void)core; (void)f;
recsink* s = ud; s->n_attach++; s->la_vmid = vmid; s->la_ep = ep;
return 0;
}
static void rs_detach(void* ud, vmsig_core* core, uint32_t vmid, uint32_t ep) {
(void)core;
recsink* s = ud; s->n_detach++; s->ld_vmid = vmid; s->ld_ep = ep;
}
/* ---- roster subscriber ---- */
typedef struct { int attach, detach; uint32_t last_vmid, last_ep, last_action; char last_name[32]; } robs;
static int rob_on_ev(void* u, const vmsig_event* ev) {
robs* r = u;
if (ev->kind != VMSIG_EV_ROSTER) return 0;
const vmsig_roster* e = (const vmsig_roster*)ev->inln;
r->last_vmid = e->vmid; r->last_ep = ev->endpoint; r->last_action = e->action;
snprintf(r->last_name, sizeof r->last_name, "%s", e->name);
if (e->action == VMSIG_ROSTER_ATTACH) r->attach++;
else if (e->action == VMSIG_ROSTER_DETACH) r->detach++;
return 0;
}
static void test_discovery(void) {
printf("test_discovery\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
robs ro; memset(&ro, 0, sizeof ro);
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
cfg.on_event = rob_on_ev; cfg.user = &ro;
void* ctl = vmsig_inproc_control_new(&cfg);
vmsig_grant g; memset(&g, 0, sizeof g);
g.principal = 1; g.endpoint_mask = ~0ull; g.source_mask = 0xFFFFFFFFu; g.cap_mask = VMSIG_CAP_ROSTER;
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
fakeprobe fp; memset(&fp, 0, sizeof fp); fp.config_ok = 1; fp.live_mode = 0;
vmsig_host_probe probe = { fp_config, fp_live, &fp };
recsink rs; memset(&rs, 0, sizeof rs);
vmsig_discovery_sink sink = { rs_attach, rs_detach, &rs };
char dir[] = "/tmp/vmsig_disc.XXXXXX";
CHECK(mkdtemp(dir) != NULL, "temp watch dir created");
vmsig_discovery* d = vmsig_discovery_new(core, dir, NULL, NULL, NULL, &probe, &sink);
CHECK(d != NULL, "discovery created");
/* 1) appear 101 -> attach ep0 + roster ATTACH */
vmsig_discovery_feed(d, 101, 1);
CHECK(rs.n_attach == 1 && rs.la_vmid == 101 && rs.la_ep == 0, "101 attached on ep0 (sink)");
CHECK(ro.attach == 1 && ro.last_vmid == 101 && ro.last_ep == 0 &&
ro.last_action == VMSIG_ROSTER_ATTACH, "roster ATTACH 101 ep0");
CHECK(strcmp(ro.last_name, "win-101") == 0, "roster carried the VM name");
CHECK(vmsig_discovery_slot_of_vmid(d, 101) == 0, "slot_of_vmid(101)==0");
/* 2) appear 102 -> ep1 */
vmsig_discovery_feed(d, 102, 1);
CHECK(rs.n_attach == 2 && rs.la_vmid == 102 && rs.la_ep == 1, "102 attached on ep1");
/* duplicate appear 101 -> ignored */
vmsig_discovery_feed(d, 101, 1);
CHECK(rs.n_attach == 2, "duplicate appear ignored");
/* 3) gone 101 -> detach + roster DETACH + slot freed */
vmsig_discovery_feed(d, 101, 0);
CHECK(rs.n_detach == 1 && rs.ld_vmid == 101 && rs.ld_ep == 0, "101 detached (sink)");
CHECK(ro.detach == 1 && ro.last_action == VMSIG_ROSTER_DETACH && ro.last_vmid == 101,
"roster DETACH 101");
CHECK(vmsig_discovery_slot_of_vmid(d, 101) == -1, "slot freed after detach");
/* 4) appear 103 -> reuse freed ep0 */
vmsig_discovery_feed(d, 103, 1);
CHECK(rs.la_ep == 0 && rs.la_vmid == 103, "103 reuses freed ep0 (lowest free)");
/* 5) config-fail -> drop */
fp.config_ok = 0;
int n = rs.n_attach;
vmsig_discovery_feed(d, 999, 1);
CHECK(rs.n_attach == n, "config-fail vmid dropped (no attach)");
fp.config_ok = 1;
/* 6) stale (file present, VM dead) -> drop */
fp.live_mode = 1;
n = rs.n_attach;
vmsig_discovery_feed(d, 105, 1);
CHECK(rs.n_attach == n, "stale VM dropped (no attach)");
fp.live_mode = 0;
/* 7) retry-then-ok: first probe retries, tick re-probes and attaches */
fp.live_mode = 2; fp.live_calls = 0;
n = rs.n_attach;
vmsig_discovery_feed(d, 104, 1);
CHECK(rs.n_attach == n, "retry: not attached on first probe");
CHECK(vmsig_discovery_slot_of_vmid(d, 104) == -1, "retry: no slot yet");
vmsig_discovery_tick(d);
CHECK(rs.n_attach == n + 1 && rs.la_vmid == 104, "retry: attached after re-probe");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
rmdir(dir);
}
/* Bootstrap path: files already present when discovery starts are picked up by the REAL
* readdir + parse_vmid scan (not the test feed hook); junk names are ignored. */
static void touch(const char* dir, const char* name) {
char path[512];
snprintf(path, sizeof path, "%s/%s", dir, name);
int fd = open(path, O_CREAT | O_WRONLY | O_CLOEXEC, 0600);
if (fd >= 0) close(fd);
}
static void rm(const char* dir, const char* name) {
char path[512];
snprintf(path, sizeof path, "%s/%s", dir, name);
unlink(path);
}
static void test_bootstrap(void) {
printf("test_bootstrap\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
fakeprobe fp; memset(&fp, 0, sizeof fp); fp.config_ok = 1; fp.live_mode = 0;
vmsig_host_probe probe = { fp_config, fp_live, &fp };
recsink rs; memset(&rs, 0, sizeof rs);
vmsig_discovery_sink sink = { rs_attach, rs_detach, &rs };
char dir[] = "/tmp/vmsig_boot.XXXXXX";
CHECK(mkdtemp(dir) != NULL, "temp dir");
touch(dir, "vm-200-ram"); /* valid trigger */
touch(dir, "notavm"); /* ignored */
touch(dir, "vm-bad-ram"); /* non-numeric => ignored */
vmsig_discovery* d = vmsig_discovery_new(core, dir, NULL, NULL, NULL, &probe, &sink);
CHECK(d != NULL, "discovery created");
CHECK(rs.n_attach == 1 && rs.la_vmid == 200, "bootstrap scan attached ONLY vm-200 (real parse)");
CHECK(vmsig_discovery_slot_of_vmid(d, 200) == 0, "200 pinned to ep0 via bootstrap");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
rm(dir, "vm-200-ram"); rm(dir, "notavm"); rm(dir, "vm-bad-ram"); rmdir(dir);
}
int main(void) {
test_discovery();
test_bootstrap();
printf("discovery tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+100
View File
@@ -0,0 +1,100 @@
/* test_dynep.c — runtime hot-plug of a VM endpoint (WS1): a discovery-style consumer
* attaches an adapter trio, then detaches it and re-attaches it on the SAME endpoint
* while the loop is running. Proves:
* - vmsig_core_add_adapter works AFTER vmsig_core_run started (from a loop-thread cb);
* - vmsig_core_detach_endpoint tears the trio down (deferred reap) and bumps the epoch,
* broadcasting MEMCTX_INVALIDATED so a holder settles;
* - re-attaching the same endpoint publishes MEMCTX at the strictly-higher epoch.
* All driven from the holder callbacks, which run on the loop thread (single-threaded
* with the pumps), so attach/detach are issued mid-loop exactly as discovery will. */
#define _GNU_SOURCE
#include "vmsig.h"
#include <stdio.h>
#include <string.h>
#include <stdint.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { \
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
} while (0)
typedef struct {
vmsig_core* core;
uint32_t ep;
int memctx; /* MEMCTX received */
int invalidated; /* MEMCTX_INVALIDATED received */
uint32_t last_epoch; /* epoch of the last MEMCTX */
int phase; /* 0: pre-detach, 1: detached, 2: reattached */
int ticks; /* vmhost watchdog ticks (failsafe) */
} dyn;
/* Re-attach the trio (vmhost watchdog + memctx) on the same endpoint, mid-loop, from the
* INVALIDATED delivery — exactly the discovery "file reappeared" path. */
static void reattach_trio(dyn* d) {
vmsig_core_add_adapter(d->core, vmsig_vmhost_ops(), NULL, d->ep);
vmsig_core_add_adapter(d->core, vmsig_memctx_ops(), NULL, d->ep);
}
static int dyn_on_ev(void* u, const vmsig_event* ev) {
dyn* d = u;
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) d->ticks++;
else if (ev->kind == VMSIG_EV_MEMCTX_INVALIDATED) {
d->invalidated++;
if (d->phase == 1) { d->phase = 2; reattach_trio(d); }
}
if (d->ticks > 60) vmsig_core_stop(d->core); /* failsafe */
return 0;
}
static int dyn_on_memctx(void* u, const vmsig_event* ev, int fd) {
dyn* d = u;
const vmsig_memctx* m = (const vmsig_memctx*)ev->inln;
(void)fd; /* core closes the borrowed RO-fd after this call */
d->memctx++;
d->last_epoch = m->epoch;
if (d->phase == 0 && m->epoch == 0) {
d->phase = 1;
vmsig_core_detach_endpoint(d->core, d->ep); /* deferred reap -> bump -> INVALIDATED */
} else if (d->phase == 2 && m->epoch >= 1) {
vmsig_core_stop(d->core); /* re-attached context observed: done */
}
return 0;
}
static void test_dynep(void) {
printf("test_dynep\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
dyn d; memset(&d, 0, sizeof d);
d.core = core; d.ep = 0;
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
cfg.on_event = dyn_on_ev; cfg.on_memctx = dyn_on_memctx; cfg.user = &d;
void* ctl = vmsig_inproc_control_new(&cfg);
vmsig_grant g; memset(&g, 0, sizeof g);
g.principal = 1; g.endpoint_mask = 1ull << 0; g.source_mask = 0xFFFFFFFFu;
g.cap_mask = VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE;
vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
/* initial trio on ep0, pre-run (vmhost watchdog ticks the loop + memctx publishes). */
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost ep0");
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), NULL, 0) >= 0, "add memctx ep0");
vmsig_core_run(core);
CHECK(d.memctx >= 2, "MEMCTX received before AND after re-attach");
CHECK(d.invalidated >= 1, "MEMCTX_INVALIDATED delivered on detach");
CHECK(d.last_epoch >= 1, "epoch advanced across detach/re-attach");
CHECK(d.phase == 2, "reached the re-attached phase");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
int main(void) {
test_dynep();
printf("dynep tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+66
View File
@@ -16,6 +16,7 @@
#include "vmsig.h"
#include "vmsig_socket.h" /* vmsig_wire, vmsig_socket_attach */
#include "core_internal.h" /* core_emit_up (synthetic lifecycle injection) */
#include "memctx.h" /* vmsig_memctx_cfg (infra ro_fd ownership test) */
#include <stdio.h>
#include <string.h>
#include <stdint.h>
@@ -328,12 +329,77 @@ static void test_socket(void) {
vmsig_ctx_free(ctx);
}
/* ---- 6. ro_fd ownership: an infra-supplied RO-fd is closed by the adapter --- *
* Regression for the latent leak: cfg.ro_fd ownership transfers to the adapter at
* open(); mc_close() must close it, so a re-grant (detach + re-attach with a fresh
* infra ro_fd) does not leak the prior one. Only DUPS leave outward (one per share),
* so the original stays open across the run and is reaped at adapter close. */
#ifndef MFD_CLOEXEC
#include <sys/syscall.h>
#include <linux/memfd.h>
static int memfd_create(const char* name, unsigned int flags) {
return (int)syscall(SYS_memfd_create, name, flags);
}
#endif
#ifndef MFD_ALLOW_SEALING
#define MFD_ALLOW_SEALING 0x0002U
#endif
#ifndef F_ADD_SEALS
#define F_ADD_SEALS (1024 + 9)
#define F_SEAL_SHRINK 0x0002
#define F_SEAL_GROW 0x0004
#endif
#ifndef F_SEAL_FUTURE_WRITE
#define F_SEAL_FUTURE_WRITE 0x0010
#endif
static int make_ro_backing(uint32_t size) {
int fd = memfd_create("vmsig_test_ro", MFD_CLOEXEC | MFD_ALLOW_SEALING);
if (fd < 0) fd = memfd_create("vmsig_test_ro", MFD_CLOEXEC);
if (fd < 0) return -1;
if (ftruncate(fd, (off_t)size) != 0) { close(fd); return -1; }
(void)fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_FUTURE_WRITE);
return fd;
}
static void test_ro_fd_ownership(void) {
printf("test_ro_fd_ownership\n");
int ro = make_ro_backing(0x10000u); /* >= the stub low so the holder can mmap */
CHECK(ro >= 0, "created an RO backing fd");
if (ro < 0) return;
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
holder h; memset(&h, 0, sizeof h);
h.core = core; h.is_driver = 1; h.expect_ep = 0; h.stop_epoch = -1;
add_holder(core, &h, VMSIG_CAP_MEMCTX, 0xFFFFFFFFu, 1ull << 0);
/* stub kcr3 (no VM) but a REAL infra ro_fd handed in for the RO share path. */
vmsig_memctx_cfg mc; memset(&mc, 0, sizeof mc);
mc.stub = 1; mc.ram_path = NULL; mc.low = 0; mc.ro_fd = ro;
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), &mc, 0) >= 0, "add memctx (infra ro_fd)");
vmsig_core_run(core);
CHECK(h.memctx >= 1, "holder received MEMCTX over the infra ro_fd");
CHECK(h.ro_ok, "infra ro_fd re-shared and mmaps PROT_READ");
CHECK(fcntl(ro, F_GETFD) >= 0, "infra ro_fd still open before close (no premature close)");
vmsig_core_free(core); /* mc_close closes the owned cfg_ro_fd */
vmsig_ctx_free(ctx);
CHECK(fcntl(ro, F_GETFD) == -1, "infra ro_fd closed by mc_close after free (no leak)");
if (fcntl(ro, F_GETFD) >= 0) close(ro); /* belt-and-braces if the assert failed */
}
int main(void) {
test_multicast();
test_epoch();
test_retain();
test_multivm();
test_socket();
test_ro_fd_ownership();
printf("memctx tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+279
View File
@@ -0,0 +1,279 @@
/* test_perception.c — table-driven invariant predicates + per-cr3 user-AS scan.
*
* Two layers (no proc_list / win32 — that path needs a real Windows kernel
* bring-up and is covered by an out-of-tree integration run, not this unit):
* 1) Invariant predicates as a TABLE of cases over a synthesized producer
* block (pure, no vmie): valid / latest==NONE / torn odd seq / non-BGRA /
* stride!=width*4 / dims out of range — each asserts accept-vs-reject.
* 2) Per-cr3 user-AS scan + sampling under a SYNTHETIC cr3: lay out a real
* region per vgpu_stream.h in a memfd, build a minimal x86-64 identity page
* table (2 MiB large pages) that maps the region at a USER VA (the region
* really lives in a producer's user-AS), open it RO via vmie_mem_from_ro_fd,
* and run vgpup_scan_user_as_for_region + a two-phase heartbeat liveness
* check, then construct a handle (proc_cr3 = synth cr3) and run the real
* frame/cursor/geometry/status reads and the control-write seam under it.
* (cr3 0 over a flat image cannot translate — gva_* needs real page tables —
* so we synthesize them; this exercises the actual translation path the
* caller will use.) The win32 proc_list wrapper is deliberately NOT exercised
* here: vgpup_scan_user_as_for_region is the pure per-cr3 core it calls.
*
* Exit 0 on all-pass; nonzero on the first failure.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include "perception-internal.h"
static int g_fail;
#define CHECK(cond, msg) do { \
if (!(cond)) { fprintf(stderr, "FAIL: %s (%s:%d)\n", (msg), __FILE__, __LINE__); ++g_fail; } \
} while (0)
/* ---- layer 1: invariant predicate table ---------------------------------- */
/* Build a baseline VALID producer block (one published BGRA frame in slot 0). */
static void make_valid_producer(vgpu_producer_t* p)
{
memset(p, 0, sizeof *p);
p->latest = 0;
p->frame_id = 1;
p->seq[0] = 2; /* even = stable */
p->desc[0].width = 1920;
p->desc[0].height = 1080;
p->desc[0].stride = 1920 * 4;
p->desc[0].format = VGPU_FMT_BGRA8888;
p->desc[0].frame_id = 1;
p->status = VGPU_ST_CAPTURING;
p->backend = VGPU_BK_DDA;
p->supported_formats = (1u << VGPU_FMT_BGRA8888);
p->heartbeat = 42;
}
typedef struct {
const char* name;
void (*mutate)(vgpu_producer_t*);
int expect; /* expected vgpup_invariants_hold result */
} inv_case;
static void mut_none(vgpu_producer_t* p) { (void)p; }
static void mut_latest_none(vgpu_producer_t* p) { p->latest = VGPU_LATEST_NONE; }
static void mut_latest_oob(vgpu_producer_t* p) { p->latest = VGPU_SLOT_COUNT; }
static void mut_seq_odd(vgpu_producer_t* p) { p->seq[0] = 3; }
static void mut_not_bgra(vgpu_producer_t* p) { p->desc[0].format = 7; }
static void mut_bad_stride(vgpu_producer_t* p) { p->desc[0].stride = 1920 * 4 + 1; }
static void mut_width_zero(vgpu_producer_t* p) { p->desc[0].width = 0; }
static void mut_width_huge(vgpu_producer_t* p) { p->desc[0].width = VGPU_MAX_WIDTH + 1; }
static void mut_height_huge(vgpu_producer_t* p) { p->desc[0].height = VGPU_MAX_HEIGHT + 1; }
static void mut_status_oob(vgpu_producer_t* p) { p->status = VGPU_ST_ERROR + 1; }
static void mut_backend_oob(vgpu_producer_t* p) { p->backend = VGPU_BK_GDI + 1; }
static void mut_no_bgra_support(vgpu_producer_t* p) { p->supported_formats = 0; }
static const inv_case INV_CASES[] = {
{ "valid", mut_none, 1 },
{ "latest==NONE", mut_latest_none, 1 }, /* no frame yet, still valid */
{ "latest out of range", mut_latest_oob, 0 },
{ "torn odd seq", mut_seq_odd, 0 },
{ "non-BGRA format", mut_not_bgra, 0 },
{ "stride != width*4", mut_bad_stride, 0 },
{ "width == 0", mut_width_zero, 0 },
{ "width too large", mut_width_huge, 0 },
{ "height too large", mut_height_huge, 0 },
{ "status out of range", mut_status_oob, 0 },
{ "backend out of range", mut_backend_oob, 0 },
{ "BGRA not supported", mut_no_bgra_support, 0 },
};
static void run_invariant_table(void)
{
size_t i;
for (i = 0; i < sizeof(INV_CASES) / sizeof(INV_CASES[0]); ++i) {
vgpu_producer_t p;
int got;
make_valid_producer(&p);
INV_CASES[i].mutate(&p);
got = vgpup_invariants_hold(&p);
CHECK(got == INV_CASES[i].expect, INV_CASES[i].name);
}
}
/* ---- layer 2: per-cr3 user-AS scan + sampling over a real RO vmie_mem ------ */
/* x86-64 paging entry flags for the synthetic identity table. */
#define PTE_P 0x1u /* present */
#define PTE_RW 0x2u /* writable */
#define PTE_US 0x4u /* user-accessible (the region is in a user-AS) */
#define PTE_PS 0x80u /* page size (2 MiB leaf at PD level) */
#define LARGE_PAGE (2ull * 1024 * 1024)
/* Build a minimal identity page table mapping [0, span) of the image at user VA
* `base` using 2 MiB large pages, with the PML4/PDPT/PD pages laid out right
* after the region in the same image. Every level carries US so the run reports
* VR_W|VR_U (a real user-AS mapping). Returns the cr3 (PML4 GPA). The mapped VA
* range fits one PD (covers up to 1 GiB), which is plenty for the region. */
static uint64_t build_identity_table(uint8_t* img, uint64_t region_bytes,
uint64_t base, uint64_t span)
{
const uint64_t pml4_gpa = region_bytes; /* one page each, after region */
const uint64_t pdpt_gpa = region_bytes + 0x1000;
const uint64_t pd_gpa = region_bytes + 0x2000;
uint64_t* pml4 = (uint64_t*)(img + pml4_gpa);
uint64_t* pdpt = (uint64_t*)(img + pdpt_gpa);
uint64_t* pd = (uint64_t*)(img + pd_gpa);
const unsigned pml4i = (unsigned)((base >> 39) & 0x1ffu);
const unsigned pdpti = (unsigned)((base >> 30) & 0x1ffu);
const unsigned pdi0 = (unsigned)((base >> 21) & 0x1ffu);
uint64_t mapped = 0;
unsigned k = 0;
pml4[pml4i] = pdpt_gpa | PTE_P | PTE_RW | PTE_US;
pdpt[pdpti] = pd_gpa | PTE_P | PTE_RW | PTE_US;
while (mapped < span) {
pd[pdi0 + k] = mapped | PTE_P | PTE_RW | PTE_US | PTE_PS; /* VA base+k*2M → GPA mapped */
mapped += LARGE_PAGE;
++k;
}
return pml4_gpa;
}
static void run_flat_smoke(void)
{
const uint64_t region_bytes = VGPU_REGION_BYTES;
/* region rounded up to a 2 MiB boundary for the large-page identity map */
const uint64_t mapped_span = (region_bytes + LARGE_PAGE - 1) & ~(LARGE_PAGE - 1);
const size_t total_bytes = (size_t)region_bytes + 0x3000; /* + PML4/PDPT/PD */
/* a USER VA, 2 MiB-aligned, within [USER_MIN, USER_MAX] — the region lives in
* a producer's user address space, so we map it there (not at a kernel VA). */
const uint64_t base_va = 0x0000000010000000ull;
const uint32_t w = 64, h = 32;
const size_t frame_bytes = (size_t)w * h * 4u;
int fd;
uint8_t* img;
uint64_t cr3;
vmie_mem* m;
vgpu_producer_t p;
uint8_t marker;
fd = memfd_create("vgpu-region", 0);
CHECK(fd >= 0, "memfd_create");
if (fd < 0) { return; }
if (ftruncate(fd, (off_t)total_bytes) != 0) { CHECK(0, "ftruncate"); close(fd); return; }
img = mmap(NULL, total_bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
CHECK(img != MAP_FAILED, "mmap");
if (img == MAP_FAILED) { close(fd); return; }
/* lay out a valid producer block with one BGRA frame in slot 0 (at GPA 0) */
make_valid_producer(&p);
p.desc[0].width = w;
p.desc[0].height = h;
p.desc[0].stride = w * 4u;
memcpy(img + VGPU_PRODUCER_OFFSET, &p, sizeof p);
/* fill the slot-0 frame bytes in the RING with a recognizable marker */
marker = 0xA5;
memset(img + VGPU_RING_OFFSET + 0 * VGPU_SLOT_STRIDE, marker, frame_bytes);
/* synthesize an identity table mapping the region at base_va, then open RO */
cr3 = build_identity_table(img, region_bytes, base_va, mapped_span);
m = vmie_mem_from_ro_fd(fd, total_bytes);
CHECK(m != NULL, "vmie_mem_from_ro_fd");
if (!m) { munmap(img, total_bytes); close(fd); return; }
/* per-cr3 user-AS scan: candidate found at the user VA with hb0 == 42 */
{
uint64_t rgva = 0xdead, hb0 = 0;
int rc = vgpup_scan_user_as_for_region(m, cr3, &rgva, &hb0);
CHECK(rc == 0, "scan_user_as rc");
CHECK(rgva == base_va, "scan_user_as region gva");
CHECK(hb0 == 42, "scan_user_as hb0");
/* two-phase liveness: not alive until heartbeat advances */
CHECK(vgpup_confirm_alive(m, cr3, rgva, hb0) == 0, "confirm not-yet-alive");
{ uint64_t hb = 43; memcpy(img + offsetof(vgpu_producer_t, heartbeat), &hb, sizeof hb); }
CHECK(vgpup_confirm_alive(m, cr3, rgva, hb0) == 1, "confirm alive after tick");
}
/* construct a handle directly (the proc_list/win32 path is not unit-testable;
* proc_cr3 is the synthetic cr3 here) and exercise the read API + control seam */
{
vgpup_region rr;
vgpup_region* r = &rr;
uint8_t* dst = malloc(frame_bytes);
vgpup_frame_info fi;
vgpup_cursor cur;
vgpup_geometry geo;
vgpup_status st;
int rc;
memset(&rr, 0, sizeof rr);
rr.proc_cr3 = cr3;
rr.region_gva = base_va;
rr.ctrl_gva = base_va + VGPU_CONTROL_OFFSET;
rr.ring_gva = base_va + VGPU_RING_OFFSET;
CHECK(dst != NULL, "malloc dst");
rc = vgpup_sample_frame(r, m, dst, frame_bytes, &fi);
CHECK(rc == 1, "sample_frame fresh");
if (rc == 1) {
CHECK(fi.desc.width == w && fi.desc.height == h, "sample dims");
CHECK(fi.bytes == frame_bytes, "sample bytes");
CHECK(dst[0] == marker && dst[frame_bytes - 1] == marker, "sample content");
}
/* same frame_id → no fresh frame (dedup) */
CHECK(vgpup_sample_frame(r, m, dst, frame_bytes, &fi) == 0, "sample dedup");
/* too-small buffer → lossy drop (0), not error */
CHECK(vgpup_sample_frame(r, m, dst, 1, &fi) == 0, "sample tiny-cap");
CHECK(vgpup_read_cursor(r, m, &cur) == 1, "read_cursor");
CHECK(vgpup_read_geometry(r, m, &geo) == 1, "read_geometry");
CHECK(vgpup_read_status(r, m, &st) == 0, "read_status");
CHECK(st.status == VGPU_ST_CAPTURING, "status value");
CHECK(st.heartbeat == 43, "status heartbeat");
CHECK(vgpup_run_epoch(r) == st.run_epoch, "run_epoch accessor");
/* control-write seam: builds frame + offsets, writes nothing */
{
vgpup_control_intent in = { VGPU_CMD_RUN, 60, 1, 7 };
vgpu_control_t frame;
uint64_t ctrl_gva = 0;
uint32_t off = 0, len = 0;
int crc = vgpup_build_control_write(r, &in, &frame, &ctrl_gva, &off, &len);
CHECK(crc == 0, "build_control_write rc");
CHECK(frame.desired_state == VGPU_CMD_RUN, "control desired_state");
CHECK(frame.target_fps == 60, "control target_fps");
CHECK(frame.full_frame_req == 7, "control full_frame_req");
CHECK(frame.ctrl_gen == 0, "control ctrl_gen untouched");
CHECK(ctrl_gva == base_va + VGPU_CONTROL_OFFSET, "control gva");
CHECK(off == offsetof(vgpu_control_t, desired_state), "control off");
CHECK(len == offsetof(vgpu_control_t, full_frame_req) + sizeof(uint32_t)
- offsetof(vgpu_control_t, desired_state), "control len");
}
free(dst);
}
vmie_mem_close(m); /* the TEST owns vmie_mem here (it is the caller) */
munmap(img, total_bytes);
close(fd);
}
int main(void)
{
run_invariant_table();
run_flat_smoke();
if (g_fail) {
fprintf(stderr, "%d check(s) failed\n", g_fail);
return 1;
}
printf("all checks passed\n");
return 0;
}
+96
View File
@@ -0,0 +1,96 @@
/* test_roster.c — VM roster inventory coherence (WS2): VMSIG_EV_ROSTER publish, CAP_ROSTER
* gating, endpoint_mask scoping, retained-replay to a late subscriber, and DETACH clearing
* the retained datum. Publish/replay are synchronous (no fd), so the loop is not run: the
* inproc deliver fires the subscriber callback inline. */
#define _GNU_SOURCE
#include "vmsig.h"
#include "core_internal.h" /* core_roster_publish */
#include <stdio.h>
#include <string.h>
#include <stdint.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { \
if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } \
} while (0)
typedef struct {
int count;
uint32_t ep, vmid, state, action;
char name[VMSIG_ROSTER_NAME_MAX];
} robs;
static int rob_on_ev(void* u, const vmsig_event* ev) {
robs* r = u;
if (ev->kind != VMSIG_EV_ROSTER) return 0;
const vmsig_roster* e = (const vmsig_roster*)ev->inln;
r->count++;
r->ep = ev->endpoint; r->vmid = e->vmid; r->state = e->state; r->action = e->action;
memcpy(r->name, e->name, sizeof r->name);
return 0;
}
static int add_robs(vmsig_core* core, robs* r, uint32_t cap, uint64_t epmask) {
vmsig_inproc_cfg cfg; memset(&cfg, 0, sizeof cfg);
cfg.on_event = rob_on_ev; cfg.user = r;
void* ctl = vmsig_inproc_control_new(&cfg);
vmsig_grant g; memset(&g, 0, sizeof g);
g.principal = 9; g.endpoint_mask = epmask; g.source_mask = 0xFFFFFFFFu; g.cap_mask = cap;
return vmsig_core_add_control(core, vmsig_inproc_control_ops(), ctl, &g);
}
static void publish(vmsig_core* core, uint32_t ep, uint32_t vmid, uint32_t state,
uint32_t action, const char* name) {
vmsig_roster e; memset(&e, 0, sizeof e);
e.vmid = vmid; e.state = state; e.action = action;
snprintf(e.name, sizeof e.name, "%s", name);
core_roster_publish(core, ep, &e);
}
static void test_roster(void) {
printf("test_roster\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
robs a, b, cc; memset(&a,0,sizeof a); memset(&b,0,sizeof b); memset(&cc,0,sizeof cc);
add_robs(core, &a, VMSIG_CAP_ROSTER, ~0ull); /* all endpoints, can see roster */
add_robs(core, &b, VMSIG_CAP_OBSERVE, ~0ull); /* no CAP_ROSTER -> denied */
add_robs(core, &cc, VMSIG_CAP_ROSTER, 1ull << 0); /* scoped to ep0 only */
/* ATTACH ep0 */
publish(core, 0, 1001, VMSIG_VM_RUNNING, VMSIG_ROSTER_ATTACH, "win-1001");
CHECK(a.count == 1 && a.ep == 0 && a.vmid == 1001 && a.action == VMSIG_ROSTER_ATTACH,
"A (CAP_ROSTER) received ATTACH ep0");
CHECK(strcmp(a.name, "win-1001") == 0, "A: name carried inline");
CHECK(b.count == 0, "B without CAP_ROSTER does NOT receive roster");
CHECK(cc.count == 1, "C scoped to ep0 received ep0 ATTACH");
/* ATTACH ep1 */
publish(core, 1, 1002, VMSIG_VM_RUNNING, VMSIG_ROSTER_ATTACH, "win-1002");
CHECK(a.count == 2 && a.ep == 1 && a.vmid == 1002, "A received ATTACH ep1");
CHECK(cc.count == 1, "C scoped to ep0 does NOT receive ep1 (endpoint_mask filter)");
/* late subscriber D: replay of the retained roster (ep0 + ep1) on add_control */
robs d; memset(&d, 0, sizeof d);
add_robs(core, &d, VMSIG_CAP_ROSTER, ~0ull);
CHECK(d.count == 2, "late subscriber D replayed BOTH retained roster entries");
/* DETACH ep0: current subscribers see it; the retained datum is cleared */
publish(core, 0, 1001, VMSIG_VM_SHUTDOWN, VMSIG_ROSTER_DETACH, "win-1001");
CHECK(a.count == 3 && a.ep == 0 && a.action == VMSIG_ROSTER_DETACH, "A received DETACH ep0");
/* late subscriber E after DETACH: replay yields ONLY ep1 (ep0 cleared) */
robs e; memset(&e, 0, sizeof e);
add_robs(core, &e, VMSIG_CAP_ROSTER, ~0ull);
CHECK(e.count == 1 && e.ep == 1 && e.vmid == 1002,
"late subscriber E replayed only the live ep1 (detached ep0 not retained)");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
int main(void) {
test_roster();
printf("roster tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}
+79
View File
@@ -0,0 +1,79 @@
/* test_slot.c — vmid<->endpoint allocator (WS3): pin/idempotence, lowest-free-bit, free +
* reuse, full-table, and persistence round-trip. */
#define _GNU_SOURCE
#include "slot.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
static int g_fail = 0;
#define CHECK(cond, msg) do { if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } } while (0)
static void test_alloc(void) {
printf("test_slot_alloc\n");
slot_table t; slot_init(&t);
CHECK(slot_lookup(&t, 1001) == -1, "unbound vmid => -1");
int a = slot_alloc(&t, 1001);
CHECK(a == 0, "first alloc => lowest bit 0");
CHECK(slot_alloc(&t, 1001) == 0, "alloc is idempotent (pin same slot)");
CHECK(slot_lookup(&t, 1001) == 0, "lookup returns the pinned slot");
int b = slot_alloc(&t, 1002);
CHECK(b == 1, "second vmid => next free bit 1");
slot_free(&t, 1001);
CHECK(slot_lookup(&t, 1001) == -1, "freed vmid => -1");
int c = slot_alloc(&t, 1003);
CHECK(c == 0, "freed bit 0 reused by a new vmid (lowest free)");
CHECK(slot_alloc(&t, 1002) == 1, "the other binding survived the free/reuse");
CHECK(slot_alloc(&t, 0) == -1, "vmid 0 rejected");
}
static void test_full(void) {
printf("test_slot_full\n");
slot_table t; slot_init(&t);
for (uint32_t i = 0; i < VMSIG_SLOT_COUNT; i++)
CHECK(slot_alloc(&t, 1000 + i) == (int)i, "fill all 64 slots in order");
CHECK(slot_alloc(&t, 9999) == -1, "65th vmid => -1 (ceiling)");
CHECK(slot_alloc(&t, 1000) == 0, "an already-bound vmid still resolves when full");
slot_free(&t, 1030);
CHECK(slot_alloc(&t, 9999) == 30, "after a free, the freed bit is available");
}
static void test_persist(void) {
printf("test_slot_persist\n");
char path[] = "/tmp/vmsig_slot_test.XXXXXX";
int fd = mkstemp(path); if (fd >= 0) close(fd);
slot_table t; slot_init(&t);
slot_alloc(&t, 1001); /* bit 0 */
slot_alloc(&t, 700); /* bit 1 */
slot_free(&t, 1001);
slot_alloc(&t, 900); /* reuses bit 0 */
CHECK(slot_save(&t, path) == 0, "save ok");
slot_table u; slot_init(&u);
CHECK(slot_load(&u, path) == 0, "load ok");
CHECK(slot_lookup(&u, 900) == 0, "persisted: 900 on bit 0");
CHECK(slot_lookup(&u, 700) == 1, "persisted: 700 on bit 1");
CHECK(slot_lookup(&u, 1001) == -1, "persisted: freed 1001 absent");
/* a new alloc on the loaded table must avoid the occupied bits */
CHECK(slot_alloc(&u, 111) == 2, "loaded table: next free bit is 2");
slot_table v; slot_init(&v);
CHECK(slot_load(&v, "/tmp/vmsig_nonexistent_xyz") == 0, "missing file => fresh start (0)");
CHECK(slot_alloc(&v, 1) == 0, "fresh table allocates bit 0");
unlink(path);
}
int main(void) {
test_alloc();
test_full();
test_persist();
printf("slot tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}