3 Commits

Author SHA1 Message Date
lirent 26e5ab4709 fix(memctx): retry cold bootstrap with backoff while guest boots
The cold address-space bootstrap (host_bootstrap -> System DTB) ran once and was
terminal: when the adapter attached before the guest finished booting, no System
process was found, the adapter emitted a single ERROR and never retried, so the
memctx datum was never published.

Make it self-healing: on bootstrap failure arm a one-shot backoff timerfd (a
second adapter fd, demuxed by cookie) that re-kicks the bootstrap until it
succeeds; reset and disarm on success. Drop the per-failure URGENT ERROR (a
still-booting guest is transient, not a fault) for a single diagnostic line on
the first failure. Add a stub fail-injection (cfg fail_boots) and test_retry.

Bump 0.3.5.
2026-06-24 11:20:32 +03:00
lirent 6fea392d46 build: derive the project version from VMSIG_VERSION so one tag drives every package + the .so version 2026-06-22 20:35:10 +03:00
lirent 0289817821 packaging: split vgpu perception into separate libvgpu-perception0 + -dev packages
- the vmsig package no longer ships the gpu lib; it is a Sensor lib for the control, not the daemon
- vgpu-perception gets SOVERSION; runtime (libvgpu-perception0) and dev (-dev) packages, like the vmie split
- per-component install + a 3-package make deb; fix a stale comment (the windows producer is in-tree)
2026-06-22 20:32:21 +03:00
12 changed files with 268 additions and 71 deletions
+31 -21
View File
@@ -1,5 +1,8 @@
cmake_minimum_required(VERSION 3.16)
project(vmsig VERSION 0.3.3 LANGUAGES C)
# Single source of truth for the version: CI passes -DVMSIG_VERSION=${TAG#v}, so the project
# version (-> libvgpu-perception SONAME/.so version) and the .deb version come from one tag.
set(VMSIG_VERSION "0.3.5" CACHE STRING "Release version (MAJOR.MINOR.PATCH); CI passes the tag")
project(vmsig VERSION ${VMSIG_VERSION} LANGUAGES C)
set(CMAKE_C_STANDARD 17)
set(CMAKE_C_STANDARD_REQUIRED ON)
@@ -117,16 +120,18 @@ add_executable(vmsig_cli src/cli.c)
target_link_libraries(vmsig_cli PRIVATE vmsig)
target_compile_options(vmsig_cli PRIVATE -Wall -Wextra)
# ---- vgpu-perception: host-side vgpu Sensor S-lib (absorbed in-tree) ---------
# A SEPARATE shipped library (NOT fused into libvmsig — it is consumed by the shell, not the
# signaling core). Host-only: reads the vgpu shared region from its own RO vmie_mem. Built
# only when armed (needs vmie). The in-guest Windows producer (vgpu-streamer.exe) stays in a
# separate repo and is NOT part of this delivery.
# ---- vgpu-perception: host-side vgpu Sensor S-lib ---------------------------
# Packaged SEPARATELY from the daemon (libvgpu-perception0 + -dev), NOT fused into libvmsig —
# a Sensor lib consumed by a control/shell, not the signaling core. Host-only: reads the vgpu
# shared region from its own RO vmie_mem. Built only when armed (needs vmie). The in-guest
# Windows producer is the vgpu-streamer cross-target above (same tree, shared ABI vgpu_stream.h).
if(VMSIG_WITH_VMIE)
add_library(vgpu-perception SHARED
src/si/vgpu-perception/discover.c
src/si/vgpu-perception/sample.c
src/si/vgpu-perception/control.c)
set_target_properties(vgpu-perception PROPERTIES
VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) # libvgpu-perception.so.0
target_include_directories(vgpu-perception
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/si/vgpu-perception/include)
@@ -265,25 +270,30 @@ add_test(NAME memwrite COMMAND vmsig_memwritetest)
add_test(NAME cli COMMAND vmsig_cli)
# ---- install rules (for the .deb stage) -------------------------------------
option(VMSIG_INSTALL "Generate install() rules (daemon/lib/headers/unit/config)" OFF)
option(VMSIG_INSTALL "Generate install() rules (per-component, for the .deb stages)" OFF)
if(VMSIG_INSTALL)
include(GNUInstallDirs)
install(TARGETS vmsigd RUNTIME DESTINATION ${CMAKE_INSTALL_SBINDIR})
install(TARGETS vmsig LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
if(TARGET vgpu-perception) # armed builds ship the host vgpu S-lib alongside
install(TARGETS vgpu-perception LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()
# public contracts (signaling + absorbed SI host headers) under include/vmsig/
# --- component `daemon`: the signaling delivery (package: vmsig). NO gpu lib here. ---
install(TARGETS vmsigd RUNTIME DESTINATION ${CMAKE_INSTALL_SBINDIR} COMPONENT daemon)
install(TARGETS vmsig LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT daemon)
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vmsig
FILES_MATCHING PATTERN "vmsig*.h"
PATTERN "vmctl.h"
PATTERN "vgpu_stream.h"
PATTERN "vgpu_perception.h")
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vmsig COMPONENT daemon
FILES_MATCHING PATTERN "vmsig*.h" PATTERN "vmctl.h")
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/packaging/systemd/vmsigd.service
DESTINATION lib/systemd/system)
DESTINATION lib/systemd/system COMPONENT daemon)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/packaging/tmpfiles/vmsig.conf
DESTINATION lib/tmpfiles.d)
DESTINATION lib/tmpfiles.d COMPONENT daemon)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/packaging/config/vmsigd.conf
DESTINATION /etc/vmsig)
DESTINATION /etc/vmsig COMPONENT daemon)
# --- the host vgpu perception S-lib, SEPARATE from the daemon: runtime (versioned .so,
# package libvgpu-perception0) vs dev (namelink + headers, package libvgpu-perception-dev) ---
if(TARGET vgpu-perception)
install(TARGETS vgpu-perception
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
COMPONENT vgpu_runtime
NAMELINK_COMPONENT vgpu_dev)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/include/vgpu_perception.h
${CMAKE_CURRENT_SOURCE_DIR}/include/vgpu_stream.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vmsig COMPONENT vgpu_dev)
endif()
endif()
+46 -32
View File
@@ -1,46 +1,60 @@
# vmsig packaging — `make deb` builds the .deb over a `cmake --install` stage.
# Private values are NOT baked into the tree: pass them via the variables below (the
# defaults are neutral placeholders; CI overrides them from vars/secrets).
# vmsig packaging — `make deb` builds TWO Debian packages from per-component install stages:
# vmsig — the signaling daemon + library + headers + systemd unit
# libvgpu-perception — the host-side vgpu perception S-lib (SEPARATE: not in vmsig)
# Private values are NOT baked in: pass them via the variables below (CI overrides them).
#
# make deb LIBVMIE_PATH=/path/to/vmie VERSION=1.2.3 \
# MAINTAINER="Name <addr>" DEPENDS="libc6, libvmie0"
# make deb LIBVMIE_PATH=/path/to/vmie VERSION=1.2.3 MAINTAINER="Name <addr>"
VERSION ?= 0.0.0
MAINTAINER ?= vmsig packaging <root@localhost>
# libvmie0 is vmie's own runtime package (SONAME libvmie.so.0): libvmsig.so and
# libvgpu-perception.so dynamically link it, so it is a HARD runtime dependency.
DEPENDS ?= libc6, libvmie0
ARCH ?= amd64
VERSION ?= 0.0.0
MAINTAINER ?= vmsig packaging <root@localhost>
# libvmie0 is vmie's runtime package (SONAME libvmie.so.0): both libvmsig.so and
# libvgpu-perception.so dynamically link it — a hard runtime dependency of each package.
DEPENDS ?= libc6, libvmie0
ARCH ?= amd64
LIBVMIE_PATH ?=
BUILD_DIR ?= .build-pkg
STAGE ?= $(CURDIR)/dist/stage
DIST ?= $(CURDIR)/dist
BUILD_DIR ?= .build-pkg
DIST ?= $(CURDIR)/dist
.PHONY: deb clean
# Armed package: the shipped daemon needs vmie for memctx. vmie stays an external dependency
# (package Depends on its runtime). vmie is found from a source tree (LIBVMIE_PATH) or, when
# that is empty, from the installed libvmie-dev (system / CMAKE_PREFIX_PATH) — the CI path.
# vmie is found from a source tree (LIBVMIE_PATH) or, when empty, the installed libvmie-dev
# (system / CMAKE_PREFIX_PATH) — the CI path.
deb:
rm -rf $(STAGE)
cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=Release -DVMSIG_INSTALL=ON -DVMSIG_WITH_VMIE=ON \
$(if $(LIBVMIE_PATH),-DLIBVMIE_PATH=$(LIBVMIE_PATH),)
-DVMSIG_VERSION=$(VERSION) $(if $(LIBVMIE_PATH),-DLIBVMIE_PATH=$(LIBVMIE_PATH),)
cmake --build $(BUILD_DIR) -j
DESTDIR=$(STAGE) cmake --install $(BUILD_DIR) --prefix /usr
mkdir -p $(STAGE)/DEBIAN
sed -e 's/@VERSION@/$(VERSION)/' \
-e 's|@MAINTAINER@|$(MAINTAINER)|' \
-e 's/@DEPENDS@/$(DEPENDS)/' \
packaging/deb/control.in > $(STAGE)/DEBIAN/control
cp packaging/deb/conffiles $(STAGE)/DEBIAN/conffiles
install -m 0755 packaging/deb/postinst $(STAGE)/DEBIAN/postinst
install -m 0755 packaging/deb/prerm $(STAGE)/DEBIAN/prerm
# strip inherited setgid from staged dirs (a setgid build tree => dpkg-deb rejects DEBIAN)
find $(STAGE) -type d -exec chmod g-s {} +
mkdir -p $(DIST)
dpkg-deb --root-owner-group --build $(STAGE) $(DIST)/vmsig_$(VERSION)_$(ARCH).deb
@echo "built: $(DIST)/vmsig_$(VERSION)_$(ARCH).deb"
# ---- package: vmsig (component `daemon`) ----
rm -rf $(DIST)/stage-daemon
DESTDIR=$(DIST)/stage-daemon cmake --install $(BUILD_DIR) --prefix /usr --component daemon
mkdir -p $(DIST)/stage-daemon/DEBIAN
sed -e 's/@VERSION@/$(VERSION)/' -e 's|@MAINTAINER@|$(MAINTAINER)|' -e 's/@DEPENDS@/$(DEPENDS)/' \
packaging/deb/vmsig/control.in > $(DIST)/stage-daemon/DEBIAN/control
cp packaging/deb/vmsig/conffiles $(DIST)/stage-daemon/DEBIAN/conffiles
install -m 0755 packaging/deb/vmsig/postinst $(DIST)/stage-daemon/DEBIAN/postinst
install -m 0755 packaging/deb/vmsig/prerm $(DIST)/stage-daemon/DEBIAN/prerm
find $(DIST)/stage-daemon -type d -exec chmod g-s {} +
dpkg-deb --root-owner-group --build $(DIST)/stage-daemon $(DIST)/vmsig_$(VERSION)_$(ARCH).deb
# ---- package: libvgpu-perception0 (component `vgpu_runtime` — versioned .so) ----
rm -rf $(DIST)/stage-vgpu0
DESTDIR=$(DIST)/stage-vgpu0 cmake --install $(BUILD_DIR) --prefix /usr --component vgpu_runtime
mkdir -p $(DIST)/stage-vgpu0/DEBIAN
sed -e 's/@VERSION@/$(VERSION)/' -e 's|@MAINTAINER@|$(MAINTAINER)|' -e 's/@DEPENDS@/$(DEPENDS)/' \
packaging/deb/vgpu0/control.in > $(DIST)/stage-vgpu0/DEBIAN/control
install -m 0755 packaging/deb/vgpu0/postinst $(DIST)/stage-vgpu0/DEBIAN/postinst
find $(DIST)/stage-vgpu0 -type d -exec chmod g-s {} +
dpkg-deb --root-owner-group --build $(DIST)/stage-vgpu0 $(DIST)/libvgpu-perception0_$(VERSION)_$(ARCH).deb
# ---- package: libvgpu-perception-dev (component `vgpu_dev` — namelink + headers) ----
rm -rf $(DIST)/stage-vgpu-dev
DESTDIR=$(DIST)/stage-vgpu-dev cmake --install $(BUILD_DIR) --prefix /usr --component vgpu_dev
mkdir -p $(DIST)/stage-vgpu-dev/DEBIAN
sed -e 's/@VERSION@/$(VERSION)/' -e 's|@MAINTAINER@|$(MAINTAINER)|' \
-e 's/@DEPENDS@/libvgpu-perception0 (= $(VERSION))/' \
packaging/deb/vgpu-dev/control.in > $(DIST)/stage-vgpu-dev/DEBIAN/control
find $(DIST)/stage-vgpu-dev -type d -exec chmod g-s {} +
dpkg-deb --root-owner-group --build $(DIST)/stage-vgpu-dev $(DIST)/libvgpu-perception-dev_$(VERSION)_$(ARCH).deb
@echo "built: vmsig + libvgpu-perception0 + libvgpu-perception-dev ($(VERSION))"
clean:
rm -rf $(BUILD_DIR) $(DIST)
+10
View File
@@ -0,0 +1,10 @@
Package: libvgpu-perception-dev
Version: @VERSION@
Section: libdevel
Priority: optional
Architecture: amd64
Depends: @DEPENDS@
Maintainer: @MAINTAINER@
Description: Host-side vgpu perception library (development files)
Headers (vgpu_perception.h, vgpu_stream.h) and the linker namelink for
libvgpu-perception. Install this to build a control/shell against the perception API.
+12
View File
@@ -0,0 +1,12 @@
Package: libvgpu-perception0
Version: @VERSION@
Section: libs
Priority: optional
Architecture: amd64
Depends: @DEPENDS@
Maintainer: @MAINTAINER@
Description: Host-side vgpu perception library
Reads the in-guest vgpu shared region (frames, cursor, geometry) from the host over a
read-only guest-RAM handle and exposes a perception API. A Sensor-layer library consumed
by a control/shell, independent of the signaling daemon. This package ships the runtime
shared object (libvgpu-perception.so.0).
+10
View File
@@ -0,0 +1,10 @@
#!/bin/sh
set -e
case "$1" in
configure)
ldconfig || true
;;
abort-upgrade|abort-remove|abort-deconfigure)
;;
esac
exit 0
@@ -5,9 +5,9 @@ Priority: optional
Architecture: amd64
Depends: @DEPENDS@
Maintainer: @MAINTAINER@
Description: VM signaling coherence daemon and host SI libraries
Description: VM signaling coherence daemon
vmsig serves a unix-socket control plane over the signaling layer for the VMs it
discovers: lifecycle/state, coherent guest address-space context handoff, and arbitrated
input and memory-write actuation. Ships the daemon (vmsigd), the signaling library, the
host-side vgpu perception library, and a systemd unit. Configured via
/etc/vmsig/vmsigd.conf.
input and memory-write actuation. Ships the daemon (vmsigd), the signaling library, and a
systemd unit. Configured via /etc/vmsig/vmsigd.conf. The host-side vgpu perception library
is a separate package (libvgpu-perception).
+3
View File
@@ -12,6 +12,9 @@ typedef struct {
/* TRANSFERS to the adapter (closed in close()) — the */
/* caller dups first if it must keep its own copy. */
/* <0 => default: open(ram_path, O_RDONLY) / stub-memfd */
uint32_t fail_boots; /* test-only: fail the first N stub bootstraps before */
/* succeeding (drives the retry/backoff path deterministically */
/* without timing dependence); 0 in production. stub path only. */
} vmsig_memctx_cfg;
/* Max SRC bytes per atomic gva_write (bounds the worker POD slot; mc_req header + src
+115 -13
View File
@@ -25,6 +25,7 @@
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/epoll.h>
#include <sys/timerfd.h> /* one-shot backoff timer for cold-bootstrap retry */
#ifdef VMSIG_WITH_VMIE
#include "win32.h" /* vmie_win32_open/host_bootstrap/proc_list/close */
@@ -54,6 +55,18 @@ static int memfd_create(const char* name, unsigned int flags) {
#define MC_MAX_SEG 8
#define MC_WORKER_DEPTH 16 /* one off-loop thread: rare bootstrap + writes */
/* Cold-bootstrap retry backoff (guest may still be booting when discovery attaches us;
* host_bootstrap then finds no System process). Mirror of the discovery backoff so the
* adapter stays decoupled from the discovery layer (Rule-of-three not reached): 50ms base,
* exponential with the shift capped at 6, ceiling 2s steady-state. One-shot timerfd: armed
* on failure, disarmed on success — no it_interval, no busy-wait. */
#define MC_BOOT_BACKOFF_BASE 50000000ull /* 50 ms */
#define MC_BOOT_BACKOFF_CAP 2000000000ull /* 2 s */
/* Adapter readiness fds are demuxed by per-slot cookie: slot 0 is the worker completion
* eventfd, slot 1 is the one-shot backoff timerfd that re-kicks the bootstrap. */
enum { MC_COOKIE_WORKER = 0, MC_COOKIE_RETRY = 1 };
enum { MC_JOB_BOOTSTRAP = 0, MC_JOB_WRITE = 1 };
/* worker req/res (POD <= VMSIG_WORK_SLOT). One off-loop worker runs BOTH the cold
@@ -63,7 +76,10 @@ enum { MC_JOB_BOOTSTRAP = 0, MC_JOB_WRITE = 1 };
* target cr3 (0 => System DTB; resolved on the worker against a->kcr3). */
typedef struct {
uint32_t op; /* MC_JOB_* */
uint32_t boot_count; /* MC_JOB_BOOTSTRAP */
uint32_t boot_count; /* MC_JOB_BOOTSTRAP: drives the stub kcr3 per epoch */
uint32_t attempt; /* MC_JOB_BOOTSTRAP: consecutive-failure index of THIS */
/* kick (copy of a->boot_attempts); stub fails while */
/* attempt < a->fail_boots. NOT the epoch counter. */
/* --- MC_JOB_WRITE --- */
uint64_t cr3; /* target AS root; 0 => a->kcr3 (kernel AS), resolved on worker */
uint64_t gva;
@@ -89,7 +105,12 @@ struct vmsig_adapter {
vmsig_emit emit;
int registered; /* register_memctx already called */
vmsig_worker* worker; /* off-loop bootstrap + atomic writes */
uint32_t boot_count; /* incremented on each (re-)bootstrap */
uint32_t boot_count; /* incremented on each (re-)bootstrap (epoch tag) */
/* cold-bootstrap retry — loop-thread-only (attach/on_ready/invalidate/close). */
int retry_fd; /* one-shot backoff timerfd (-1 when none) */
uint32_t boot_attempts; /* consecutive bootstrap failures this cycle (0 = none); reset on success/epoch */
uint32_t fail_boots; /* test-only: fail the first N stub bootstraps (cfg); set once in mc_open, then read-only (worker reads it) */
#ifdef VMSIG_WITH_VMIE
vmie_win32* win; /* held RW handle across the epoch (kcr3 source + gva_write target) */
@@ -109,6 +130,35 @@ struct vmsig_adapter {
/* fwd: MEMWRITE completion ACK (defined below mc_submit; used in mc_on_ready demux). */
static void mc_memwrite_ack(struct vmsig_adapter* a, int ok, uint32_t corr, uint32_t origin);
/* mirror of the discovery backoff; kept in this adapter to stay decoupled from the discovery
* layer (Rule-of-three not reached). Exponential with a shift capped at 6, clamped to CAP. */
static uint64_t mc_boot_backoff(uint32_t attempts) {
uint64_t b = MC_BOOT_BACKOFF_BASE << (attempts < 6 ? attempts : 6);
return b > MC_BOOT_BACKOFF_CAP ? MC_BOOT_BACKOFF_CAP : b;
}
/* Arm the one-shot backoff timer (it_value only — no it_interval). Loop-thread-only.
* Best-effort: a settime failure is logged, not fatal (matches discovery rearm). */
static void mc_arm_retry(struct vmsig_adapter* a) {
if (a->retry_fd < 0) return;
uint64_t dt = mc_boot_backoff(a->boot_attempts);
struct itimerspec its;
memset(&its, 0, sizeof its);
its.it_value.tv_sec = (time_t)(dt / 1000000000ull);
its.it_value.tv_nsec = (long)(dt % 1000000000ull);
if (timerfd_settime(a->retry_fd, 0, &its, NULL) != 0)
fprintf(stderr, "vmsig memctx: endpoint %u retry timer arm failed\n", a->endpoint);
}
/* Disarm the backoff timer (zero itimerspec). Loop-thread-only. Used on bootstrap success
* and at epoch change so a stale arm from a prior failure cannot fire over a fresh cycle. */
static void mc_disarm_retry(struct vmsig_adapter* a) {
if (a->retry_fd < 0) return;
struct itimerspec its;
memset(&its, 0, sizeof its);
(void)timerfd_settime(a->retry_fd, 0, &its, NULL);
}
/* ---- stub RO-fd: memfd + deterministic contents + seal of future writes ---- */
static int mc_make_stub_fd(uint32_t size) {
int fd = memfd_create("vmsig_memctx", MFD_CLOEXEC | MFD_ALLOW_SEALING);
@@ -184,6 +234,9 @@ static int mc_job(void* user, const void* req, void* res) {
/* MC_JOB_BOOTSTRAP */
if (a->stub) {
/* test-only: fail the first fail_boots attempts to exercise the retry path
* deterministically (a->fail_boots is set once in open, read-only here). */
if (rq->attempt < a->fail_boots) return -1;
rs->kcr3 = 0xC0DE0000ull + (uint64_t)rq->boot_count * 0x1000ull; /* changes per epoch */
return 0;
}
@@ -202,6 +255,7 @@ static void mc_kick_bootstrap(struct vmsig_adapter* a) {
mc_req rq;
memset(&rq, 0, sizeof rq);
rq.op = MC_JOB_BOOTSTRAP; rq.boot_count = a->boot_count;
rq.attempt = a->boot_attempts; /* failure index of this kick (loop-thread snapshot) */
(void)vmsig_worker_submit(a->worker, &rq, sizeof rq); /* full => drop (rare) */
}
@@ -232,6 +286,10 @@ static void mc_reg_invalidate(void* ctx, uint32_t epoch) {
struct vmsig_adapter* a = ctx;
(void)epoch; /* the core owns the epoch; the adapter must re-bootstrap */
a->have_ctx = 0; /* the previous context is invalid */
/* new cycle: drop a stale arm from the previous cycle and restart the failure counter at
* zero so this bootstrap's backoff starts fresh (and the first-failure diagnostic re-arms). */
a->boot_attempts = 0;
mc_disarm_retry(a);
mc_kick_bootstrap(a); /* off-loop; on_ready re-emits MEMCTX (new epoch) */
}
@@ -247,11 +305,13 @@ static vmsig_adapter* mc_open(const void* cfg, uint32_t endpoint) {
a->cfg_ro_fd = (c && c->ro_fd >= 0) ? c->ro_fd : -1;
if (!a->ram_path && a->cfg_ro_fd < 0) a->stub = 1; /* no path/fd => stub */
a->stub_fd = -1;
a->retry_fd = -1;
a->fail_boots = c ? c->fail_boots : 0; /* set once; read-only afterwards (worker reads) */
return a;
}
static int mc_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
if (cap < 1) return -1;
if (cap < 2) return -1; /* worker eventfd + one-shot backoff timerfd */
a->emit = *emit;
a->worker = vmsig_worker_new(mc_job, a, 1, MC_WORKER_DEPTH);
@@ -262,11 +322,27 @@ static int mc_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg
if (a->stub_fd < 0) { vmsig_worker_free(a->worker); a->worker = NULL; return -1; }
}
/* worker completion-eventfd as the readiness source (cookie=0). */
/* one-shot backoff timerfd: re-kicks the cold bootstrap when the guest is still booting.
* Created here (loop-thread-only fd); armed on failure, disarmed on success. Rollback the
* worker + stub_fd on failure, symmetric to mc_make_stub_fd above. */
a->retry_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
if (a->retry_fd < 0) {
if (a->stub_fd >= 0) { close(a->stub_fd); a->stub_fd = -1; }
vmsig_worker_free(a->worker); a->worker = NULL;
return -1;
}
/* worker completion-eventfd as the readiness source (cookie=worker). */
reg[0].fd = vmsig_worker_evfd(a->worker);
reg[0].epoll_events = EPOLLIN;
reg[0].shape = VMSIG_RDY_EVENTFD;
reg[0].cookie = 0;
reg[0].cookie = MC_COOKIE_WORKER;
/* backoff timerfd as the second readiness source (cookie=retry). */
reg[1].fd = a->retry_fd;
reg[1].epoll_events = EPOLLIN;
reg[1].shape = VMSIG_RDY_TIMERFD;
reg[1].cookie = MC_COOKIE_RETRY;
/* register the reg BEFORE the first bootstrap: the core slot gets the hooks. describe
* is not called until the slot is valid (which only happens after the first MEMCTX). */
@@ -289,11 +365,23 @@ static int mc_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg
a->emit.emit(a->emit.token, &up);
mc_kick_bootstrap(a); /* first bootstrap off-loop; assemble the locator on completion */
return 1;
return 2; /* worker eventfd + backoff timerfd */
}
static int mc_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
(void)cookie; (void)events;
(void)events; /* epoll flags carry nothing we need; the cookie selects the source */
/* retry timerfd fired: the guest was still booting; drain and re-kick the bootstrap.
* Re-kick is a fresh MC_JOB_BOOTSTRAP into the SAME FIFO worker queue, so it serializes
* behind any in-flight write — nothing extra to synchronize. */
if (cookie == MC_COOKIE_RETRY) {
uint64_t v;
while (read(a->retry_fd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain to EAGAIN */ }
mc_kick_bootstrap(a);
return 0;
}
/* cookie == MC_COOKIE_WORKER: worker completion. */
vmsig_worker_ack(a->worker);
mc_res rs;
int rc;
@@ -304,18 +392,29 @@ static int mc_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
continue;
}
if (rc != 0) {
/* bootstrap failed: ERROR (source MEMCTX); do NOT publish an invalid kcr3. */
vmsig_event er;
memset(&er, 0, sizeof er);
er.kind = VMSIG_EV_ERROR; er.source = VMSIG_SRC_MEMCTX; er.dir = VMSIG_DIR_UP;
er.prio = VMSIG_PRIO_URGENT; er.endpoint = a->endpoint;
a->emit.emit(a->emit.token, &er);
/* bootstrap failed: the guest is likely still booting (host_bootstrap found no
* System process). This is NOT a control-level error — do NOT emit VMSIG_EV_ERROR
* (it would spam URGENT during a normal multi-second guest boot). Instead schedule a
* backoff retry; the context simply stays unpublished until a kick succeeds. One
* diagnostic line on the FIRST failure of the cycle (symmetric to the discovery
* "never came up" note), not on every attempt. */
if (a->boot_attempts == 0)
fprintf(stderr, "vmsig memctx: endpoint %u bootstrap not ready yet, retrying\n",
a->endpoint);
a->boot_attempts++;
mc_arm_retry(a); /* one-shot timer at mc_boot_backoff(boot_attempts) */
continue;
}
/* assemble the locator on the loop thread from rs.kcr3. a->kcr3 is the gva_write
* TARGET and is owned SOLELY by the worker thread (set in mc_bootstrap_armed, read by
* MC_JOB_WRITE — same thread, FIFO happens-before); the loop must NOT also write it, or
* an in-flight write at line ~170 would race it. cur_pod.kcr3 is loop-only (delivery). */
/* bootstrap succeeded: cancel any pending retry and reset the failure counter BEFORE
* publishing, so a stale timer armed by a prior failure cannot fire over a live context. */
a->boot_attempts = 0;
mc_disarm_retry(a);
memset(&a->cur_pod, 0, sizeof a->cur_pod);
a->cur_pod.kcr3 = rs.kcr3;
a->cur_pod.low = a->low ? a->low : MC_STUB_SIZE;
@@ -398,6 +497,9 @@ static void mc_close(vmsig_adapter* a) {
if (a->win) vmie_win32_close(a->win); /* AFTER worker join: no in-flight gva_write */
#endif
if (a->stub_fd >= 0) close(a->stub_fd);
/* one-shot backoff timerfd: never spawns a worker job, so its close is independent of the
* worker join — same contract as stub_fd. The core already epoll_ctl(DEL)'d the slot. */
if (a->retry_fd >= 0) close(a->retry_fd);
/* ro_fd ownership transferred to the adapter at open(): close it here so a re-grant
* (detach + re-attach with a fresh infra ro_fd) does not leak the prior one. Infra
* that must keep its own copy dups before handing it in — symmetric to the holder
+37 -1
View File
@@ -42,7 +42,7 @@ struct holder {
holder* peer; /* multi-VM: stop when both are ready (or NULL) */
int is_driver; /* stops the loop on a condition */
uint32_t expect_ep;
int memctx, invalidated, ticks, bad_ep;
int memctx, invalidated, ticks, bad_ep, errors;
uint64_t last_kcr3, kcr3_e0;
uint32_t last_epoch, last_nseg;
int ro_ok, rw_eacces, seg0_ok;
@@ -66,6 +66,7 @@ static int h_on_ev(void* u, const vmsig_event* ev) {
holder* h = u;
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) h->ticks++;
else if (ev->kind == VMSIG_EV_MEMCTX_INVALIDATED) h->invalidated++;
else if (ev->kind == VMSIG_EV_ERROR) h->errors++; /* no boot-retry ERROR spam */
maybe_stop(h);
return 0;
}
@@ -393,6 +394,40 @@ static void test_ro_fd_ownership(void) {
if (fcntl(ro, F_GETFD) >= 0) close(ro); /* belt-and-braces if the assert failed */
}
/* ---- 7. cold-bootstrap retry: stub fails N times, then publishes via backoff ----- *
* Regression for the cold-bootstrap-while-guest-boots bug: a failed bootstrap must NOT be
* terminal nor emit URGENT ERROR — it arms a one-shot backoff timerfd that re-kicks the
* bootstrap until it succeeds. fail_boots=3 makes the first three stub bootstraps fail
* deterministically (no timing dependence); the real timerfd fires at ~50/100/200ms, so the
* 4th kick succeeds sub-second. vmhost is added (as test_multicast) for the ticks failsafe
* and a realistic loop; stop on memctx>=1 (stop_epoch=-1). */
static void test_retry(void) {
printf("test_retry\n");
vmsig_ctx* ctx = vmsig_ctx_new();
vmsig_core* core = vmsig_core_new(ctx);
holder h; memset(&h, 0, sizeof h);
h.core = core; h.is_driver = 1; h.expect_ep = 0; h.stop_epoch = -1;
/* OBSERVE so vmhost lifecycle ticks reach maybe_stop (ticks>30 failsafe) and ERROR
* (if any) is counted; MEMCTX cap to receive the published context. */
add_holder(core, &h, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
vmsig_memctx_cfg mc; memset(&mc, 0, sizeof mc);
mc.stub = 1; mc.ram_path = NULL; mc.low = 0; mc.ro_fd = -1; mc.fail_boots = 3;
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), &mc, 0) >= 0, "add memctx (fail_boots=3)");
vmsig_core_run(core);
CHECK(h.memctx >= 1, "MEMCTX published after a series of bootstrap failures (retry worked)");
CHECK(h.last_kcr3 != 0, "valid kcr3 after the successful retry");
CHECK(h.errors == 0, "no ERROR spam during boot retries");
vmsig_core_free(core);
vmsig_ctx_free(ctx);
}
int main(void) {
test_multicast();
test_epoch();
@@ -400,6 +435,7 @@ int main(void) {
test_multivm();
test_socket();
test_ro_fd_ownership();
test_retry();
printf("memctx tests: %s\n", g_fail ? "FAIL" : "PASS");
return g_fail ? 1 : 0;
}