mirror of
https://dev.lirent.ru/Vatrog/vm-automation-signaling.git
synced 2026-06-25 20:36:36 +03:00
Compare commits
5 Commits
d1aa09ecac
...
v0.3.7
| Author | SHA1 | Date | |
|---|---|---|---|
|
929bcf0e74
|
|||
|
3142337e62
|
|||
|
26e5ab4709
|
|||
|
6fea392d46
|
|||
|
0289817821
|
+41
-21
@@ -1,5 +1,8 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
project(vmsig VERSION 0.3.3 LANGUAGES C)
|
||||
# Single source of truth for the version: CI passes -DVMSIG_VERSION=${TAG#v}, so the project
|
||||
# version (-> libvgpu-perception SONAME/.so version) and the .deb version come from one tag.
|
||||
set(VMSIG_VERSION "0.3.7" CACHE STRING "Release version (MAJOR.MINOR.PATCH); CI passes the tag")
|
||||
project(vmsig VERSION ${VMSIG_VERSION} LANGUAGES C)
|
||||
|
||||
set(CMAKE_C_STANDARD 17)
|
||||
set(CMAKE_C_STANDARD_REQUIRED ON)
|
||||
@@ -62,6 +65,7 @@ add_library(vmsig SHARED
|
||||
src/control/socket.c
|
||||
src/discovery/slot.c
|
||||
src/discovery/linux/host_probe.c
|
||||
src/discovery/linux/mtree.c
|
||||
src/discovery/discovery.c
|
||||
# SI input driver (vmctl), absorbed in-tree (host-only: QMP + uinput)
|
||||
src/si/input/open.c
|
||||
@@ -117,16 +121,18 @@ add_executable(vmsig_cli src/cli.c)
|
||||
target_link_libraries(vmsig_cli PRIVATE vmsig)
|
||||
target_compile_options(vmsig_cli PRIVATE -Wall -Wextra)
|
||||
|
||||
# ---- vgpu-perception: host-side vgpu Sensor S-lib (absorbed in-tree) ---------
|
||||
# A SEPARATE shipped library (NOT fused into libvmsig — it is consumed by the shell, not the
|
||||
# signaling core). Host-only: reads the vgpu shared region from its own RO vmie_mem. Built
|
||||
# only when armed (needs vmie). The in-guest Windows producer (vgpu-streamer.exe) stays in a
|
||||
# separate repo and is NOT part of this delivery.
|
||||
# ---- vgpu-perception: host-side vgpu Sensor S-lib ---------------------------
|
||||
# Packaged SEPARATELY from the daemon (libvgpu-perception0 + -dev), NOT fused into libvmsig —
|
||||
# a Sensor lib consumed by a control/shell, not the signaling core. Host-only: reads the vgpu
|
||||
# shared region from its own RO vmie_mem. Built only when armed (needs vmie). The in-guest
|
||||
# Windows producer is the vgpu-streamer cross-target above (same tree, shared ABI vgpu_stream.h).
|
||||
if(VMSIG_WITH_VMIE)
|
||||
add_library(vgpu-perception SHARED
|
||||
src/si/vgpu-perception/discover.c
|
||||
src/si/vgpu-perception/sample.c
|
||||
src/si/vgpu-perception/control.c)
|
||||
set_target_properties(vgpu-perception PROPERTIES
|
||||
VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) # libvgpu-perception.so.0
|
||||
target_include_directories(vgpu-perception
|
||||
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/si/vgpu-perception/include)
|
||||
@@ -211,6 +217,15 @@ target_include_directories(vmsig_discoverytest PRIVATE
|
||||
target_compile_options(vmsig_discoverytest PRIVATE -Wall -Wextra)
|
||||
add_test(NAME discovery COMMAND vmsig_discoverytest)
|
||||
|
||||
add_executable(vmsig_mtreetest src/test/test_mtree.c)
|
||||
target_link_libraries(vmsig_mtreetest PRIVATE vmsig)
|
||||
target_include_directories(vmsig_mtreetest PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/discovery/include)
|
||||
target_compile_definitions(vmsig_mtreetest PRIVATE
|
||||
FIXTURE_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/test/fixtures")
|
||||
target_compile_options(vmsig_mtreetest PRIVATE -Wall -Wextra)
|
||||
add_test(NAME mtree COMMAND vmsig_mtreetest)
|
||||
|
||||
add_executable(vmsig_daemoncfgtest
|
||||
src/test/test_daemoncfg.c
|
||||
src/daemon/config.c
|
||||
@@ -265,25 +280,30 @@ add_test(NAME memwrite COMMAND vmsig_memwritetest)
|
||||
add_test(NAME cli COMMAND vmsig_cli)
|
||||
|
||||
# ---- install rules (for the .deb stage) -------------------------------------
|
||||
option(VMSIG_INSTALL "Generate install() rules (daemon/lib/headers/unit/config)" OFF)
|
||||
option(VMSIG_INSTALL "Generate install() rules (per-component, for the .deb stages)" OFF)
|
||||
if(VMSIG_INSTALL)
|
||||
include(GNUInstallDirs)
|
||||
install(TARGETS vmsigd RUNTIME DESTINATION ${CMAKE_INSTALL_SBINDIR})
|
||||
install(TARGETS vmsig LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
if(TARGET vgpu-perception) # armed builds ship the host vgpu S-lib alongside
|
||||
install(TARGETS vgpu-perception LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
endif()
|
||||
# public contracts (signaling + absorbed SI host headers) under include/vmsig/
|
||||
# --- component `daemon`: the signaling delivery (package: vmsig). NO gpu lib here. ---
|
||||
install(TARGETS vmsigd RUNTIME DESTINATION ${CMAKE_INSTALL_SBINDIR} COMPONENT daemon)
|
||||
install(TARGETS vmsig LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT daemon)
|
||||
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vmsig
|
||||
FILES_MATCHING PATTERN "vmsig*.h"
|
||||
PATTERN "vmctl.h"
|
||||
PATTERN "vgpu_stream.h"
|
||||
PATTERN "vgpu_perception.h")
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vmsig COMPONENT daemon
|
||||
FILES_MATCHING PATTERN "vmsig*.h" PATTERN "vmctl.h")
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/packaging/systemd/vmsigd.service
|
||||
DESTINATION lib/systemd/system)
|
||||
DESTINATION lib/systemd/system COMPONENT daemon)
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/packaging/tmpfiles/vmsig.conf
|
||||
DESTINATION lib/tmpfiles.d)
|
||||
DESTINATION lib/tmpfiles.d COMPONENT daemon)
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/packaging/config/vmsigd.conf
|
||||
DESTINATION /etc/vmsig)
|
||||
DESTINATION /etc/vmsig COMPONENT daemon)
|
||||
# --- the host vgpu perception S-lib, SEPARATE from the daemon: runtime (versioned .so,
|
||||
# package libvgpu-perception0) vs dev (namelink + headers, package libvgpu-perception-dev) ---
|
||||
if(TARGET vgpu-perception)
|
||||
install(TARGETS vgpu-perception
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
COMPONENT vgpu_runtime
|
||||
NAMELINK_COMPONENT vgpu_dev)
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/include/vgpu_perception.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/vgpu_stream.h
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vmsig COMPONENT vgpu_dev)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -1,46 +1,60 @@
|
||||
# vmsig packaging — `make deb` builds the .deb over a `cmake --install` stage.
|
||||
# Private values are NOT baked into the tree: pass them via the variables below (the
|
||||
# defaults are neutral placeholders; CI overrides them from vars/secrets).
|
||||
# vmsig packaging — `make deb` builds TWO Debian packages from per-component install stages:
|
||||
# vmsig — the signaling daemon + library + headers + systemd unit
|
||||
# libvgpu-perception — the host-side vgpu perception S-lib (SEPARATE: not in vmsig)
|
||||
# Private values are NOT baked in: pass them via the variables below (CI overrides them).
|
||||
#
|
||||
# make deb LIBVMIE_PATH=/path/to/vmie VERSION=1.2.3 \
|
||||
# MAINTAINER="Name <addr>" DEPENDS="libc6, libvmie0"
|
||||
# make deb LIBVMIE_PATH=/path/to/vmie VERSION=1.2.3 MAINTAINER="Name <addr>"
|
||||
|
||||
VERSION ?= 0.0.0
|
||||
MAINTAINER ?= vmsig packaging <root@localhost>
|
||||
# libvmie0 is vmie's own runtime package (SONAME libvmie.so.0): libvmsig.so and
|
||||
# libvgpu-perception.so dynamically link it, so it is a HARD runtime dependency.
|
||||
DEPENDS ?= libc6, libvmie0
|
||||
ARCH ?= amd64
|
||||
VERSION ?= 0.0.0
|
||||
MAINTAINER ?= vmsig packaging <root@localhost>
|
||||
# libvmie0 is vmie's runtime package (SONAME libvmie.so.0): both libvmsig.so and
|
||||
# libvgpu-perception.so dynamically link it — a hard runtime dependency of each package.
|
||||
DEPENDS ?= libc6, libvmie0
|
||||
ARCH ?= amd64
|
||||
LIBVMIE_PATH ?=
|
||||
|
||||
BUILD_DIR ?= .build-pkg
|
||||
STAGE ?= $(CURDIR)/dist/stage
|
||||
DIST ?= $(CURDIR)/dist
|
||||
BUILD_DIR ?= .build-pkg
|
||||
DIST ?= $(CURDIR)/dist
|
||||
|
||||
.PHONY: deb clean
|
||||
|
||||
# Armed package: the shipped daemon needs vmie for memctx. vmie stays an external dependency
|
||||
# (package Depends on its runtime). vmie is found from a source tree (LIBVMIE_PATH) or, when
|
||||
# that is empty, from the installed libvmie-dev (system / CMAKE_PREFIX_PATH) — the CI path.
|
||||
# vmie is found from a source tree (LIBVMIE_PATH) or, when empty, the installed libvmie-dev
|
||||
# (system / CMAKE_PREFIX_PATH) — the CI path.
|
||||
deb:
|
||||
rm -rf $(STAGE)
|
||||
cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=Release -DVMSIG_INSTALL=ON -DVMSIG_WITH_VMIE=ON \
|
||||
$(if $(LIBVMIE_PATH),-DLIBVMIE_PATH=$(LIBVMIE_PATH),)
|
||||
-DVMSIG_VERSION=$(VERSION) $(if $(LIBVMIE_PATH),-DLIBVMIE_PATH=$(LIBVMIE_PATH),)
|
||||
cmake --build $(BUILD_DIR) -j
|
||||
DESTDIR=$(STAGE) cmake --install $(BUILD_DIR) --prefix /usr
|
||||
mkdir -p $(STAGE)/DEBIAN
|
||||
sed -e 's/@VERSION@/$(VERSION)/' \
|
||||
-e 's|@MAINTAINER@|$(MAINTAINER)|' \
|
||||
-e 's/@DEPENDS@/$(DEPENDS)/' \
|
||||
packaging/deb/control.in > $(STAGE)/DEBIAN/control
|
||||
cp packaging/deb/conffiles $(STAGE)/DEBIAN/conffiles
|
||||
install -m 0755 packaging/deb/postinst $(STAGE)/DEBIAN/postinst
|
||||
install -m 0755 packaging/deb/prerm $(STAGE)/DEBIAN/prerm
|
||||
# strip inherited setgid from staged dirs (a setgid build tree => dpkg-deb rejects DEBIAN)
|
||||
find $(STAGE) -type d -exec chmod g-s {} +
|
||||
mkdir -p $(DIST)
|
||||
dpkg-deb --root-owner-group --build $(STAGE) $(DIST)/vmsig_$(VERSION)_$(ARCH).deb
|
||||
@echo "built: $(DIST)/vmsig_$(VERSION)_$(ARCH).deb"
|
||||
# ---- package: vmsig (component `daemon`) ----
|
||||
rm -rf $(DIST)/stage-daemon
|
||||
DESTDIR=$(DIST)/stage-daemon cmake --install $(BUILD_DIR) --prefix /usr --component daemon
|
||||
mkdir -p $(DIST)/stage-daemon/DEBIAN
|
||||
sed -e 's/@VERSION@/$(VERSION)/' -e 's|@MAINTAINER@|$(MAINTAINER)|' -e 's/@DEPENDS@/$(DEPENDS)/' \
|
||||
packaging/deb/vmsig/control.in > $(DIST)/stage-daemon/DEBIAN/control
|
||||
cp packaging/deb/vmsig/conffiles $(DIST)/stage-daemon/DEBIAN/conffiles
|
||||
install -m 0755 packaging/deb/vmsig/postinst $(DIST)/stage-daemon/DEBIAN/postinst
|
||||
install -m 0755 packaging/deb/vmsig/prerm $(DIST)/stage-daemon/DEBIAN/prerm
|
||||
find $(DIST)/stage-daemon -type d -exec chmod g-s {} +
|
||||
dpkg-deb --root-owner-group --build $(DIST)/stage-daemon $(DIST)/vmsig_$(VERSION)_$(ARCH).deb
|
||||
# ---- package: libvgpu-perception0 (component `vgpu_runtime` — versioned .so) ----
|
||||
rm -rf $(DIST)/stage-vgpu0
|
||||
DESTDIR=$(DIST)/stage-vgpu0 cmake --install $(BUILD_DIR) --prefix /usr --component vgpu_runtime
|
||||
mkdir -p $(DIST)/stage-vgpu0/DEBIAN
|
||||
sed -e 's/@VERSION@/$(VERSION)/' -e 's|@MAINTAINER@|$(MAINTAINER)|' -e 's/@DEPENDS@/$(DEPENDS)/' \
|
||||
packaging/deb/vgpu0/control.in > $(DIST)/stage-vgpu0/DEBIAN/control
|
||||
install -m 0755 packaging/deb/vgpu0/postinst $(DIST)/stage-vgpu0/DEBIAN/postinst
|
||||
find $(DIST)/stage-vgpu0 -type d -exec chmod g-s {} +
|
||||
dpkg-deb --root-owner-group --build $(DIST)/stage-vgpu0 $(DIST)/libvgpu-perception0_$(VERSION)_$(ARCH).deb
|
||||
# ---- package: libvgpu-perception-dev (component `vgpu_dev` — namelink + headers) ----
|
||||
rm -rf $(DIST)/stage-vgpu-dev
|
||||
DESTDIR=$(DIST)/stage-vgpu-dev cmake --install $(BUILD_DIR) --prefix /usr --component vgpu_dev
|
||||
mkdir -p $(DIST)/stage-vgpu-dev/DEBIAN
|
||||
sed -e 's/@VERSION@/$(VERSION)/' -e 's|@MAINTAINER@|$(MAINTAINER)|' \
|
||||
-e 's/@DEPENDS@/libvgpu-perception0 (= $(VERSION))/' \
|
||||
packaging/deb/vgpu-dev/control.in > $(DIST)/stage-vgpu-dev/DEBIAN/control
|
||||
find $(DIST)/stage-vgpu-dev -type d -exec chmod g-s {} +
|
||||
dpkg-deb --root-owner-group --build $(DIST)/stage-vgpu-dev $(DIST)/libvgpu-perception-dev_$(VERSION)_$(ARCH).deb
|
||||
@echo "built: vmsig + libvgpu-perception0 + libvgpu-perception-dev ($(VERSION))"
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILD_DIR) $(DIST)
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
case "$1" in
|
||||
configure)
|
||||
ldconfig || true
|
||||
mkdir -p /etc/vmsig
|
||||
chmod 0640 /etc/vmsig/vmsigd.conf 2>/dev/null || true # carries the uid->grant policy
|
||||
mkdir -p /dev/shm/vmsig && chmod 0755 /dev/shm/vmsig # also (re)created at boot via tmpfiles
|
||||
if [ -d /run/systemd/system ]; then
|
||||
systemctl daemon-reload || true
|
||||
systemd-tmpfiles --create /usr/lib/tmpfiles.d/vmsig.conf || true
|
||||
systemctl enable vmsigd.service || true # enable, but do NOT start
|
||||
fi
|
||||
echo "vmsig: review the [grant] policy in /etc/vmsig/vmsigd.conf, then: systemctl start vmsigd" >&2
|
||||
;;
|
||||
abort-upgrade|abort-remove|abort-deconfigure)
|
||||
;;
|
||||
esac
|
||||
exit 0
|
||||
@@ -0,0 +1,10 @@
|
||||
Package: libvgpu-perception-dev
|
||||
Version: @VERSION@
|
||||
Section: libdevel
|
||||
Priority: optional
|
||||
Architecture: amd64
|
||||
Depends: @DEPENDS@
|
||||
Maintainer: @MAINTAINER@
|
||||
Description: Host-side vgpu perception library (development files)
|
||||
Headers (vgpu_perception.h, vgpu_stream.h) and the linker namelink for
|
||||
libvgpu-perception. Install this to build a control/shell against the perception API.
|
||||
@@ -0,0 +1,12 @@
|
||||
Package: libvgpu-perception0
|
||||
Version: @VERSION@
|
||||
Section: libs
|
||||
Priority: optional
|
||||
Architecture: amd64
|
||||
Depends: @DEPENDS@
|
||||
Maintainer: @MAINTAINER@
|
||||
Description: Host-side vgpu perception library
|
||||
Reads the in-guest vgpu shared region (frames, cursor, geometry) from the host over a
|
||||
read-only guest-RAM handle and exposes a perception API. A Sensor-layer library consumed
|
||||
by a control/shell, independent of the signaling daemon. This package ships the runtime
|
||||
shared object (libvgpu-perception.so.0).
|
||||
Executable
+10
@@ -0,0 +1,10 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
case "$1" in
|
||||
configure)
|
||||
ldconfig || true
|
||||
;;
|
||||
abort-upgrade|abort-remove|abort-deconfigure)
|
||||
;;
|
||||
esac
|
||||
exit 0
|
||||
@@ -5,9 +5,9 @@ Priority: optional
|
||||
Architecture: amd64
|
||||
Depends: @DEPENDS@
|
||||
Maintainer: @MAINTAINER@
|
||||
Description: VM signaling coherence daemon and host SI libraries
|
||||
Description: VM signaling coherence daemon
|
||||
vmsig serves a unix-socket control plane over the signaling layer for the VMs it
|
||||
discovers: lifecycle/state, coherent guest address-space context handoff, and arbitrated
|
||||
input and memory-write actuation. Ships the daemon (vmsigd), the signaling library, the
|
||||
host-side vgpu perception library, and a systemd unit. Configured via
|
||||
/etc/vmsig/vmsigd.conf.
|
||||
input and memory-write actuation. Ships the daemon (vmsigd), the signaling library, and a
|
||||
systemd unit. Configured via /etc/vmsig/vmsigd.conf. The host-side vgpu perception library
|
||||
is a separate package (libvgpu-perception).
|
||||
Executable
+29
@@ -0,0 +1,29 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
case "$1" in
|
||||
configure)
|
||||
ldconfig || true
|
||||
mkdir -p /etc/vmsig
|
||||
chmod 0640 /etc/vmsig/vmsigd.conf 2>/dev/null || true # carries the uid->grant policy
|
||||
mkdir -p /dev/shm/vmsig && chmod 0755 /dev/shm/vmsig # also (re)created at boot via tmpfiles
|
||||
if [ -d /run/systemd/system ]; then
|
||||
systemctl daemon-reload || true
|
||||
systemd-tmpfiles --create /usr/lib/tmpfiles.d/vmsig.conf || true
|
||||
systemctl enable vmsigd.service || true # enable, but do NOT start
|
||||
fi
|
||||
if [ -z "$2" ]; then
|
||||
# fresh install ($2 empty): enabled but NOT started — the operator reviews the
|
||||
# grant policy before the first start.
|
||||
echo "vmsig: review the [grant] policy in /etc/vmsig/vmsigd.conf, then: systemctl start vmsigd" >&2
|
||||
else
|
||||
# upgrade ($2 = old version): a running daemon keeps the OLD in-memory image until
|
||||
# restarted — the new build is not applied automatically. Not auto-restarted here:
|
||||
# the start is gated on the grant policy, so the operator owns the moment. try-restart
|
||||
# touches the daemon only if it is currently running (leaves a stopped one alone).
|
||||
echo "vmsig: upgraded from $2 — a running daemon still runs the old build; apply with: systemctl try-restart vmsigd" >&2
|
||||
fi
|
||||
;;
|
||||
abort-upgrade|abort-remove|abort-deconfigure)
|
||||
;;
|
||||
esac
|
||||
exit 0
|
||||
@@ -12,6 +12,9 @@ typedef struct {
|
||||
/* TRANSFERS to the adapter (closed in close()) — the */
|
||||
/* caller dups first if it must keep its own copy. */
|
||||
/* <0 => default: open(ram_path, O_RDONLY) / stub-memfd */
|
||||
uint32_t fail_boots; /* test-only: fail the first N stub bootstraps before */
|
||||
/* succeeding (drives the retry/backoff path deterministically */
|
||||
/* without timing dependence); 0 in production. stub path only. */
|
||||
} vmsig_memctx_cfg;
|
||||
|
||||
/* Max SRC bytes per atomic gva_write (bounds the worker POD slot; mc_req header + src
|
||||
|
||||
+115
-13
@@ -25,6 +25,7 @@
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <sys/timerfd.h> /* one-shot backoff timer for cold-bootstrap retry */
|
||||
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
#include "win32.h" /* vmie_win32_open/host_bootstrap/proc_list/close */
|
||||
@@ -54,6 +55,18 @@ static int memfd_create(const char* name, unsigned int flags) {
|
||||
#define MC_MAX_SEG 8
|
||||
#define MC_WORKER_DEPTH 16 /* one off-loop thread: rare bootstrap + writes */
|
||||
|
||||
/* Cold-bootstrap retry backoff (guest may still be booting when discovery attaches us;
|
||||
* host_bootstrap then finds no System process). Mirror of the discovery backoff so the
|
||||
* adapter stays decoupled from the discovery layer (Rule-of-three not reached): 50ms base,
|
||||
* exponential with the shift capped at 6, ceiling 2s steady-state. One-shot timerfd: armed
|
||||
* on failure, disarmed on success — no it_interval, no busy-wait. */
|
||||
#define MC_BOOT_BACKOFF_BASE 50000000ull /* 50 ms */
|
||||
#define MC_BOOT_BACKOFF_CAP 2000000000ull /* 2 s */
|
||||
|
||||
/* Adapter readiness fds are demuxed by per-slot cookie: slot 0 is the worker completion
|
||||
* eventfd, slot 1 is the one-shot backoff timerfd that re-kicks the bootstrap. */
|
||||
enum { MC_COOKIE_WORKER = 0, MC_COOKIE_RETRY = 1 };
|
||||
|
||||
enum { MC_JOB_BOOTSTRAP = 0, MC_JOB_WRITE = 1 };
|
||||
|
||||
/* worker req/res (POD <= VMSIG_WORK_SLOT). One off-loop worker runs BOTH the cold
|
||||
@@ -63,7 +76,10 @@ enum { MC_JOB_BOOTSTRAP = 0, MC_JOB_WRITE = 1 };
|
||||
* target cr3 (0 => System DTB; resolved on the worker against a->kcr3). */
|
||||
typedef struct {
|
||||
uint32_t op; /* MC_JOB_* */
|
||||
uint32_t boot_count; /* MC_JOB_BOOTSTRAP */
|
||||
uint32_t boot_count; /* MC_JOB_BOOTSTRAP: drives the stub kcr3 per epoch */
|
||||
uint32_t attempt; /* MC_JOB_BOOTSTRAP: consecutive-failure index of THIS */
|
||||
/* kick (copy of a->boot_attempts); stub fails while */
|
||||
/* attempt < a->fail_boots. NOT the epoch counter. */
|
||||
/* --- MC_JOB_WRITE --- */
|
||||
uint64_t cr3; /* target AS root; 0 => a->kcr3 (kernel AS), resolved on worker */
|
||||
uint64_t gva;
|
||||
@@ -89,7 +105,12 @@ struct vmsig_adapter {
|
||||
vmsig_emit emit;
|
||||
int registered; /* register_memctx already called */
|
||||
vmsig_worker* worker; /* off-loop bootstrap + atomic writes */
|
||||
uint32_t boot_count; /* incremented on each (re-)bootstrap */
|
||||
uint32_t boot_count; /* incremented on each (re-)bootstrap (epoch tag) */
|
||||
|
||||
/* cold-bootstrap retry — loop-thread-only (attach/on_ready/invalidate/close). */
|
||||
int retry_fd; /* one-shot backoff timerfd (-1 when none) */
|
||||
uint32_t boot_attempts; /* consecutive bootstrap failures this cycle (0 = none); reset on success/epoch */
|
||||
uint32_t fail_boots; /* test-only: fail the first N stub bootstraps (cfg); set once in mc_open, then read-only (worker reads it) */
|
||||
|
||||
#ifdef VMSIG_WITH_VMIE
|
||||
vmie_win32* win; /* held RW handle across the epoch (kcr3 source + gva_write target) */
|
||||
@@ -109,6 +130,35 @@ struct vmsig_adapter {
|
||||
/* fwd: MEMWRITE completion ACK (defined below mc_submit; used in mc_on_ready demux). */
|
||||
static void mc_memwrite_ack(struct vmsig_adapter* a, int ok, uint32_t corr, uint32_t origin);
|
||||
|
||||
/* mirror of the discovery backoff; kept in this adapter to stay decoupled from the discovery
|
||||
* layer (Rule-of-three not reached). Exponential with a shift capped at 6, clamped to CAP. */
|
||||
static uint64_t mc_boot_backoff(uint32_t attempts) {
|
||||
uint64_t b = MC_BOOT_BACKOFF_BASE << (attempts < 6 ? attempts : 6);
|
||||
return b > MC_BOOT_BACKOFF_CAP ? MC_BOOT_BACKOFF_CAP : b;
|
||||
}
|
||||
|
||||
/* Arm the one-shot backoff timer (it_value only — no it_interval). Loop-thread-only.
|
||||
* Best-effort: a settime failure is logged, not fatal (matches discovery rearm). */
|
||||
static void mc_arm_retry(struct vmsig_adapter* a) {
|
||||
if (a->retry_fd < 0) return;
|
||||
uint64_t dt = mc_boot_backoff(a->boot_attempts);
|
||||
struct itimerspec its;
|
||||
memset(&its, 0, sizeof its);
|
||||
its.it_value.tv_sec = (time_t)(dt / 1000000000ull);
|
||||
its.it_value.tv_nsec = (long)(dt % 1000000000ull);
|
||||
if (timerfd_settime(a->retry_fd, 0, &its, NULL) != 0)
|
||||
fprintf(stderr, "vmsig memctx: endpoint %u retry timer arm failed\n", a->endpoint);
|
||||
}
|
||||
|
||||
/* Disarm the backoff timer (zero itimerspec). Loop-thread-only. Used on bootstrap success
|
||||
* and at epoch change so a stale arm from a prior failure cannot fire over a fresh cycle. */
|
||||
static void mc_disarm_retry(struct vmsig_adapter* a) {
|
||||
if (a->retry_fd < 0) return;
|
||||
struct itimerspec its;
|
||||
memset(&its, 0, sizeof its);
|
||||
(void)timerfd_settime(a->retry_fd, 0, &its, NULL);
|
||||
}
|
||||
|
||||
/* ---- stub RO-fd: memfd + deterministic contents + seal of future writes ---- */
|
||||
static int mc_make_stub_fd(uint32_t size) {
|
||||
int fd = memfd_create("vmsig_memctx", MFD_CLOEXEC | MFD_ALLOW_SEALING);
|
||||
@@ -184,6 +234,9 @@ static int mc_job(void* user, const void* req, void* res) {
|
||||
|
||||
/* MC_JOB_BOOTSTRAP */
|
||||
if (a->stub) {
|
||||
/* test-only: fail the first fail_boots attempts to exercise the retry path
|
||||
* deterministically (a->fail_boots is set once in open, read-only here). */
|
||||
if (rq->attempt < a->fail_boots) return -1;
|
||||
rs->kcr3 = 0xC0DE0000ull + (uint64_t)rq->boot_count * 0x1000ull; /* changes per epoch */
|
||||
return 0;
|
||||
}
|
||||
@@ -202,6 +255,7 @@ static void mc_kick_bootstrap(struct vmsig_adapter* a) {
|
||||
mc_req rq;
|
||||
memset(&rq, 0, sizeof rq);
|
||||
rq.op = MC_JOB_BOOTSTRAP; rq.boot_count = a->boot_count;
|
||||
rq.attempt = a->boot_attempts; /* failure index of this kick (loop-thread snapshot) */
|
||||
(void)vmsig_worker_submit(a->worker, &rq, sizeof rq); /* full => drop (rare) */
|
||||
}
|
||||
|
||||
@@ -232,6 +286,10 @@ static void mc_reg_invalidate(void* ctx, uint32_t epoch) {
|
||||
struct vmsig_adapter* a = ctx;
|
||||
(void)epoch; /* the core owns the epoch; the adapter must re-bootstrap */
|
||||
a->have_ctx = 0; /* the previous context is invalid */
|
||||
/* new cycle: drop a stale arm from the previous cycle and restart the failure counter at
|
||||
* zero so this bootstrap's backoff starts fresh (and the first-failure diagnostic re-arms). */
|
||||
a->boot_attempts = 0;
|
||||
mc_disarm_retry(a);
|
||||
mc_kick_bootstrap(a); /* off-loop; on_ready re-emits MEMCTX (new epoch) */
|
||||
}
|
||||
|
||||
@@ -247,11 +305,13 @@ static vmsig_adapter* mc_open(const void* cfg, uint32_t endpoint) {
|
||||
a->cfg_ro_fd = (c && c->ro_fd >= 0) ? c->ro_fd : -1;
|
||||
if (!a->ram_path && a->cfg_ro_fd < 0) a->stub = 1; /* no path/fd => stub */
|
||||
a->stub_fd = -1;
|
||||
a->retry_fd = -1;
|
||||
a->fail_boots = c ? c->fail_boots : 0; /* set once; read-only afterwards (worker reads) */
|
||||
return a;
|
||||
}
|
||||
|
||||
static int mc_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg, int cap) {
|
||||
if (cap < 1) return -1;
|
||||
if (cap < 2) return -1; /* worker eventfd + one-shot backoff timerfd */
|
||||
a->emit = *emit;
|
||||
|
||||
a->worker = vmsig_worker_new(mc_job, a, 1, MC_WORKER_DEPTH);
|
||||
@@ -262,11 +322,27 @@ static int mc_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg
|
||||
if (a->stub_fd < 0) { vmsig_worker_free(a->worker); a->worker = NULL; return -1; }
|
||||
}
|
||||
|
||||
/* worker completion-eventfd as the readiness source (cookie=0). */
|
||||
/* one-shot backoff timerfd: re-kicks the cold bootstrap when the guest is still booting.
|
||||
* Created here (loop-thread-only fd); armed on failure, disarmed on success. Rollback the
|
||||
* worker + stub_fd on failure, symmetric to mc_make_stub_fd above. */
|
||||
a->retry_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
|
||||
if (a->retry_fd < 0) {
|
||||
if (a->stub_fd >= 0) { close(a->stub_fd); a->stub_fd = -1; }
|
||||
vmsig_worker_free(a->worker); a->worker = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* worker completion-eventfd as the readiness source (cookie=worker). */
|
||||
reg[0].fd = vmsig_worker_evfd(a->worker);
|
||||
reg[0].epoll_events = EPOLLIN;
|
||||
reg[0].shape = VMSIG_RDY_EVENTFD;
|
||||
reg[0].cookie = 0;
|
||||
reg[0].cookie = MC_COOKIE_WORKER;
|
||||
|
||||
/* backoff timerfd as the second readiness source (cookie=retry). */
|
||||
reg[1].fd = a->retry_fd;
|
||||
reg[1].epoll_events = EPOLLIN;
|
||||
reg[1].shape = VMSIG_RDY_TIMERFD;
|
||||
reg[1].cookie = MC_COOKIE_RETRY;
|
||||
|
||||
/* register the reg BEFORE the first bootstrap: the core slot gets the hooks. describe
|
||||
* is not called until the slot is valid (which only happens after the first MEMCTX). */
|
||||
@@ -289,11 +365,23 @@ static int mc_attach(vmsig_adapter* a, const vmsig_emit* emit, vmsig_fd_reg* reg
|
||||
a->emit.emit(a->emit.token, &up);
|
||||
|
||||
mc_kick_bootstrap(a); /* first bootstrap off-loop; assemble the locator on completion */
|
||||
return 1;
|
||||
return 2; /* worker eventfd + backoff timerfd */
|
||||
}
|
||||
|
||||
static int mc_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
|
||||
(void)cookie; (void)events;
|
||||
(void)events; /* epoll flags carry nothing we need; the cookie selects the source */
|
||||
|
||||
/* retry timerfd fired: the guest was still booting; drain and re-kick the bootstrap.
|
||||
* Re-kick is a fresh MC_JOB_BOOTSTRAP into the SAME FIFO worker queue, so it serializes
|
||||
* behind any in-flight write — nothing extra to synchronize. */
|
||||
if (cookie == MC_COOKIE_RETRY) {
|
||||
uint64_t v;
|
||||
while (read(a->retry_fd, &v, sizeof v) == (ssize_t)sizeof v) { /* drain to EAGAIN */ }
|
||||
mc_kick_bootstrap(a);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* cookie == MC_COOKIE_WORKER: worker completion. */
|
||||
vmsig_worker_ack(a->worker);
|
||||
mc_res rs;
|
||||
int rc;
|
||||
@@ -304,18 +392,29 @@ static int mc_on_ready(vmsig_adapter* a, uint32_t cookie, uint32_t events) {
|
||||
continue;
|
||||
}
|
||||
if (rc != 0) {
|
||||
/* bootstrap failed: ERROR (source MEMCTX); do NOT publish an invalid kcr3. */
|
||||
vmsig_event er;
|
||||
memset(&er, 0, sizeof er);
|
||||
er.kind = VMSIG_EV_ERROR; er.source = VMSIG_SRC_MEMCTX; er.dir = VMSIG_DIR_UP;
|
||||
er.prio = VMSIG_PRIO_URGENT; er.endpoint = a->endpoint;
|
||||
a->emit.emit(a->emit.token, &er);
|
||||
/* bootstrap failed: the guest is likely still booting (host_bootstrap found no
|
||||
* System process). This is NOT a control-level error — do NOT emit VMSIG_EV_ERROR
|
||||
* (it would spam URGENT during a normal multi-second guest boot). Instead schedule a
|
||||
* backoff retry; the context simply stays unpublished until a kick succeeds. One
|
||||
* diagnostic line on the FIRST failure of the cycle (symmetric to the discovery
|
||||
* "never came up" note), not on every attempt. */
|
||||
if (a->boot_attempts == 0)
|
||||
fprintf(stderr, "vmsig memctx: endpoint %u bootstrap not ready yet, retrying\n",
|
||||
a->endpoint);
|
||||
a->boot_attempts++;
|
||||
mc_arm_retry(a); /* one-shot timer at mc_boot_backoff(boot_attempts) */
|
||||
continue;
|
||||
}
|
||||
/* assemble the locator on the loop thread from rs.kcr3. a->kcr3 is the gva_write
|
||||
* TARGET and is owned SOLELY by the worker thread (set in mc_bootstrap_armed, read by
|
||||
* MC_JOB_WRITE — same thread, FIFO happens-before); the loop must NOT also write it, or
|
||||
* an in-flight write at line ~170 would race it. cur_pod.kcr3 is loop-only (delivery). */
|
||||
|
||||
/* bootstrap succeeded: cancel any pending retry and reset the failure counter BEFORE
|
||||
* publishing, so a stale timer armed by a prior failure cannot fire over a live context. */
|
||||
a->boot_attempts = 0;
|
||||
mc_disarm_retry(a);
|
||||
|
||||
memset(&a->cur_pod, 0, sizeof a->cur_pod);
|
||||
a->cur_pod.kcr3 = rs.kcr3;
|
||||
a->cur_pod.low = a->low ? a->low : MC_STUB_SIZE;
|
||||
@@ -398,6 +497,9 @@ static void mc_close(vmsig_adapter* a) {
|
||||
if (a->win) vmie_win32_close(a->win); /* AFTER worker join: no in-flight gva_write */
|
||||
#endif
|
||||
if (a->stub_fd >= 0) close(a->stub_fd);
|
||||
/* one-shot backoff timerfd: never spawns a worker job, so its close is independent of the
|
||||
* worker join — same contract as stub_fd. The core already epoll_ctl(DEL)'d the slot. */
|
||||
if (a->retry_fd >= 0) close(a->retry_fd);
|
||||
/* ro_fd ownership transferred to the adapter at open(): close it here so a re-grant
|
||||
* (detach + re-attach with a fresh infra ro_fd) does not leak the prior one. Infra
|
||||
* that must keep its own copy dups before handing it in — symmetric to the holder
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
#ifndef VMSIG_MTREE_H
|
||||
#define VMSIG_MTREE_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/* Derive the below-4G split (== vmie `low`: low-RAM GPA bound AND high-RAM file offset)
|
||||
* from `info mtree -f` text. Operates on the system flatview only. FAIL-CLOSED: 0 if the
|
||||
* split cannot be derived with confidence. `text` is plain UTF-8 with real '\n'
|
||||
* (the caller un-escapes the QMP JSON string first). */
|
||||
uint64_t mtree_low_split(const char* text);
|
||||
|
||||
#endif /* VMSIG_MTREE_H */
|
||||
@@ -5,6 +5,7 @@
|
||||
* leaves ok=0 (the VM is not brought up rather than guessed). */
|
||||
#define _GNU_SOURCE
|
||||
#include "host_probe.h"
|
||||
#include "mtree.h" /* mtree_low_split */
|
||||
#include "vmsig_event.h" /* VMSIG_VM_* */
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@@ -174,29 +175,39 @@ static int qmp_status_word(const char* buf) {
|
||||
return VMSIG_VM_UNKNOWN;
|
||||
}
|
||||
|
||||
/* Derive the below-4G split from `info mtree` text: the size of the RAM region whose guest
|
||||
* physical range starts at address 0. Standard QEMU split-RAM layout puts low RAM at
|
||||
* [0, low) and high RAM above 4G at file offset @low. FAIL-CLOSED: 0 if not found.
|
||||
* NOTE: parses HMP text (not a stable QMP schema) — verify against real `info mtree` output. */
|
||||
static uint64_t mtree_low(const char* ret) {
|
||||
/* The return is a JSON string; lines inside are escaped "\n". Scan for the GPA-0 ram run:
|
||||
* " 0000000000000000-<end16> (prio N, ram): ..." */
|
||||
const char* p = ret;
|
||||
while ((p = strstr(p, "0000000000000000-")) != NULL) {
|
||||
const char* end_hex = p + 17; /* 16 zeros + '-' */
|
||||
char* stop = NULL;
|
||||
unsigned long long end = strtoull(end_hex, &stop, 16);
|
||||
/* the descriptor after the range must mark it RAM (not the i/o "system" root) */
|
||||
const char* tail = stop ? stop : end_hex;
|
||||
const char* nl = strstr(tail, "\\n");
|
||||
const char* lim = nl ? nl : (tail + 64);
|
||||
int is_ram = 0;
|
||||
for (const char* q = tail; q < lim && *q; q++)
|
||||
if (!strncmp(q, "ram)", 4)) { is_ram = 1; break; }
|
||||
if (is_ram && end > 0 && end != ~0ull) return end + 1ull; /* [0, end] => low=end+1 */
|
||||
p = end_hex;
|
||||
/* Extract the JSON string value of "return" from an HMP-over-QMP reply and decode its
|
||||
* transport escapes (\n \t \" \\) in place into a NUL-terminated plain-text buffer. The
|
||||
* `info mtree -f` output is one JSON string with embedded escaped newlines; un-escaping is
|
||||
* a transport detail of HMP-over-QMP and belongs here (next to the QMP code), so the split
|
||||
* parser (mtree_low_split) can work on human-readable text with real '\n'. The decode never
|
||||
* grows the buffer (every escape shortens it), so it writes into `out` (>= strlen(buf)+1).
|
||||
* Returns 1 on success, 0 if no "return" string is present. */
|
||||
static int qmp_return_plain(const char* buf, char* out, size_t cap) {
|
||||
const char* r = strstr(buf, "\"return\"");
|
||||
if (!r) return 0;
|
||||
r = strchr(r, ':'); if (!r) return 0;
|
||||
r = strchr(r, '"'); if (!r) return 0; /* opening quote of the string value */
|
||||
r++;
|
||||
size_t o = 0;
|
||||
for (; *r && o + 1 < cap; r++) {
|
||||
char c = *r;
|
||||
if (c == '"') break; /* closing quote */
|
||||
if (c == '\\' && r[1]) {
|
||||
r++;
|
||||
switch (*r) {
|
||||
case 'n': c = '\n'; break;
|
||||
case 't': c = '\t'; break;
|
||||
case 'r': c = '\r'; break;
|
||||
case '"': c = '"'; break;
|
||||
case '\\': c = '\\'; break;
|
||||
case '/': c = '/'; break;
|
||||
default: c = *r; break; /* unknown escape: take it literally */
|
||||
}
|
||||
}
|
||||
out[o++] = c;
|
||||
}
|
||||
return 0;
|
||||
out[o] = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int hp_live(const struct vmsig_host_probe* p, vmsig_host_facts* io) {
|
||||
@@ -221,7 +232,9 @@ static int hp_live(const struct vmsig_host_probe* p, vmsig_host_facts* io) {
|
||||
if (qmp_cmd(fd,
|
||||
"{\"execute\":\"human-monitor-command\","
|
||||
"\"arguments\":{\"command-line\":\"info mtree -f\"}}\n", buf, 256 * 1024) == 1) {
|
||||
io->low = mtree_low(buf);
|
||||
/* un-escape the HMP string in place (it only shrinks), then parse the split */
|
||||
if (qmp_return_plain(buf, buf, 256 * 1024))
|
||||
io->low = mtree_low_split(buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,178 @@
|
||||
/* mtree.c — derive the below-4G split (vmie `low`) from `info mtree -f` text.
|
||||
*
|
||||
* `low` is one number with two meanings (see vmie low_segs): the GPA bound of low-RAM
|
||||
* ([0,low) maps 1:1 to file[0,low)) AND the file offset at which RAM resumes above 4 GiB
|
||||
* (GPA 4GiB -> file[low]). The robust signal for it is therefore the `@<file_off>` suffix
|
||||
* of the high-RAM ram region (GPA >= 4 GiB): that offset IS `low` by construction.
|
||||
*
|
||||
* Low-RAM below 4 GiB is fragmented (Hyper-V synic overlays, smbase/tseg blackhole i/o
|
||||
* holes, rom holes), so "end of the first contiguous ram run" is NOT a reliable split.
|
||||
* We never trust it. Primary signal: high-RAM `@offset`. Cross-validator / fallback:
|
||||
* the start GPA of the first non-ram region at or above the standard PCI-hole base
|
||||
* (0x80000000) — the bottom of the 4 GiB PCI hole, which equals `low` for the classic
|
||||
* single-`low` layout. The two must agree when both are present; otherwise fail-closed.
|
||||
*
|
||||
* Pure text, line by line, no allocation beyond the input, no I/O. FAIL-CLOSED: any
|
||||
* unexpected/incomplete input yields 0 ("not found"); 0 is reserved for that. */
|
||||
#include "mtree.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Standard QEMU/i440fx/q35 PCI-hole base (bottom of the 4 GiB hole). Used ONLY as the
|
||||
* lower cutoff for the cross-validator/fallback, never hardcoded as the answer. */
|
||||
#define PCI_HOLE_BASE 0x80000000ull
|
||||
/* 4 GiB: high-RAM (the ram region carrying `@low`) starts at or above this GPA. */
|
||||
#define RAM_HIGH_BASE 0x100000000ull
|
||||
|
||||
/* Parse exactly `n` hex digits at p into *out. Returns the char past the last digit, or
|
||||
* NULL if there are not n hex digits (no partial consume). */
|
||||
static const char* parse_hexn(const char* p, int n, uint64_t* out) {
|
||||
uint64_t v = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
char c = p[i];
|
||||
unsigned d;
|
||||
if (c >= '0' && c <= '9') d = (unsigned)(c - '0');
|
||||
else if (c >= 'a' && c <= 'f') d = (unsigned)(c - 'a' + 10);
|
||||
else if (c >= 'A' && c <= 'F') d = (unsigned)(c - 'A' + 10);
|
||||
else return NULL;
|
||||
v = (v << 4) | d;
|
||||
}
|
||||
*out = v;
|
||||
return p + n;
|
||||
}
|
||||
|
||||
/* One region line of a flatview body, e.g.
|
||||
* " 0000000100000000-000000027fffffff (prio 0, ram): ram0 @0000000080000000 KVM"
|
||||
* Two leading spaces, 16-hex start, '-', 16-hex end, " (prio <N>, <flag>): <rest>".
|
||||
* Fills *start_gpa, *is_ram and, when present in <rest>, *file_off (with *has_off=1).
|
||||
* Returns 1 on a well-formed region line, 0 otherwise (not a region line for us). */
|
||||
typedef struct {
|
||||
uint64_t start_gpa;
|
||||
int is_ram; /* flag is exactly "ram" (not ramd/romd/rom/i/o/container) */
|
||||
int has_off; /* a "@<hex>" suffix was present in the descriptor */
|
||||
uint64_t file_off; /* value of that suffix */
|
||||
} region_line;
|
||||
|
||||
static int parse_region_line(const char* line, const char* nl, region_line* out) {
|
||||
/* leading " " then 16 hex, '-', 16 hex */
|
||||
if (line[0] != ' ' || line[1] != ' ') return 0;
|
||||
const char* p = line + 2;
|
||||
uint64_t start, end;
|
||||
p = parse_hexn(p, 16, &start);
|
||||
if (!p || *p != '-') return 0;
|
||||
p++;
|
||||
p = parse_hexn(p, 16, &end);
|
||||
if (!p) return 0;
|
||||
|
||||
/* " (prio <N>, <flag>):" — find the flag between ", " and ")". */
|
||||
if (strncmp(p, " (prio ", 7) != 0) return 0;
|
||||
const char* comma = memchr(p, ',', (size_t)(nl - p));
|
||||
if (!comma) return 0;
|
||||
const char* flag = comma + 1;
|
||||
while (flag < nl && *flag == ' ') flag++;
|
||||
const char* rparen = memchr(flag, ')', (size_t)(nl - flag));
|
||||
if (!rparen) return 0;
|
||||
size_t flen = (size_t)(rparen - flag);
|
||||
|
||||
out->start_gpa = start;
|
||||
out->is_ram = (flen == 3 && strncmp(flag, "ram", 3) == 0) ? 1 : 0;
|
||||
|
||||
/* optional "@<hex>" anywhere in the descriptor tail (after "): "). */
|
||||
out->has_off = 0;
|
||||
out->file_off = 0;
|
||||
const char* at = memchr(rparen, '@', (size_t)(nl - rparen));
|
||||
if (at) {
|
||||
char* stop = NULL;
|
||||
unsigned long long v = strtoull(at + 1, &stop, 16);
|
||||
if (stop && stop != at + 1) { out->has_off = 1; out->file_off = (uint64_t)v; }
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Locate the system flatview body: the lines AFTER " Root memory region: system" up to
|
||||
* the next "FlatView #" (or EOF). Returns the body start, sets *body_end; NULL if absent. */
|
||||
static const char* find_system_flatview(const char* text, const char** body_end) {
|
||||
const char* anchor = "Root memory region: system";
|
||||
const char* p = text;
|
||||
while ((p = strstr(p, anchor)) != NULL) {
|
||||
/* The root name must end the token (CR/LF/space/EOF) — reject "system.flash0" etc.,
|
||||
* and reject roots that merely contain the word elsewhere. QEMU's HMP output is
|
||||
* CRLF, so the byte after "system" is '\r'; accept it (LF-only input also works). */
|
||||
const char* after = p + strlen(anchor);
|
||||
if (*after == '\n' || *after == '\0' || *after == ' ' || *after == '\r') {
|
||||
const char* body = strchr(p, '\n');
|
||||
if (!body) return NULL;
|
||||
body++; /* first region line */
|
||||
const char* fv = strstr(body, "\nFlatView #");
|
||||
*body_end = fv ? fv + 1 : (body + strlen(body));
|
||||
return body;
|
||||
}
|
||||
p = after;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Primary signal: file offset (`@hex`) of the first ram region whose start GPA >= 4 GiB.
|
||||
* Returns 1 and sets *off when found, 0 otherwise. */
|
||||
static int high_ram_offset(const char* body, const char* end, uint64_t* off) {
|
||||
const char* p = body;
|
||||
while (p < end) {
|
||||
const char* nl = memchr(p, '\n', (size_t)(end - p));
|
||||
const char* line_end = nl ? nl : end;
|
||||
region_line r;
|
||||
if (parse_region_line(p, line_end, &r) &&
|
||||
r.is_ram && r.start_gpa >= RAM_HIGH_BASE && r.has_off) {
|
||||
*off = r.file_off;
|
||||
return 1;
|
||||
}
|
||||
if (!nl) break;
|
||||
p = nl + 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Cross-validator / fallback: start GPA of the first non-ram region at or above the
|
||||
* PCI-hole base (the bottom of the 4 GiB hole == low for the classic layout). Returns 1
|
||||
* and sets *base when found, 0 otherwise. Blackhole holes below 0x80000000 are skipped
|
||||
* by the lower cutoff. */
|
||||
static int pci_hole_start(const char* body, const char* end, uint64_t* base) {
|
||||
const char* p = body;
|
||||
while (p < end) {
|
||||
const char* nl = memchr(p, '\n', (size_t)(end - p));
|
||||
const char* line_end = nl ? nl : end;
|
||||
region_line r;
|
||||
if (parse_region_line(p, line_end, &r) &&
|
||||
!r.is_ram && r.start_gpa >= PCI_HOLE_BASE && r.start_gpa < RAM_HIGH_BASE) {
|
||||
*base = r.start_gpa;
|
||||
return 1;
|
||||
}
|
||||
if (!nl) break;
|
||||
p = nl + 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t mtree_low_split(const char* text) {
|
||||
if (!text) return 0;
|
||||
|
||||
const char* body_end = NULL;
|
||||
const char* body = find_system_flatview(text, &body_end);
|
||||
if (!body) return 0; /* no system AS => fail-closed */
|
||||
|
||||
uint64_t off = 0, base = 0;
|
||||
int have_off = high_ram_offset(body, body_end, &off);
|
||||
int have_base = pci_hole_start(body, body_end, &base);
|
||||
|
||||
if (have_off) {
|
||||
if (off == 0 || off == ~0ull) return 0; /* degenerate offset */
|
||||
/* cross-validate against the PCI-hole base when we have one */
|
||||
if (have_base && base != off) return 0; /* layout anomaly => fail-closed */
|
||||
return off; /* primary signal */
|
||||
}
|
||||
|
||||
/* No high-RAM (guest RAM all below 4 GiB): fall back to the PCI-hole base, but only
|
||||
* at or above the standard base so blackhole holes can never be mistaken for it. */
|
||||
if (have_base && base >= PCI_HOLE_BASE) return base;
|
||||
|
||||
return 0; /* nothing trustworthy */
|
||||
}
|
||||
+42
@@ -0,0 +1,42 @@
|
||||
FlatView #0
|
||||
AS "cpu-smm-0", root: mem-container-smram
|
||||
Root memory region: mem-container-smram
|
||||
0000000000000000-0000000000017fff (prio 0, ram): ram0
|
||||
0000000000018000-0000000000018fff (prio 0, ram): synic-0-msg-page
|
||||
000000000001c000-000000007fffffff (prio 0, ram): ram0 @000000000001c000
|
||||
0000000080000000-0000000081ffffff (prio 0, i/o): vfio-pci-bar3
|
||||
0000000100000000-000000017fffffff (prio 0, ram): ram0 @0000000040000000
|
||||
|
||||
FlatView #1
|
||||
AS "I/O", root: io
|
||||
Root memory region: io
|
||||
0000000000000000-0000000000000007 (prio 0, i/o): dma-chan
|
||||
0000000000000060-0000000000000060 (prio 0, i/o): i8042-data
|
||||
0000000000000064-0000000000000064 (prio 0, i/o): i8042-cmd
|
||||
|
||||
FlatView #2
|
||||
AS "memory", root: system
|
||||
AS "cpu-memory-0", root: system
|
||||
Root memory region: system
|
||||
0000000000000000-0000000000017fff (prio 0, ram): ram0
|
||||
0000000000018000-0000000000018fff (prio 0, ram): synic-0-msg-page
|
||||
0000000000019000-0000000000019fff (prio 0, ram): synic-1-msg-page
|
||||
000000000001a000-000000000001afff (prio 0, ram): synic-2-msg-page
|
||||
000000000001b000-000000000001bfff (prio 0, ram): synic-3-msg-page
|
||||
000000000001c000-000000000002ffff (prio 0, ram): ram0 @000000000001c000
|
||||
0000000000030000-000000000004ffff (prio 1, i/o): smbase-blackhole
|
||||
0000000000050000-00000000000bffff (prio 0, ram): ram0 @0000000000050000
|
||||
00000000000c0000-00000000000dffff (prio 1, rom): pc.rom
|
||||
00000000000e0000-00000000000fffff (prio 0, rom): system.flash0 @000000000035c000
|
||||
0000000000100000-000000007bffffff (prio 0, ram): ram0 @0000000000100000
|
||||
000000007c000000-000000007fffffff (prio 1, i/o): tseg-blackhole
|
||||
0000000080000000-0000000081ffffff (prio 0, i/o): vfio-pci-bar3
|
||||
0000000082000000-0000000082087fff (prio 0, i/o): vfio-pci-bar0
|
||||
00000000e0000000-00000000efffffff (prio 0, i/o): pcie-mmcfg-mmio
|
||||
00000000fec00000-00000000fec00fff (prio 0, i/o): kvm-ioapic
|
||||
00000000ffc00000-00000000ffc83fff (prio 0, romd): system.flash1
|
||||
0000000100000000-000000027fffffff (prio 0, ram): ram0 @0000000080000000
|
||||
|
||||
FlatView #3
|
||||
AS "pci_bridge_io", root: pci_bridge_io
|
||||
Root memory region: pci_bridge_io
|
||||
+37
-1
@@ -42,7 +42,7 @@ struct holder {
|
||||
holder* peer; /* multi-VM: stop when both are ready (or NULL) */
|
||||
int is_driver; /* stops the loop on a condition */
|
||||
uint32_t expect_ep;
|
||||
int memctx, invalidated, ticks, bad_ep;
|
||||
int memctx, invalidated, ticks, bad_ep, errors;
|
||||
uint64_t last_kcr3, kcr3_e0;
|
||||
uint32_t last_epoch, last_nseg;
|
||||
int ro_ok, rw_eacces, seg0_ok;
|
||||
@@ -66,6 +66,7 @@ static int h_on_ev(void* u, const vmsig_event* ev) {
|
||||
holder* h = u;
|
||||
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) h->ticks++;
|
||||
else if (ev->kind == VMSIG_EV_MEMCTX_INVALIDATED) h->invalidated++;
|
||||
else if (ev->kind == VMSIG_EV_ERROR) h->errors++; /* no boot-retry ERROR spam */
|
||||
maybe_stop(h);
|
||||
return 0;
|
||||
}
|
||||
@@ -393,6 +394,40 @@ static void test_ro_fd_ownership(void) {
|
||||
if (fcntl(ro, F_GETFD) >= 0) close(ro); /* belt-and-braces if the assert failed */
|
||||
}
|
||||
|
||||
/* ---- 7. cold-bootstrap retry: stub fails N times, then publishes via backoff ----- *
|
||||
* Regression for the cold-bootstrap-while-guest-boots bug: a failed bootstrap must NOT be
|
||||
* terminal nor emit URGENT ERROR — it arms a one-shot backoff timerfd that re-kicks the
|
||||
* bootstrap until it succeeds. fail_boots=3 makes the first three stub bootstraps fail
|
||||
* deterministically (no timing dependence); the real timerfd fires at ~50/100/200ms, so the
|
||||
* 4th kick succeeds sub-second. vmhost is added (as test_multicast) for the ticks failsafe
|
||||
* and a realistic loop; stop on memctx>=1 (stop_epoch=-1). */
|
||||
static void test_retry(void) {
|
||||
printf("test_retry\n");
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
|
||||
holder h; memset(&h, 0, sizeof h);
|
||||
h.core = core; h.is_driver = 1; h.expect_ep = 0; h.stop_epoch = -1;
|
||||
/* OBSERVE so vmhost lifecycle ticks reach maybe_stop (ticks>30 failsafe) and ERROR
|
||||
* (if any) is counted; MEMCTX cap to receive the published context. */
|
||||
add_holder(core, &h, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
|
||||
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
|
||||
|
||||
vmsig_memctx_cfg mc; memset(&mc, 0, sizeof mc);
|
||||
mc.stub = 1; mc.ram_path = NULL; mc.low = 0; mc.ro_fd = -1; mc.fail_boots = 3;
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), &mc, 0) >= 0, "add memctx (fail_boots=3)");
|
||||
|
||||
vmsig_core_run(core);
|
||||
|
||||
CHECK(h.memctx >= 1, "MEMCTX published after a series of bootstrap failures (retry worked)");
|
||||
CHECK(h.last_kcr3 != 0, "valid kcr3 after the successful retry");
|
||||
CHECK(h.errors == 0, "no ERROR spam during boot retries");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
test_multicast();
|
||||
test_epoch();
|
||||
@@ -400,6 +435,7 @@ int main(void) {
|
||||
test_multivm();
|
||||
test_socket();
|
||||
test_ro_fd_ownership();
|
||||
test_retry();
|
||||
printf("memctx tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,105 @@
|
||||
/* test_mtree.c — unit tests for mtree_low_split (the below-4G split parser). Pure text in,
|
||||
* number out; no QMP/transport. The fragmented fixture reproduces the structural traps the
|
||||
* old heuristic tripped on (Hyper-V synic overlays, smbase/tseg blackhole holes, rom holes)
|
||||
* plus a decoy non-system flatview that carries its OWN GPA-0 stub and a DIFFERENT @offset,
|
||||
* proving the system address space is selected (not "first match in the text"). */
|
||||
#define _GNU_SOURCE
|
||||
#include "mtree.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef FIXTURE_DIR
|
||||
#define FIXTURE_DIR "."
|
||||
#endif
|
||||
|
||||
static int g_fail = 0;
|
||||
#define CHECK(cond, msg) do { if (!(cond)) { printf(" FAIL: %s\n", (msg)); g_fail = 1; } } while (0)
|
||||
|
||||
/* Slurp a whole text file into a heap buffer (NUL-terminated). NULL on error. */
|
||||
static char* slurp(const char* path) {
|
||||
FILE* f = fopen(path, "rb");
|
||||
if (!f) return NULL;
|
||||
if (fseek(f, 0, SEEK_END) != 0) { fclose(f); return NULL; }
|
||||
long sz = ftell(f);
|
||||
if (sz < 0) { fclose(f); return NULL; }
|
||||
rewind(f);
|
||||
char* buf = malloc((size_t)sz + 1);
|
||||
if (!buf) { fclose(f); return NULL; }
|
||||
size_t got = fread(buf, 1, (size_t)sz, f);
|
||||
fclose(f);
|
||||
buf[got] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
/* Re-encode every '\n' as '\r\n' (QEMU's HMP output is CRLF). Caller frees; NULL on OOM. */
|
||||
static char* to_crlf(const char* lf) {
|
||||
size_t n = 0, extra = 0;
|
||||
for (const char* p = lf; *p; p++) { n++; if (*p == '\n') extra++; }
|
||||
char* out = malloc(n + extra + 1);
|
||||
if (!out) return NULL;
|
||||
char* o = out;
|
||||
for (const char* p = lf; *p; p++) { if (*p == '\n') *o++ = '\r'; *o++ = *p; }
|
||||
*o = 0;
|
||||
return out;
|
||||
}
|
||||
|
||||
/* Case B: a minimal, NON-fragmented system flatview — one big GPA-0 ram run plus high-RAM
|
||||
* carrying @<low>. Must not be broken by the new parser. */
|
||||
static const char* k_happy =
|
||||
"FlatView #0\n"
|
||||
" AS \"memory\", root: system\n"
|
||||
" Root memory region: system\n"
|
||||
" 0000000000000000-000000007fffffff (prio 0, ram): ram0\n"
|
||||
" 0000000080000000-0000000081ffffff (prio 0, i/o): vfio-pci-bar3\n"
|
||||
" 0000000100000000-000000017fffffff (prio 0, ram): ram0 @0000000080000000\n";
|
||||
|
||||
/* Case C: text without any system flatview => fail-closed. */
|
||||
static const char* k_no_system =
|
||||
"FlatView #0\n"
|
||||
" AS \"I/O\", root: io\n"
|
||||
" Root memory region: io\n"
|
||||
" 0000000000000000-0000000000000007 (prio 0, i/o): dma-chan\n";
|
||||
|
||||
int main(void) {
|
||||
printf("test_mtree\n");
|
||||
|
||||
/* Cases A and E: the fragmented fixture (decoy first, system second). */
|
||||
char path[1024];
|
||||
snprintf(path, sizeof path, "%s/mtree_split_fragmented.txt", FIXTURE_DIR);
|
||||
char* frag = slurp(path);
|
||||
CHECK(frag != NULL, "fragmented fixture loaded");
|
||||
if (frag) {
|
||||
uint64_t low = mtree_low_split(frag);
|
||||
/* A: fragmented low-RAM must NOT yield the GPA-0 stub end (0x18000) — the bug. */
|
||||
CHECK(low == 0x80000000ull, "A: fragmented split == 0x80000000");
|
||||
CHECK(low != 0x18000ull, "A: not the GPA-0 stub end (0x18000)");
|
||||
/* E: the decoy (non-system) flatview comes FIRST and carries @0x40000000; the
|
||||
* function must select the SYSTEM flatview (@0x80000000), not the decoy. */
|
||||
CHECK(low != 0x40000000ull, "E: decoy flatview @offset rejected (system AS chosen)");
|
||||
/* F: real QEMU HMP output is CRLF. The parser MUST tolerate '\r' — a synthetic
|
||||
* LF-only fixture hid this, so the shipped parser returned 0 on the real VM mtree
|
||||
* (-> low=0 -> VM never attached). Regression guard, independent of how git stores
|
||||
* the fixture's line endings. */
|
||||
char* frag_crlf = to_crlf(frag);
|
||||
CHECK(frag_crlf != NULL, "F: CRLF copy allocated");
|
||||
if (frag_crlf) {
|
||||
CHECK(mtree_low_split(frag_crlf) == 0x80000000ull, "F: CRLF fragmented split == 0x80000000");
|
||||
free(frag_crlf);
|
||||
}
|
||||
free(frag);
|
||||
}
|
||||
|
||||
/* Case B: happy path (non-fragmented) still resolves to the high-RAM @offset. */
|
||||
CHECK(mtree_low_split(k_happy) == 0x80000000ull, "B: non-fragmented happy path == 0x80000000");
|
||||
|
||||
/* Case C: no system flatview => 0. */
|
||||
CHECK(mtree_low_split(k_no_system) == 0, "C: no system flatview => fail-closed 0");
|
||||
|
||||
/* Case D: garbage / empty => 0. */
|
||||
CHECK(mtree_low_split("") == 0, "D: empty text => 0");
|
||||
CHECK(mtree_low_split("not an mtree at all\n") == 0, "D: junk text => 0");
|
||||
|
||||
printf("mtree tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
Reference in New Issue
Block a user