Files
vatrog-vm-signaling/src/si/vgpu-perception/discover.c
T
lirent 9bde398b6c vmsig: management daemon, runtime endpoint lifecycle, roster, discovery, in-tree drivers, packaging
- core: runtime attach/detach of a per-endpoint adapter trio (runtime-safe add_adapter + vmsig_core_detach_endpoint, deferred reap)
- roster: VMSIG_EV_ROSTER + CAP_ROSTER, retained per-endpoint and replayed to late subscribers
- discovery: inotify trigger dir, vmid/endpoint slot allocator, host probe; vmsigd daemon with config + per-uid admission
- input driver and vgpu perception built in-tree; vgpu perception as a separate library
- memctx: own the supplied ro_fd (closed at detach)
- deb packaging: install rules, systemd unit, tmpfiles, default config
2026-06-22 17:25:06 +03:00

171 lines
6.9 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/* discover.c — process discovery + user-AS region scan (NO magic) + handle.
*
* The region is a RW shared mapping projected into the USER address space of a
* producer PROCESS — NOT a kernel VA in the System address space. So discovery
* works by PROCESS: enumerate processes (proc_list) over the RO win32 context,
* and for each one scan its user-AS under process.cr3 in [USER_MIN, USER_MAX]
* for a contiguous RW run >= VGPU_REGION_BYTES, read the producer block at its
* base, and accept it iff the whole structural-invariant table holds. The System
* kcr3 is needed only to open the context and walk processes (the caller already
* baked it into v); the region itself is always read under the producer's cr3.
*
* There is NO magic field in the ABI and the owner forbids inventing one. The
* discriminator is the cheap RW-run filter + the invariant table + two-phase
* heartbeat liveness — and the inter-phase WAIT is the caller's (the core never
* sleeps). Discovery is STRUCTURAL: never filtered by process.name.
*
* Layering: the win32 dependency (proc_list, vmie_win32_mem) lives ONLY in this
* file, in the per-process loop. The per-cr3 scan (vgpup_scan_user_as_for_region)
* is pure gva_* so it stays win32-agnostic and unit-testable under a synthetic
* cr3. A <0 read after binding means the producer process may have restarted
* (its pages are gone); the core only reports it — re-discovery is the caller's.
*/
#include <stdlib.h>
#include "perception-internal.h"
/* How many region runs to ask for per process when probing its user-AS. A user
* address space has many runs; this is generous, and the scan early-exits on the
* first accepted candidate anyway. */
#define VGPUP_MAX_REGIONS 256
/* How many processes to enumerate. proc_list stops at this; raising it would see
* more, but a producer is an ordinary user process well within this bound. */
#define VGPUP_MAX_PROCS 512
/* Read the producer block at `region_gva` under `cr3` into *out (one gva_read of
* the whole block). 0 on success, <0 on read error. */
static int read_producer_block(vmie_mem* m, uint64_t cr3, uint64_t region_gva,
vgpu_producer_t* out)
{
return gva_read(m, (uintptr_t)cr3, (uintptr_t)region_gva, out, sizeof *out) < 0 ? -1 : 0;
}
/* Scan ONE process user-AS (steps 35) under `cr3`: walk the RW runs in
* [USER_MIN, USER_MAX] and, for each contiguous run >= VGPU_REGION_BYTES, test
* the producer block at the run base against the invariant table. On the first
* accepted candidate write its base GVA + heartbeat snapshot and return 0;
* <0 if none is found / a read fails. Pure gva_* — no proc_list, no win32.
*
* Adjacent same-protection runs are coalesced: gva_regions reports VA-contiguous
* runs, but a region can land as one run or as touching neighbours, so we extend
* a running span while the next run starts exactly where the current one ends.
* The window [USER_MIN, USER_MAX] lies in one canonical half, as gva_regions
* requires. The RW filter (VR_R|VR_W) matches the shared mapping's protection
* and is cheap — it reads region metadata, not the 98 MiB of region bytes. */
int vgpup_scan_user_as_for_region(vmie_mem* m, uint64_t cr3,
uint64_t* out_region_gva, uint64_t* out_hb0)
{
vregion runs[VGPUP_MAX_REGIONS];
int n, i;
if (!m || !out_region_gva || !out_hb0) { return -1; }
n = gva_regions(m, (uintptr_t)cr3, USER_MIN, USER_MAX, VR_R | VR_W, runs, VGPUP_MAX_REGIONS);
if (n < 0) { return -1; }
if (n > VGPUP_MAX_REGIONS) { n = VGPUP_MAX_REGIONS; } /* truncated; probe what we got */
for (i = 0; i < n; ++i) {
uint64_t span_base = runs[i].va;
uint64_t span_len = runs[i].len;
int j = i;
/* coalesce adjacent RW runs into one contiguous span */
while (j + 1 < n && runs[j + 1].va == runs[j].va + runs[j].len) {
span_len += runs[j + 1].len;
++j;
}
if (span_len >= VGPU_REGION_BYTES) {
vgpu_producer_t p;
if (read_producer_block(m, cr3, span_base, &p) == 0 &&
vgpup_invariants_hold(&p)) {
*out_region_gva = span_base;
*out_hb0 = p.heartbeat;
return 0;
}
}
}
return -1;
}
/* Phase 1: enumerate processes and scan each one's user-AS for the region. The
* win32 dependency is confined here: vmie_win32_mem(v) for the generic gva_*,
* proc_list(v, skip_system=1, ...) to drop PEB-less System/kernel-only entries
* (a producer is never one). On the first process that yields a candidate write
* its proc_cr3 + region base GVA + heartbeat snapshot and return 0; <0 if no
* process yields one or proc_list / the context is not ready. */
int vgpup_discover_candidate(vmie_win32* v, uint64_t* out_proc_cr3,
uint64_t* out_region_gva, uint64_t* out_hb0)
{
process procs[VGPUP_MAX_PROCS];
vmie_mem* m;
int np, i;
if (!v || !out_proc_cr3 || !out_region_gva || !out_hb0) { return -1; }
m = vmie_win32_mem(v);
if (!m) { return -1; }
np = proc_list(v, /*skip_system=*/1, procs, VGPUP_MAX_PROCS);
if (np < 0) { return -1; }
if (np > VGPUP_MAX_PROCS) { np = VGPUP_MAX_PROCS; } /* truncated; probe what we got */
for (i = 0; i < np; ++i) {
uint64_t region_gva = 0, hb0 = 0;
if (vgpup_scan_user_as_for_region(m, procs[i].cr3, &region_gva, &hb0) == 0) {
*out_proc_cr3 = procs[i].cr3;
*out_region_gva = region_gva;
*out_hb0 = hb0;
return 0;
}
}
return -1;
}
/* Phase 2: re-read heartbeat at region_gva under proc_cr3 and report whether it
* advanced. The caller must have waited >= VGPU_HEARTBEAT_PERIOD_MS since phase
* 1. <0 here can also mean the producer process restarted (pages gone). */
int vgpup_confirm_alive(vmie_mem* m, uint64_t proc_cr3,
uint64_t region_gva, uint64_t hb0)
{
uint64_t hb_now;
if (!m) { return -1; }
if (gva_read(m, (uintptr_t)proc_cr3,
(uintptr_t)region_gva + offsetof(vgpu_producer_t, heartbeat),
&hb_now, sizeof hb_now) < 0) {
return -1;
}
return (hb_now - hb0) > 0u ? 1 : 0;
}
vgpup_region* vgpup_open(vmie_win32* v)
{
uint64_t proc_cr3 = 0, region_gva = 0, hb0 = 0;
vgpup_region* r;
if (vgpup_discover_candidate(v, &proc_cr3, &region_gva, &hb0) != 0) { return NULL; }
r = (vgpup_region*)calloc(1, sizeof *r);
if (!r) { return NULL; }
r->proc_cr3 = proc_cr3;
r->region_gva = region_gva;
r->ctrl_gva = region_gva + VGPU_CONTROL_OFFSET;
r->ring_gva = region_gva + VGPU_RING_OFFSET;
r->last_frame_id = 0;
r->run_epoch = 0;
return r;
}
void vgpup_close(vgpup_region* r)
{
free(r); /* core state only; v / m belong to the caller */
}
uint32_t vgpup_run_epoch(const vgpup_region* r)
{
return r ? r->run_epoch : 0u;
}