Files
vatrog-vm-signaling/src/discovery/linux/mtree.c
T
lirent 929bcf0e74 fix(discovery): tolerate CRLF line endings in mtree parsing
mtree_low_split anchored the system flatview on "Root memory region: system"
followed by LF/space/EOF, but QEMU's HMP `info mtree -f` output is CRLF, so the
byte after "system" is '\r'. The anchor was rejected, the parser returned 0
(fail-closed), and on a real guest the daemon never attached the VM (low=0 =>
ok=0). The synthetic LF-only fixture hid this; the fix is verified against the
real CRLF output.

Accept '\r' in the anchor check (LF-only input still works) and add a regression
test that re-encodes the fixture as CRLF in memory.

Bump 0.3.7.
2026-06-24 15:08:07 +03:00

179 lines
7.7 KiB
C

/* mtree.c — derive the below-4G split (vmie `low`) from `info mtree -f` text.
*
* `low` is one number with two meanings (see vmie low_segs): the GPA bound of low-RAM
* ([0,low) maps 1:1 to file[0,low)) AND the file offset at which RAM resumes above 4 GiB
* (GPA 4GiB -> file[low]). The robust signal for it is therefore the `@<file_off>` suffix
* of the high-RAM ram region (GPA >= 4 GiB): that offset IS `low` by construction.
*
* Low-RAM below 4 GiB is fragmented (Hyper-V synic overlays, smbase/tseg blackhole i/o
* holes, rom holes), so "end of the first contiguous ram run" is NOT a reliable split.
* We never trust it. Primary signal: high-RAM `@offset`. Cross-validator / fallback:
* the start GPA of the first non-ram region at or above the standard PCI-hole base
* (0x80000000) — the bottom of the 4 GiB PCI hole, which equals `low` for the classic
* single-`low` layout. The two must agree when both are present; otherwise fail-closed.
*
* Pure text, line by line, no allocation beyond the input, no I/O. FAIL-CLOSED: any
* unexpected/incomplete input yields 0 ("not found"); 0 is reserved for that. */
#include "mtree.h"
#include <string.h>
#include <stdlib.h>
/* Standard QEMU/i440fx/q35 PCI-hole base (bottom of the 4 GiB hole). Used ONLY as the
* lower cutoff for the cross-validator/fallback, never hardcoded as the answer. */
#define PCI_HOLE_BASE 0x80000000ull
/* 4 GiB: high-RAM (the ram region carrying `@low`) starts at or above this GPA. */
#define RAM_HIGH_BASE 0x100000000ull
/* Parse exactly `n` hex digits at p into *out. Returns the char past the last digit, or
* NULL if there are not n hex digits (no partial consume). */
static const char* parse_hexn(const char* p, int n, uint64_t* out) {
uint64_t v = 0;
for (int i = 0; i < n; i++) {
char c = p[i];
unsigned d;
if (c >= '0' && c <= '9') d = (unsigned)(c - '0');
else if (c >= 'a' && c <= 'f') d = (unsigned)(c - 'a' + 10);
else if (c >= 'A' && c <= 'F') d = (unsigned)(c - 'A' + 10);
else return NULL;
v = (v << 4) | d;
}
*out = v;
return p + n;
}
/* One region line of a flatview body, e.g.
* " 0000000100000000-000000027fffffff (prio 0, ram): ram0 @0000000080000000 KVM"
* Two leading spaces, 16-hex start, '-', 16-hex end, " (prio <N>, <flag>): <rest>".
* Fills *start_gpa, *is_ram and, when present in <rest>, *file_off (with *has_off=1).
* Returns 1 on a well-formed region line, 0 otherwise (not a region line for us). */
typedef struct {
uint64_t start_gpa;
int is_ram; /* flag is exactly "ram" (not ramd/romd/rom/i/o/container) */
int has_off; /* a "@<hex>" suffix was present in the descriptor */
uint64_t file_off; /* value of that suffix */
} region_line;
static int parse_region_line(const char* line, const char* nl, region_line* out) {
/* leading " " then 16 hex, '-', 16 hex */
if (line[0] != ' ' || line[1] != ' ') return 0;
const char* p = line + 2;
uint64_t start, end;
p = parse_hexn(p, 16, &start);
if (!p || *p != '-') return 0;
p++;
p = parse_hexn(p, 16, &end);
if (!p) return 0;
/* " (prio <N>, <flag>):" — find the flag between ", " and ")". */
if (strncmp(p, " (prio ", 7) != 0) return 0;
const char* comma = memchr(p, ',', (size_t)(nl - p));
if (!comma) return 0;
const char* flag = comma + 1;
while (flag < nl && *flag == ' ') flag++;
const char* rparen = memchr(flag, ')', (size_t)(nl - flag));
if (!rparen) return 0;
size_t flen = (size_t)(rparen - flag);
out->start_gpa = start;
out->is_ram = (flen == 3 && strncmp(flag, "ram", 3) == 0) ? 1 : 0;
/* optional "@<hex>" anywhere in the descriptor tail (after "): "). */
out->has_off = 0;
out->file_off = 0;
const char* at = memchr(rparen, '@', (size_t)(nl - rparen));
if (at) {
char* stop = NULL;
unsigned long long v = strtoull(at + 1, &stop, 16);
if (stop && stop != at + 1) { out->has_off = 1; out->file_off = (uint64_t)v; }
}
return 1;
}
/* Locate the system flatview body: the lines AFTER " Root memory region: system" up to
* the next "FlatView #" (or EOF). Returns the body start, sets *body_end; NULL if absent. */
static const char* find_system_flatview(const char* text, const char** body_end) {
const char* anchor = "Root memory region: system";
const char* p = text;
while ((p = strstr(p, anchor)) != NULL) {
/* The root name must end the token (CR/LF/space/EOF) — reject "system.flash0" etc.,
* and reject roots that merely contain the word elsewhere. QEMU's HMP output is
* CRLF, so the byte after "system" is '\r'; accept it (LF-only input also works). */
const char* after = p + strlen(anchor);
if (*after == '\n' || *after == '\0' || *after == ' ' || *after == '\r') {
const char* body = strchr(p, '\n');
if (!body) return NULL;
body++; /* first region line */
const char* fv = strstr(body, "\nFlatView #");
*body_end = fv ? fv + 1 : (body + strlen(body));
return body;
}
p = after;
}
return NULL;
}
/* Primary signal: file offset (`@hex`) of the first ram region whose start GPA >= 4 GiB.
* Returns 1 and sets *off when found, 0 otherwise. */
static int high_ram_offset(const char* body, const char* end, uint64_t* off) {
const char* p = body;
while (p < end) {
const char* nl = memchr(p, '\n', (size_t)(end - p));
const char* line_end = nl ? nl : end;
region_line r;
if (parse_region_line(p, line_end, &r) &&
r.is_ram && r.start_gpa >= RAM_HIGH_BASE && r.has_off) {
*off = r.file_off;
return 1;
}
if (!nl) break;
p = nl + 1;
}
return 0;
}
/* Cross-validator / fallback: start GPA of the first non-ram region at or above the
* PCI-hole base (the bottom of the 4 GiB hole == low for the classic layout). Returns 1
* and sets *base when found, 0 otherwise. Blackhole holes below 0x80000000 are skipped
* by the lower cutoff. */
static int pci_hole_start(const char* body, const char* end, uint64_t* base) {
const char* p = body;
while (p < end) {
const char* nl = memchr(p, '\n', (size_t)(end - p));
const char* line_end = nl ? nl : end;
region_line r;
if (parse_region_line(p, line_end, &r) &&
!r.is_ram && r.start_gpa >= PCI_HOLE_BASE && r.start_gpa < RAM_HIGH_BASE) {
*base = r.start_gpa;
return 1;
}
if (!nl) break;
p = nl + 1;
}
return 0;
}
uint64_t mtree_low_split(const char* text) {
if (!text) return 0;
const char* body_end = NULL;
const char* body = find_system_flatview(text, &body_end);
if (!body) return 0; /* no system AS => fail-closed */
uint64_t off = 0, base = 0;
int have_off = high_ram_offset(body, body_end, &off);
int have_base = pci_hole_start(body, body_end, &base);
if (have_off) {
if (off == 0 || off == ~0ull) return 0; /* degenerate offset */
/* cross-validate against the PCI-hole base when we have one */
if (have_base && base != off) return 0; /* layout anomaly => fail-closed */
return off; /* primary signal */
}
/* No high-RAM (guest RAM all below 4 GiB): fall back to the PCI-hole base, but only
* at or above the standard base so blackhole holes can never be mistaken for it. */
if (have_base && base >= PCI_HOLE_BASE) return base;
return 0; /* nothing trustworthy */
}