mirror of
https://dev.lirent.ru/Vatrog/vm-automation-signaling.git
synced 2026-06-25 20:36:36 +03:00
fix(discovery): derive the below-4G split robustly from fragmented mtree
host_probe derived the guest's below-4G split (vmie `low`) by taking the first GPA-0 RAM run in `info mtree -f`. When low RAM is fragmented by overlay pages (Hyper-V SynIC) and blackhole holes (smbase/tseg), that first run is a tiny fragment, so the split came out far too small and host_bootstrap could never recover the System DTB — the memctx context was never published. Extract a pure parser, mtree_low_split(): anchor on the system flatview, take `low` from the @file-offset of the high-RAM region at GPA >= 4 GiB (which equals the split by construction), cross-validate against the PCI-hole base, and fail closed when it can't be derived. QMP-reply un-escaping moves to the transport boundary so the parser works on plain text. Unit-tested against a synthetic fragmented flatview including a decoy non-system address space. postinst also hints to restart the daemon after an upgrade (a running instance keeps the old build until restarted). Bump 0.3.6.
This commit is contained in:
@@ -0,0 +1,177 @@
|
||||
/* mtree.c — derive the below-4G split (vmie `low`) from `info mtree -f` text.
|
||||
*
|
||||
* `low` is one number with two meanings (see vmie low_segs): the GPA bound of low-RAM
|
||||
* ([0,low) maps 1:1 to file[0,low)) AND the file offset at which RAM resumes above 4 GiB
|
||||
* (GPA 4GiB -> file[low]). The robust signal for it is therefore the `@<file_off>` suffix
|
||||
* of the high-RAM ram region (GPA >= 4 GiB): that offset IS `low` by construction.
|
||||
*
|
||||
* Low-RAM below 4 GiB is fragmented (Hyper-V synic overlays, smbase/tseg blackhole i/o
|
||||
* holes, rom holes), so "end of the first contiguous ram run" is NOT a reliable split.
|
||||
* We never trust it. Primary signal: high-RAM `@offset`. Cross-validator / fallback:
|
||||
* the start GPA of the first non-ram region at or above the standard PCI-hole base
|
||||
* (0x80000000) — the bottom of the 4 GiB PCI hole, which equals `low` for the classic
|
||||
* single-`low` layout. The two must agree when both are present; otherwise fail-closed.
|
||||
*
|
||||
* Pure text, line by line, no allocation beyond the input, no I/O. FAIL-CLOSED: any
|
||||
* unexpected/incomplete input yields 0 ("not found"); 0 is reserved for that. */
|
||||
#include "mtree.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Standard QEMU/i440fx/q35 PCI-hole base (bottom of the 4 GiB hole). Used ONLY as the
|
||||
* lower cutoff for the cross-validator/fallback, never hardcoded as the answer. */
|
||||
#define PCI_HOLE_BASE 0x80000000ull
|
||||
/* 4 GiB: high-RAM (the ram region carrying `@low`) starts at or above this GPA. */
|
||||
#define RAM_HIGH_BASE 0x100000000ull
|
||||
|
||||
/* Parse exactly `n` hex digits at p into *out. Returns the char past the last digit, or
|
||||
* NULL if there are not n hex digits (no partial consume). */
|
||||
static const char* parse_hexn(const char* p, int n, uint64_t* out) {
|
||||
uint64_t v = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
char c = p[i];
|
||||
unsigned d;
|
||||
if (c >= '0' && c <= '9') d = (unsigned)(c - '0');
|
||||
else if (c >= 'a' && c <= 'f') d = (unsigned)(c - 'a' + 10);
|
||||
else if (c >= 'A' && c <= 'F') d = (unsigned)(c - 'A' + 10);
|
||||
else return NULL;
|
||||
v = (v << 4) | d;
|
||||
}
|
||||
*out = v;
|
||||
return p + n;
|
||||
}
|
||||
|
||||
/* One region line of a flatview body, e.g.
|
||||
* " 0000000100000000-000000027fffffff (prio 0, ram): ram0 @0000000080000000 KVM"
|
||||
* Two leading spaces, 16-hex start, '-', 16-hex end, " (prio <N>, <flag>): <rest>".
|
||||
* Fills *start_gpa, *is_ram and, when present in <rest>, *file_off (with *has_off=1).
|
||||
* Returns 1 on a well-formed region line, 0 otherwise (not a region line for us). */
|
||||
typedef struct {
|
||||
uint64_t start_gpa;
|
||||
int is_ram; /* flag is exactly "ram" (not ramd/romd/rom/i/o/container) */
|
||||
int has_off; /* a "@<hex>" suffix was present in the descriptor */
|
||||
uint64_t file_off; /* value of that suffix */
|
||||
} region_line;
|
||||
|
||||
static int parse_region_line(const char* line, const char* nl, region_line* out) {
|
||||
/* leading " " then 16 hex, '-', 16 hex */
|
||||
if (line[0] != ' ' || line[1] != ' ') return 0;
|
||||
const char* p = line + 2;
|
||||
uint64_t start, end;
|
||||
p = parse_hexn(p, 16, &start);
|
||||
if (!p || *p != '-') return 0;
|
||||
p++;
|
||||
p = parse_hexn(p, 16, &end);
|
||||
if (!p) return 0;
|
||||
|
||||
/* " (prio <N>, <flag>):" — find the flag between ", " and ")". */
|
||||
if (strncmp(p, " (prio ", 7) != 0) return 0;
|
||||
const char* comma = memchr(p, ',', (size_t)(nl - p));
|
||||
if (!comma) return 0;
|
||||
const char* flag = comma + 1;
|
||||
while (flag < nl && *flag == ' ') flag++;
|
||||
const char* rparen = memchr(flag, ')', (size_t)(nl - flag));
|
||||
if (!rparen) return 0;
|
||||
size_t flen = (size_t)(rparen - flag);
|
||||
|
||||
out->start_gpa = start;
|
||||
out->is_ram = (flen == 3 && strncmp(flag, "ram", 3) == 0) ? 1 : 0;
|
||||
|
||||
/* optional "@<hex>" anywhere in the descriptor tail (after "): "). */
|
||||
out->has_off = 0;
|
||||
out->file_off = 0;
|
||||
const char* at = memchr(rparen, '@', (size_t)(nl - rparen));
|
||||
if (at) {
|
||||
char* stop = NULL;
|
||||
unsigned long long v = strtoull(at + 1, &stop, 16);
|
||||
if (stop && stop != at + 1) { out->has_off = 1; out->file_off = (uint64_t)v; }
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Locate the system flatview body: the lines AFTER " Root memory region: system" up to
|
||||
* the next "FlatView #" (or EOF). Returns the body start, sets *body_end; NULL if absent. */
|
||||
static const char* find_system_flatview(const char* text, const char** body_end) {
|
||||
const char* anchor = "Root memory region: system";
|
||||
const char* p = text;
|
||||
while ((p = strstr(p, anchor)) != NULL) {
|
||||
/* The root name must end the token (newline/EOF) — reject "system.flash0" etc.,
|
||||
* and reject roots that merely contain the word elsewhere. */
|
||||
const char* after = p + strlen(anchor);
|
||||
if (*after == '\n' || *after == '\0' || *after == ' ') {
|
||||
const char* body = strchr(p, '\n');
|
||||
if (!body) return NULL;
|
||||
body++; /* first region line */
|
||||
const char* fv = strstr(body, "\nFlatView #");
|
||||
*body_end = fv ? fv + 1 : (body + strlen(body));
|
||||
return body;
|
||||
}
|
||||
p = after;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Primary signal: file offset (`@hex`) of the first ram region whose start GPA >= 4 GiB.
|
||||
* Returns 1 and sets *off when found, 0 otherwise. */
|
||||
static int high_ram_offset(const char* body, const char* end, uint64_t* off) {
|
||||
const char* p = body;
|
||||
while (p < end) {
|
||||
const char* nl = memchr(p, '\n', (size_t)(end - p));
|
||||
const char* line_end = nl ? nl : end;
|
||||
region_line r;
|
||||
if (parse_region_line(p, line_end, &r) &&
|
||||
r.is_ram && r.start_gpa >= RAM_HIGH_BASE && r.has_off) {
|
||||
*off = r.file_off;
|
||||
return 1;
|
||||
}
|
||||
if (!nl) break;
|
||||
p = nl + 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Cross-validator / fallback: start GPA of the first non-ram region at or above the
|
||||
* PCI-hole base (the bottom of the 4 GiB hole == low for the classic layout). Returns 1
|
||||
* and sets *base when found, 0 otherwise. Blackhole holes below 0x80000000 are skipped
|
||||
* by the lower cutoff. */
|
||||
static int pci_hole_start(const char* body, const char* end, uint64_t* base) {
|
||||
const char* p = body;
|
||||
while (p < end) {
|
||||
const char* nl = memchr(p, '\n', (size_t)(end - p));
|
||||
const char* line_end = nl ? nl : end;
|
||||
region_line r;
|
||||
if (parse_region_line(p, line_end, &r) &&
|
||||
!r.is_ram && r.start_gpa >= PCI_HOLE_BASE && r.start_gpa < RAM_HIGH_BASE) {
|
||||
*base = r.start_gpa;
|
||||
return 1;
|
||||
}
|
||||
if (!nl) break;
|
||||
p = nl + 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t mtree_low_split(const char* text) {
|
||||
if (!text) return 0;
|
||||
|
||||
const char* body_end = NULL;
|
||||
const char* body = find_system_flatview(text, &body_end);
|
||||
if (!body) return 0; /* no system AS => fail-closed */
|
||||
|
||||
uint64_t off = 0, base = 0;
|
||||
int have_off = high_ram_offset(body, body_end, &off);
|
||||
int have_base = pci_hole_start(body, body_end, &base);
|
||||
|
||||
if (have_off) {
|
||||
if (off == 0 || off == ~0ull) return 0; /* degenerate offset */
|
||||
/* cross-validate against the PCI-hole base when we have one */
|
||||
if (have_base && base != off) return 0; /* layout anomaly => fail-closed */
|
||||
return off; /* primary signal */
|
||||
}
|
||||
|
||||
/* No high-RAM (guest RAM all below 4 GiB): fall back to the PCI-hole base, but only
|
||||
* at or above the standard base so blackhole holes can never be mistaken for it. */
|
||||
if (have_base && base >= PCI_HOLE_BASE) return base;
|
||||
|
||||
return 0; /* nothing trustworthy */
|
||||
}
|
||||
Reference in New Issue
Block a user