Files
vatrog-vm-introspection-engine/src/engine/gva.c
T
lirent dc09d7f2a4 Keep the arch layer's prose consumer-agnostic; note the x86-64 binding
The generic address-space layer no longer names win32 in its comments: the
khalf_score and gva_translate doc-comments described themselves in terms of
their current Windows consumer, a downward coupling from the stable layer to a
specific, swappable one. Reworded to describe what each primitive does, not who
calls it. Also drop a dangling reference to the renamed engine handle.

State the contract's real scope in memmodel.h: OS-agnostic but architecture-
bound. The address-space key is the x86-64 CR3 (the PML4 base), shared by any
guest OS on x86-64 - CR3 is an ISA register, not a Windows concept; only its
per-process storage (DirectoryTableBase) is win32-specific and stays in the
win32 engine.
2026-06-15 12:07:43 +03:00

272 lines
11 KiB
C

#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include "engine-arch.h"
/* PTE permission bits we propagate down the walk. */
#define PTE_RW (1ull << 1)
#define PTE_US (1ull << 2)
#define PTE_NX (1ull << 63)
/* ---- single-address translation (hot) ----------------------------------- *
* Translate `va` under `cr3` to a GPA. On success: *gpa = GPA of `va`, and
* *leaf (if non-NULL) = bytes from `va` to the end of the containing leaf. */
__attribute__((hot))
static int gva_gpa(vmie_mem* m, uintptr_t cr3, uintptr_t va,
uintptr_t* gpa, size_t* leaf) {
uint64_t t = cr3 & PFN_MASK, e;
const uint64_t* pe;
const unsigned i4 = (va >> 39) & 0x1ff, i3 = (va >> 30) & 0x1ff,
i2 = (va >> 21) & 0x1ff, i1 = (va >> 12) & 0x1ff;
if (!(pe = gpa_ptr(m, t + i4 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
t = e & PFN_MASK;
if (!(pe = gpa_ptr(m, t + i3 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
if (e & PG_PS) { /* 1 GiB leaf */
const uint64_t off = va & 0x3FFFFFFF;
*gpa = (e & PFN_MASK & ~0x3FFFFFFFull) + off;
if (leaf) *leaf = (1u << 30) - off;
return 0;
}
t = e & PFN_MASK;
if (!(pe = gpa_ptr(m, t + i2 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
if (e & PG_PS) { /* 2 MiB leaf */
const uint64_t off = va & 0x1FFFFF;
*gpa = (e & PFN_MASK & ~0x1FFFFFull) + off;
if (leaf) *leaf = (1u << 21) - off;
return 0;
}
t = e & PFN_MASK;
if (!(pe = gpa_ptr(m, t + i1 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
const uint64_t off = va & 0xFFF; /* 4 KiB leaf */
*gpa = (e & PFN_MASK) + off;
if (leaf) *leaf = 0x1000 - off;
return 0;
}
/* cold extern translate: GPA of `va` under `cr3`, or -1. Wraps the hot static
* gva_gpa for cold callers outside this TU without exposing the inlinable hot
* primitive. Declared in engine-arch.h. */
__attribute__((cold))
int gva_translate(vmie_mem* m, uintptr_t cr3, uintptr_t va, uintptr_t* gpa) {
return gva_gpa(m, cr3, va, gpa, NULL);
}
/* zero-copy borrowed read: leaf-bounded host pointer at `va` (see memmodel.h). */
__attribute__((hot))
const void* gva_ptr(vmie_mem* m, uintptr_t cr3, uintptr_t va, size_t* avail) {
uintptr_t gpa; size_t leaf;
if (gva_gpa(m, cr3, va, &gpa, &leaf)) return NULL;
*avail = leaf;
return gpa_ptr(m, gpa, leaf);
}
__attribute__((hot))
int gva_read(vmie_mem* m, uintptr_t cr3, uintptr_t va, void* dst, size_t nmemb) {
uint8_t* d = dst;
while (nmemb) {
uintptr_t gpa; size_t leaf;
if (gva_gpa(m, cr3, va, &gpa, &leaf)) return -1;
const size_t n = leaf < nmemb ? leaf : nmemb;
if (gpa_read(m, gpa, d, n)) return -1;
va += n; d += n; nmemb -= n;
}
return 0;
}
__attribute__((hot))
int gva_write(vmie_mem* m, uintptr_t cr3, uintptr_t va, const void* src, size_t nmemb) {
const uint8_t* s = src;
while (nmemb) {
uintptr_t gpa; size_t leaf;
if (gva_gpa(m, cr3, va, &gpa, &leaf)) return -1;
const size_t n = leaf < nmemb ? leaf : nmemb;
if (gpa_write(m, gpa, s, n)) return -1;
va += n; s += n; nmemb -= n;
}
return 0;
}
/* ---- bootstrap helpers (cold) -------------------------------------------- */
__attribute__((cold))
int khalf_score(const vmie_mem* m, uint64_t pml4) {
const uint64_t t = pml4 & PFN_MASK;
int n = 0; uint64_t e;
for (int i = 256; i < 512; i++)
if (!gpa_read((vmie_mem*)m, t + i * 8, &e, 8) && (e & PG_P)) n++;
return n;
}
/* ---- region enumeration -------------------------------------------------- */
struct rgn_acc {
vregion* out; int nmax; int n;
uint32_t prot_any;
uint64_t lo, hi;
int have; uint64_t va, len; uint32_t prot;
};
static void rgn_flush(struct rgn_acc* a) {
if (!a->have) return;
if (a->prot_any == 0 || (a->prot & a->prot_any)) {
if (a->n < a->nmax) {
a->out[a->n].va = a->va; a->out[a->n].len = a->len; a->out[a->n].prot = a->prot;
}
a->n++;
}
a->have = 0;
}
/* Clamp a present leaf to [lo,hi] and coalesce it onto the current run. */
static void rgn_leaf(struct rgn_acc* a, uint64_t va, uint64_t size, uint32_t prot) {
uint64_t vend = va + size - 1; /* inclusive last byte */
if (vend < a->lo || va > a->hi) return; /* outside window */
if (va < a->lo) va = a->lo;
if (vend > a->hi) vend = a->hi;
const uint64_t len = vend - va + 1;
if (a->have && prot == a->prot && va == a->va + a->len) {
a->len += len; /* extend current run */
} else {
rgn_flush(a);
a->have = 1; a->va = va; a->len = len; a->prot = prot;
}
}
static uint32_t rgn_prot(int rw, int us, int nx) {
return VR_R | (rw ? VR_W : 0) | (nx ? 0 : VR_X) | (us ? VR_U : 0);
}
/* whole-subtree window test: does [base, base+span) intersect [lo,hi]? */
static int rgn_hit(uint64_t base, uint64_t span, uint64_t lo, uint64_t hi) {
const uint64_t end = base + (span - 1); /* inclusive */
return !(end < lo || base > hi);
}
__attribute__((hot))
int gva_regions(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
uint32_t prot_any, vregion* out, int nmax) {
if (nmax <= 0) return 0;
struct rgn_acc a = { out, nmax, 0, prot_any, lo, hi, 0, 0, 0, 0 };
const uint64_t* t4 = gpa_ptr(m, cr3 & PFN_MASK, 4096);
if (!t4) return 0;
for (int i4 = 0; i4 < 512; i4++) {
const uint64_t e4 = t4[i4];
if (!(e4 & PG_P)) continue;
const uint64_t b4 = VA_CANON((uint64_t)i4 << 39);
if (!rgn_hit(b4, 1ull << 39, lo, hi)) continue;
const int rw4 = (e4 >> 1) & 1, us4 = (e4 >> 2) & 1, nx4 = (int)(e4 >> 63) & 1;
const uint64_t* t3 = gpa_ptr(m, e4 & PFN_MASK, 4096);
if (!t3) continue;
for (int i3 = 0; i3 < 512; i3++) {
const uint64_t e3 = t3[i3];
if (!(e3 & PG_P)) continue;
const uint64_t b3 = VA_CANON(((uint64_t)i4 << 39) | ((uint64_t)i3 << 30));
if (!rgn_hit(b3, 1ull << 30, lo, hi)) continue;
const int rw3 = rw4 & ((e3 >> 1) & 1), us3 = us4 & ((e3 >> 2) & 1),
nx3 = nx4 | ((int)(e3 >> 63) & 1);
if (e3 & PG_PS) { rgn_leaf(&a, b3, 1ull << 30, rgn_prot(rw3, us3, nx3)); continue; }
const uint64_t* t2 = gpa_ptr(m, e3 & PFN_MASK, 4096);
if (!t2) continue;
for (int i2 = 0; i2 < 512; i2++) {
const uint64_t e2 = t2[i2];
if (!(e2 & PG_P)) continue;
const uint64_t b2 = VA_CANON(((uint64_t)i4 << 39) | ((uint64_t)i3 << 30) | ((uint64_t)i2 << 21));
if (!rgn_hit(b2, 1ull << 21, lo, hi)) continue;
const int rw2 = rw3 & ((e2 >> 1) & 1), us2 = us3 & ((e2 >> 2) & 1),
nx2 = nx3 | ((int)(e2 >> 63) & 1);
if (e2 & PG_PS) { rgn_leaf(&a, b2, 1ull << 21, rgn_prot(rw2, us2, nx2)); continue; }
const uint64_t* t1 = gpa_ptr(m, e2 & PFN_MASK, 4096);
if (!t1) continue;
for (int i1 = 0; i1 < 512; i1++) {
const uint64_t e1 = t1[i1];
if (!(e1 & PG_P)) continue;
const uint64_t b1 = VA_CANON(((uint64_t)i4 << 39) | ((uint64_t)i3 << 30) |
((uint64_t)i2 << 21) | ((uint64_t)i1 << 12));
if (!rgn_hit(b1, 1ull << 12, lo, hi)) continue;
const int rw1 = rw2 & ((e1 >> 1) & 1), us1 = us2 & ((e1 >> 2) & 1),
nx1 = nx2 | ((int)(e1 >> 63) & 1);
rgn_leaf(&a, b1, 1ull << 12, rgn_prot(rw1, us1, nx1));
}
}
}
}
rgn_flush(&a);
return a.n;
}
/* ---- windowed sweep engine ----------------------------------------------- */
#define SWEEP_WIN (1u << 20) /* 1 MiB window (multiple of 8) */
#define SWEEP_RMAX (1u << 16) /* max runs enumerated per sweep */
__attribute__((hot))
int gva_sweep(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
uint32_t prot_any, size_t overlap, gva_sweep_cb cb, void* user) {
if (overlap >= SWEEP_WIN) return -1;
vregion* rg = malloc((size_t)SWEEP_RMAX * sizeof *rg);
uint8_t* buf = malloc(SWEEP_WIN);
if (!rg || !buf) { free(rg); free(buf); return -1; }
int nr = gva_regions(m, cr3, lo, hi, prot_any, rg, SWEEP_RMAX);
if (nr > (int)SWEEP_RMAX) nr = (int)SWEEP_RMAX;
int rc = 0;
for (int r = 0; r < nr && !rc; r++) {
uint64_t base = rg[r].va; /* VA of buf[0] */
uint64_t va = rg[r].va;
const uint64_t vend = rg[r].va + (rg[r].len - 1); /* inclusive last */
size_t fill = 0;
while (va <= vend) {
size_t avail;
const uint8_t* p = gva_ptr(m, cr3, va, &avail);
if (!p) { /* gap: flush+skip */
if (fill && cb(user, buf, fill, base, overlap, 1)) { rc = 1; break; }
if (vend - va < 0x1000 - (va & 0xFFF)) break; /* skip past top: done */
va += 0x1000 - (va & 0xFFF);
base = va; fill = 0;
continue;
}
size_t n = avail; /* leaf-contiguous */
if (n > (size_t)(vend - va + 1)) n = (size_t)(vend - va + 1);
if (n > SWEEP_WIN - fill) n = SWEEP_WIN - fill;
const int end = (n == (size_t)(vend - va + 1)); /* chunk hits vend */
if (fill == 0 && avail > 0x1000) { /* large-page lend */
if (cb(user, p, n, va, 0, end)) { rc = 1; break; }
if (end) break; /* avoid va wrap */
va += n;
if (overlap == 0) base = va;
else { memcpy(buf, p + n - overlap, overlap); base = va - overlap; fill = overlap; }
continue;
}
memcpy(buf + fill, p, n); /* buffered window */
fill += n; va += n;
if (end || fill == SWEEP_WIN) {
if (cb(user, buf, fill, base, overlap, end)) { rc = 1; break; }
if (end) { fill = 0; break; } /* avoid va wrap */
if (overlap == 0 || overlap >= fill) {
base = va; fill = 0;
} else { /* carry overlap */
memmove(buf, buf + fill - overlap, overlap);
base = va - overlap; fill = overlap;
}
}
}
if (!rc && fill && cb(user, buf, fill, base, overlap, 1)) rc = 1;
}
free(rg); free(buf);
return rc;
}