Zero-copy hot path, correctness hardening

gva_ptr: leaf-bounded zero-copy guest reads. gva_sweep redesigned to drive
on it — large-page leaves are lent to the callback while 4K runs stay
buffered, and the run loop is guarded against wrap at the top of the address
space. gva_gpa fetches PTEs zero-copy; optional W32MS_LTO build option folds
the per-fetch call boundary (shipped -O2 default unchanged).

Correctness: subtract-form bounds check (no add overflow), memcpy decode in
place of type-punned wide loads, zero-init PDB name before compare,
PCI-hole-crossing range rejection, single-sourced VA_CANON and USER bounds.
hot/cold attributes audited across the translation and scan path.
This commit is contained in:
2026-06-15 00:58:27 +03:00
parent 1ec70b7ede
commit 4015e839eb
9 changed files with 84 additions and 39 deletions
+43 -19
View File
@@ -5,9 +5,6 @@
#include "include/memory.h"
#include "../include/include.h"
/* sign-extend a 48-bit canonical VA */
#define VA_CANON(v) (((v) & (1ull << 47)) ? ((v) | 0xFFFF000000000000ull) : (v))
/* PTE permission bits we propagate down the walk. */
#define PTE_RW (1ull << 1)
#define PTE_US (1ull << 2)
@@ -16,15 +13,17 @@
/* ---- single-address translation (hot) ----------------------------------- *
* Translate `va` under `cr3` to a GPA. On success: *gpa = GPA of `va`, and
* *leaf (if non-NULL) = bytes from `va` to the end of the containing leaf. */
__attribute__((hot))
static int gva_gpa(gva_ctx* ctx, uintptr_t cr3, uintptr_t va,
uintptr_t* gpa, size_t* leaf) {
uint64_t t = cr3 & PFN_MASK, e;
const uint64_t* pe;
const unsigned i4 = (va >> 39) & 0x1ff, i3 = (va >> 30) & 0x1ff,
i2 = (va >> 21) & 0x1ff, i1 = (va >> 12) & 0x1ff;
if (gpa_read(&p_(ctx), t + i4 * 8, &e, 8) || !(e & PG_P)) return -1;
if (!(pe = gpa_ptr(&p_(ctx), t + i4 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
t = e & PFN_MASK;
if (gpa_read(&p_(ctx), t + i3 * 8, &e, 8) || !(e & PG_P)) return -1;
if (!(pe = gpa_ptr(&p_(ctx), t + i3 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
if (e & PG_PS) { /* 1 GiB leaf */
const uint64_t off = va & 0x3FFFFFFF;
*gpa = (e & PFN_MASK & ~0x3FFFFFFFull) + off;
@@ -32,7 +31,7 @@ static int gva_gpa(gva_ctx* ctx, uintptr_t cr3, uintptr_t va,
return 0;
}
t = e & PFN_MASK;
if (gpa_read(&p_(ctx), t + i2 * 8, &e, 8) || !(e & PG_P)) return -1;
if (!(pe = gpa_ptr(&p_(ctx), t + i2 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
if (e & PG_PS) { /* 2 MiB leaf */
const uint64_t off = va & 0x1FFFFF;
*gpa = (e & PFN_MASK & ~0x1FFFFFull) + off;
@@ -40,13 +39,22 @@ static int gva_gpa(gva_ctx* ctx, uintptr_t cr3, uintptr_t va,
return 0;
}
t = e & PFN_MASK;
if (gpa_read(&p_(ctx), t + i1 * 8, &e, 8) || !(e & PG_P)) return -1;
if (!(pe = gpa_ptr(&p_(ctx), t + i1 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
const uint64_t off = va & 0xFFF; /* 4 KiB leaf */
*gpa = (e & PFN_MASK) + off;
if (leaf) *leaf = 0x1000 - off;
return 0;
}
/* zero-copy borrowed read: leaf-bounded host pointer at `va` (see memory.h). */
__attribute__((hot))
const void* gva_ptr(gva_ctx* ctx, uintptr_t cr3, uintptr_t va, size_t* avail) {
uintptr_t gpa; size_t leaf;
if (gva_gpa(ctx, cr3, va, &gpa, &leaf)) return NULL;
*avail = leaf;
return gpa_ptr(&p_(ctx), gpa, leaf);
}
__attribute__((hot))
int gva_read(gva_ctx* ctx, uintptr_t cr3, uintptr_t va, void* dst, size_t nmemb) {
uint8_t* d = dst;
@@ -169,6 +177,7 @@ static int rgn_hit(uint64_t base, uint64_t span, uint64_t lo, uint64_t hi) {
return !(end < lo || base > hi);
}
__attribute__((hot))
int gva_regions(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
uint32_t prot_any, vregion* out, int nmax) {
if (nmax <= 0) return 0;
@@ -230,6 +239,7 @@ int gva_regions(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
#define SWEEP_WIN (1u << 20) /* 1 MiB window (multiple of 8) */
#define SWEEP_RMAX (1u << 16) /* max runs enumerated per sweep */
__attribute__((hot))
int gva_sweep(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
uint32_t prot_any, size_t overlap, gva_sweep_cb cb, void* user) {
if (overlap >= SWEEP_WIN) return -1;
@@ -245,26 +255,40 @@ int gva_sweep(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
for (int r = 0; r < nr && !rc; r++) {
uint64_t base = rg[r].va; /* VA of buf[0] */
uint64_t va = rg[r].va;
const uint64_t vend = rg[r].va + rg[r].len;
const uint64_t vend = rg[r].va + (rg[r].len - 1); /* inclusive last */
size_t fill = 0;
while (va < vend) {
size_t pg = 0x1000 - (size_t)(va & 0xFFF); /* to page edge */
if (pg > (size_t)(vend - va)) pg = (size_t)(vend - va);
if (pg > SWEEP_WIN - fill) pg = SWEEP_WIN - fill;
if (gva_read(ctx, cr3, va, buf + fill, pg)) { /* gap: flush+skip */
while (va <= vend) {
size_t avail;
const uint8_t* p = gva_ptr(ctx, cr3, va, &avail);
if (!p) { /* gap: flush+skip */
if (fill && cb(user, buf, fill, base, overlap, 1)) { rc = 1; break; }
if (vend - va < 0x1000 - (va & 0xFFF)) break; /* skip past top: done */
va += 0x1000 - (va & 0xFFF);
base = va; fill = 0;
continue;
}
fill += pg; va += pg;
size_t n = avail; /* leaf-contiguous */
if (n > (size_t)(vend - va + 1)) n = (size_t)(vend - va + 1);
if (n > SWEEP_WIN - fill) n = SWEEP_WIN - fill;
const int end = (n == (size_t)(vend - va + 1)); /* chunk hits vend */
if (fill == SWEEP_WIN) {
const int last = (va >= vend);
if (cb(user, buf, fill, base, overlap, last)) { rc = 1; break; }
if (last || overlap == 0 || overlap >= fill) {
if (fill == 0 && avail > 0x1000) { /* large-page lend */
if (cb(user, p, n, va, 0, end)) { rc = 1; break; }
if (end) break; /* avoid va wrap */
va += n;
if (overlap == 0) base = va;
else { memcpy(buf, p + n - overlap, overlap); base = va - overlap; fill = overlap; }
continue;
}
memcpy(buf + fill, p, n); /* buffered window */
fill += n; va += n;
if (end || fill == SWEEP_WIN) {
if (cb(user, buf, fill, base, overlap, end)) { rc = 1; break; }
if (end) { fill = 0; break; } /* avoid va wrap */
if (overlap == 0 || overlap >= fill) {
base = va; fill = 0;
} else { /* carry overlap */
memmove(buf, buf + fill - overlap, overlap);