Zero-copy hot path, correctness hardening

gva_ptr: leaf-bounded zero-copy guest reads. gva_sweep redesigned to drive on it — large-page leaves are lent to the callback while 4K runs stay buffered, and the run loop is guarded against wrap at the top of the address space. gva_gpa fetches PTEs zero-copy; optional W32MS_LTO build option folds the per-fetch call boundary (shipped -O2 default unchanged). Correctness: subtract-form bounds check (no add overflow), memcpy decode in place of type-punned wide loads, zero-init PDB name before compare, PCI-hole-crossing range rejection, single-sourced VA_CANON and USER bounds. hot/cold attributes audited across the translation and scan path.
2026-06-18 06:36:37 +03:00 · 2026-06-15 00:58:27 +03:00
parent 1ec70b7ede
commit 4015e839eb
9 changed files with 84 additions and 39 deletions
@@ -5,9 +5,6 @@
 #include "include/memory.h"
 #include "../include/include.h"

-/* sign-extend a 48-bit canonical VA */
-#define VA_CANON(v) (((v) & (1ull << 47)) ? ((v) | 0xFFFF000000000000ull) : (v))
-
 /* PTE permission bits we propagate down the walk. */
 #define PTE_RW (1ull << 1)
 #define PTE_US (1ull << 2)
@@ -16,15 +13,17 @@
 /* ---- single-address translation (hot) ----------------------------------- *
 * Translate `va` under `cr3` to a GPA. On success: *gpa = GPA of `va`, and
 * *leaf (if non-NULL) = bytes from `va` to the end of the containing leaf. */
+__attribute__((hot))
 static int gva_gpa(gva_ctx* ctx, uintptr_t cr3, uintptr_t va,
                   uintptr_t* gpa, size_t* leaf) {
    uint64_t t = cr3 & PFN_MASK, e;
+    const uint64_t* pe;
    const unsigned i4 = (va >> 39) & 0x1ff, i3 = (va >> 30) & 0x1ff,
                   i2 = (va >> 21) & 0x1ff, i1 = (va >> 12) & 0x1ff;

-    if (gpa_read(&p_(ctx), t + i4 * 8, &e, 8) || !(e & PG_P)) return -1;
+    if (!(pe = gpa_ptr(&p_(ctx), t + i4 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
    t = e & PFN_MASK;
-    if (gpa_read(&p_(ctx), t + i3 * 8, &e, 8) || !(e & PG_P)) return -1;
+    if (!(pe = gpa_ptr(&p_(ctx), t + i3 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
    if (e & PG_PS) {                                   /* 1 GiB leaf */
        const uint64_t off = va & 0x3FFFFFFF;
        *gpa = (e & PFN_MASK & ~0x3FFFFFFFull) + off;
@@ -32,7 +31,7 @@ static int gva_gpa(gva_ctx* ctx, uintptr_t cr3, uintptr_t va,
        return 0;
    }
    t = e & PFN_MASK;
-    if (gpa_read(&p_(ctx), t + i2 * 8, &e, 8) || !(e & PG_P)) return -1;
+    if (!(pe = gpa_ptr(&p_(ctx), t + i2 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
    if (e & PG_PS) {                                   /* 2 MiB leaf */
        const uint64_t off = va & 0x1FFFFF;
        *gpa = (e & PFN_MASK & ~0x1FFFFFull) + off;
@@ -40,13 +39,22 @@ static int gva_gpa(gva_ctx* ctx, uintptr_t cr3, uintptr_t va,
        return 0;
    }
    t = e & PFN_MASK;
-    if (gpa_read(&p_(ctx), t + i1 * 8, &e, 8) || !(e & PG_P)) return -1;
+    if (!(pe = gpa_ptr(&p_(ctx), t + i1 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
    const uint64_t off = va & 0xFFF;                   /* 4 KiB leaf */
    *gpa = (e & PFN_MASK) + off;
    if (leaf) *leaf = 0x1000 - off;
    return 0;
 }

+/* zero-copy borrowed read: leaf-bounded host pointer at `va` (see memory.h). */
+__attribute__((hot))
+const void* gva_ptr(gva_ctx* ctx, uintptr_t cr3, uintptr_t va, size_t* avail) {
+    uintptr_t gpa; size_t leaf;
+    if (gva_gpa(ctx, cr3, va, &gpa, &leaf)) return NULL;
+    *avail = leaf;
+    return gpa_ptr(&p_(ctx), gpa, leaf);
+}
+
 __attribute__((hot))
 int gva_read(gva_ctx* ctx, uintptr_t cr3, uintptr_t va, void* dst, size_t nmemb) {
    uint8_t* d = dst;
@@ -169,6 +177,7 @@ static int rgn_hit(uint64_t base, uint64_t span, uint64_t lo, uint64_t hi) {
    return !(end < lo || base > hi);
 }

+__attribute__((hot))
 int gva_regions(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
                uint32_t prot_any, vregion* out, int nmax) {
    if (nmax <= 0) return 0;
@@ -230,6 +239,7 @@ int gva_regions(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
 #define SWEEP_WIN  (1u << 20)        /* 1 MiB window (multiple of 8)          */
 #define SWEEP_RMAX (1u << 16)        /* max runs enumerated per sweep         */

+__attribute__((hot))
 int gva_sweep(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
              uint32_t prot_any, size_t overlap, gva_sweep_cb cb, void* user) {
    if (overlap >= SWEEP_WIN) return -1;
@@ -245,26 +255,40 @@ int gva_sweep(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
    for (int r = 0; r < nr && !rc; r++) {
        uint64_t base = rg[r].va;                 /* VA of buf[0]            */
        uint64_t va   = rg[r].va;
-        const uint64_t vend = rg[r].va + rg[r].len;
+        const uint64_t vend = rg[r].va + (rg[r].len - 1);  /* inclusive last */
        size_t fill = 0;

-        while (va < vend) {
-            size_t pg = 0x1000 - (size_t)(va & 0xFFF);     /* to page edge   */
-            if (pg > (size_t)(vend - va)) pg = (size_t)(vend - va);
-            if (pg > SWEEP_WIN - fill)    pg = SWEEP_WIN - fill;
-
-            if (gva_read(ctx, cr3, va, buf + fill, pg)) {  /* gap: flush+skip */
+        while (va <= vend) {
+            size_t avail;
+            const uint8_t* p = gva_ptr(ctx, cr3, va, &avail);
+            if (!p) {                                      /* gap: flush+skip */
                if (fill && cb(user, buf, fill, base, overlap, 1)) { rc = 1; break; }
+                if (vend - va < 0x1000 - (va & 0xFFF)) break;  /* skip past top: done */
                va  += 0x1000 - (va & 0xFFF);
                base = va; fill = 0;
                continue;
            }
-            fill += pg; va += pg;
+            size_t n = avail;                              /* leaf-contiguous */
+            if (n > (size_t)(vend - va + 1)) n = (size_t)(vend - va + 1);
+            if (n > SWEEP_WIN - fill)        n = SWEEP_WIN - fill;
+            const int end = (n == (size_t)(vend - va + 1)); /* chunk hits vend */

-            if (fill == SWEEP_WIN) {
-                const int last = (va >= vend);
-                if (cb(user, buf, fill, base, overlap, last)) { rc = 1; break; }
-                if (last || overlap == 0 || overlap >= fill) {
+            if (fill == 0 && avail > 0x1000) {             /* large-page lend */
+                if (cb(user, p, n, va, 0, end)) { rc = 1; break; }
+                if (end) break;                            /* avoid va wrap   */
+                va += n;
+                if (overlap == 0) base = va;
+                else { memcpy(buf, p + n - overlap, overlap); base = va - overlap; fill = overlap; }
+                continue;
+            }
+
+            memcpy(buf + fill, p, n);                      /* buffered window */
+            fill += n; va += n;
+
+            if (end || fill == SWEEP_WIN) {
+                if (cb(user, buf, fill, base, overlap, end)) { rc = 1; break; }
+                if (end) { fill = 0; break; }              /* avoid va wrap   */
+                if (overlap == 0 || overlap >= fill) {
                    base = va; fill = 0;
                } else {                                   /* carry overlap   */
                    memmove(buf, buf + fill - overlap, overlap);