#include #include #include #include #include "engine-arch.h" /* PTE permission bits we propagate down the walk. */ #define PTE_RW (1ull << 1) #define PTE_US (1ull << 2) #define PTE_NX (1ull << 63) /* ---- single-address translation (hot) ----------------------------------- * * Translate `va` under `cr3` to a GPA. On success: *gpa = GPA of `va`, and * *leaf (if non-NULL) = bytes from `va` to the end of the containing leaf. */ __attribute__((hot)) static int gva_gpa(vmie_mem* m, uintptr_t cr3, uintptr_t va, uintptr_t* gpa, size_t* leaf) { uint64_t t = cr3 & PFN_MASK, e; const uint64_t* pe; const unsigned i4 = (va >> 39) & 0x1ff, i3 = (va >> 30) & 0x1ff, i2 = (va >> 21) & 0x1ff, i1 = (va >> 12) & 0x1ff; if (!(pe = gpa_ptr(m, t + i4 * 8, 8)) || !((e = *pe) & PG_P)) return -1; t = e & PFN_MASK; if (!(pe = gpa_ptr(m, t + i3 * 8, 8)) || !((e = *pe) & PG_P)) return -1; if (e & PG_PS) { /* 1 GiB leaf */ const uint64_t off = va & 0x3FFFFFFF; *gpa = (e & PFN_MASK & ~0x3FFFFFFFull) + off; if (leaf) *leaf = (1u << 30) - off; return 0; } t = e & PFN_MASK; if (!(pe = gpa_ptr(m, t + i2 * 8, 8)) || !((e = *pe) & PG_P)) return -1; if (e & PG_PS) { /* 2 MiB leaf */ const uint64_t off = va & 0x1FFFFF; *gpa = (e & PFN_MASK & ~0x1FFFFFull) + off; if (leaf) *leaf = (1u << 21) - off; return 0; } t = e & PFN_MASK; if (!(pe = gpa_ptr(m, t + i1 * 8, 8)) || !((e = *pe) & PG_P)) return -1; const uint64_t off = va & 0xFFF; /* 4 KiB leaf */ *gpa = (e & PFN_MASK) + off; if (leaf) *leaf = 0x1000 - off; return 0; } /* cold extern translate: GPA of `va` under `cr3`, or -1. Wraps the hot static * gva_gpa for cold callers outside this TU without exposing the inlinable hot * primitive. Declared in engine-arch.h. */ __attribute__((cold)) int gva_translate(vmie_mem* m, uintptr_t cr3, uintptr_t va, uintptr_t* gpa) { return gva_gpa(m, cr3, va, gpa, NULL); } /* zero-copy borrowed read: leaf-bounded host pointer at `va` (see memmodel.h). */ __attribute__((hot)) const void* gva_ptr(vmie_mem* m, uintptr_t cr3, uintptr_t va, size_t* avail) { uintptr_t gpa; size_t leaf; if (gva_gpa(m, cr3, va, &gpa, &leaf)) return NULL; *avail = leaf; return gpa_ptr(m, gpa, leaf); } __attribute__((hot)) int gva_read(vmie_mem* m, uintptr_t cr3, uintptr_t va, void* dst, size_t nmemb) { uint8_t* d = dst; while (nmemb) { uintptr_t gpa; size_t leaf; if (gva_gpa(m, cr3, va, &gpa, &leaf)) return -1; const size_t n = leaf < nmemb ? leaf : nmemb; if (gpa_read(m, gpa, d, n)) return -1; va += n; d += n; nmemb -= n; } return 0; } __attribute__((hot)) int gva_write(vmie_mem* m, uintptr_t cr3, uintptr_t va, const void* src, size_t nmemb) { const uint8_t* s = src; while (nmemb) { uintptr_t gpa; size_t leaf; if (gva_gpa(m, cr3, va, &gpa, &leaf)) return -1; const size_t n = leaf < nmemb ? leaf : nmemb; if (gpa_write(m, gpa, s, n)) return -1; va += n; s += n; nmemb -= n; } return 0; } /* ---- bootstrap helpers (cold) -------------------------------------------- */ __attribute__((cold)) int khalf_score(const vmie_mem* m, uint64_t pml4) { const uint64_t t = pml4 & PFN_MASK; int n = 0; uint64_t e; for (int i = 256; i < 512; i++) if (!gpa_read((vmie_mem*)m, t + i * 8, &e, 8) && (e & PG_P)) n++; return n; } /* ---- region enumeration -------------------------------------------------- */ struct rgn_acc { vregion* out; int nmax; int n; uint32_t prot_any; uint64_t lo, hi; int have; uint64_t va, len; uint32_t prot; }; static void rgn_flush(struct rgn_acc* a) { if (!a->have) return; if (a->prot_any == 0 || (a->prot & a->prot_any)) { if (a->n < a->nmax) { a->out[a->n].va = a->va; a->out[a->n].len = a->len; a->out[a->n].prot = a->prot; } a->n++; } a->have = 0; } /* Clamp a present leaf to [lo,hi] and coalesce it onto the current run. */ static void rgn_leaf(struct rgn_acc* a, uint64_t va, uint64_t size, uint32_t prot) { uint64_t vend = va + size - 1; /* inclusive last byte */ if (vend < a->lo || va > a->hi) return; /* outside window */ if (va < a->lo) va = a->lo; if (vend > a->hi) vend = a->hi; const uint64_t len = vend - va + 1; if (a->have && prot == a->prot && va == a->va + a->len) { a->len += len; /* extend current run */ } else { rgn_flush(a); a->have = 1; a->va = va; a->len = len; a->prot = prot; } } static uint32_t rgn_prot(int rw, int us, int nx) { return VR_R | (rw ? VR_W : 0) | (nx ? 0 : VR_X) | (us ? VR_U : 0); } /* whole-subtree window test: does [base, base+span) intersect [lo,hi]? */ static int rgn_hit(uint64_t base, uint64_t span, uint64_t lo, uint64_t hi) { const uint64_t end = base + (span - 1); /* inclusive */ return !(end < lo || base > hi); } __attribute__((hot)) int gva_regions(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, uint32_t prot_any, vregion* out, int nmax) { if (nmax <= 0) return 0; struct rgn_acc a = { out, nmax, 0, prot_any, lo, hi, 0, 0, 0, 0 }; const uint64_t* t4 = gpa_ptr(m, cr3 & PFN_MASK, 4096); if (!t4) return 0; for (int i4 = 0; i4 < 512; i4++) { const uint64_t e4 = t4[i4]; if (!(e4 & PG_P)) continue; const uint64_t b4 = VA_CANON((uint64_t)i4 << 39); if (!rgn_hit(b4, 1ull << 39, lo, hi)) continue; const int rw4 = (e4 >> 1) & 1, us4 = (e4 >> 2) & 1, nx4 = (int)(e4 >> 63) & 1; const uint64_t* t3 = gpa_ptr(m, e4 & PFN_MASK, 4096); if (!t3) continue; for (int i3 = 0; i3 < 512; i3++) { const uint64_t e3 = t3[i3]; if (!(e3 & PG_P)) continue; const uint64_t b3 = VA_CANON(((uint64_t)i4 << 39) | ((uint64_t)i3 << 30)); if (!rgn_hit(b3, 1ull << 30, lo, hi)) continue; const int rw3 = rw4 & ((e3 >> 1) & 1), us3 = us4 & ((e3 >> 2) & 1), nx3 = nx4 | ((int)(e3 >> 63) & 1); if (e3 & PG_PS) { rgn_leaf(&a, b3, 1ull << 30, rgn_prot(rw3, us3, nx3)); continue; } const uint64_t* t2 = gpa_ptr(m, e3 & PFN_MASK, 4096); if (!t2) continue; for (int i2 = 0; i2 < 512; i2++) { const uint64_t e2 = t2[i2]; if (!(e2 & PG_P)) continue; const uint64_t b2 = VA_CANON(((uint64_t)i4 << 39) | ((uint64_t)i3 << 30) | ((uint64_t)i2 << 21)); if (!rgn_hit(b2, 1ull << 21, lo, hi)) continue; const int rw2 = rw3 & ((e2 >> 1) & 1), us2 = us3 & ((e2 >> 2) & 1), nx2 = nx3 | ((int)(e2 >> 63) & 1); if (e2 & PG_PS) { rgn_leaf(&a, b2, 1ull << 21, rgn_prot(rw2, us2, nx2)); continue; } const uint64_t* t1 = gpa_ptr(m, e2 & PFN_MASK, 4096); if (!t1) continue; for (int i1 = 0; i1 < 512; i1++) { const uint64_t e1 = t1[i1]; if (!(e1 & PG_P)) continue; const uint64_t b1 = VA_CANON(((uint64_t)i4 << 39) | ((uint64_t)i3 << 30) | ((uint64_t)i2 << 21) | ((uint64_t)i1 << 12)); if (!rgn_hit(b1, 1ull << 12, lo, hi)) continue; const int rw1 = rw2 & ((e1 >> 1) & 1), us1 = us2 & ((e1 >> 2) & 1), nx1 = nx2 | ((int)(e1 >> 63) & 1); rgn_leaf(&a, b1, 1ull << 12, rgn_prot(rw1, us1, nx1)); } } } } rgn_flush(&a); return a.n; } /* ---- windowed sweep engine ----------------------------------------------- */ #define SWEEP_WIN (1u << 20) /* 1 MiB window (multiple of 8) */ #define SWEEP_RMAX (1u << 16) /* max runs enumerated per sweep */ __attribute__((hot)) int gva_sweep(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, uint32_t prot_any, size_t overlap, gva_sweep_cb cb, void* user) { if (overlap >= SWEEP_WIN) return -1; vregion* rg = malloc((size_t)SWEEP_RMAX * sizeof *rg); uint8_t* buf = malloc(SWEEP_WIN); if (!rg || !buf) { free(rg); free(buf); return -1; } int nr = gva_regions(m, cr3, lo, hi, prot_any, rg, SWEEP_RMAX); if (nr > (int)SWEEP_RMAX) nr = (int)SWEEP_RMAX; int rc = 0; for (int r = 0; r < nr && !rc; r++) { uint64_t base = rg[r].va; /* VA of buf[0] */ uint64_t va = rg[r].va; const uint64_t vend = rg[r].va + (rg[r].len - 1); /* inclusive last */ size_t fill = 0; while (va <= vend) { size_t avail; const uint8_t* p = gva_ptr(m, cr3, va, &avail); if (!p) { /* gap: flush+skip */ if (fill && cb(user, buf, fill, base, overlap, 1)) { rc = 1; break; } if (vend - va < 0x1000 - (va & 0xFFF)) break; /* skip past top: done */ va += 0x1000 - (va & 0xFFF); base = va; fill = 0; continue; } size_t n = avail; /* leaf-contiguous */ if (n > (size_t)(vend - va + 1)) n = (size_t)(vend - va + 1); if (n > SWEEP_WIN - fill) n = SWEEP_WIN - fill; const int end = (n == (size_t)(vend - va + 1)); /* chunk hits vend */ if (fill == 0 && avail > 0x1000) { /* large-page lend */ if (cb(user, p, n, va, 0, end)) { rc = 1; break; } if (end) break; /* avoid va wrap */ va += n; if (overlap == 0) base = va; else { memcpy(buf, p + n - overlap, overlap); base = va - overlap; fill = overlap; } continue; } memcpy(buf + fill, p, n); /* buffered window */ fill += n; va += n; if (end || fill == SWEEP_WIN) { if (cb(user, buf, fill, base, overlap, end)) { rc = 1; break; } if (end) { fill = 0; break; } /* avoid va wrap */ if (overlap == 0 || overlap >= fill) { base = va; fill = 0; } else { /* carry overlap */ memmove(buf, buf + fill - overlap, overlap); base = va - overlap; fill = overlap; } } } if (!rc && fill && cb(user, buf, fill, base, overlap, 1)) rc = 1; } free(rg); free(buf); return rc; }