vatrog-vm-introspection-engine/src/engine/gva.c

#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include "engine-arch.h"

/* PTE permission bits we propagate down the walk. */
#define PTE_RW (1ull << 1)
#define PTE_US (1ull << 2)
#define PTE_NX (1ull << 63)

/* ---- single-address translation (hot) ----------------------------------- *
 * Translate `va` under `cr3` to a GPA. On success: *gpa = GPA of `va`, and
 * *leaf (if non-NULL) = bytes from `va` to the end of the containing leaf. */
__attribute__((hot))
static int gva_gpa(vmie_mem* m, uintptr_t cr3, uintptr_t va,
                   uintptr_t* gpa, size_t* leaf) {
    uint64_t t = cr3 & PFN_MASK, e;
    const uint64_t* pe;
    const unsigned i4 = (va >> 39) & 0x1ff, i3 = (va >> 30) & 0x1ff,
                   i2 = (va >> 21) & 0x1ff, i1 = (va >> 12) & 0x1ff;

    if (!(pe = gpa_ptr(m, t + i4 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
    t = e & PFN_MASK;
    if (!(pe = gpa_ptr(m, t + i3 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
    if (e & PG_PS) {                                   /* 1 GiB leaf */
        const uint64_t off = va & 0x3FFFFFFF;
        *gpa = (e & PFN_MASK & ~0x3FFFFFFFull) + off;
        if (leaf) *leaf = (1u << 30) - off;
        return 0;
    }
    t = e & PFN_MASK;
    if (!(pe = gpa_ptr(m, t + i2 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
    if (e & PG_PS) {                                   /* 2 MiB leaf */
        const uint64_t off = va & 0x1FFFFF;
        *gpa = (e & PFN_MASK & ~0x1FFFFFull) + off;
        if (leaf) *leaf = (1u << 21) - off;
        return 0;
    }
    t = e & PFN_MASK;
    if (!(pe = gpa_ptr(m, t + i1 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
    const uint64_t off = va & 0xFFF;                   /* 4 KiB leaf */
    *gpa = (e & PFN_MASK) + off;
    if (leaf) *leaf = 0x1000 - off;
    return 0;
}

/* cold extern translate: GPA of `va` under `cr3`, or -1. Wraps the hot static
 * gva_gpa for cold callers outside this TU without exposing the inlinable hot
 * primitive. Declared in engine-arch.h. */
__attribute__((cold))
int gva_translate(vmie_mem* m, uintptr_t cr3, uintptr_t va, uintptr_t* gpa) {
    return gva_gpa(m, cr3, va, gpa, NULL);
}

/* zero-copy borrowed read: leaf-bounded host pointer at `va` (see memmodel.h). */
__attribute__((hot))
const void* gva_ptr(vmie_mem* m, uintptr_t cr3, uintptr_t va, size_t* avail) {
    uintptr_t gpa; size_t leaf;
    if (gva_gpa(m, cr3, va, &gpa, &leaf)) return NULL;
    *avail = leaf;
    return gpa_ptr(m, gpa, leaf);
}

__attribute__((hot))
int gva_read(vmie_mem* m, uintptr_t cr3, uintptr_t va, void* dst, size_t nmemb) {
    uint8_t* d = dst;
    while (nmemb) {
        uintptr_t gpa; size_t leaf;
        if (gva_gpa(m, cr3, va, &gpa, &leaf)) return -1;
        const size_t n = leaf < nmemb ? leaf : nmemb;
        if (gpa_read(m, gpa, d, n)) return -1;
        va += n; d += n; nmemb -= n;
    }
    return 0;
}

__attribute__((hot))
int gva_write(vmie_mem* m, uintptr_t cr3, uintptr_t va, const void* src, size_t nmemb) {
    const uint8_t* s = src;
    while (nmemb) {
        uintptr_t gpa; size_t leaf;
        if (gva_gpa(m, cr3, va, &gpa, &leaf)) return -1;
        const size_t n = leaf < nmemb ? leaf : nmemb;
        if (gpa_write(m, gpa, s, n)) return -1;
        va += n; s += n; nmemb -= n;
    }
    return 0;
}

/* ---- bootstrap helpers (cold) -------------------------------------------- */

__attribute__((cold))
int khalf_score(const vmie_mem* m, uint64_t pml4) {
    const uint64_t t = pml4 & PFN_MASK;
    int n = 0; uint64_t e;
    for (int i = 256; i < 512; i++)
        if (!gpa_read((vmie_mem*)m, t + i * 8, &e, 8) && (e & PG_P)) n++;
    return n;
}

/* ---- region enumeration -------------------------------------------------- */

struct rgn_acc {
    vregion* out; int nmax; int n;
    uint32_t prot_any;
    uint64_t lo, hi;
    int      have; uint64_t va, len; uint32_t prot;
};

static void rgn_flush(struct rgn_acc* a) {
    if (!a->have) return;
    if (a->prot_any == 0 || (a->prot & a->prot_any)) {
        if (a->n < a->nmax) {
            a->out[a->n].va = a->va; a->out[a->n].len = a->len; a->out[a->n].prot = a->prot;
        }
        a->n++;
    }
    a->have = 0;
}

/* Clamp a present leaf to [lo,hi] and coalesce it onto the current run. */
static void rgn_leaf(struct rgn_acc* a, uint64_t va, uint64_t size, uint32_t prot) {
    uint64_t vend = va + size - 1;                 /* inclusive last byte */
    if (vend < a->lo || va > a->hi) return;        /* outside window      */
    if (va   < a->lo) va   = a->lo;
    if (vend > a->hi) vend = a->hi;
    const uint64_t len = vend - va + 1;
    if (a->have && prot == a->prot && va == a->va + a->len) {
        a->len += len;                             /* extend current run  */
    } else {
        rgn_flush(a);
        a->have = 1; a->va = va; a->len = len; a->prot = prot;
    }
}

static uint32_t rgn_prot(int rw, int us, int nx) {
    return VR_R | (rw ? VR_W : 0) | (nx ? 0 : VR_X) | (us ? VR_U : 0);
}

/* whole-subtree window test: does [base, base+span) intersect [lo,hi]? */
static int rgn_hit(uint64_t base, uint64_t span, uint64_t lo, uint64_t hi) {
    const uint64_t end = base + (span - 1);        /* inclusive           */
    return !(end < lo || base > hi);
}

__attribute__((hot))
int gva_regions(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
                uint32_t prot_any, vregion* out, int nmax) {
    if (nmax <= 0) return 0;
    struct rgn_acc a = { out, nmax, 0, prot_any, lo, hi, 0, 0, 0, 0 };

    const uint64_t* t4 = gpa_ptr(m, cr3 & PFN_MASK, 4096);
    if (!t4) return 0;

    for (int i4 = 0; i4 < 512; i4++) {
        const uint64_t e4 = t4[i4];
        if (!(e4 & PG_P)) continue;
        const uint64_t b4 = VA_CANON((uint64_t)i4 << 39);
        if (!rgn_hit(b4, 1ull << 39, lo, hi)) continue;
        const int rw4 = (e4 >> 1) & 1, us4 = (e4 >> 2) & 1, nx4 = (int)(e4 >> 63) & 1;

        const uint64_t* t3 = gpa_ptr(m, e4 & PFN_MASK, 4096);
        if (!t3) continue;
        for (int i3 = 0; i3 < 512; i3++) {
            const uint64_t e3 = t3[i3];
            if (!(e3 & PG_P)) continue;
            const uint64_t b3 = VA_CANON(((uint64_t)i4 << 39) | ((uint64_t)i3 << 30));
            if (!rgn_hit(b3, 1ull << 30, lo, hi)) continue;
            const int rw3 = rw4 & ((e3 >> 1) & 1), us3 = us4 & ((e3 >> 2) & 1),
                      nx3 = nx4 | ((int)(e3 >> 63) & 1);
            if (e3 & PG_PS) { rgn_leaf(&a, b3, 1ull << 30, rgn_prot(rw3, us3, nx3)); continue; }

            const uint64_t* t2 = gpa_ptr(m, e3 & PFN_MASK, 4096);
            if (!t2) continue;
            for (int i2 = 0; i2 < 512; i2++) {
                const uint64_t e2 = t2[i2];
                if (!(e2 & PG_P)) continue;
                const uint64_t b2 = VA_CANON(((uint64_t)i4 << 39) | ((uint64_t)i3 << 30) | ((uint64_t)i2 << 21));
                if (!rgn_hit(b2, 1ull << 21, lo, hi)) continue;
                const int rw2 = rw3 & ((e2 >> 1) & 1), us2 = us3 & ((e2 >> 2) & 1),
                          nx2 = nx3 | ((int)(e2 >> 63) & 1);
                if (e2 & PG_PS) { rgn_leaf(&a, b2, 1ull << 21, rgn_prot(rw2, us2, nx2)); continue; }

                const uint64_t* t1 = gpa_ptr(m, e2 & PFN_MASK, 4096);
                if (!t1) continue;
                for (int i1 = 0; i1 < 512; i1++) {
                    const uint64_t e1 = t1[i1];
                    if (!(e1 & PG_P)) continue;
                    const uint64_t b1 = VA_CANON(((uint64_t)i4 << 39) | ((uint64_t)i3 << 30) |
                                                 ((uint64_t)i2 << 21) | ((uint64_t)i1 << 12));
                    if (!rgn_hit(b1, 1ull << 12, lo, hi)) continue;
                    const int rw1 = rw2 & ((e1 >> 1) & 1), us1 = us2 & ((e1 >> 2) & 1),
                              nx1 = nx2 | ((int)(e1 >> 63) & 1);
                    rgn_leaf(&a, b1, 1ull << 12, rgn_prot(rw1, us1, nx1));
                }
            }
        }
    }
    rgn_flush(&a);
    return a.n;
}

/* ---- windowed sweep engine ----------------------------------------------- */

#define SWEEP_WIN  (1u << 20)        /* 1 MiB window (multiple of 8)          */
#define SWEEP_RMAX (1u << 16)        /* max runs enumerated per sweep         */

__attribute__((hot))
int gva_sweep(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
              uint32_t prot_any, size_t overlap, gva_sweep_cb cb, void* user) {
    if (overlap >= SWEEP_WIN) return -1;

    vregion* rg = malloc((size_t)SWEEP_RMAX * sizeof *rg);
    uint8_t* buf = malloc(SWEEP_WIN);
    if (!rg || !buf) { free(rg); free(buf); return -1; }

    int nr = gva_regions(m, cr3, lo, hi, prot_any, rg, SWEEP_RMAX);
    if (nr > (int)SWEEP_RMAX) nr = (int)SWEEP_RMAX;

    int rc = 0;
    for (int r = 0; r < nr && !rc; r++) {
        uint64_t base = rg[r].va;                 /* VA of buf[0]            */
        uint64_t va   = rg[r].va;
        const uint64_t vend = rg[r].va + (rg[r].len - 1);  /* inclusive last */
        size_t fill = 0;

        while (va <= vend) {
            size_t avail;
            const uint8_t* p = gva_ptr(m, cr3, va, &avail);
            if (!p) {                                      /* gap: flush+skip */
                if (fill && cb(user, buf, fill, base, overlap, 1)) { rc = 1; break; }
                if (vend - va < 0x1000 - (va & 0xFFF)) break;  /* skip past top: done */
                va  += 0x1000 - (va & 0xFFF);
                base = va; fill = 0;
                continue;
            }
            size_t n = avail;                              /* leaf-contiguous */
            if (n > (size_t)(vend - va + 1)) n = (size_t)(vend - va + 1);
            if (n > SWEEP_WIN - fill)        n = SWEEP_WIN - fill;
            const int end = (n == (size_t)(vend - va + 1)); /* chunk hits vend */

            if (fill == 0 && avail > 0x1000) {             /* large-page lend */
                if (cb(user, p, n, va, 0, end)) { rc = 1; break; }
                if (end) break;                            /* avoid va wrap   */
                va += n;
                if (overlap == 0) base = va;
                else { memcpy(buf, p + n - overlap, overlap); base = va - overlap; fill = overlap; }
                continue;
            }

            memcpy(buf + fill, p, n);                      /* buffered window */
            fill += n; va += n;

            if (end || fill == SWEEP_WIN) {
                if (cb(user, buf, fill, base, overlap, end)) { rc = 1; break; }
                if (end) { fill = 0; break; }              /* avoid va wrap   */
                if (overlap == 0 || overlap >= fill) {
                    base = va; fill = 0;
                } else {                                   /* carry overlap   */
                    memmove(buf, buf + fill - overlap, overlap);
                    base = va - overlap; fill = overlap;
                }
            }
        }
        if (!rc && fill && cb(user, buf, fill, base, overlap, 1)) rc = 1;
    }

    free(rg); free(buf);
    return rc;
}