From c36ffe295d3559f50d910f196ef1872b2b8ca961 Mon Sep 17 00:00:00 2001 From: Gregory Lirent Date: Tue, 16 Jun 2026 17:38:10 +0300 Subject: [PATCH] Add process-scoped scanning algorithms: multi-pattern, code-xref, pointer-map, dissection, snapshot diff All are OS-agnostic handlers keyed by vmie_mem* + cr3, built on the windowed sweep / region walk / matcher; none names a Windows concept and each compiles against include/ alone. Scanning: a compiled multi-pattern automaton (Aho-Corasick over each pattern's longest literal anchor, then a masked verify) finds N signatures in one sweep pass (sigscan.h sigset; scan.h gva_sig_scan_multi). gva_code_xref decodes rel32 call/jmp and RIP-relative lea/mov to find every instruction targeting a given VA. Pointer graph (pmap.h): one sweep indexes every qword whose value lands in a mapped region into reverse + forward edges. pmap_referrers is the keystone - it answers who-points-here, class-instance enumeration (referrers of a vtable VA), and string xref (referrers of a string VA) from the same index; pmap_paths is the indexed counterpart to scan_pointer's one-shot DFS; struct_dissect classifies the qwords of an instance (pointer/vtable/float/ int/string) into a field map. Temporal (snapdiff.h): snap_take captures a window's bytes, snap_diff reports the changed runs against a later read. --- CMakeLists.txt | 6 +- include/pmap.h | 82 ++++++++ include/scan.h | 28 +++ include/sigscan.h | 30 +++ include/snapdiff.h | 34 ++++ src/handlers/codescan.c | 130 +++++++++++++ src/handlers/pmap.c | 411 ++++++++++++++++++++++++++++++++++++++++ src/handlers/sigset.c | 253 +++++++++++++++++++++++++ src/handlers/snapdiff.c | 187 ++++++++++++++++++ 9 files changed, 1160 insertions(+), 1 deletion(-) create mode 100644 include/pmap.h create mode 100644 include/snapdiff.h create mode 100644 src/handlers/codescan.c create mode 100644 src/handlers/pmap.c create mode 100644 src/handlers/sigset.c create mode 100644 src/handlers/snapdiff.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 406250a..3bd20fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,11 @@ add_library(vmie STATIC src/engine/win32/profile.c src/engine/win32/text.c src/handlers/scan.c - src/handlers/sigscan.c) + src/handlers/sigscan.c + src/handlers/sigset.c + src/handlers/codescan.c + src/handlers/pmap.c + src/handlers/snapdiff.c) target_include_directories(vmie PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include # public API: include/*.h PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/core/include # private: core.h diff --git a/include/pmap.h b/include/pmap.h new file mode 100644 index 0000000..f03e2ee --- /dev/null +++ b/include/pmap.h @@ -0,0 +1,82 @@ +/* pmap.h - pointer-graph index and structure analysis (OS-agnostic handler). + * + * Layered above the memory-model contract (memmodel.h) and the scanning surface + * (scan.h, for `range` and `scan_ptr_path`). A `pmap` is a one-pass reverse + + * forward index of every intra-address-space pointer under a `cr3`: for each + * 8-byte-aligned qword whose VALUE lands inside a mapped region, it records the + * edge `referrer_va -> target_va`. Two sorted views answer the keystone queries + * in O(log n): who-points-here (referrers) and what-does-this-point-to + * (targets). Everything is keyed by `vmie_mem* + cr3`; it names no Windows + * object. + * + * Ownership: pmap_build / pmap_free (create/destroy). All queries are read-only + * and re-entrant against a built pmap; pmap_free is safe on NULL. + */ +#ifndef VMIE_PMAP_H +#define VMIE_PMAP_H +#include +#include +#include "memmodel.h" /* vmie_mem, vregion, range */ +#include "scan.h" /* scan_ptr_path, SCAN_PTR_MAXDEPTH */ + +typedef struct pmap pmap; /* reverse + forward index (opaque) */ + +/* One gva_sweep over [lo,hi] (prot filter): for every 8-byte-aligned qword whose + * VALUE lands inside a mapped region (membership tested against a gva_regions + * set), record the edge referrer_va -> target_va. Stores two sorted views (by + * target, by referrer) for O(log n) queries. Returns NULL on OOM or bad input. */ +pmap* pmap_build(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, uint32_t prot_any); + +/* Release a pmap from pmap_build. Safe on NULL. */ +void pmap_free(pmap* pm); + +/* All VAs holding a pointer whose value == target_va. THE keystone query: + * who-points-here; vtable-instance enumeration (target = a vtable VA, since an + * object's first qword is its vtable); string-xref target. Writes up to `max` + * referrer VAs to `out` (NULL to count only) and returns the TOTAL. */ +int pmap_referrers(const pmap* pm, uint64_t target_va, uint64_t* out, int max); + +/* Forward edges: pointer VALUES stored at/around referrer_va (what this region + * points to). For path walking / dissection. Writes up to `max` target VAs to + * `out` (NULL to count only) and returns the TOTAL. */ +int pmap_targets(const pmap* pm, uint64_t referrer_va, uint64_t* out, int max); + +/* Map-accelerated pointer paths to `target`, anchored on module ranges `mods`. + * Same result type as scan_pointer. Cost profile differs deliberately: + * scan_pointer is a ONE-SHOT live DFS that builds no index (low memory for a + * single query); pmap_paths runs over the ALREADY-BUILT index (one sweep amort- + * ized across many cheap queries). Not a duplicate path - a different trade-off. + * Writes up to `max` paths to `out` and returns the TOTAL, or -1 on bad input. */ +int pmap_paths(const pmap* pm, uint64_t target, const range* mods, int nmods, + int max_depth, uint32_t max_off, scan_ptr_path* out, int max); + +/* string-xref: find the needle bytes anywhere in the AS (matcher over the + * sweep), then pmap_referrers for each occurrence's VA. The caller pre-encodes + * the byte image (e.g. a UTF-16 string), so `needle`/`nlen` are matched as raw + * bytes. Writes up to `max` referrer VAs to `out` (NULL to count only) and + * returns the TOTAL number of referrers, or -1 on bad input. */ +int xref_string(vmie_mem* m, uintptr_t cr3, const pmap* pm, + const void* needle, size_t nlen, uint64_t* out, int max); + +/* ---- structure dissection ------------------------------------------------ * + * Classify each 8-byte slot in [va, va+nbytes). */ +typedef enum { + FK_UNKNOWN, FK_PTR, FK_VTABLE, FK_F32, FK_F64, FK_I32, FK_I64, FK_ASCII, FK_UTF16 +} field_kind; + +typedef struct { + uint32_t off; /* byte offset from va */ + field_kind kind; + uint64_t raw; /* the raw 8 bytes at off */ + uint64_t target; /* pointee VA for FK_PTR/FK_VTABLE, else 0 */ +} field_desc; + +/* Classify each slot: PTR = value lands in a mapped region. VTABLE = value + * points into a non-writable region whose first qwords are themselves pointers + * into X-regions. F32/F64 = finite, sane magnitude. ASCII/UTF16 = printable run + * >= 4. Else I32/I64/UNKNOWN. Reuses gva_read + gva_regions (+ pm if given, may + * be NULL). Writes slots to `out` and returns the number written (<= max). */ +int struct_dissect(vmie_mem* m, uintptr_t cr3, uint64_t va, size_t nbytes, + const pmap* pm, field_desc* out, int max); + +#endif /* VMIE_PMAP_H */ diff --git a/include/scan.h b/include/scan.h index ffd56d9..30c00f1 100644 --- a/include/scan.h +++ b/include/scan.h @@ -53,6 +53,34 @@ int scan_pointer(vmie_mem* m, uintptr_t cr3, const range* mods, int nmods, uint64_t target, int max_depth, uint32_t max_off, scan_ptr_path* out, int max); +/* ---- multi-pattern + code-xref bridges (over sigscan.h / gva_sweep) ------ * + * Same windowed-seam discipline as gva_sig_scan, but for a compiled sigset and + * a heuristic rel32 decoder. Both stream guest memory through gva_sweep and + * report VAs in the guest's own coordinate space. */ + +/* One attributed multi-pattern hit: which compiled pattern, and where. */ +typedef struct { int pattern; uint64_t va; } sig_multi_hit; + +/* Windowed multi-pattern scan over [lo,hi]: drives sig_set_each on each window, + * seam-deduped like gva_sig_scan. The sweep overlap is (longest pattern len - 1) + * = sigset_maxlen(s) - 1, so no full pattern is split at a window boundary. + * Writes up to `max` hits to `out` (NULL to count only) and returns the TOTAL + * number of hits, or -1 on a NULL/empty sigset. */ +int gva_sig_scan_multi(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, + uint32_t prot_any, const sigset* s, + sig_multi_hit* out, int max); + +/* code-xref: every instruction in the X-regions of [lo,hi] whose rel32 operand + * targets `target_va`. Heuristic decoder (NOT a full disassembler): recognizes + * E8 call / E9 jmp (next_rip + disp32) and the RIP-relative ModRM forms + * (mod=00, rm=101) of lea/mov (REX.W 8D / 8B) where target = next_rip + + * (int32)disp. Records each matching instruction-start VA. The sweep forces + * VR_X and carries a >=15-byte overlap (max x86 instruction length) so no + * instruction is cut at a window seam. Writes up to `max` VAs to `out` (NULL to + * count only) and returns the TOTAL number of matches, or -1 on bad input. */ +int gva_code_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, + uint64_t target_va, uint64_t* out, int max); + /* gva bridges to the signature matcher: build mem_view from guest memory and feed sigscan.h */ int gva_sig_scan (vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, uint32_t prot_any, const sig_pattern_t* p, uint64_t* out, int max); diff --git a/include/sigscan.h b/include/sigscan.h index 3115a37..ac6aaef 100644 --- a/include/sigscan.h +++ b/include/sigscan.h @@ -79,4 +79,34 @@ uint64_t sig_rip(mem_view_t v, uint64_t hit_va, size_t disp_off, size_t instr_le * is actually available. Useful for narrowing a scan to a [start,end] window. */ mem_view_t mem_sub(mem_view_t v, uint64_t start_va, size_t size); +/* ---- compiled multi-pattern matcher (Aho-Corasick anchors) --------------- * + * A sigset compiles N patterns into one automaton scanned in a single pass. It + * is still PURE (only mem_view_t, no vmie_mem). Each pattern contributes its + * longest contiguous non-wildcard run as a literal anchor; an Aho-Corasick goto + * over those anchors finds candidate sites, and on an anchor hit the FULL masked + * pattern is verified (mem_sub + mask compare) before the match is reported. + * This is the building block under gva_sig_scan_multi (see scan.h). */ +typedef struct sigset sigset; /* compiled automaton (opaque) */ + +/* Compile `n` patterns into a sigset. The patterns are borrowed for the call + * only (their bytes are copied into the automaton). Returns NULL on OOM, on + * n <= 0, or if any pattern is empty / all-wildcard (no literal anchor). Release + * with sigset_free(). */ +sigset* sigset_compile(const sig_pattern_t* pats, int n); + +/* Release a sigset produced by sigset_compile. Safe on NULL. */ +void sigset_free(sigset* s); + +/* Invoke cb(user, pat_index, match_va) for every full-pattern match of any + * compiled pattern in `v`, anchor-driven (not necessarily in ascending order + * across patterns). `cb` returns nonzero to stop early. The longest-anchor + * length is what a windowed caller uses as overlap to de-dup across seams. */ +void sig_set_each(const sigset* s, mem_view_t v, + int (*cb)(void* user, int pat, uint64_t va), void* user); + +/* Longest compiled pattern length, in bytes. A windowed sweep carries + * (this - 1) leading-overlap bytes so no full pattern is split at a seam (the + * gva_sig_scan_multi overlap contract). 0 on NULL. */ +size_t sigset_maxlen(const sigset* s); + #endif /* VMIE_SIGSCAN_H */ \ No newline at end of file diff --git a/include/snapdiff.h b/include/snapdiff.h new file mode 100644 index 0000000..e871d4e --- /dev/null +++ b/include/snapdiff.h @@ -0,0 +1,34 @@ +/* snapdiff.h - per-process temporal snapshot + diff (OS-agnostic handler). + * + * A `snapshot` captures the bytes of every mapped run in a VA window under a + * `cr3` at time T0. snap_diff re-reads the same window now and emits the runs + * whose bytes changed (coalesced VA-contiguous diffs), including runs that + * appeared or disappeared since T0. Keyed by `vmie_mem* + cr3`; it names no + * Windows object. + * + * Ownership: snap_take / snap_free (create/destroy). snap_free is safe on NULL. + */ +#ifndef VMIE_SNAPDIFF_H +#define VMIE_SNAPDIFF_H +#include +#include +#include "memmodel.h" /* vmie_mem, vregion */ + +typedef struct snapshot snapshot; + +/* Capture the bytes of every mapped run in [lo,hi] (prot filter) under `cr3` at + * T0. Returns a heap-owned snapshot, or NULL on OOM / bad input. */ +snapshot* snap_take(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, uint32_t prot_any); + +/* Release a snapshot from snap_take. Safe on NULL. */ +void snap_free(snapshot* s); + +/* Re-read the window now, compare to the snapshot, and emit changed runs as + * vregion {va, len, prot = current} - coalescing VA-contiguous changed bytes + * into one run. Runs that appeared or disappeared since T0 count as changed. + * Writes up to `max` runs to `changed` (NULL to count only) and returns the + * TOTAL number of changed runs, or -1 on bad input. */ +int snap_diff(const snapshot* s, vmie_mem* m, uintptr_t cr3, + vregion* changed, int max); + +#endif /* VMIE_SNAPDIFF_H */ diff --git a/src/handlers/codescan.c b/src/handlers/codescan.c new file mode 100644 index 0000000..a9db7c5 --- /dev/null +++ b/src/handlers/codescan.c @@ -0,0 +1,130 @@ +/* codescan.c - windowed multi-pattern scan + heuristic rel32 code-xref. + * + * Both bridges stream guest memory through gva_sweep and report guest VAs: + * gva_sig_scan_multi - drives a compiled sigset over each window, seam-deduped + * (overlap = longest pattern len - 1). + * gva_code_xref - heuristic decode of the rel32 instruction forms in + * X-regions; records instruction starts whose computed + * target equals target_va. Overlap >= 15 (max x86 insn + * length) keeps an instruction whole across a seam. + * + * Handler boundary: only memmodel.h / scan.h / sigscan.h. + */ +#include +#include +#include +#include "memmodel.h" +#include "sigscan.h" +#include "scan.h" + +/* x86-64 maximum instruction length; the code-xref sweep overlap. A decoded + * instruction may be up to this long, so a window must carry this many leading + * bytes to re-present an instruction split at the previous seam. */ +#define X86_MAX_INSN 15 + +/* ---- multi-pattern scan -------------------------------------------------- */ + +struct multi_cb { + const sigset* s; + sig_multi_hit* out; int max, n; + uint64_t win_base; size_t win_len, win_ov; int win_last; +}; + +__attribute__((hot)) +static int multi_hit(void* u, int pat, uint64_t va) { + struct multi_cb* c = u; + const size_t off = (size_t)(va - c->win_base); + if (!c->win_last && c->win_len > c->win_ov && off >= c->win_len - c->win_ov) { + return 0; /* trailing overlap: next window owns it */ + } + if (c->out && c->n < c->max) { c->out[c->n].pattern = pat; c->out[c->n].va = va; } + c->n++; + return 0; +} + +__attribute__((hot)) +static int multi_sweep_cb(void* u, const uint8_t* data, size_t len, + uint64_t base, size_t ov, int last) { + struct multi_cb* c = u; + c->win_base = base; c->win_len = len; c->win_ov = ov; c->win_last = last; + const mem_view_t v = { data, len, base }; + sig_set_each(c->s, v, multi_hit, c); + return 0; +} + +int gva_sig_scan_multi(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, + uint32_t prot_any, const sigset* s, + sig_multi_hit* out, int max) { + const size_t maxlen = sigset_maxlen(s); + if (maxlen == 0) { return -1; } + struct multi_cb c; memset(&c, 0, sizeof c); + c.s = s; c.out = out; c.max = max; + if (gva_sweep(m, cr3, lo, hi, prot_any, maxlen - 1, multi_sweep_cb, &c) < 0) { + return -1; + } + return c.n; +} + +/* ---- heuristic rel32 code-xref ------------------------------------------- * + * Decode just enough to recover a rel32 target. Two recognized shapes: + * E8/E9 disp32 (call/jmp) : start+5 + disp + * REX.W 8D|8B modrm(00,*,101) disp32 (lea/mov rip) : start+7 + disp + * The lea/mov form REQUIRES the REX.W prefix (0x48..0x4F with W set), per the + * 64-bit operand RIP-relative encoding; a bare 8D/8B is not accepted (it would + * also let the decoder re-recognize the same instruction one byte past its REX + * prefix). Returns the encoded length (>=5) and writes the target via *target, + * or 0 if `p[0..avail)` is not one of the forms. */ +__attribute__((hot)) +static size_t decode_rel32(const uint8_t* p, size_t avail, + uint64_t start_va, uint64_t* target) { + if (avail >= 5 && (p[0] == 0xE8 || p[0] == 0xE9)) { + int32_t disp; memcpy(&disp, p + 1, 4); + *target = start_va + 5 + (int64_t)disp; + return 5; + } + /* REX.W prefix (0x48..0x4F: bit 3 = W), then 8D/8B with RIP-rel ModRM */ + if (avail >= 7 && (p[0] & 0xF8) == 0x48 && (p[1] == 0x8D || p[1] == 0x8B)) { + const uint8_t modrm = p[2]; + if ((modrm & 0xC0) == 0x00 && (modrm & 0x07) == 0x05) { /* mod=00 rm=101 */ + int32_t disp; memcpy(&disp, p + 3, 4); + *target = start_va + 7 + (int64_t)disp; /* rex op modrm disp32 */ + return 7; + } + } + return 0; +} + +struct xref_cb { + uint64_t target; + uint64_t* out; int max, n; +}; + +__attribute__((hot)) +static int xref_sweep_cb(void* u, const uint8_t* data, size_t len, + uint64_t base, size_t ov, int last) { + struct xref_cb* c = u; + /* Decode at every byte offset (heuristic, overlapping). A match that STARTS + * in the trailing overlap of a non-last window is dropped: the next window + * re-presents that instruction whole in its leading overlap. */ + const size_t limit = last ? len : (len > ov ? len - ov : 0); + for (size_t off = 0; off < len; off++) { + if (!last && off >= limit) { break; } + uint64_t tgt = 0; + const size_t ilen = decode_rel32(data + off, len - off, base + off, &tgt); + if (ilen && tgt == c->target) { + if (c->out && c->n < c->max) { c->out[c->n] = base + off; } + c->n++; + } + } + return 0; +} + +int gva_code_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, + uint64_t target_va, uint64_t* out, int max) { + struct xref_cb c; memset(&c, 0, sizeof c); + c.target = target_va; c.out = out; c.max = max; + if (gva_sweep(m, cr3, lo, hi, VR_X, X86_MAX_INSN, xref_sweep_cb, &c) < 0) { + return -1; + } + return c.n; +} diff --git a/src/handlers/pmap.c b/src/handlers/pmap.c new file mode 100644 index 0000000..69883fd --- /dev/null +++ b/src/handlers/pmap.c @@ -0,0 +1,411 @@ +/* pmap.c - pointer-graph index + structure dissection (OS-agnostic handler). + * + * pmap_build runs one gva_sweep, collecting every 8-aligned qword whose value + * lands in a mapped region as an edge referrer_va -> target_va, then sorts two + * index views (by target, by referrer) for O(log n) queries. pmap_referrers / + * pmap_targets are binary-search range scans over those views. pmap_paths walks + * the reverse index DFS (indexed analogue of scan_pointer's one-shot live DFS). + * xref_string finds a needle then resolves its referrers. struct_dissect + * classifies fixed-size slots by re-reading memory and testing membership. + * + * Handler boundary: only memmodel.h / scan.h / sigscan.h. + */ +#include +#include +#include +#include +#include "memmodel.h" +#include "sigscan.h" +#include "scan.h" +#include "pmap.h" + +#define PM_REG_CAP (1 << 16) + +/* A directed pointer edge: a qword at `ref` holding the value `tgt`. */ +typedef struct { uint64_t tgt, ref; } pedge; + +struct pmap { + pedge* edges; /* all edges (insertion order) */ + size_t nedge; + uint32_t* by_tgt; /* edge indices sorted by (tgt, ref) */ + uint32_t* by_ref; /* edge indices sorted by (ref, tgt) */ + + vregion* regs; /* mapped region set used for membership tests */ + int nregs; +}; + +/* ---- mapped-region membership (sorted regs, binary search) --------------- */ + +static int reg_cmp(const void* a, const void* b) { + const uint64_t x = ((const vregion*)a)->va, y = ((const vregion*)b)->va; + return (x > y) - (x < y); +} + +__attribute__((hot)) +static int in_mapped(const vregion* regs, int n, uint64_t v) { + int lo = 0, hi = n; /* find last region with va <= v */ + while (lo < hi) { + const int mid = (lo + hi) / 2; + if (regs[mid].va <= v) { lo = mid + 1; } else { hi = mid; } + } + if (lo == 0) { return 0; } + const vregion* r = ®s[lo - 1]; + return v >= r->va && v < r->va + r->len; +} + +/* ---- index sort (data-oriented: sort packed keys, emit edge indices) ----- * + * qsort takes no user pointer, so rather than a context-keyed comparator the + * keys are packed into a self-contained array (primary, secondary, edge index) + * sorted by value; the sorted index column is then peeled off. */ +typedef struct { uint64_t a, b; uint32_t idx; } skey; +static int skey_cmp(const void* x, const void* y) { + const skey* p = x; const skey* q = y; + if (p->a != q->a) { return p->a < q->a ? -1 : 1; } + if (p->b != q->b) { return p->b < q->b ? -1 : 1; } + return 0; +} + +/* Build a sorted index `out` (len nedge) from edges, keyed (primary,secondary). + * primary_is_tgt selects (tgt,ref) vs (ref,tgt). Returns 0 / -1 on OOM. */ +__attribute__((cold)) +static int build_index(const pedge* edges, size_t nedge, int primary_is_tgt, + uint32_t** out) { + if (nedge == 0) { *out = NULL; return 0; } + skey* k = malloc(nedge * sizeof *k); + uint32_t* idx = malloc(nedge * sizeof *idx); + if (!k || !idx) { free(k); free(idx); return -1; } + for (size_t i = 0; i < nedge; i++) { + k[i].a = primary_is_tgt ? edges[i].tgt : edges[i].ref; + k[i].b = primary_is_tgt ? edges[i].ref : edges[i].tgt; + k[i].idx = (uint32_t)i; + } + qsort(k, nedge, sizeof *k, skey_cmp); + for (size_t i = 0; i < nedge; i++) { idx[i] = k[i].idx; } + free(k); + *out = idx; + return 0; +} + +/* ---- sweep collector ----------------------------------------------------- */ + +struct pm_cb { + pedge* edges; size_t nedge, capedge; + const vregion* regs; int nregs; + int oom; + uint64_t win_base; size_t win_len, win_ov; int win_last; +}; + +static int edge_push(struct pm_cb* c, uint64_t ref, uint64_t tgt) { + if (c->nedge == c->capedge) { + const size_t nc = c->capedge ? c->capedge * 2 : 65536; + pedge* ne = realloc(c->edges, nc * sizeof *ne); + if (!ne) { c->oom = 1; return -1; } + c->edges = ne; c->capedge = nc; + } + c->edges[c->nedge].ref = ref; + c->edges[c->nedge].tgt = tgt; + c->nedge++; + return 0; +} + +__attribute__((hot)) +static int pm_sweep_cb(void* u, const uint8_t* data, size_t len, + uint64_t base, size_t ov, int last) { + struct pm_cb* c = u; + /* qword-aligned scan: start at the first 8-aligned VA in this window. Seam- + * dedup: an 8-aligned qword whose VA is in the trailing overlap of a + * non-last window is dropped (the next window re-presents it 8-aligned). */ + size_t off = 0; + const size_t m = (size_t)(base & 7u); + if (m) { off = 8 - m; } + const size_t limit = last ? len : (len > ov ? len - ov : 0); + for (; off + 8 <= len; off += 8) { + if (!last && off >= limit) { break; } + uint64_t v; memcpy(&v, data + off, 8); + if (in_mapped(c->regs, c->nregs, v)) { + if (edge_push(c, base + off, v)) { return 1; } + } + } + return 0; +} + +/* ---- build / free -------------------------------------------------------- */ + +__attribute__((cold)) +pmap* pmap_build(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, uint32_t prot_any) { + pmap* pm = calloc(1, sizeof *pm); + if (!pm) { return NULL; } + + pm->regs = malloc((size_t)PM_REG_CAP * sizeof *pm->regs); + if (!pm->regs) { free(pm); return NULL; } + pm->nregs = gva_regions(m, cr3, lo, hi, prot_any, pm->regs, PM_REG_CAP); + if (pm->nregs < 0) { pm->nregs = 0; } + if (pm->nregs > PM_REG_CAP) { pm->nregs = PM_REG_CAP; } + qsort(pm->regs, (size_t)pm->nregs, sizeof *pm->regs, reg_cmp); + + struct pm_cb c; memset(&c, 0, sizeof c); + c.regs = pm->regs; c.nregs = pm->nregs; + const int sw = gva_sweep(m, cr3, lo, hi, prot_any, 8, pm_sweep_cb, &c); + if (sw < 0 || c.oom) { free(c.edges); pmap_free(pm); return NULL; } + + pm->edges = c.edges; pm->nedge = c.nedge; + if (build_index(pm->edges, pm->nedge, 1, &pm->by_tgt) || + build_index(pm->edges, pm->nedge, 0, &pm->by_ref)) { + pmap_free(pm); + return NULL; + } + return pm; +} + +__attribute__((cold)) +void pmap_free(pmap* pm) { + if (!pm) { return; } + free(pm->edges); + free(pm->by_tgt); + free(pm->by_ref); + free(pm->regs); + free(pm); +} + +/* ---- queries ------------------------------------------------------------- */ + +/* Lower bound of `key` in a sorted index `idx` over edges, keying on `field` + * (offsetof tgt or ref via a selector). Returns the first index position with + * key value >= key. */ +__attribute__((hot)) +static size_t idx_lb(const pedge* edges, const uint32_t* idx, size_t n, + int by_tgt, uint64_t key) { + size_t lo = 0, hi = n; + while (lo < hi) { + const size_t mid = (lo + hi) / 2; + const uint64_t kv = by_tgt ? edges[idx[mid]].tgt : edges[idx[mid]].ref; + if (kv < key) { lo = mid + 1; } else { hi = mid; } + } + return lo; +} + +int pmap_referrers(const pmap* pm, uint64_t target_va, uint64_t* out, int max) { + if (!pm) { return -1; } + int total = 0; + for (size_t i = idx_lb(pm->edges, pm->by_tgt, pm->nedge, 1, target_va); + i < pm->nedge && pm->edges[pm->by_tgt[i]].tgt == target_va; i++) { + if (out && total < max) { out[total] = pm->edges[pm->by_tgt[i]].ref; } + total++; + } + return total; +} + +int pmap_targets(const pmap* pm, uint64_t referrer_va, uint64_t* out, int max) { + if (!pm) { return -1; } + int total = 0; + for (size_t i = idx_lb(pm->edges, pm->by_ref, pm->nedge, 0, referrer_va); + i < pm->nedge && pm->edges[pm->by_ref[i]].ref == referrer_va; i++) { + if (out && total < max) { out[total] = pm->edges[pm->by_ref[i]].tgt; } + total++; + } + return total; +} + +/* ---- indexed pointer paths (analogue of scan_pointer's one-shot DFS) ------ * + * scan_pointer builds no index and walks live memory for a single query; this + * walks the already-built reverse index, so a built pmap amortizes one sweep + * across many path queries. */ +struct ppaths { + const pmap* pm; + const range* mods; int nmods; + uint32_t max_off; int max_depth; + scan_ptr_path* out; int max, n; + int32_t disc[SCAN_PTR_MAXDEPTH]; +}; + +__attribute__((hot)) +static int pp_in_module(const struct ppaths* P, uint64_t a) { + for (int i = 0; i < P->nmods; i++) { + if (a >= P->mods[i].base && a < P->mods[i].base + P->mods[i].size) { return 1; } + } + return 0; +} + +static void pp_dfs(struct ppaths* P, uint64_t need, int hops) { + if (hops > 0 && pp_in_module(P, need) && P->n < P->max) { + scan_ptr_path* o = &P->out[P->n++]; + o->base = need; o->depth = hops; + for (int k = 0; k < hops; k++) { o->off[k] = P->disc[hops - 1 - k]; } + } + if (hops >= P->max_depth || P->n >= P->max) { return; } + /* edges with tgt in [need - max_off, need]: each is a referrer location + * whose stored pointer lands `off` bytes below `need`. */ + const uint64_t loV = need > P->max_off ? need - P->max_off : 0; + const pmap* pm = P->pm; + for (size_t i = idx_lb(pm->edges, pm->by_tgt, pm->nedge, 1, loV); + i < pm->nedge && pm->edges[pm->by_tgt[i]].tgt <= need; i++) { + P->disc[hops] = (int32_t)(need - pm->edges[pm->by_tgt[i]].tgt); + pp_dfs(P, pm->edges[pm->by_tgt[i]].ref, hops + 1); + if (P->n >= P->max) { return; } + } +} + +int pmap_paths(const pmap* pm, uint64_t target, const range* mods, int nmods, + int max_depth, uint32_t max_off, scan_ptr_path* out, int max) { + if (!pm || max_depth < 1 || max < 1) { return -1; } + if (max_depth > SCAN_PTR_MAXDEPTH) { max_depth = SCAN_PTR_MAXDEPTH; } + if (nmods < 0) { nmods = 0; } + + struct ppaths P; memset(&P, 0, sizeof P); + P.pm = pm; P.mods = mods; P.nmods = nmods; + P.max_off = max_off; P.max_depth = max_depth; P.out = out; P.max = max; + pp_dfs(&P, target, 0); + return P.n; +} + +/* ---- string-xref: locate needle bytes, then resolve referrers ------------ */ + +struct strx { + const pmap* pm; + uint64_t* out; int max, n; +}; + +/* For one needle occurrence at `va`, append its referrers. Writes into the + * remaining output room (index-backed batch query) and always advances the + * running total so truncation past `max` is observable to the caller. */ +static void strx_occurrence(struct strx* sx, uint64_t va) { + uint64_t* dst = (sx->out && sx->n < sx->max) ? sx->out + sx->n : NULL; + const int room = (sx->out && sx->n < sx->max) ? sx->max - sx->n : 0; + const int total = pmap_referrers(sx->pm, va, dst, room); + sx->n += total; /* total >= written: count is exact */ +} + +struct strx_sweep { + struct strx* sx; + const sig_pattern_t* p; + uint64_t win_base; size_t win_len, win_ov; int win_last; +}; + +static int strx_hit(void* u, uint64_t va) { + struct strx_sweep* w = u; + const size_t off = (size_t)(va - w->win_base); + if (!w->win_last && w->win_len > w->win_ov && off >= w->win_len - w->win_ov) { + return 0; /* trailing overlap: next window owns it */ + } + strx_occurrence(w->sx, va); + return 0; +} + +static int strx_sweep_cb(void* u, const uint8_t* data, size_t len, + uint64_t base, size_t ov, int last) { + struct strx_sweep* w = u; + w->win_base = base; w->win_len = len; w->win_ov = ov; w->win_last = last; + const mem_view_t v = { data, len, base }; + sig_each(v, w->p, strx_hit, w); + return 0; +} + +int xref_string(vmie_mem* m, uintptr_t cr3, const pmap* pm, + const void* needle, size_t nlen, uint64_t* out, int max) { + if (!pm || !needle || nlen == 0) { return -1; } + sig_pattern_t p; + if (!sig_from_bytes((const uint8_t*)needle, nlen, &p)) { return -1; } + + struct strx sx; memset(&sx, 0, sizeof sx); + sx.pm = pm; sx.out = out; sx.max = max; + struct strx_sweep w; memset(&w, 0, sizeof w); + w.sx = &sx; w.p = &p; + /* the AS spans two canonical halves; gva_sweep requires a window within one, + * so sweep each half (a needle cannot straddle the non-canonical gap). */ + int sw = gva_sweep(m, cr3, USER_MIN, USER_MAX, 0, nlen - 1, strx_sweep_cb, &w); + if (sw >= 0) { + sw = gva_sweep(m, cr3, KERN_MIN, ~0ull, 0, nlen - 1, strx_sweep_cb, &w); + } + sig_free(&p); + if (sw < 0) { return -1; } + return sx.n; +} + +/* ---- structure dissection ------------------------------------------------ */ + +/* Membership test: prefer the pmap's region set; else query gva_regions live. */ +__attribute__((hot)) +static int sd_mapped(vmie_mem* m, uintptr_t cr3, const pmap* pm, uint64_t v, + uint32_t* prot_out) { + if (pm) { + if (!in_mapped(pm->regs, pm->nregs, v)) { return 0; } + /* recover prot from the region set */ + int lo = 0, hi = pm->nregs; + while (lo < hi) { const int md = (lo + hi) / 2; + if (pm->regs[md].va <= v) { lo = md + 1; } else { hi = md; } } + if (prot_out && lo > 0) { *prot_out = pm->regs[lo - 1].prot; } + return 1; + } + vregion rg[8]; + const int n = gva_regions(m, cr3, v, v, 0, rg, 8); + if (n <= 0) { return 0; } + if (prot_out) { *prot_out = rg[0].prot; } + return 1; +} + +/* A VTABLE points into a non-writable region whose first qwords are themselves + * pointers into X-regions. */ +__attribute__((hot)) +static int sd_is_vtable(vmie_mem* m, uintptr_t cr3, const pmap* pm, + uint64_t v, uint32_t vprot) { + if (vprot & VR_W) { return 0; } /* vtables live in read-only memory */ + uint64_t slot0 = 0; + if (gva_read(m, cr3, v, &slot0, 8)) { return 0; } + uint32_t sprot = 0; + if (!sd_mapped(m, cr3, pm, slot0, &sprot)) { return 0; } + return (sprot & VR_X) != 0; /* first entry points at code */ +} + +/* Classify a slot as text on a printable RUN of >= 4 from the start: a leading + * ASCII run (NUL/end terminates), or a leading UTF-16 run (printable low byte, + * zero high byte). ASCII is preferred when both qualify. */ +#define SD_TEXT_MIN 4 +__attribute__((hot)) +static field_kind sd_text(const uint8_t* b, size_t len) { + size_t arun = 0; /* leading printable-ASCII run */ + while (arun < len && b[arun] >= 0x20 && b[arun] < 0x7f) { arun++; } + if (arun >= SD_TEXT_MIN) { return FK_ASCII; } + size_t urun = 0; /* leading printable-UTF16 run (chars) */ + while ((urun * 2 + 1) < len && + b[urun * 2] >= 0x20 && b[urun * 2] < 0x7f && b[urun * 2 + 1] == 0) { + urun++; + } + return urun >= SD_TEXT_MIN ? FK_UTF16 : FK_UNKNOWN; +} + +int struct_dissect(vmie_mem* m, uintptr_t cr3, uint64_t va, size_t nbytes, + const pmap* pm, field_desc* out, int max) { + if (!out || max <= 0) { return 0; } + int w = 0; + for (size_t off = 0; off + 8 <= nbytes && w < max; off += 8) { + uint8_t raw8[8]; + if (gva_read(m, cr3, va + off, raw8, 8)) { continue; } + uint64_t raw; memcpy(&raw, raw8, 8); + + field_desc* d = &out[w++]; + d->off = (uint32_t)off; + d->raw = raw; + d->target = 0; + d->kind = FK_UNKNOWN; + + uint32_t vprot = 0; + if (sd_mapped(m, cr3, pm, raw, &vprot)) { + d->target = raw; + d->kind = sd_is_vtable(m, cr3, pm, raw, vprot) ? FK_VTABLE : FK_PTR; + continue; + } + /* text run over the raw bytes */ + const field_kind t = sd_text(raw8, 8); + if (t != FK_UNKNOWN) { d->kind = t; continue; } + /* float sanity: finite and sane magnitude */ + float f32; memcpy(&f32, raw8, 4); + double f64; memcpy(&f64, raw8, 8); + const double af = f32 < 0 ? -(double)f32 : (double)f32; + const double ad = f64 < 0 ? -f64 : f64; + if (f32 == f32 && af >= 1e-6 && af <= 1e9) { d->kind = FK_F32; continue; } + if (f64 == f64 && ad >= 1e-6 && ad <= 1e12) { d->kind = FK_F64; continue; } + /* integer fallback: nonzero high half => I64, else I32 */ + d->kind = (raw >> 32) ? FK_I64 : FK_I32; + } + return w; +} diff --git a/src/handlers/sigset.c b/src/handlers/sigset.c new file mode 100644 index 0000000..e98f634 --- /dev/null +++ b/src/handlers/sigset.c @@ -0,0 +1,253 @@ +/* sigset.c - compiled multi-pattern matcher (Aho-Corasick over literal anchors). + * + * Each pattern's longest contiguous non-wildcard run is its literal ANCHOR. An + * Aho-Corasick automaton (goto + failure links) is built over all anchors and + * driven once over the view; on reaching an accepting state the matcher walks + * the output list, aligns each owning pattern to its start (anchor_va - + * anchor_off), and verifies the FULL masked pattern before reporting. Patterns + * may share anchors and overlap; verification disambiguates. + * + * PURE: only mem_view_t (memmodel.h via sigscan.h), no vmie_mem, no I/O. + */ +#include "sigscan.h" + +#include +#include +#include +#include + +/* One compiled pattern: its bytes/mask (owned copies) and the anchor span. */ +typedef struct { + uint8_t* bytes; /* owned copy of pattern bytes (len) */ + uint8_t* mask; /* owned copy of pattern mask (len) */ + size_t len; /* pattern length */ + size_t anchor_off; /* anchor start offset within the pattern */ + size_t anchor_len; /* anchor length (the literal run fed to AC) */ +} spat; + +/* AC trie node: 256-way goto via a flat row, plus failure link and an output + * list head. Output entries chain pattern indices that END at this node. */ +#define AC_NIL (-1) +typedef struct { + int next[256]; /* goto transitions (AC_NIL = none in goto graph) */ + int fail; /* failure link */ + int out; /* head of output list (index into out_pat/out_nxt) */ +} acnode; + +struct sigset { + spat* pats; /* n compiled patterns */ + int n; + size_t maxlen; /* longest full pattern length */ + + acnode* node; /* AC nodes (node[0] == root) */ + int nnode, capnode; + + int* out_pat; /* output list: owning pattern index */ + int* out_nxt; /* output list: next link (AC_NIL = end) */ + int nout, capout; +}; + +/* ---- pattern anchor selection (cold setup) ------------------------------- */ + +/* Longest contiguous run of set mask bits; writes start/len, returns len. */ +__attribute__((cold)) +static size_t longest_anchor(const uint8_t* mask, size_t len, + size_t* out_off) { + size_t best_off = 0, best_len = 0, i = 0; + while (i < len) { + if (!mask[i]) { i++; continue; } + const size_t run_off = i; + while (i < len && mask[i]) { i++; } + const size_t run_len = i - run_off; + if (run_len > best_len) { best_len = run_len; best_off = run_off; } + } + *out_off = best_off; + return best_len; +} + +/* ---- AC trie construction (cold setup) ----------------------------------- */ + +__attribute__((cold)) +static int node_new(sigset* s) { + if (s->nnode == s->capnode) { + const int nc = s->capnode ? s->capnode * 2 : 64; + acnode* nn = realloc(s->node, (size_t)nc * sizeof *nn); + if (!nn) { return AC_NIL; } + s->node = nn; s->capnode = nc; + } + const int id = s->nnode++; + acnode* nd = &s->node[id]; + for (int c = 0; c < 256; c++) { nd->next[c] = AC_NIL; } + nd->fail = 0; + nd->out = AC_NIL; + return id; +} + +/* Push pattern `pat` onto node `nd`'s output list. */ +__attribute__((cold)) +static int out_push(sigset* s, int nd, int pat) { + if (s->nout == s->capout) { + const int nc = s->capout ? s->capout * 2 : 64; + int* np = realloc(s->out_pat, (size_t)nc * sizeof *np); + if (!np) { return -1; } + s->out_pat = np; + int* nx = realloc(s->out_nxt, (size_t)nc * sizeof *nx); + if (!nx) { return -1; } + s->out_nxt = nx; s->capout = nc; + } + const int e = s->nout++; + s->out_pat[e] = pat; + s->out_nxt[e] = s->node[nd].out; + s->node[nd].out = e; + return 0; +} + +/* Insert one anchor (pattern `pat`'s literal run) into the goto trie. */ +__attribute__((cold)) +static int trie_insert(sigset* s, int pat) { + const spat* p = &s->pats[pat]; + int cur = 0; + for (size_t i = 0; i < p->anchor_len; i++) { + const uint8_t c = p->bytes[p->anchor_off + i]; + if (s->node[cur].next[c] == AC_NIL) { + const int id = node_new(s); + if (id == AC_NIL) { return -1; } + s->node[cur].next[c] = id; /* node_new may realloc; re-fetch ok */ + } + cur = s->node[cur].next[c]; + } + return out_push(s, cur, pat); +} + +/* Build failure links + merge output lists (BFS over the goto graph). */ +__attribute__((cold)) +static int build_failure(sigset* s) { + int* queue = malloc((size_t)s->nnode * sizeof *queue); + if (!queue) { return -1; } + int head = 0, tail = 0; + + for (int c = 0; c < 256; c++) { /* depth-1 nodes fail to root */ + const int v = s->node[0].next[c]; + if (v != AC_NIL) { s->node[v].fail = 0; queue[tail++] = v; } + else { s->node[0].next[c] = 0; } /* root self-loop goto */ + } + while (head < tail) { + const int u = queue[head++]; + for (int c = 0; c < 256; c++) { + const int v = s->node[u].next[c]; + if (v == AC_NIL) { + s->node[u].next[c] = s->node[s->node[u].fail].next[c]; + continue; + } + s->node[v].fail = s->node[s->node[u].fail].next[c]; + queue[tail++] = v; + /* merge fail node's outputs into v's list (chain the lists) */ + const int fout = s->node[s->node[v].fail].out; + if (fout != AC_NIL && s->node[v].out == AC_NIL) { + s->node[v].out = fout; /* share tail: read-only traversal */ + } else if (fout != AC_NIL) { + int e = s->node[v].out; + while (s->out_nxt[e] != AC_NIL) { e = s->out_nxt[e]; } + s->out_nxt[e] = fout; /* append fail-chain to own list */ + } + } + } + free(queue); + return 0; +} + +/* ---- public surface ------------------------------------------------------ */ + +__attribute__((cold)) +sigset* sigset_compile(const sig_pattern_t* pats, int n) { + if (!pats || n <= 0) { return NULL; } + + sigset* s = calloc(1, sizeof *s); + if (!s) { return NULL; } + s->pats = calloc((size_t)n, sizeof *s->pats); + if (!s->pats) { free(s); return NULL; } + + for (int i = 0; i < n; i++) { + const sig_pattern_t* src = &pats[i]; + if (!src->bytes || !src->mask || src->len == 0) { goto fail; } + size_t aoff = 0; + const size_t alen = longest_anchor(src->mask, src->len, &aoff); + if (alen == 0) { goto fail; } /* all-wildcard: no literal anchor */ + + spat* p = &s->pats[i]; + p->bytes = malloc(src->len); + p->mask = malloc(src->len); + if (!p->bytes || !p->mask) { goto fail; } + memcpy(p->bytes, src->bytes, src->len); + memcpy(p->mask, src->mask, src->len); + p->len = src->len; p->anchor_off = aoff; p->anchor_len = alen; + if (src->len > s->maxlen) { s->maxlen = src->len; } + s->n++; + } + + if (node_new(s) == AC_NIL) { goto fail; } /* root */ + for (int i = 0; i < s->n; i++) { + if (trie_insert(s, i)) { goto fail; } + } + if (build_failure(s)) { goto fail; } + return s; + +fail: + sigset_free(s); + return NULL; +} + +__attribute__((cold)) +void sigset_free(sigset* s) { + if (!s) { return; } + if (s->pats) { + for (int i = 0; i < s->n; i++) { free(s->pats[i].bytes); free(s->pats[i].mask); } + free(s->pats); + } + free(s->node); + free(s->out_pat); + free(s->out_nxt); + free(s); +} + +size_t sigset_maxlen(const sigset* s) { return s ? s->maxlen : 0; } + +/* Verify the full masked pattern `pat` at start VA `start` against `v`. */ +__attribute__((hot)) +static int verify(const sigset* s, mem_view_t v, int pat, uint64_t start) { + const spat* p = &s->pats[pat]; + if (start < v.base_va) { return 0; } + const size_t off = (size_t)(start - v.base_va); + if (off + p->len > v.size) { return 0; } + const uint8_t* d = v.data + off; + for (size_t i = 0; i < p->len; i++) { + if (p->mask[i] && d[i] != p->bytes[i]) { return 0; } + } + return 1; +} + +__attribute__((hot)) +void sig_set_each(const sigset* s, mem_view_t v, + int (*cb)(void* user, int pat, uint64_t va), void* user) { + if (!s || !s->node || !v.data || v.size == 0 || s->n == 0) { return; } + + int cur = 0; + for (size_t i = 0; i < v.size; i++) { + cur = s->node[cur].next[v.data[i]]; /* goto fully closed in build */ + if (s->node[cur].out == AC_NIL) { continue; } + /* i is the END index of one or more anchors. Walk output list. */ + for (int e = s->node[cur].out; e != AC_NIL; e = s->out_nxt[e]) { + const int pat = s->out_pat[e]; + const spat* p = &s->pats[pat]; + /* anchor ends at i => anchor start = i - (anchor_len-1); pattern + * start = anchor_start - anchor_off. */ + if (i + 1 < p->anchor_len) { continue; } + const uint64_t astart = v.base_va + (i + 1 - p->anchor_len); + if (astart < v.base_va + p->anchor_off) { continue; } + const uint64_t pstart = astart - p->anchor_off; + if (verify(s, v, pat, pstart)) { + if (cb(user, pat, pstart)) { return; } + } + } + } +} diff --git a/src/handlers/snapdiff.c b/src/handlers/snapdiff.c new file mode 100644 index 0000000..62615fe --- /dev/null +++ b/src/handlers/snapdiff.c @@ -0,0 +1,187 @@ +/* snapdiff.c - per-process temporal snapshot + diff (OS-agnostic handler). + * + * snap_take copies the bytes of every mapped run in a window at T0. snap_diff + * re-reads each run now and emits VA-contiguous changed ranges: byte-level + * differences, plus runs that appeared (mapped now, not at T0) or disappeared + * (mapped at T0, not now). Changed sub-ranges that are VA-contiguous are + * coalesced into a single emitted run. + * + * Handler boundary: only memmodel.h / snapdiff.h + stdlib/string. + */ +#include +#include +#include +#include +#include "memmodel.h" +#include "snapdiff.h" + +#define SNAP_REG_CAP (1 << 16) + +/* One captured run: its VA, length, and an owned byte copy at T0. */ +typedef struct { + uint64_t va, len; + uint8_t* bytes; /* owned copy of `len` bytes (NULL if capture failed) */ +} snaprun; + +struct snapshot { + vmie_mem* m; + uintptr_t cr3; + uint64_t lo, hi; + uint32_t prot; + snaprun* runs; + int nruns; +}; + +/* ---- capture (cold) ------------------------------------------------------ */ + +__attribute__((cold)) +snapshot* snap_take(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, uint32_t prot_any) { + snapshot* s = calloc(1, sizeof *s); + if (!s) { return NULL; } + s->m = m; s->cr3 = cr3; s->lo = lo; s->hi = hi; s->prot = prot_any; + + vregion* rg = malloc((size_t)SNAP_REG_CAP * sizeof *rg); + if (!rg) { free(s); return NULL; } + int nr = gva_regions(m, cr3, lo, hi, prot_any, rg, SNAP_REG_CAP); + if (nr < 0) { nr = 0; } + if (nr > SNAP_REG_CAP) { nr = SNAP_REG_CAP; } + + s->runs = calloc((size_t)nr, sizeof *s->runs); + if (!s->runs && nr) { free(rg); free(s); return NULL; } + s->nruns = nr; + for (int i = 0; i < nr; i++) { + s->runs[i].va = rg[i].va; + s->runs[i].len = rg[i].len; + uint8_t* b = malloc((size_t)rg[i].len); + if (!b) { continue; } /* leave bytes NULL: capture gap */ + if (gva_read(m, cr3, rg[i].va, b, (size_t)rg[i].len)) { free(b); b = NULL; } + s->runs[i].bytes = b; + } + free(rg); + return s; +} + +__attribute__((cold)) +void snap_free(snapshot* s) { + if (!s) { return; } + for (int i = 0; i < s->nruns; i++) { free(s->runs[i].bytes); } + free(s->runs); + free(s); +} + +/* ---- diff (coalescing emitter) ------------------------------------------- */ + +struct demit { + vregion* out; int max, n; + uint32_t cur_prot; /* prot stamped on emitted runs */ + int have; /* a pending run is open */ + uint64_t pva, pend; /* pending [pva, pend) */ +}; + +/* Flush the pending run to the output (counting even past `max`). */ +__attribute__((hot)) +static void emit_flush(struct demit* e) { + if (!e->have) { return; } + if (e->out && e->n < e->max) { + e->out[e->n].va = e->pva; + e->out[e->n].len = e->pend - e->pva; + e->out[e->n].prot = e->cur_prot; + } + e->n++; + e->have = 0; +} + +/* Mark [va, va+len) changed, coalescing with the pending run if VA-contiguous. */ +__attribute__((hot)) +static void emit_changed(struct demit* e, uint64_t va, uint64_t len, uint32_t prot) { + if (len == 0) { return; } + if (e->have && va == e->pend) { /* contiguous: extend */ + e->pend = va + len; + return; + } + emit_flush(e); + e->have = 1; e->pva = va; e->pend = va + len; e->cur_prot = prot; +} + +/* Current prot at `va` (for stamping emitted runs); 0 if unmapped now. */ +__attribute__((hot)) +static uint32_t cur_prot_at(vmie_mem* m, uintptr_t cr3, uint64_t va) { + vregion rg[4]; + const int n = gva_regions(m, cr3, va, va, 0, rg, 4); + return n > 0 ? rg[0].prot : 0u; +} + +int snap_diff(const snapshot* s, vmie_mem* m, uintptr_t cr3, + vregion* changed, int max) { + if (!s) { return -1; } + + struct demit e; memset(&e, 0, sizeof e); + e.out = changed; e.max = max; + + /* enumerate current runs once for the "appeared" pass and prot lookup */ + vregion* now = malloc((size_t)SNAP_REG_CAP * sizeof *now); + if (!now) { return -1; } + int nnow = gva_regions(m, cr3, s->lo, s->hi, s->prot, now, SNAP_REG_CAP); + if (nnow < 0) { nnow = 0; } + if (nnow > SNAP_REG_CAP) { nnow = SNAP_REG_CAP; } + + /* Pass 1: walk T0 runs; compare bytes that are still mapped, mark the rest + * (disappeared / read-failure) as changed. Byte-level diffs are coalesced + * across page boundaries within a run via the contiguous emitter. */ + uint8_t* live = NULL; size_t livecap = 0; + for (int i = 0; i < s->nruns; i++) { + const snaprun* r = &s->runs[i]; + const size_t len = (size_t)r->len; + if (len > livecap) { + uint8_t* nl = realloc(live, len); + if (!nl) { free(live); free(now); return -1; } + live = nl; livecap = len; + } + if (!r->bytes || gva_read(m, cr3, r->va, live, len)) { + /* disappeared or unreadable now (or T0 capture gap): whole run changed */ + emit_changed(&e, r->va, r->len, cur_prot_at(m, cr3, r->va)); + continue; + } + /* byte compare; coalesce contiguous differing bytes */ + const uint32_t prot = cur_prot_at(m, cr3, r->va); + size_t j = 0; + while (j < len) { + if (live[j] != r->bytes[j]) { + const size_t start = j; + while (j < len && live[j] != r->bytes[j]) { j++; } + emit_changed(&e, r->va + start, j - start, prot); + } else { + j++; + } + } + } + free(live); + + /* Pass 2: current runs that did NOT exist at T0 (appeared) are changed. A + * current byte is "new" if its VA is not covered by any T0 run. */ + for (int i = 0; i < nnow; i++) { + uint64_t va = now[i].va; + const uint64_t end = now[i].va + now[i].len; + while (va < end) { + /* find a T0 run covering `va`; if none, this byte is new */ + uint64_t cover_end = end; + int covered = 0; + for (int k = 0; k < s->nruns; k++) { + const snaprun* r = &s->runs[k]; + if (va >= r->va && va < r->va + r->len) { + covered = 1; cover_end = r->va + r->len; break; + } + if (r->va > va && r->va < cover_end) { cover_end = r->va; } + } + const uint64_t seg_end = cover_end < end ? cover_end : end; + if (!covered) { + emit_changed(&e, va, seg_end - va, now[i].prot); + } + va = seg_end; + } + } + free(now); + + emit_flush(&e); + return e.n; +}