mirror of
https://dev.lirent.ru/Vatrog/vm-introspection-engine.git
synced 2026-06-18 02:06:36 +03:00
Add process-scoped scanning algorithms: multi-pattern, code-xref, pointer-map, dissection, snapshot diff
All are OS-agnostic handlers keyed by vmie_mem* + cr3, built on the windowed sweep / region walk / matcher; none names a Windows concept and each compiles against include/ alone. Scanning: a compiled multi-pattern automaton (Aho-Corasick over each pattern's longest literal anchor, then a masked verify) finds N signatures in one sweep pass (sigscan.h sigset; scan.h gva_sig_scan_multi). gva_code_xref decodes rel32 call/jmp and RIP-relative lea/mov to find every instruction targeting a given VA. Pointer graph (pmap.h): one sweep indexes every qword whose value lands in a mapped region into reverse + forward edges. pmap_referrers is the keystone - it answers who-points-here, class-instance enumeration (referrers of a vtable VA), and string xref (referrers of a string VA) from the same index; pmap_paths is the indexed counterpart to scan_pointer's one-shot DFS; struct_dissect classifies the qwords of an instance (pointer/vtable/float/ int/string) into a field map. Temporal (snapdiff.h): snap_take captures a window's bytes, snap_diff reports the changed runs against a later read.
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
/* pmap.h - pointer-graph index and structure analysis (OS-agnostic handler).
|
||||
*
|
||||
* Layered above the memory-model contract (memmodel.h) and the scanning surface
|
||||
* (scan.h, for `range` and `scan_ptr_path`). A `pmap` is a one-pass reverse +
|
||||
* forward index of every intra-address-space pointer under a `cr3`: for each
|
||||
* 8-byte-aligned qword whose VALUE lands inside a mapped region, it records the
|
||||
* edge `referrer_va -> target_va`. Two sorted views answer the keystone queries
|
||||
* in O(log n): who-points-here (referrers) and what-does-this-point-to
|
||||
* (targets). Everything is keyed by `vmie_mem* + cr3`; it names no Windows
|
||||
* object.
|
||||
*
|
||||
* Ownership: pmap_build / pmap_free (create/destroy). All queries are read-only
|
||||
* and re-entrant against a built pmap; pmap_free is safe on NULL.
|
||||
*/
|
||||
#ifndef VMIE_PMAP_H
|
||||
#define VMIE_PMAP_H
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include "memmodel.h" /* vmie_mem, vregion, range */
|
||||
#include "scan.h" /* scan_ptr_path, SCAN_PTR_MAXDEPTH */
|
||||
|
||||
typedef struct pmap pmap; /* reverse + forward index (opaque) */
|
||||
|
||||
/* One gva_sweep over [lo,hi] (prot filter): for every 8-byte-aligned qword whose
|
||||
* VALUE lands inside a mapped region (membership tested against a gva_regions
|
||||
* set), record the edge referrer_va -> target_va. Stores two sorted views (by
|
||||
* target, by referrer) for O(log n) queries. Returns NULL on OOM or bad input. */
|
||||
pmap* pmap_build(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, uint32_t prot_any);
|
||||
|
||||
/* Release a pmap from pmap_build. Safe on NULL. */
|
||||
void pmap_free(pmap* pm);
|
||||
|
||||
/* All VAs holding a pointer whose value == target_va. THE keystone query:
|
||||
* who-points-here; vtable-instance enumeration (target = a vtable VA, since an
|
||||
* object's first qword is its vtable); string-xref target. Writes up to `max`
|
||||
* referrer VAs to `out` (NULL to count only) and returns the TOTAL. */
|
||||
int pmap_referrers(const pmap* pm, uint64_t target_va, uint64_t* out, int max);
|
||||
|
||||
/* Forward edges: pointer VALUES stored at/around referrer_va (what this region
|
||||
* points to). For path walking / dissection. Writes up to `max` target VAs to
|
||||
* `out` (NULL to count only) and returns the TOTAL. */
|
||||
int pmap_targets(const pmap* pm, uint64_t referrer_va, uint64_t* out, int max);
|
||||
|
||||
/* Map-accelerated pointer paths to `target`, anchored on module ranges `mods`.
|
||||
* Same result type as scan_pointer. Cost profile differs deliberately:
|
||||
* scan_pointer is a ONE-SHOT live DFS that builds no index (low memory for a
|
||||
* single query); pmap_paths runs over the ALREADY-BUILT index (one sweep amort-
|
||||
* ized across many cheap queries). Not a duplicate path - a different trade-off.
|
||||
* Writes up to `max` paths to `out` and returns the TOTAL, or -1 on bad input. */
|
||||
int pmap_paths(const pmap* pm, uint64_t target, const range* mods, int nmods,
|
||||
int max_depth, uint32_t max_off, scan_ptr_path* out, int max);
|
||||
|
||||
/* string-xref: find the needle bytes anywhere in the AS (matcher over the
|
||||
* sweep), then pmap_referrers for each occurrence's VA. The caller pre-encodes
|
||||
* the byte image (e.g. a UTF-16 string), so `needle`/`nlen` are matched as raw
|
||||
* bytes. Writes up to `max` referrer VAs to `out` (NULL to count only) and
|
||||
* returns the TOTAL number of referrers, or -1 on bad input. */
|
||||
int xref_string(vmie_mem* m, uintptr_t cr3, const pmap* pm,
|
||||
const void* needle, size_t nlen, uint64_t* out, int max);
|
||||
|
||||
/* ---- structure dissection ------------------------------------------------ *
|
||||
* Classify each 8-byte slot in [va, va+nbytes). */
|
||||
typedef enum {
|
||||
FK_UNKNOWN, FK_PTR, FK_VTABLE, FK_F32, FK_F64, FK_I32, FK_I64, FK_ASCII, FK_UTF16
|
||||
} field_kind;
|
||||
|
||||
typedef struct {
|
||||
uint32_t off; /* byte offset from va */
|
||||
field_kind kind;
|
||||
uint64_t raw; /* the raw 8 bytes at off */
|
||||
uint64_t target; /* pointee VA for FK_PTR/FK_VTABLE, else 0 */
|
||||
} field_desc;
|
||||
|
||||
/* Classify each slot: PTR = value lands in a mapped region. VTABLE = value
|
||||
* points into a non-writable region whose first qwords are themselves pointers
|
||||
* into X-regions. F32/F64 = finite, sane magnitude. ASCII/UTF16 = printable run
|
||||
* >= 4. Else I32/I64/UNKNOWN. Reuses gva_read + gva_regions (+ pm if given, may
|
||||
* be NULL). Writes slots to `out` and returns the number written (<= max). */
|
||||
int struct_dissect(vmie_mem* m, uintptr_t cr3, uint64_t va, size_t nbytes,
|
||||
const pmap* pm, field_desc* out, int max);
|
||||
|
||||
#endif /* VMIE_PMAP_H */
|
||||
@@ -53,6 +53,34 @@ int scan_pointer(vmie_mem* m, uintptr_t cr3, const range* mods, int nmods,
|
||||
uint64_t target, int max_depth, uint32_t max_off,
|
||||
scan_ptr_path* out, int max);
|
||||
|
||||
/* ---- multi-pattern + code-xref bridges (over sigscan.h / gva_sweep) ------ *
|
||||
* Same windowed-seam discipline as gva_sig_scan, but for a compiled sigset and
|
||||
* a heuristic rel32 decoder. Both stream guest memory through gva_sweep and
|
||||
* report VAs in the guest's own coordinate space. */
|
||||
|
||||
/* One attributed multi-pattern hit: which compiled pattern, and where. */
|
||||
typedef struct { int pattern; uint64_t va; } sig_multi_hit;
|
||||
|
||||
/* Windowed multi-pattern scan over [lo,hi]: drives sig_set_each on each window,
|
||||
* seam-deduped like gva_sig_scan. The sweep overlap is (longest pattern len - 1)
|
||||
* = sigset_maxlen(s) - 1, so no full pattern is split at a window boundary.
|
||||
* Writes up to `max` hits to `out` (NULL to count only) and returns the TOTAL
|
||||
* number of hits, or -1 on a NULL/empty sigset. */
|
||||
int gva_sig_scan_multi(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint32_t prot_any, const sigset* s,
|
||||
sig_multi_hit* out, int max);
|
||||
|
||||
/* code-xref: every instruction in the X-regions of [lo,hi] whose rel32 operand
|
||||
* targets `target_va`. Heuristic decoder (NOT a full disassembler): recognizes
|
||||
* E8 call / E9 jmp (next_rip + disp32) and the RIP-relative ModRM forms
|
||||
* (mod=00, rm=101) of lea/mov (REX.W 8D / 8B) where target = next_rip +
|
||||
* (int32)disp. Records each matching instruction-start VA. The sweep forces
|
||||
* VR_X and carries a >=15-byte overlap (max x86 instruction length) so no
|
||||
* instruction is cut at a window seam. Writes up to `max` VAs to `out` (NULL to
|
||||
* count only) and returns the TOTAL number of matches, or -1 on bad input. */
|
||||
int gva_code_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint64_t target_va, uint64_t* out, int max);
|
||||
|
||||
/* gva bridges to the signature matcher: build mem_view from guest memory and feed sigscan.h */
|
||||
int gva_sig_scan (vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint32_t prot_any, const sig_pattern_t* p, uint64_t* out, int max);
|
||||
|
||||
@@ -79,4 +79,34 @@ uint64_t sig_rip(mem_view_t v, uint64_t hit_va, size_t disp_off, size_t instr_le
|
||||
* is actually available. Useful for narrowing a scan to a [start,end] window. */
|
||||
mem_view_t mem_sub(mem_view_t v, uint64_t start_va, size_t size);
|
||||
|
||||
/* ---- compiled multi-pattern matcher (Aho-Corasick anchors) --------------- *
|
||||
* A sigset compiles N patterns into one automaton scanned in a single pass. It
|
||||
* is still PURE (only mem_view_t, no vmie_mem). Each pattern contributes its
|
||||
* longest contiguous non-wildcard run as a literal anchor; an Aho-Corasick goto
|
||||
* over those anchors finds candidate sites, and on an anchor hit the FULL masked
|
||||
* pattern is verified (mem_sub + mask compare) before the match is reported.
|
||||
* This is the building block under gva_sig_scan_multi (see scan.h). */
|
||||
typedef struct sigset sigset; /* compiled automaton (opaque) */
|
||||
|
||||
/* Compile `n` patterns into a sigset. The patterns are borrowed for the call
|
||||
* only (their bytes are copied into the automaton). Returns NULL on OOM, on
|
||||
* n <= 0, or if any pattern is empty / all-wildcard (no literal anchor). Release
|
||||
* with sigset_free(). */
|
||||
sigset* sigset_compile(const sig_pattern_t* pats, int n);
|
||||
|
||||
/* Release a sigset produced by sigset_compile. Safe on NULL. */
|
||||
void sigset_free(sigset* s);
|
||||
|
||||
/* Invoke cb(user, pat_index, match_va) for every full-pattern match of any
|
||||
* compiled pattern in `v`, anchor-driven (not necessarily in ascending order
|
||||
* across patterns). `cb` returns nonzero to stop early. The longest-anchor
|
||||
* length is what a windowed caller uses as overlap to de-dup across seams. */
|
||||
void sig_set_each(const sigset* s, mem_view_t v,
|
||||
int (*cb)(void* user, int pat, uint64_t va), void* user);
|
||||
|
||||
/* Longest compiled pattern length, in bytes. A windowed sweep carries
|
||||
* (this - 1) leading-overlap bytes so no full pattern is split at a seam (the
|
||||
* gva_sig_scan_multi overlap contract). 0 on NULL. */
|
||||
size_t sigset_maxlen(const sigset* s);
|
||||
|
||||
#endif /* VMIE_SIGSCAN_H */
|
||||
@@ -0,0 +1,34 @@
|
||||
/* snapdiff.h - per-process temporal snapshot + diff (OS-agnostic handler).
|
||||
*
|
||||
* A `snapshot` captures the bytes of every mapped run in a VA window under a
|
||||
* `cr3` at time T0. snap_diff re-reads the same window now and emits the runs
|
||||
* whose bytes changed (coalesced VA-contiguous diffs), including runs that
|
||||
* appeared or disappeared since T0. Keyed by `vmie_mem* + cr3`; it names no
|
||||
* Windows object.
|
||||
*
|
||||
* Ownership: snap_take / snap_free (create/destroy). snap_free is safe on NULL.
|
||||
*/
|
||||
#ifndef VMIE_SNAPDIFF_H
|
||||
#define VMIE_SNAPDIFF_H
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include "memmodel.h" /* vmie_mem, vregion */
|
||||
|
||||
typedef struct snapshot snapshot;
|
||||
|
||||
/* Capture the bytes of every mapped run in [lo,hi] (prot filter) under `cr3` at
|
||||
* T0. Returns a heap-owned snapshot, or NULL on OOM / bad input. */
|
||||
snapshot* snap_take(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, uint32_t prot_any);
|
||||
|
||||
/* Release a snapshot from snap_take. Safe on NULL. */
|
||||
void snap_free(snapshot* s);
|
||||
|
||||
/* Re-read the window now, compare to the snapshot, and emit changed runs as
|
||||
* vregion {va, len, prot = current} - coalescing VA-contiguous changed bytes
|
||||
* into one run. Runs that appeared or disappeared since T0 count as changed.
|
||||
* Writes up to `max` runs to `changed` (NULL to count only) and returns the
|
||||
* TOTAL number of changed runs, or -1 on bad input. */
|
||||
int snap_diff(const snapshot* s, vmie_mem* m, uintptr_t cr3,
|
||||
vregion* changed, int max);
|
||||
|
||||
#endif /* VMIE_SNAPDIFF_H */
|
||||
Reference in New Issue
Block a user