Files
vatrog-vm-introspection-engine/include/scan.h
T
lirent c36ffe295d Add process-scoped scanning algorithms: multi-pattern, code-xref, pointer-map, dissection, snapshot diff
All are OS-agnostic handlers keyed by vmie_mem* + cr3, built on the windowed
sweep / region walk / matcher; none names a Windows concept and each compiles
against include/ alone.

Scanning: a compiled multi-pattern automaton (Aho-Corasick over each pattern's
longest literal anchor, then a masked verify) finds N signatures in one sweep
pass (sigscan.h sigset; scan.h gva_sig_scan_multi). gva_code_xref decodes
rel32 call/jmp and RIP-relative lea/mov to find every instruction targeting a
given VA.

Pointer graph (pmap.h): one sweep indexes every qword whose value lands in a
mapped region into reverse + forward edges. pmap_referrers is the keystone -
it answers who-points-here, class-instance enumeration (referrers of a vtable
VA), and string xref (referrers of a string VA) from the same index;
pmap_paths is the indexed counterpart to scan_pointer's one-shot DFS;
struct_dissect classifies the qwords of an instance (pointer/vtable/float/
int/string) into a field map.

Temporal (snapdiff.h): snap_take captures a window's bytes, snap_diff reports
the changed runs against a later read.
2026-06-16 17:38:10 +03:00

112 lines
5.9 KiB
C

/* scan.h - typed value scanner, pointer scanner, and gva<->signature bridges.
*
* Layered above the pure matcher (sigscan.h) and the generic memory-model
* contract (memmodel.h): this is the OS-agnostic scanning surface. Everything
* here is keyed by a `vmie_mem*` + `cr3` (and, for the pointer scan, a decoded
* `range[]`); it names no Windows object. The value scanner narrows a candidate
* set across successive snapshots; the pointer scanner discovers range-anchored
* pointer chains; the gva_sig_* bridges build mem_view_t windows out of guest
* memory and feed them to the signature matcher.
*
* The Windows-typed convenience entry points (scan_new(process*),
* vmie_scan_pointer(process*)) live in the win32 surface (win32.h).
*/
#ifndef VMIE_SCAN_H
#define VMIE_SCAN_H
#include <stdint.h>
#include <stddef.h>
#include "memmodel.h" /* vmie_mem, range, vregion */
#include "sigscan.h" /* mem_view_t, sig_pattern_t */
/* typed value scanner. ENUMERATOR ORDER IS LOAD-BEARING: scan.c indexes the
* table g_tsz[] = {1,2,4,8, 1,2,4,8, 4,8, 2} by these values - do not reorder
* without updating scan.c. */
typedef enum {
SCAN_I8, SCAN_I16, SCAN_I32, SCAN_I64, /* signed */
SCAN_U8, SCAN_U16, SCAN_U32, SCAN_U64, /* unsigned */
SCAN_F32, SCAN_F64, SCAN_F16 /* float */
} scan_type;
typedef enum {
SCAN_EQ, SCAN_NEQ, SCAN_GT, SCAN_LT, /* require a value argument */
SCAN_INC, SCAN_DEC, SCAN_CHANGED, SCAN_UNCHANGED /* relative to the previous snapshot */
} scan_op;
typedef struct scan scan; /* opaque session */
typedef struct { uint64_t addr; uint64_t value; } scan_hit;
#define SCAN_PTR_MAXDEPTH 8 /* DFS depth and size of off[] */
typedef struct {
uint64_t base; /* range-anchored base address */
int depth; /* number of offsets in off[] */
int32_t off[SCAN_PTR_MAXDEPTH]; /* dereference chain */
} scan_ptr_path;
scan* scan_new_cr3(vmie_mem* m, uintptr_t cr3, scan_type t, const void* value,
int be, int aligned, uint64_t lo, uint64_t hi);
int64_t scan_next(scan* s, scan_op op, const void* value);
int64_t scan_count(scan* s);
int scan_results(scan* s, uint64_t offset, int max, scan_hit* out);
void scan_free(scan* s);
int scan_pointer(vmie_mem* m, uintptr_t cr3, const range* mods, int nmods,
uint64_t target, int max_depth, uint32_t max_off,
scan_ptr_path* out, int max);
/* ---- multi-pattern + code-xref bridges (over sigscan.h / gva_sweep) ------ *
* Same windowed-seam discipline as gva_sig_scan, but for a compiled sigset and
* a heuristic rel32 decoder. Both stream guest memory through gva_sweep and
* report VAs in the guest's own coordinate space. */
/* One attributed multi-pattern hit: which compiled pattern, and where. */
typedef struct { int pattern; uint64_t va; } sig_multi_hit;
/* Windowed multi-pattern scan over [lo,hi]: drives sig_set_each on each window,
* seam-deduped like gva_sig_scan. The sweep overlap is (longest pattern len - 1)
* = sigset_maxlen(s) - 1, so no full pattern is split at a window boundary.
* Writes up to `max` hits to `out` (NULL to count only) and returns the TOTAL
* number of hits, or -1 on a NULL/empty sigset. */
int gva_sig_scan_multi(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
uint32_t prot_any, const sigset* s,
sig_multi_hit* out, int max);
/* code-xref: every instruction in the X-regions of [lo,hi] whose rel32 operand
* targets `target_va`. Heuristic decoder (NOT a full disassembler): recognizes
* E8 call / E9 jmp (next_rip + disp32) and the RIP-relative ModRM forms
* (mod=00, rm=101) of lea/mov (REX.W 8D / 8B) where target = next_rip +
* (int32)disp. Records each matching instruction-start VA. The sweep forces
* VR_X and carries a >=15-byte overlap (max x86 instruction length) so no
* instruction is cut at a window seam. Writes up to `max` VAs to `out` (NULL to
* count only) and returns the TOTAL number of matches, or -1 on bad input. */
int gva_code_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
uint64_t target_va, uint64_t* out, int max);
/* gva bridges to the signature matcher: build mem_view from guest memory and feed sigscan.h */
int gva_sig_scan (vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
uint32_t prot_any, const sig_pattern_t* p, uint64_t* out, int max);
int gva_sig_first(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
uint32_t prot_any, const sig_pattern_t* p, uint64_t* va);
int gva_sig_rip (vmie_mem* m, uintptr_t cr3, uint64_t hit_va,
size_t disp_off, size_t instr_len, uint64_t* target);
/* ---- physical-image signature scan (OS-agnostic engine bridge) ----------- *
* Scan the raw physical image (the core segment map) for a signature, without a
* cr3 or page tables: each seg is one mem_view_t over its file span, fed to the
* pure matcher. This is the dump path - a dump (vmie_mem_open*) supports the
* physical scan only. Keyed by vmie_mem*, like the rest of this header. */
/* Attributed hit from a multi-source scan: which source matched, and where. */
typedef struct { int source; uint64_t gpa; } sig_hit_src;
/* Scan one physical image for `p`. Writes up to `max` GPA hits to `out` (NULL to
* count only) and returns the TOTAL number of hits, or -1 on a bad pattern. */
int sig_scan_mem (vmie_mem* m, const sig_pattern_t* p, uint64_t* out, int max);
/* Scan `nsrc` physical images for `p`, tagging each hit with its source index.
* Writes up to `max` attributed hits to `out` (NULL to count only) and returns
* the TOTAL across all sources, or -1 on a bad pattern. */
int sig_scan_sources(vmie_mem* const* srcs, int nsrc, const sig_pattern_t* p,
sig_hit_src* out, int max);
#endif /* VMIE_SCAN_H */