mirror of
https://dev.lirent.ru/Vatrog/vm-introspection-engine.git
synced 2026-06-18 02:06:36 +03:00
79e82ffc6a
Wave 1 of the code-analysis layer, built on the x86-64 decoder: - vmie_win32_callgraph walks each .pdata function with the decoder and emits an edge for every direct call/jmp whose target lands in the module - the intra-module call graph. Indirect edges are left to the IAT and jump tables. - gva_jumptable recovers a switch's case targets from an indirect jump's table: consecutive pointer entries that land in an executable region. - cfg_blocks splits one function view into basic blocks (a generic handler: leaders from intra-function branch targets, cut after jmp/jcc/ret). - gva_imm_xref finds the instructions whose immediate operand equals a constant - the dual of code-xref for magic values, error codes, syscall numbers. The decoder now also reports imm_off/imm_len so a caller can read or match the immediate operand. The generic primitives live in the new codeanalysis.h (jump tables, basic blocks) and scan.h (constant xref); the .pdata-bound call graph stays on the win32 surface and reuses the existing function/section/decode primitives - no second PE or instruction parser.
142 lines
7.7 KiB
C
142 lines
7.7 KiB
C
/* scan.h - typed value scanner, pointer scanner, and gva<->signature bridges.
|
|
*
|
|
* Layered above the pure matcher (sigscan.h) and the generic memory-model
|
|
* contract (memmodel.h): this is the OS-agnostic scanning surface. Everything
|
|
* here is keyed by a `vmie_mem*` + `cr3` (and, for the pointer scan, a decoded
|
|
* `range[]`); it names no Windows object. The value scanner narrows a candidate
|
|
* set across successive snapshots; the pointer scanner discovers range-anchored
|
|
* pointer chains; the gva_sig_* bridges build mem_view_t windows out of guest
|
|
* memory and feed them to the signature matcher.
|
|
*
|
|
* The Windows-typed convenience entry points (scan_new(process*),
|
|
* vmie_scan_pointer(process*)) live in the win32 surface (win32.h).
|
|
*/
|
|
#ifndef VMIE_SCAN_H
|
|
#define VMIE_SCAN_H
|
|
#include <stdint.h>
|
|
#include <stddef.h>
|
|
#include "memmodel.h" /* vmie_mem, range, vregion */
|
|
#include "sigscan.h" /* mem_view_t, sig_pattern_t */
|
|
|
|
/* typed value scanner. ENUMERATOR ORDER IS LOAD-BEARING: scan.c indexes the
|
|
* table g_tsz[] = {1,2,4,8, 1,2,4,8, 4,8, 2} by these values - do not reorder
|
|
* without updating scan.c. */
|
|
typedef enum {
|
|
SCAN_I8, SCAN_I16, SCAN_I32, SCAN_I64, /* signed */
|
|
SCAN_U8, SCAN_U16, SCAN_U32, SCAN_U64, /* unsigned */
|
|
SCAN_F32, SCAN_F64, SCAN_F16 /* float */
|
|
} scan_type;
|
|
|
|
typedef enum {
|
|
SCAN_EQ, SCAN_NEQ, SCAN_GT, SCAN_LT, /* require a value argument */
|
|
SCAN_INC, SCAN_DEC, SCAN_CHANGED, SCAN_UNCHANGED /* relative to the previous snapshot */
|
|
} scan_op;
|
|
|
|
typedef struct scan scan; /* opaque session */
|
|
typedef struct { uint64_t addr; uint64_t value; } scan_hit;
|
|
|
|
#define SCAN_PTR_MAXDEPTH 8 /* DFS depth and size of off[] */
|
|
typedef struct {
|
|
uint64_t base; /* range-anchored base address */
|
|
int depth; /* number of offsets in off[] */
|
|
int32_t off[SCAN_PTR_MAXDEPTH]; /* dereference chain */
|
|
} scan_ptr_path;
|
|
|
|
scan* scan_new_cr3(vmie_mem* m, uintptr_t cr3, scan_type t, const void* value,
|
|
int be, int aligned, uint64_t lo, uint64_t hi);
|
|
int64_t scan_next(scan* s, scan_op op, const void* value);
|
|
int64_t scan_count(scan* s);
|
|
int scan_results(scan* s, uint64_t offset, int max, scan_hit* out);
|
|
void scan_free(scan* s);
|
|
|
|
int scan_pointer(vmie_mem* m, uintptr_t cr3, const range* mods, int nmods,
|
|
uint64_t target, int max_depth, uint32_t max_off,
|
|
scan_ptr_path* out, int max);
|
|
|
|
/* ---- multi-pattern + code-xref bridges (over sigscan.h / gva_sweep) ------ *
|
|
* Same windowed-seam discipline as gva_sig_scan, but for a compiled sigset and
|
|
* a heuristic rel32 decoder. Both stream guest memory through gva_sweep and
|
|
* report VAs in the guest's own coordinate space. */
|
|
|
|
/* One attributed multi-pattern hit: which compiled pattern, and where. */
|
|
typedef struct { int pattern; uint64_t va; } sig_multi_hit;
|
|
|
|
/* Windowed multi-pattern scan over [lo,hi]: drives sig_set_each on each window,
|
|
* seam-deduped like gva_sig_scan. The sweep overlap is (longest pattern len - 1)
|
|
* = sigset_maxlen(s) - 1, so no full pattern is split at a window boundary.
|
|
* Writes up to `max` hits to `out` (NULL to count only) and returns the TOTAL
|
|
* number of hits, or -1 on a NULL/empty sigset. */
|
|
int gva_sig_scan_multi(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
|
uint32_t prot_any, const sigset* s,
|
|
sig_multi_hit* out, int max);
|
|
|
|
/* code-xref: every instruction in the X-regions of [lo,hi] whose near rel
|
|
* branch or RIP-relative memory operand resolves to `target_va`. Brute-scans
|
|
* each byte offset with the light x86-64 decoder (x86dec.h, NOT a full
|
|
* disassembler): an E8/E9/EB/Jcc rel branch matches when next_rip + rel ==
|
|
* target_va, and any RIP-relative operand (ModRM mod=00, rm=101) matches when
|
|
* next_rip + disp32 == target_va (this covers lea/mov and any other rip-rel
|
|
* form). Records each matching instruction-start VA. The sweep forces VR_X and
|
|
* carries a >=15-byte overlap (max x86 instruction length) so no instruction is
|
|
* cut at a window seam. Writes up to `max` VAs to `out` (NULL to count only) and
|
|
* returns the TOTAL number of matches, or -1 on bad input. */
|
|
int gva_code_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
|
uint64_t target_va, uint64_t* out, int max);
|
|
|
|
/* immediate / constant xref: every instruction in [lo,hi] (kept by the
|
|
* protection filter `prot_any`; pass VR_X to restrict to code) whose IMMEDIATE
|
|
* operand equals `value`, compared over the low `width` bytes (width is 1, 2, 4,
|
|
* or 8). Like gva_code_xref it brute-scans each byte offset with the light
|
|
* x86-64 decoder (x86dec.h, NOT a full disassembler) and carries a >=15-byte
|
|
* (max x86 instruction length) sweep overlap so no instruction is cut at a
|
|
* window seam; the same SEAM and INTERIOR de-duplications apply (a match
|
|
* starting in a non-last window's trailing overlap is left to the next window,
|
|
* and an interior alias falling inside an already-accepted match is dropped).
|
|
*
|
|
* An instruction matches when it carries an immediate (imm_len > 0) at least
|
|
* `width` bytes wide and its low `width` bytes equal `value & mask(width)`. The
|
|
* rel/RIP-relative DISPLACEMENT of a branch is NOT an immediate and never
|
|
* matches here - use gva_code_xref for displacement targets.
|
|
*
|
|
* Records each matching instruction-start VA in the view's coordinate space.
|
|
* Writes up to `max` VAs to `out` (NULL to count only) and returns the TOTAL
|
|
* number of matches, or -1 on bad input (a NULL m, an unswept range, or a width
|
|
* that is not 1/2/4/8). Use it to answer "what code uses the constant N" - error
|
|
* codes, magic values, syscall numbers, table sizes, struct sizes.
|
|
*
|
|
* Example - sites that load the NTSTATUS 0xC0000022 (ACCESS_DENIED) as a dword:
|
|
* uint64_t sites[64];
|
|
* int n = gva_imm_xref(m, cr3, lo, hi, VR_X, 0xC0000022ull, 4, sites, 64); */
|
|
int gva_imm_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
|
uint32_t prot_any, uint64_t value, int width,
|
|
uint64_t* out, int max);
|
|
|
|
/* gva bridges to the signature matcher: build mem_view from guest memory and feed sigscan.h */
|
|
int gva_sig_scan (vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
|
uint32_t prot_any, const sig_pattern_t* p, uint64_t* out, int max);
|
|
int gva_sig_first(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
|
uint32_t prot_any, const sig_pattern_t* p, uint64_t* va);
|
|
int gva_sig_rip (vmie_mem* m, uintptr_t cr3, uint64_t hit_va,
|
|
size_t disp_off, size_t instr_len, uint64_t* target);
|
|
|
|
/* ---- physical-image signature scan (OS-agnostic engine bridge) ----------- *
|
|
* Scan the raw physical image (the core segment map) for a signature, without a
|
|
* cr3 or page tables: each seg is one mem_view_t over its file span, fed to the
|
|
* pure matcher. This is the dump path - a dump (vmie_mem_open*) supports the
|
|
* physical scan only. Keyed by vmie_mem*, like the rest of this header. */
|
|
|
|
/* Attributed hit from a multi-source scan: which source matched, and where. */
|
|
typedef struct { int source; uint64_t gpa; } sig_hit_src;
|
|
|
|
/* Scan one physical image for `p`. Writes up to `max` GPA hits to `out` (NULL to
|
|
* count only) and returns the TOTAL number of hits, or -1 on a bad pattern. */
|
|
int sig_scan_mem (vmie_mem* m, const sig_pattern_t* p, uint64_t* out, int max);
|
|
|
|
/* Scan `nsrc` physical images for `p`, tagging each hit with its source index.
|
|
* Writes up to `max` attributed hits to `out` (NULL to count only) and returns
|
|
* the TOTAL across all sources, or -1 on a bad pattern. */
|
|
int sig_scan_sources(vmie_mem* const* srcs, int nsrc, const sig_pattern_t* p,
|
|
sig_hit_src* out, int max);
|
|
|
|
#endif /* VMIE_SCAN_H */
|