Files

112 lines
6.0 KiB
C
Raw Permalink Normal View History

/* sigscan.h - source-agnostic x86-64 signature scanner.
*
* Everything operates on a mem_view_t: a flat byte span plus the virtual address
* that data[0] maps to. Live guest memory, a retained snapshot, and an on-disk
* dump are identical to the matcher - only how you build the view differs. All
* results are reported as addresses in the view's own coordinate space
* (base_va + offset): a guest VA for a virtual view, a GPA for a physical view.
*
* This module is pure: it never touches a vmie_mem and performs no I/O. To scan
* guest memory, build views from the gva layer (see scan.h: gva_sig_scan) and
* feed them here.
*/
2026-06-15 01:49:16 +03:00
#ifndef VMIE_SIGSCAN_H
#define VMIE_SIGSCAN_H
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include "memmodel.h" /* mem_view_t (the single owner of the view type) */
/* A parsed byte pattern. mask[i] == 1 means bytes[i] must match; 0 = wildcard.
* Owns two heap allocations of `len` bytes each; release with sig_free(). */
typedef struct {
uint8_t* bytes;
uint8_t* mask;
size_t len;
} sig_pattern_t;
/* Parse an IDA-style string, e.g. "48 8B 05 ? ? ? ? 48 85 C0" ('?' or '??' =
* wildcard). On success fills *out and returns true; free it with sig_free().
* Returns false on NULL args, an empty string, or a malformed hex byte. */
bool sig_parse_ida(const char* ida, sig_pattern_t* out);
/* Parse code+mask form, e.g. bytes="\x48\x8B\x05\x00\x00\x00\x00", mask="xxx????"
* ('x'/'X' = must match, anything else = wildcard). `bytes` must have at least
* strlen(mask) readable bytes. Returns true on success (free with sig_free()),
* false on NULL args or an empty mask. */
bool sig_parse_mask(const uint8_t* bytes, const char* mask, sig_pattern_t* out);
/* Build an exact (no-wildcard) pattern from `len` raw bytes: every byte must
* match. A thin wrapper over sig_parse_mask with an all-'x' mask, so the result
* is released with sig_free() like any other pattern. Returns true on success,
* false on NULL args, a zero length, or OOM. Touches no vmie_mem (pure). */
bool sig_from_bytes(const uint8_t* bytes, size_t len, sig_pattern_t* out);
/* Release a pattern produced by sig_parse_*. Safe on NULL and on an
* already-freed pattern (it is zeroed). */
void sig_free(sig_pattern_t* p);
/* Invoke cb(user, match_va) for every match of `p` in `v`, in ascending address
* order. The driver anchors on the pattern's first non-wildcard byte and uses
* memchr to skip, so it is fast even on sparse matches. `cb` returns nonzero to
* stop early. This is the building block under sig_first/sig_all and is what a
* windowed caller uses to de-duplicate across window seams (see scan.h). */
void sig_each(mem_view_t v, const sig_pattern_t* p,
int (*cb)(void* user, uint64_t va), void* user);
/* First match, or 0 if none. (0 is also a theoretically valid base_va of 0; in
* practice view base addresses are nonzero, so 0 reliably means "no match".) */
uint64_t sig_first(mem_view_t v, const sig_pattern_t* p);
/* All matches. If `out` is NULL, returns the total match count (use it to size a
* buffer). Otherwise writes up to `max` addresses to `out` and returns how many
* were written (capped at `max`). */
size_t sig_all(mem_view_t v, const sig_pattern_t* p, uint64_t* out, size_t max);
/* Resolve an x86-64 RIP-relative operand at a match site.
* hit_va - VA of the matched pattern start (== instruction start)
* disp_off - byte offset of the int32 displacement within the pattern
* instr_len - full instruction length (next RIP = hit_va + instr_len); for the
* common "<prefix> disp32" tail this is disp_off + 4
* Returns the absolute target VA, or 0 if the displacement bytes lie outside `v`.
* The result is an address in the same space as `v` (a guest VA for a guest
* view): dereference it with gva_read under the matching cr3. This is how an
* unexported global is located from a code signature. */
uint64_t sig_rip(mem_view_t v, uint64_t hit_va, size_t disp_off, size_t instr_len);
/* Clamp a sub-view [start_va, start_va+size) against `v`. Returns a zeroed view
* (data == NULL) if start_va is outside `v`; otherwise `size` is trimmed to what
* is actually available. Useful for narrowing a scan to a [start,end] window. */
mem_view_t mem_sub(mem_view_t v, uint64_t start_va, size_t size);
/* ---- compiled multi-pattern matcher (Aho-Corasick anchors) --------------- *
* A sigset compiles N patterns into one automaton scanned in a single pass. It
* is still PURE (only mem_view_t, no vmie_mem). Each pattern contributes its
* longest contiguous non-wildcard run as a literal anchor; an Aho-Corasick goto
* over those anchors finds candidate sites, and on an anchor hit the FULL masked
* pattern is verified (mem_sub + mask compare) before the match is reported.
* This is the building block under gva_sig_scan_multi (see scan.h). */
typedef struct sigset sigset; /* compiled automaton (opaque) */
/* Compile `n` patterns into a sigset. The patterns are borrowed for the call
* only (their bytes are copied into the automaton). Returns NULL on OOM, on
* n <= 0, or if any pattern is empty / all-wildcard (no literal anchor). Release
* with sigset_free(). */
sigset* sigset_compile(const sig_pattern_t* pats, int n);
/* Release a sigset produced by sigset_compile. Safe on NULL. */
void sigset_free(sigset* s);
/* Invoke cb(user, pat_index, match_va) for every full-pattern match of any
* compiled pattern in `v`, anchor-driven (not necessarily in ascending order
* across patterns). `cb` returns nonzero to stop early. The longest-anchor
* length is what a windowed caller uses as overlap to de-dup across seams. */
void sig_set_each(const sigset* s, mem_view_t v,
int (*cb)(void* user, int pat, uint64_t va), void* user);
/* Longest compiled pattern length, in bytes. A windowed sweep carries
* (this - 1) leading-overlap bytes so no full pattern is split at a seam (the
* gva_sig_scan_multi overlap contract). 0 on NULL. */
size_t sigset_maxlen(const sigset* s);
2026-06-15 01:49:16 +03:00
#endif /* VMIE_SIGSCAN_H */