/* sigscan.h - source-agnostic x86-64 signature scanner. * * Everything operates on a mem_view_t: a flat byte span plus the virtual address * that data[0] maps to. Live guest memory, a retained snapshot, and an on-disk * dump are identical to the matcher - only how you build the view differs. All * results are reported as addresses in the view's own coordinate space * (base_va + offset): a guest VA for a virtual view, a GPA for a physical view. * * This module is pure: it never touches a vmie_mem and performs no I/O. To scan * guest memory, build views from the gva layer (see scan.h: gva_sig_scan) and * feed them here. */ #ifndef VMIE_SIGSCAN_H #define VMIE_SIGSCAN_H #include #include #include #include "memmodel.h" /* mem_view_t (the single owner of the view type) */ /* A parsed byte pattern. mask[i] == 1 means bytes[i] must match; 0 = wildcard. * Owns two heap allocations of `len` bytes each; release with sig_free(). */ typedef struct { uint8_t* bytes; uint8_t* mask; size_t len; } sig_pattern_t; /* Parse an IDA-style string, e.g. "48 8B 05 ? ? ? ? 48 85 C0" ('?' or '??' = * wildcard). On success fills *out and returns true; free it with sig_free(). * Returns false on NULL args, an empty string, or a malformed hex byte. */ bool sig_parse_ida(const char* ida, sig_pattern_t* out); /* Parse code+mask form, e.g. bytes="\x48\x8B\x05\x00\x00\x00\x00", mask="xxx????" * ('x'/'X' = must match, anything else = wildcard). `bytes` must have at least * strlen(mask) readable bytes. Returns true on success (free with sig_free()), * false on NULL args or an empty mask. */ bool sig_parse_mask(const uint8_t* bytes, const char* mask, sig_pattern_t* out); /* Build an exact (no-wildcard) pattern from `len` raw bytes: every byte must * match. A thin wrapper over sig_parse_mask with an all-'x' mask, so the result * is released with sig_free() like any other pattern. Returns true on success, * false on NULL args, a zero length, or OOM. Touches no vmie_mem (pure). */ bool sig_from_bytes(const uint8_t* bytes, size_t len, sig_pattern_t* out); /* Release a pattern produced by sig_parse_*. Safe on NULL and on an * already-freed pattern (it is zeroed). */ void sig_free(sig_pattern_t* p); /* Invoke cb(user, match_va) for every match of `p` in `v`, in ascending address * order. The driver anchors on the pattern's first non-wildcard byte and uses * memchr to skip, so it is fast even on sparse matches. `cb` returns nonzero to * stop early. This is the building block under sig_first/sig_all and is what a * windowed caller uses to de-duplicate across window seams (see scan.h). */ void sig_each(mem_view_t v, const sig_pattern_t* p, int (*cb)(void* user, uint64_t va), void* user); /* First match, or 0 if none. (0 is also a theoretically valid base_va of 0; in * practice view base addresses are nonzero, so 0 reliably means "no match".) */ uint64_t sig_first(mem_view_t v, const sig_pattern_t* p); /* All matches. If `out` is NULL, returns the total match count (use it to size a * buffer). Otherwise writes up to `max` addresses to `out` and returns how many * were written (capped at `max`). */ size_t sig_all(mem_view_t v, const sig_pattern_t* p, uint64_t* out, size_t max); /* Resolve an x86-64 RIP-relative operand at a match site. * hit_va - VA of the matched pattern start (== instruction start) * disp_off - byte offset of the int32 displacement within the pattern * instr_len - full instruction length (next RIP = hit_va + instr_len); for the * common " disp32" tail this is disp_off + 4 * Returns the absolute target VA, or 0 if the displacement bytes lie outside `v`. * The result is an address in the same space as `v` (a guest VA for a guest * view): dereference it with gva_read under the matching cr3. This is how an * unexported global is located from a code signature. */ uint64_t sig_rip(mem_view_t v, uint64_t hit_va, size_t disp_off, size_t instr_len); /* Clamp a sub-view [start_va, start_va+size) against `v`. Returns a zeroed view * (data == NULL) if start_va is outside `v`; otherwise `size` is trimmed to what * is actually available. Useful for narrowing a scan to a [start,end] window. */ mem_view_t mem_sub(mem_view_t v, uint64_t start_va, size_t size); /* ---- compiled multi-pattern matcher (Aho-Corasick anchors) --------------- * * A sigset compiles N patterns into one automaton scanned in a single pass. It * is still PURE (only mem_view_t, no vmie_mem). Each pattern contributes its * longest contiguous non-wildcard run as a literal anchor; an Aho-Corasick goto * over those anchors finds candidate sites, and on an anchor hit the FULL masked * pattern is verified (mem_sub + mask compare) before the match is reported. * This is the building block under gva_sig_scan_multi (see scan.h). */ typedef struct sigset sigset; /* compiled automaton (opaque) */ /* Compile `n` patterns into a sigset. The patterns are borrowed for the call * only (their bytes are copied into the automaton). Returns NULL on OOM, on * n <= 0, or if any pattern is empty / all-wildcard (no literal anchor). Release * with sigset_free(). */ sigset* sigset_compile(const sig_pattern_t* pats, int n); /* Release a sigset produced by sigset_compile. Safe on NULL. */ void sigset_free(sigset* s); /* Invoke cb(user, pat_index, match_va) for every full-pattern match of any * compiled pattern in `v`, anchor-driven (not necessarily in ascending order * across patterns). `cb` returns nonzero to stop early. The longest-anchor * length is what a windowed caller uses as overlap to de-dup across seams. */ void sig_set_each(const sigset* s, mem_view_t v, int (*cb)(void* user, int pat, uint64_t va), void* user); /* Longest compiled pattern length, in bytes. A windowed sweep carries * (this - 1) leading-overlap bytes so no full pattern is split at a seam (the * gva_sig_scan_multi overlap contract). 0 on NULL. */ size_t sigset_maxlen(const sigset* s); #endif /* VMIE_SIGSCAN_H */