Add function inventory (.pdata), signature generation, and export/PDB symbols

Three reversing capabilities on the win32 surface plus a pure sig-gen handler:

- vmie_win32_functions enumerates a module's functions from the exception
  directory (.pdata RUNTIME_FUNCTION), folding unwind chain continuations into
  their primary - authoritative non-leaf boundaries, not prologue heuristics.
- vmie_win32_exports resolves the export table to {name, rva, ordinal,
  forwarded}: named functions with no PDB or network. vmie_win32_pdb_ref pulls
  the CodeView/RSDS {guid, age, pdb} from the debug directory - the symbol-server
  key for any module (full PDB parsing stays out of scope).
- sig_generate (siggen.h) builds a unique masked signature for a code span,
  wildcarding the rel/RIP-relative displacement bytes the x86 decoder locates and
  growing until it matches the scope exactly once - the dual of sigscan.

The decoder now also reports disp_off/disp_len so a caller can mask the floating
bytes. The MZ/PE walk gains one shared data-directory accessor and one shared
CodeView/RSDS parser; the kernel bootstrap is moved onto both, removing its
private copies - one PE parser in the tree.
This commit is contained in:
2026-06-16 19:27:42 +03:00
parent 06230ac680
commit c4419964aa
9 changed files with 542 additions and 67 deletions
+55
View File
@@ -0,0 +1,55 @@
/* siggen.h - x86-64 code signature generator (pure handler).
*
* Turns a span of code into a UNIQUE masked byte signature suitable for the
* sigscan matcher: opcode/ModRM/fixed bytes are must-match; the rel/RIP-relative
* displacement bytes - the ones that "float" with the load address and with
* relocation - are wildcarded. The result is the dual of sigscan: feed its
* output back into sig_all/sig_first to relocate the same code in another image.
*
* Pure: it depends only on sigscan.h (the pattern + view types and the matcher)
* and x86dec.h (the length decoder that locates the displacement field). It
* touches no vmie_mem and does no I/O; build a view (e.g. a section view) and
* pass it in.
*/
#ifndef VMIE_SIGGEN_H
#define VMIE_SIGGEN_H
#include <stddef.h>
#include "sigscan.h" /* sig_pattern_t, mem_view_t, sig_all, sig_free */
#include "x86dec.h" /* x86_decode + x86_insn.disp_off/disp_len */
/* Build a unique masked signature for the code starting at scope.data[start_off].
* Steps instructions with x86_decode; each instruction contributes its
* opcode/ModRM/fixed bytes as must-match (mask x) and its rel/RIP-relative
* displacement bytes (disp_off..disp_off+disp_len) as wildcards (mask ?), since
* those move with load address / relocation. Grows instruction by instruction
* until the pattern occurs EXACTLY ONCE in `scope` (verified with sig_all) or
* `max_len` bytes are consumed.
* scope - search space the signature must be unique within (e.g. a .text
* section view). The coordinate of uniqueness is scope's own (use
* a MODULE_RVA / SECTION_LOCAL view for an ASLR-stable result
* origin).
* start_off - byte offset in `scope` where the target code begins (must be <
* scope.size).
* max_len - cap on signature length in bytes (e.g. 64); guards against
* non-unique code.
* out - on success, the generated pattern (free with sig_free()).
* Returns the pattern length in bytes on success, 0 if it cannot be made unique
* within max_len, -1 on bad input. The result matches `scope` exactly once, at
* start_off.
*
* v1 wildcards ONLY rel/RIP-relative displacements (the dominant floating bytes);
* absolute immediate relocations are NOT auto-wildcarded (a .reloc cross-check is
* a future extension).
*
* Example - generate a portable signature for a function in .text (MODULE_RVA
* view => an ASLR-stable origin), then relocate it elsewhere:
* sig_pattern_t p;
* int len = sig_generate(text_view, fn_rva - text_view.base_va, 64, &p);
* if (len > 0) {
* uint64_t rva = sig_first(other_text_view, &p); // re-find the function
* sig_free(&p);
* } */
int sig_generate(mem_view_t scope, size_t start_off, size_t max_len,
sig_pattern_t* out);
#endif /* VMIE_SIGGEN_H */
+77
View File
@@ -267,4 +267,81 @@ int vmie_win32_section_view(vmie_win32* v, uint64_t cr3, uint64_t module_base,
const section_desc* sec, view_base mode,
uint8_t* buf, size_t bufcap, mem_view_t* out);
/* ---- function inventory / exports / PDB reference ------------------------ *
* Authoritative module metadata recovered from the PE directories, keyed by
* (vmie_win32*, cr3, module_base) like the section surface. All RVAs are
* image-relative and therefore ASLR-independent (absolute VA = module_base +
* rva); only the headers and the relevant directory need be resident. */
/* One function extent from the module's exception directory (.pdata
* RUNTIME_FUNCTION).
* rva - function start RVA (BeginAddress). Absolute VA = module_base + rva.
* size - EndAddress - BeginAddress, in bytes.
* Only NON-LEAF functions appear in .pdata (leaf functions with no unwind data
* are absent) - authoritative where present, but not a complete function list.
* rva/size are ASLR-independent. */
typedef struct { uint32_t rva; uint32_t size; } func_range;
/* Enumerate functions of the module at `module_base` (cr3 address space) from
* .pdata. Chain continuations (UNWIND_INFO with UNW_FLAG_CHAININFO) are folded
* into their primary - one entry per function start. Returns TOTAL count
* (out=NULL => count only), or -1 if no exception directory / unreadable.
*
* Example - list the first 64 functions of a module as ASLR-stable RVAs:
* func_range fr[64];
* int n = vmie_win32_functions(v, pr->cr3, m.base, fr, 64);
* for (int i = 0; i < n && i < 64; i++)
* printf("sub_%x (%u bytes)\n", fr[i].rva, fr[i].size); */
int vmie_win32_functions(vmie_win32* v, uint64_t cr3, uint64_t module_base,
func_range* out, int max);
/* One exported symbol from the module export directory (EAT).
* rva - export target RVA (absolute VA = module_base + rva). Forwarder
* exports report the forwarder-string RVA; see `forwarded`.
* ordinal - export ordinal (biased value as exported).
* name - export name, NUL-terminated, TRUNCATED to 63 chars (long C++
* mangled names are cut; "" for by-ordinal-only exports).
* forwarded - nonzero if this is a forwarder (rva points into the export
* section, not code - e.g. "NTDLL.RtlAllocateHeap"). */
typedef struct { uint32_t rva; uint16_t ordinal; uint8_t forwarded; char name[64]; } export_sym;
/* Enumerate the module's exports (named functions, no PDB/network needed).
* Returns TOTAL count (out=NULL => count), or -1 if no export directory /
* unreadable. Entries are reported in export-table order; by-ordinal-only
* exports (no name) carry name[0]=='\0'.
*
* Example - print a module's named exports:
* export_sym es[256];
* int n = vmie_win32_exports(v, pr->cr3, m.base, es, 256);
* for (int i = 0; i < n && i < 256; i++)
* printf("%-40s rva=%#x ord=%u%s\n", es[i].name, es[i].rva,
* es[i].ordinal, es[i].forwarded ? " (forwarder)" : ""); */
int vmie_win32_exports(vmie_win32* v, uint64_t cr3, uint64_t module_base,
export_sym* out, int max);
/* CodeView PDB reference from the module debug directory (RSDS). The
* symbol-server lookup key.
* guid - PDB GUID (16 bytes, in-memory byte order, as the symbol server path
* uses).
* age - PDB age.
* pdb - PDB file name, NUL-terminated, truncated to 63 chars (e.g.
* "ntdll.pdb").
* Use {guid, age, pdb} to fetch the PDB out-of-band; PARSING the PDB for
* internal symbol names is OUT OF SCOPE here (it needs the external file). */
typedef struct { uint8_t guid[16]; uint32_t age; char pdb[64]; } pdb_ref;
/* Extract the module's PDB reference. Returns 0 on success, -1 if no debug
* directory / not RSDS / unreadable. Generalizes the kernel bootstrap's GUID
* resolve to any module.
*
* Example - format the symbol-server path component for a module:
* pdb_ref pr_;
* if (vmie_win32_pdb_ref(v, pr->cr3, m.base, &pr_) == 0) {
* char g[33];
* for (int i = 0; i < 16; i++) sprintf(g + i*2, "%02X", pr_.guid[i]);
* printf("%s/%s%X/%s\n", pr_.pdb, g, pr_.age, pr_.pdb);
* } */
int vmie_win32_pdb_ref(vmie_win32* v, uint64_t cr3, uint64_t module_base,
pdb_ref* out);
#endif /* VMIE_WIN32_H */
+15 -1
View File
@@ -38,6 +38,14 @@ typedef struct {
int32_t rel; /* sign-extended branch displacement (if has_rel) */
int has_riprel; /* 1: RIP-relative memory operand (ModRM mod=00,rm=101) */
int32_t riprel; /* sign-extended RIP-relative disp32 (if has_riprel) */
uint8_t disp_off; /* byte offset within the instruction of the rel/RIP-rel
* displacement field, or 0 if the instruction has
* neither (has_rel == 0 && has_riprel == 0). This is the
* field that "floats" with the load address / relocation,
* so a signature generator wildcards exactly these bytes. */
uint8_t disp_len; /* displacement length: 1 (rel8), 4 (rel32 or RIP-rel
* disp32), else 0 (no displacement). The wildcard span is
* [disp_off, disp_off + disp_len). */
} x86_insn;
/* Decode ONE 64-bit-mode instruction at `code` (`avail` readable bytes). Fills
@@ -45,7 +53,13 @@ typedef struct {
* exceed min(avail,15). Length-correct over: legacy prefixes (66/67/F0/F2/F3/
* seg), REX, 1-byte / 0F two-byte / 0F38 / 0F3A maps, ModRM+SIB, disp8/disp32,
* imm8/16/32/64 (66 and REX.W operand-size effects), and VEX (C4/C5). EVEX
* (0x62) is a documented gap: len=0. */
* (0x62) is a documented gap: len=0.
*
* On a decoded instruction it also reports out->disp_off / out->disp_len: the
* byte position and length of the rel/RIP-relative displacement field within the
* instruction (0/0 when there is none). These are exactly the bytes that float
* with the load address / relocation, so a signature generator wildcards
* [disp_off, disp_off+disp_len) and keeps the rest as must-match. */
int x86_decode(const uint8_t* code, size_t avail, x86_insn* out);
/* Absolute target of a rel branch: ip + insn->len + insn->rel (0 unless has_rel). */