Add function inventory (.pdata), signature generation, and export/PDB symbols

Three reversing capabilities on the win32 surface plus a pure sig-gen handler:

- vmie_win32_functions enumerates a module's functions from the exception
  directory (.pdata RUNTIME_FUNCTION), folding unwind chain continuations into
  their primary - authoritative non-leaf boundaries, not prologue heuristics.
- vmie_win32_exports resolves the export table to {name, rva, ordinal,
  forwarded}: named functions with no PDB or network. vmie_win32_pdb_ref pulls
  the CodeView/RSDS {guid, age, pdb} from the debug directory - the symbol-server
  key for any module (full PDB parsing stays out of scope).
- sig_generate (siggen.h) builds a unique masked signature for a code span,
  wildcarding the rel/RIP-relative displacement bytes the x86 decoder locates and
  growing until it matches the scope exactly once - the dual of sigscan.

The decoder now also reports disp_off/disp_len so a caller can mask the floating
bytes. The MZ/PE walk gains one shared data-directory accessor and one shared
CodeView/RSDS parser; the kernel bootstrap is moved onto both, removing its
private copies - one PE parser in the tree.
This commit is contained in:
2026-06-16 19:27:42 +03:00
parent 06230ac680
commit c4419964aa
9 changed files with 542 additions and 67 deletions
+77
View File
@@ -267,4 +267,81 @@ int vmie_win32_section_view(vmie_win32* v, uint64_t cr3, uint64_t module_base,
const section_desc* sec, view_base mode,
uint8_t* buf, size_t bufcap, mem_view_t* out);
/* ---- function inventory / exports / PDB reference ------------------------ *
* Authoritative module metadata recovered from the PE directories, keyed by
* (vmie_win32*, cr3, module_base) like the section surface. All RVAs are
* image-relative and therefore ASLR-independent (absolute VA = module_base +
* rva); only the headers and the relevant directory need be resident. */
/* One function extent from the module's exception directory (.pdata
* RUNTIME_FUNCTION).
* rva - function start RVA (BeginAddress). Absolute VA = module_base + rva.
* size - EndAddress - BeginAddress, in bytes.
* Only NON-LEAF functions appear in .pdata (leaf functions with no unwind data
* are absent) - authoritative where present, but not a complete function list.
* rva/size are ASLR-independent. */
typedef struct { uint32_t rva; uint32_t size; } func_range;
/* Enumerate functions of the module at `module_base` (cr3 address space) from
* .pdata. Chain continuations (UNWIND_INFO with UNW_FLAG_CHAININFO) are folded
* into their primary - one entry per function start. Returns TOTAL count
* (out=NULL => count only), or -1 if no exception directory / unreadable.
*
* Example - list the first 64 functions of a module as ASLR-stable RVAs:
* func_range fr[64];
* int n = vmie_win32_functions(v, pr->cr3, m.base, fr, 64);
* for (int i = 0; i < n && i < 64; i++)
* printf("sub_%x (%u bytes)\n", fr[i].rva, fr[i].size); */
int vmie_win32_functions(vmie_win32* v, uint64_t cr3, uint64_t module_base,
func_range* out, int max);
/* One exported symbol from the module export directory (EAT).
* rva - export target RVA (absolute VA = module_base + rva). Forwarder
* exports report the forwarder-string RVA; see `forwarded`.
* ordinal - export ordinal (biased value as exported).
* name - export name, NUL-terminated, TRUNCATED to 63 chars (long C++
* mangled names are cut; "" for by-ordinal-only exports).
* forwarded - nonzero if this is a forwarder (rva points into the export
* section, not code - e.g. "NTDLL.RtlAllocateHeap"). */
typedef struct { uint32_t rva; uint16_t ordinal; uint8_t forwarded; char name[64]; } export_sym;
/* Enumerate the module's exports (named functions, no PDB/network needed).
* Returns TOTAL count (out=NULL => count), or -1 if no export directory /
* unreadable. Entries are reported in export-table order; by-ordinal-only
* exports (no name) carry name[0]=='\0'.
*
* Example - print a module's named exports:
* export_sym es[256];
* int n = vmie_win32_exports(v, pr->cr3, m.base, es, 256);
* for (int i = 0; i < n && i < 256; i++)
* printf("%-40s rva=%#x ord=%u%s\n", es[i].name, es[i].rva,
* es[i].ordinal, es[i].forwarded ? " (forwarder)" : ""); */
int vmie_win32_exports(vmie_win32* v, uint64_t cr3, uint64_t module_base,
export_sym* out, int max);
/* CodeView PDB reference from the module debug directory (RSDS). The
* symbol-server lookup key.
* guid - PDB GUID (16 bytes, in-memory byte order, as the symbol server path
* uses).
* age - PDB age.
* pdb - PDB file name, NUL-terminated, truncated to 63 chars (e.g.
* "ntdll.pdb").
* Use {guid, age, pdb} to fetch the PDB out-of-band; PARSING the PDB for
* internal symbol names is OUT OF SCOPE here (it needs the external file). */
typedef struct { uint8_t guid[16]; uint32_t age; char pdb[64]; } pdb_ref;
/* Extract the module's PDB reference. Returns 0 on success, -1 if no debug
* directory / not RSDS / unreadable. Generalizes the kernel bootstrap's GUID
* resolve to any module.
*
* Example - format the symbol-server path component for a module:
* pdb_ref pr_;
* if (vmie_win32_pdb_ref(v, pr->cr3, m.base, &pr_) == 0) {
* char g[33];
* for (int i = 0; i < 16; i++) sprintf(g + i*2, "%02X", pr_.guid[i]);
* printf("%s/%s%X/%s\n", pr_.pdb, g, pr_.age, pr_.pdb);
* } */
int vmie_win32_pdb_ref(vmie_win32* v, uint64_t cr3, uint64_t module_base,
pdb_ref* out);
#endif /* VMIE_WIN32_H */