Add function inventory (.pdata), signature generation, and export/PDB symbols

Three reversing capabilities on the win32 surface plus a pure sig-gen handler:

- vmie_win32_functions enumerates a module's functions from the exception
  directory (.pdata RUNTIME_FUNCTION), folding unwind chain continuations into
  their primary - authoritative non-leaf boundaries, not prologue heuristics.
- vmie_win32_exports resolves the export table to {name, rva, ordinal,
  forwarded}: named functions with no PDB or network. vmie_win32_pdb_ref pulls
  the CodeView/RSDS {guid, age, pdb} from the debug directory - the symbol-server
  key for any module (full PDB parsing stays out of scope).
- sig_generate (siggen.h) builds a unique masked signature for a code span,
  wildcarding the rel/RIP-relative displacement bytes the x86 decoder locates and
  growing until it matches the scope exactly once - the dual of sigscan.

The decoder now also reports disp_off/disp_len so a caller can mask the floating
bytes. The MZ/PE walk gains one shared data-directory accessor and one shared
CodeView/RSDS parser; the kernel bootstrap is moved onto both, removing its
private copies - one PE parser in the tree.
This commit is contained in:
2026-06-16 19:27:42 +03:00
parent 06230ac680
commit c4419964aa
9 changed files with 542 additions and 67 deletions
+1
View File
@@ -21,6 +21,7 @@ add_library(vmie STATIC
src/handlers/sigscan.c
src/handlers/sigset.c
src/handlers/codescan.c
src/handlers/siggen.c
src/handlers/x86dec.c
src/handlers/pmap.c
src/handlers/snapdiff.c)
+55
View File
@@ -0,0 +1,55 @@
/* siggen.h - x86-64 code signature generator (pure handler).
*
* Turns a span of code into a UNIQUE masked byte signature suitable for the
* sigscan matcher: opcode/ModRM/fixed bytes are must-match; the rel/RIP-relative
* displacement bytes - the ones that "float" with the load address and with
* relocation - are wildcarded. The result is the dual of sigscan: feed its
* output back into sig_all/sig_first to relocate the same code in another image.
*
* Pure: it depends only on sigscan.h (the pattern + view types and the matcher)
* and x86dec.h (the length decoder that locates the displacement field). It
* touches no vmie_mem and does no I/O; build a view (e.g. a section view) and
* pass it in.
*/
#ifndef VMIE_SIGGEN_H
#define VMIE_SIGGEN_H
#include <stddef.h>
#include "sigscan.h" /* sig_pattern_t, mem_view_t, sig_all, sig_free */
#include "x86dec.h" /* x86_decode + x86_insn.disp_off/disp_len */
/* Build a unique masked signature for the code starting at scope.data[start_off].
* Steps instructions with x86_decode; each instruction contributes its
* opcode/ModRM/fixed bytes as must-match (mask x) and its rel/RIP-relative
* displacement bytes (disp_off..disp_off+disp_len) as wildcards (mask ?), since
* those move with load address / relocation. Grows instruction by instruction
* until the pattern occurs EXACTLY ONCE in `scope` (verified with sig_all) or
* `max_len` bytes are consumed.
* scope - search space the signature must be unique within (e.g. a .text
* section view). The coordinate of uniqueness is scope's own (use
* a MODULE_RVA / SECTION_LOCAL view for an ASLR-stable result
* origin).
* start_off - byte offset in `scope` where the target code begins (must be <
* scope.size).
* max_len - cap on signature length in bytes (e.g. 64); guards against
* non-unique code.
* out - on success, the generated pattern (free with sig_free()).
* Returns the pattern length in bytes on success, 0 if it cannot be made unique
* within max_len, -1 on bad input. The result matches `scope` exactly once, at
* start_off.
*
* v1 wildcards ONLY rel/RIP-relative displacements (the dominant floating bytes);
* absolute immediate relocations are NOT auto-wildcarded (a .reloc cross-check is
* a future extension).
*
* Example - generate a portable signature for a function in .text (MODULE_RVA
* view => an ASLR-stable origin), then relocate it elsewhere:
* sig_pattern_t p;
* int len = sig_generate(text_view, fn_rva - text_view.base_va, 64, &p);
* if (len > 0) {
* uint64_t rva = sig_first(other_text_view, &p); // re-find the function
* sig_free(&p);
* } */
int sig_generate(mem_view_t scope, size_t start_off, size_t max_len,
sig_pattern_t* out);
#endif /* VMIE_SIGGEN_H */
+77
View File
@@ -267,4 +267,81 @@ int vmie_win32_section_view(vmie_win32* v, uint64_t cr3, uint64_t module_base,
const section_desc* sec, view_base mode,
uint8_t* buf, size_t bufcap, mem_view_t* out);
/* ---- function inventory / exports / PDB reference ------------------------ *
* Authoritative module metadata recovered from the PE directories, keyed by
* (vmie_win32*, cr3, module_base) like the section surface. All RVAs are
* image-relative and therefore ASLR-independent (absolute VA = module_base +
* rva); only the headers and the relevant directory need be resident. */
/* One function extent from the module's exception directory (.pdata
* RUNTIME_FUNCTION).
* rva - function start RVA (BeginAddress). Absolute VA = module_base + rva.
* size - EndAddress - BeginAddress, in bytes.
* Only NON-LEAF functions appear in .pdata (leaf functions with no unwind data
* are absent) - authoritative where present, but not a complete function list.
* rva/size are ASLR-independent. */
typedef struct { uint32_t rva; uint32_t size; } func_range;
/* Enumerate functions of the module at `module_base` (cr3 address space) from
* .pdata. Chain continuations (UNWIND_INFO with UNW_FLAG_CHAININFO) are folded
* into their primary - one entry per function start. Returns TOTAL count
* (out=NULL => count only), or -1 if no exception directory / unreadable.
*
* Example - list the first 64 functions of a module as ASLR-stable RVAs:
* func_range fr[64];
* int n = vmie_win32_functions(v, pr->cr3, m.base, fr, 64);
* for (int i = 0; i < n && i < 64; i++)
* printf("sub_%x (%u bytes)\n", fr[i].rva, fr[i].size); */
int vmie_win32_functions(vmie_win32* v, uint64_t cr3, uint64_t module_base,
func_range* out, int max);
/* One exported symbol from the module export directory (EAT).
* rva - export target RVA (absolute VA = module_base + rva). Forwarder
* exports report the forwarder-string RVA; see `forwarded`.
* ordinal - export ordinal (biased value as exported).
* name - export name, NUL-terminated, TRUNCATED to 63 chars (long C++
* mangled names are cut; "" for by-ordinal-only exports).
* forwarded - nonzero if this is a forwarder (rva points into the export
* section, not code - e.g. "NTDLL.RtlAllocateHeap"). */
typedef struct { uint32_t rva; uint16_t ordinal; uint8_t forwarded; char name[64]; } export_sym;
/* Enumerate the module's exports (named functions, no PDB/network needed).
* Returns TOTAL count (out=NULL => count), or -1 if no export directory /
* unreadable. Entries are reported in export-table order; by-ordinal-only
* exports (no name) carry name[0]=='\0'.
*
* Example - print a module's named exports:
* export_sym es[256];
* int n = vmie_win32_exports(v, pr->cr3, m.base, es, 256);
* for (int i = 0; i < n && i < 256; i++)
* printf("%-40s rva=%#x ord=%u%s\n", es[i].name, es[i].rva,
* es[i].ordinal, es[i].forwarded ? " (forwarder)" : ""); */
int vmie_win32_exports(vmie_win32* v, uint64_t cr3, uint64_t module_base,
export_sym* out, int max);
/* CodeView PDB reference from the module debug directory (RSDS). The
* symbol-server lookup key.
* guid - PDB GUID (16 bytes, in-memory byte order, as the symbol server path
* uses).
* age - PDB age.
* pdb - PDB file name, NUL-terminated, truncated to 63 chars (e.g.
* "ntdll.pdb").
* Use {guid, age, pdb} to fetch the PDB out-of-band; PARSING the PDB for
* internal symbol names is OUT OF SCOPE here (it needs the external file). */
typedef struct { uint8_t guid[16]; uint32_t age; char pdb[64]; } pdb_ref;
/* Extract the module's PDB reference. Returns 0 on success, -1 if no debug
* directory / not RSDS / unreadable. Generalizes the kernel bootstrap's GUID
* resolve to any module.
*
* Example - format the symbol-server path component for a module:
* pdb_ref pr_;
* if (vmie_win32_pdb_ref(v, pr->cr3, m.base, &pr_) == 0) {
* char g[33];
* for (int i = 0; i < 16; i++) sprintf(g + i*2, "%02X", pr_.guid[i]);
* printf("%s/%s%X/%s\n", pr_.pdb, g, pr_.age, pr_.pdb);
* } */
int vmie_win32_pdb_ref(vmie_win32* v, uint64_t cr3, uint64_t module_base,
pdb_ref* out);
#endif /* VMIE_WIN32_H */
+15 -1
View File
@@ -38,6 +38,14 @@ typedef struct {
int32_t rel; /* sign-extended branch displacement (if has_rel) */
int has_riprel; /* 1: RIP-relative memory operand (ModRM mod=00,rm=101) */
int32_t riprel; /* sign-extended RIP-relative disp32 (if has_riprel) */
uint8_t disp_off; /* byte offset within the instruction of the rel/RIP-rel
* displacement field, or 0 if the instruction has
* neither (has_rel == 0 && has_riprel == 0). This is the
* field that "floats" with the load address / relocation,
* so a signature generator wildcards exactly these bytes. */
uint8_t disp_len; /* displacement length: 1 (rel8), 4 (rel32 or RIP-rel
* disp32), else 0 (no displacement). The wildcard span is
* [disp_off, disp_off + disp_len). */
} x86_insn;
/* Decode ONE 64-bit-mode instruction at `code` (`avail` readable bytes). Fills
@@ -45,7 +53,13 @@ typedef struct {
* exceed min(avail,15). Length-correct over: legacy prefixes (66/67/F0/F2/F3/
* seg), REX, 1-byte / 0F two-byte / 0F38 / 0F3A maps, ModRM+SIB, disp8/disp32,
* imm8/16/32/64 (66 and REX.W operand-size effects), and VEX (C4/C5). EVEX
* (0x62) is a documented gap: len=0. */
* (0x62) is a documented gap: len=0.
*
* On a decoded instruction it also reports out->disp_off / out->disp_len: the
* byte position and length of the rel/RIP-relative displacement field within the
* instruction (0/0 when there is none). These are exactly the bytes that float
* with the load address / relocation, so a signature generator wildcards
* [disp_off, disp_off+disp_len) and keeps the rest as must-match. */
int x86_decode(const uint8_t* code, size_t avail, x86_insn* out);
/* Absolute target of a rel branch: ip + insn->len + insn->rel (0 unless has_rel). */
+32
View File
@@ -60,4 +60,36 @@ bool pe_section(mem_view_t v, uint64_t module_base, const char* name,
int vmie_pe_section(vmie_mem* m, uintptr_t cr3, uint64_t module_base,
const char* name, uint8_t* buf, size_t bufcap, mem_view_t* out);
/* OptionalHeader DataDirectory indices used across the engine. */
#define PE_DIR_EXPORT 0u /* IMAGE_DIRECTORY_ENTRY_EXPORT */
#define PE_DIR_DEBUG 6u /* IMAGE_DIRECTORY_ENTRY_DEBUG */
#define PE_DIR_EXCEPTION 3u /* IMAGE_DIRECTORY_ENTRY_EXCEPTION (.pdata) */
/* Read one OptionalHeader DataDirectory entry of the PE32+ image based at `base`
* in the address space `cr3`. This is the SINGLE data-directory accessor used by
* every directory walk in the engine (.pdata / export / debug) - it walks the
* DOS+NT headers from `base` once and reads DataDirectory[idx].
* idx - directory index (PE_DIR_*).
* rva - receives DataDirectory[idx].VirtualAddress (0 if the directory is
* absent); never NULL.
* size - receives DataDirectory[idx].Size; may be NULL.
* Returns 0 on success (rva/size filled), -1 if the headers are unreadable. A
* present-but-absent directory reports rva==0 with return 0. */
int pe_data_dir(vmie_mem* m, uintptr_t cr3, uint64_t base, unsigned idx,
uint32_t* rva, uint32_t* size);
/* Extract a module's CodeView RSDS reference from its debug directory. This is
* the SINGLE debug-dir/RSDS parser in the engine, shared by the kernel bootstrap
* (host.c) and the public vmie_win32_pdb_ref - there is no second copy.
* base - image base VA in `cr3`.
* guid[16] - receives the PDB GUID (in-memory byte order); never NULL.
* age - receives the PDB age; never NULL.
* name - receives the NUL-terminated PDB file name; never NULL.
* namecap - capacity of `name` (>= 1). The name is truncated to namecap-1.
* Walks PE_DIR_DEBUG for an IMAGE_DEBUG_TYPE_CODEVIEW entry whose payload starts
* with 'RSDS', then reads {guid, age, name}. Returns 0 on success, -1 if there
* is no debug directory, no CodeView/RSDS entry, or the bytes are unreadable. */
int pe_pdb_ref(vmie_mem* m, uintptr_t cr3, uint64_t base,
uint8_t guid[16], uint32_t* age, char* name, size_t namecap);
#endif /* VMIE_PE_H */
+2 -52
View File
@@ -51,11 +51,6 @@ int cr3_recover(vmie_win32* v, uint64_t va_self, uint64_t target_pa, uintptr_t*
*cr3_out = best;
return 0;
}
#define DIR_EXPORT 0u
#define DIR_DEBUG 6u
#define DBG_CODEVIEW 2u
#define CV_RSDS 0x53445352u
static int beacon_find(vmie_mem* m, uint64_t* pa, uint64_t* va) {
void *ptr = m->pa;
const void *end = m->pa + m->fsize;
@@ -74,51 +69,6 @@ static int beacon_find(vmie_mem* m, uint64_t* pa, uint64_t* va) {
return -1;
}
static int pe_datadir(vmie_mem* m, uintptr_t cr3, uint64_t base, unsigned idx, uint32_t* rva, uint32_t* size) {
uint32_t lfanew;
if (gva_read(m, cr3, base + 0x3C, &lfanew, 4)) {
return -1;
}
const uint64_t dd = base + lfanew + 0x18 + 0x70 + (uint64_t)idx*8;
if (gva_read(m, cr3, dd, rva, 4)) {
return -1;
}
return (size && gva_read(m, cr3, dd + 4, size, 4)) ? -1 : 0;
}
static int pe_pdb(vmie_mem* m, uintptr_t cr3, uint64_t base, uint8_t guid[16], uint32_t* age, char* name, size_t namecap) {
uint32_t dbg_rva, dbg_sz;
if (pe_datadir(m, cr3, base, DIR_DEBUG, &dbg_rva, &dbg_sz) || !dbg_rva) {
return -1;
}
for (uint32_t o = 0; o + 0x1C <= dbg_sz; o += 0x1C) { /* IMAGE_DEBUG_DIRECTORY[] (28B) */
uint32_t type, cv_rva, sig;
if (gva_read(m, cr3, base + dbg_rva + o + 0x0C, &type, 4)) {
return -1;
}
if (type != DBG_CODEVIEW) {
continue;
}
if (gva_read(m, cr3, base + dbg_rva + o + 0x14, &cv_rva, 4)) { /* AddressOfRawData RVA */
return -1;
}
if (gva_read(m, cr3, base + cv_rva, &sig, 4) || sig != CV_RSDS) {
return -1;
}
if (gva_read(m, cr3, base + cv_rva + 0x04, guid, 16)) {
return -1;
}
if (gva_read(m, cr3, base + cv_rva + 0x14, age, 4)) {
return -1;
}
gva_read(m, cr3, base + cv_rva + 0x18, name, namecap); /* best-effort */
name[namecap - 1] = 0;
return 0;
}
return -1;
}
static int find_ntoskrnl(vmie_mem* m, uintptr_t cr3, uint64_t* base, uint8_t guid[16], uint32_t* age) {
const uint64_t t = cr3 & PFN_MASK;
@@ -152,7 +102,7 @@ static int find_ntoskrnl(vmie_mem* m, uintptr_t cr3, uint64_t* base, uint8_t gui
if (gva_read(m, cr3, va, &mz, 2) || mz != MZ) {
continue;
}
if (pe_pdb(m, cr3, va, guid, age, pdb, sizeof pdb)) {
if (pe_pdb_ref(m, cr3, va, guid, age, pdb, sizeof pdb)) {
continue;
}
if (strncmp(pdb, "ntkrnlmp.pdb", 12) != 0) {
@@ -168,7 +118,7 @@ static int find_ntoskrnl(vmie_mem* m, uintptr_t cr3, uint64_t* base, uint8_t gui
static uint32_t ko_export_rva(vmie_mem* m, uintptr_t cr3, uint64_t kbase, const char* want) {
uint32_t exp_rva;
if (pe_datadir(m, cr3, kbase, DIR_EXPORT, &exp_rva, NULL) || !exp_rva) {
if (pe_data_dir(m, cr3, kbase, PE_DIR_EXPORT, &exp_rva, NULL) || !exp_rva) {
return 0;
}
+221
View File
@@ -122,6 +122,227 @@ int vmie_pe_section(vmie_mem* m, uintptr_t cr3, uint64_t module_base,
return 0;
}
/* ---- shared data-directory + debug-dir parse ----------------------------- *
* The single DataDirectory accessor and the single CodeView/RSDS parser of the
* engine. Both read the guest image directly under cr3 (gva_read), so they work
* for any module without first gathering the section bodies, and are reused by
* the kernel bootstrap (host.c) and the public win32 surface alike. Cold:
* one-shot header reads, not a hot loop. */
/* CodeView debug-record constants (IMAGE_DEBUG_DIRECTORY + RSDS payload). */
#define DBG_TYPE_CODEVIEW 2u
#define CV_SIG_RSDS 0x53445352u /* 'RSDS' little-endian */
#define DBG_DIR_ENTRY 0x1Cu /* sizeof(IMAGE_DEBUG_DIRECTORY) = 28 */
int pe_data_dir(vmie_mem* m, uintptr_t cr3, uint64_t base, unsigned idx,
uint32_t* rva, uint32_t* size) __attribute__((cold));
int pe_data_dir(vmie_mem* m, uintptr_t cr3, uint64_t base, unsigned idx,
uint32_t* rva, uint32_t* size) {
uint32_t lfanew;
if (gva_read(m, cr3, base + 0x3C, &lfanew, 4)) {
return -1;
}
/* NT headers at base+lfanew: Signature(4)+FileHeader(20)=0x18, then the
* PE32+ OptionalHeader; DataDirectory[] begins at OptionalHeader+0x70. */
const uint64_t dd = base + lfanew + 0x18 + 0x70 + (uint64_t)idx * 8;
if (gva_read(m, cr3, dd, rva, 4)) {
return -1;
}
return (size && gva_read(m, cr3, dd + 4, size, 4)) ? -1 : 0;
}
int pe_pdb_ref(vmie_mem* m, uintptr_t cr3, uint64_t base,
uint8_t guid[16], uint32_t* age, char* name, size_t namecap)
__attribute__((cold));
int pe_pdb_ref(vmie_mem* m, uintptr_t cr3, uint64_t base,
uint8_t guid[16], uint32_t* age, char* name, size_t namecap) {
if (namecap == 0) {
return -1;
}
uint32_t dbg_rva, dbg_sz;
if (pe_data_dir(m, cr3, base, PE_DIR_DEBUG, &dbg_rva, &dbg_sz) || !dbg_rva) {
return -1;
}
for (uint32_t o = 0; o + DBG_DIR_ENTRY <= dbg_sz; o += DBG_DIR_ENTRY) {
uint32_t type, cv_rva, sig;
if (gva_read(m, cr3, base + dbg_rva + o + 0x0C, &type, 4)) { /* Type */
return -1;
}
if (type != DBG_TYPE_CODEVIEW) {
continue;
}
if (gva_read(m, cr3, base + dbg_rva + o + 0x14, &cv_rva, 4)) { /* AddressOfRawData */
return -1;
}
if (gva_read(m, cr3, base + cv_rva, &sig, 4) || sig != CV_SIG_RSDS) {
return -1;
}
if (gva_read(m, cr3, base + cv_rva + 0x04, guid, 16)) { /* GUID */
return -1;
}
if (gva_read(m, cr3, base + cv_rva + 0x14, age, 4)) { /* Age */
return -1;
}
gva_read(m, cr3, base + cv_rva + 0x18, name, namecap); /* PdbName (best-effort) */
name[namecap - 1] = 0;
return 0;
}
return -1;
}
/* ---- public win32 surface: function inventory (.pdata) ------------------- *
* RUNTIME_FUNCTION (12 bytes): { uint32 Begin; uint32 End; uint32 UnwindInfo }.
* The exception directory is the authoritative non-leaf function table. A chain
* continuation (its UNWIND_INFO header has UNW_FLAG_CHAININFO) is NOT a function
* start - it is folded into its primary by skipping entries whose Begin falls
* inside the previous accepted [Begin, End). Cold: one-shot directory read. */
#define RTF_SIZE 12u /* sizeof(RUNTIME_FUNCTION) */
int vmie_win32_functions(vmie_win32* v, uint64_t cr3, uint64_t module_base,
func_range* out, int max) __attribute__((cold));
int vmie_win32_functions(vmie_win32* v, uint64_t cr3, uint64_t module_base,
func_range* out, int max) {
vmie_mem* m = vmie_win32_mem(v);
if (!m) { return -1; }
uint32_t pd_rva, pd_sz;
if (pe_data_dir(m, cr3, module_base, PE_DIR_EXCEPTION, &pd_rva, &pd_sz) ||
!pd_rva || pd_sz < RTF_SIZE) {
return -1;
}
int total = 0;
uint32_t prev_end = 0; /* End of the last accepted primary */
int have_prev = 0;
const uint32_t n = pd_sz / RTF_SIZE;
for (uint32_t i = 0; i < n; i++) {
uint32_t begin, end;
if (gva_read(m, cr3, module_base + pd_rva + (uint64_t)i * RTF_SIZE,
&begin, 4) ||
gva_read(m, cr3, module_base + pd_rva + (uint64_t)i * RTF_SIZE + 4,
&end, 4)) {
return -1;
}
if (end <= begin) {
continue; /* malformed / empty: skip */
}
/* Fold chain continuations: an entry whose Begin lies within the last
* accepted [Begin, End) is a continuation of that function, not a new
* start (UNW_FLAG_CHAININFO). RUNTIME_FUNCTIONs are address-sorted, so
* comparing against the previous primary's extent suffices. */
if (have_prev && begin < prev_end) {
continue;
}
if (out && total < max) {
out[total].rva = begin;
out[total].size = end - begin;
}
prev_end = end;
have_prev = 1;
total++;
}
return total;
}
/* ---- public win32 surface: exports (EAT) --------------------------------- *
* IMAGE_EXPORT_DIRECTORY (40 bytes): Base(+0x10), NumberOfFunctions(+0x14),
* NumberOfNames(+0x18), AddressOfFunctions(+0x1C), AddressOfNames(+0x20),
* AddressOfNameOrdinals(+0x24). We iterate AddressOfFunctions[] (one entry per
* exported ordinal); the name for an index is found by reverse-mapping
* AddressOfNameOrdinals[]. A function RVA that falls within the export
* directory's own [rva, rva+size) is a forwarder string, not code. Cold:
* one-shot directory read. */
#define EXP_DIR_SIZE 40u
int vmie_win32_exports(vmie_win32* v, uint64_t cr3, uint64_t module_base,
export_sym* out, int max) __attribute__((cold));
int vmie_win32_exports(vmie_win32* v, uint64_t cr3, uint64_t module_base,
export_sym* out, int max) {
vmie_mem* m = vmie_win32_mem(v);
if (!m) { return -1; }
uint32_t exp_rva, exp_sz;
if (pe_data_dir(m, cr3, module_base, PE_DIR_EXPORT, &exp_rva, &exp_sz) ||
!exp_rva) {
return -1;
}
uint8_t ed[EXP_DIR_SIZE];
if (gva_read(m, cr3, module_base + exp_rva, ed, sizeof ed)) {
return -1;
}
uint32_t ord_base, n_funcs, n_names, a_funcs, a_names, a_ords;
memcpy(&ord_base, ed + 0x10, 4);
memcpy(&n_funcs, ed + 0x14, 4);
memcpy(&n_names, ed + 0x18, 4);
memcpy(&a_funcs, ed + 0x1C, 4);
memcpy(&a_names, ed + 0x20, 4);
memcpy(&a_ords, ed + 0x24, 4);
int total = 0;
for (uint32_t i = 0; i < n_funcs; i++) {
uint32_t frva;
if (gva_read(m, cr3, module_base + a_funcs + (uint64_t)i * 4, &frva, 4)) {
return -1;
}
if (frva == 0) {
continue; /* empty export slot */
}
if (out && total < max) {
export_sym* e = &out[total];
memset(e, 0, sizeof *e);
e->rva = frva;
e->ordinal = (uint16_t)(ord_base + i);
e->forwarded = (frva >= exp_rva && frva < exp_rva + exp_sz) ? 1u : 0u;
/* reverse map: is there a name whose ordinal == i? */
for (uint32_t k = 0; k < n_names; k++) {
uint16_t ord;
if (gva_read(m, cr3, module_base + a_ords + (uint64_t)k * 2,
&ord, 2)) {
return -1;
}
if (ord != i) {
continue;
}
uint32_t nrva;
if (gva_read(m, cr3, module_base + a_names + (uint64_t)k * 4,
&nrva, 4)) {
return -1;
}
/* Read the name best-effort up to the end of its resident page:
* a 64-byte fixed read can run past the section/page even when
* the (shorter, NUL-terminated) name itself is fully resident. */
for (size_t z = sizeof e->name - 1; z > 0; z--) {
if (gva_read(m, cr3, module_base + nrva, e->name, z) == 0) {
e->name[z] = 0;
break;
}
}
e->name[sizeof e->name - 1] = 0; /* truncate long names */
break;
}
}
total++;
}
return total;
}
/* ---- public win32 surface: PDB reference (CodeView RSDS) ------------------ *
* Thin wrapper over the shared pe_pdb_ref: the same debug-dir/RSDS parser the
* kernel bootstrap uses, generalized to any module. Cold. */
int vmie_win32_pdb_ref(vmie_win32* v, uint64_t cr3, uint64_t module_base,
pdb_ref* out) __attribute__((cold));
int vmie_win32_pdb_ref(vmie_win32* v, uint64_t cr3, uint64_t module_base,
pdb_ref* out) {
vmie_mem* m = vmie_win32_mem(v);
if (!m || !out) { return -1; }
return pe_pdb_ref(m, cr3, module_base, out->guid, &out->age,
out->pdb, sizeof out->pdb);
}
/* ---- public win32 surface: section enumeration + section views ----------- *
* Cold paths (one-shot header parse / section gather, not a hot loop). They
* reuse pe_sections / the shared section-table walk above - no second parser -
+106
View File
@@ -0,0 +1,106 @@
/* siggen.c - x86-64 code signature generator (see siggen.h).
*
* Cold path: a one-shot build over a code span, not a hot loop. It reuses the
* existing primitives and adds no parallel logic:
* - x86_decode - to step instructions and learn each one's length and the
* position/length of its rel/RIP-relative displacement
* (x86_insn.disp_off/disp_len). The decoder is the single
* source of instruction length here - siggen has no decoder.
* - sig_from_bytes / - to build a sig_pattern_t from the accumulated bytes/mask,
* sig_parse_mask and sig_all to test uniqueness. siggen has no matcher.
*
* Boundary: includes only siggen.h (-> sigscan.h + x86dec.h) and the C stdlib.
*/
#include "siggen.h"
#include <stdlib.h>
#include <string.h>
/* Test whether the masked pattern (bytes + NUL-terminated mask) occurs EXACTLY
* once in `scope`. Builds a transient sig_pattern_t via sig_parse_mask (the
* shared parser, which sizes from strlen(mask)) and runs sig_all (the shared
* matcher) - no bespoke matching. Returns 1 if unique, 0 otherwise. */
__attribute__((cold))
static int unique_in_scope(mem_view_t scope, const uint8_t* bytes,
const char* mask) {
sig_pattern_t p;
if (!sig_parse_mask(bytes, mask, &p)) {
return 0;
}
uint64_t hits[2];
const size_t n = sig_all(scope, &p, hits, 2);
sig_free(&p);
return n == 1;
}
__attribute__((cold))
int sig_generate(mem_view_t scope, size_t start_off, size_t max_len,
sig_pattern_t* out) {
if (!scope.data || !out || max_len == 0 || start_off >= scope.size) {
return -1;
}
/* Accumulate the must-match bytes and a 'x'/'?' mask string up to max_len.
* The mask string is NUL-terminated for sig_parse_mask. */
uint8_t* bytes = malloc(max_len);
char* mask = malloc(max_len + 1);
if (!bytes || !mask) {
free(bytes); free(mask);
return -1;
}
const uint8_t* base = scope.data + start_off;
const size_t span = scope.size - start_off; /* readable bytes ahead */
size_t cur = 0; /* bytes accumulated so far */
int result = 0; /* 0 = not-unique-in-budget */
while (cur < max_len) {
x86_insn in;
const int ilen = x86_decode(base + cur, span - cur, &in);
if (ilen <= 0) {
result = 0; /* undecodable: cannot grow */
break;
}
size_t end = cur + (size_t)ilen;
if (end > max_len) {
/* This instruction would overflow the budget without having reached
* uniqueness; the signature cannot be made unique within max_len. */
result = 0;
break;
}
/* Copy the instruction bytes as must-match (mask 'x'). */
for (size_t i = cur; i < end; i++) {
bytes[i] = base[i];
mask[i] = 'x';
}
/* Wildcard the rel/RIP-relative displacement field, if any: those bytes
* float with load address / relocation. disp_off is instruction-relative;
* disp_len is 0 when there is no displacement. */
if (in.disp_len > 0) {
const size_t d0 = cur + in.disp_off;
const size_t d1 = d0 + in.disp_len;
for (size_t i = d0; i < d1 && i < end; i++) {
bytes[i] = 0;
mask[i] = '?';
}
}
cur = end;
mask[cur] = '\0';
if (unique_in_scope(scope, bytes, mask)) {
result = 1;
break;
}
}
if (result == 1) {
const int ok = sig_parse_mask(bytes, mask, out);
free(bytes); free(mask);
return ok ? (int)cur : -1;
}
free(bytes); free(mask);
memset(out, 0, sizeof *out);
return result; /* 0: not unique in budget */
}
+33 -14
View File
@@ -151,7 +151,7 @@ static size_t imm_bytes(unsigned imclass, const dstate* st) {
* mode, so it is not consulted here. */
__attribute__((hot))
static size_t decode_modrm(const uint8_t* p, size_t avail,
int* has_rip, int32_t* rip) {
int* has_rip, int32_t* rip, size_t* rip_off) {
if (avail < 1) { return 0; }
const uint8_t modrm = p[0];
const unsigned mod = (modrm >> 6) & 3u;
@@ -179,6 +179,7 @@ static size_t decode_modrm(const uint8_t* p, size_t avail,
((uint32_t)p[n + 2] << 16) | ((uint32_t)p[n + 3] << 24));
if (has_rip) { *has_rip = 1; }
if (rip) { *rip = d; }
if (rip_off) { *rip_off = n; } /* disp32 begins at ModRM+n */
n += 4;
return n;
}
@@ -224,7 +225,9 @@ static int decode_vex(const uint8_t* code, size_t avail, x86_insn* out) {
int rip_present = 0;
int32_t rip = 0;
const size_t m = decode_modrm(code + n, avail - n, &rip_present, &rip);
size_t rip_off = 0;
const size_t modrm_at = n;
const size_t m = decode_modrm(code + n, avail - n, &rip_present, &rip, &rip_off);
if (m == 0) { return 0; }
n += m;
@@ -241,20 +244,24 @@ static int decode_vex(const uint8_t* code, size_t avail, x86_insn* out) {
out->rel = 0;
out->has_riprel = rip_present;
out->riprel = rip;
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
out->disp_len = rip_present ? 4u : 0u;
return (int)n;
}
/* ---- branch displacement read -------------------------------------------- *
* Read a rel8 (bytes==1) or rel32 (bytes==4) branch displacement at `p`,
* sign-extend into out->rel, and mark has_rel. */
static void read_rel(const uint8_t* p, size_t bytes, x86_insn* out) {
static void read_rel(const uint8_t* p, size_t off, size_t bytes, x86_insn* out) {
if (bytes == 1) {
out->rel = (int32_t)(int8_t)p[0];
} else { /* 4 bytes */
out->rel = (int32_t)((uint32_t)p[0] | ((uint32_t)p[1] << 8) |
((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24));
}
out->has_rel = 1;
out->has_rel = 1;
out->disp_off = (uint8_t)off; /* rel field begins here */
out->disp_len = (uint8_t)bytes; /* rel8 -> 1, rel32 -> 4 */
}
/* ---- main decode --------------------------------------------------------- */
@@ -266,6 +273,7 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
out->len = 0; out->flow = X86_OTHER;
out->has_rel = 0; out->rel = 0;
out->has_riprel = 0; out->riprel = 0;
out->disp_off = 0; out->disp_len = 0;
}
return 0;
}
@@ -274,6 +282,7 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
out->len = 0; out->flow = X86_OTHER;
out->has_rel = 0; out->rel = 0;
out->has_riprel = 0; out->riprel = 0;
out->disp_off = 0; out->disp_len = 0;
const size_t cap = avail < 15u ? avail : 15u; /* never decode past 15 */
size_t n = 0;
@@ -327,12 +336,15 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
op = code[n];
n += 1;
/* every 0F38 opcode has a ModRM and no immediate. */
int rip_present = 0; int32_t rip = 0;
int rip_present = 0; int32_t rip = 0; size_t rip_off = 0;
const size_t modrm_at = n;
const size_t m = decode_modrm(code + n, cap - n,
&rip_present, &rip);
&rip_present, &rip, &rip_off);
if (m == 0) { return 0; }
n += m;
out->has_riprel = rip_present; out->riprel = rip;
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
out->disp_len = rip_present ? 4u : 0u;
if (n < 1 || n > 15 || n > avail) { return 0; }
out->len = (uint8_t)n;
return (int)n;
@@ -341,14 +353,17 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
if (n >= cap) { return 0; }
op = code[n];
n += 1;
int rip_present = 0; int32_t rip = 0;
int rip_present = 0; int32_t rip = 0; size_t rip_off = 0;
const size_t modrm_at = n;
const size_t m = decode_modrm(code + n, cap - n,
&rip_present, &rip);
&rip_present, &rip, &rip_off);
if (m == 0) { return 0; }
n += m;
if (n >= cap) { return 0; } /* trailing imm8 */
n += 1;
out->has_riprel = rip_present; out->riprel = rip;
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
out->disp_len = rip_present ? 4u : 0u;
if (n < 1 || n > 15 || n > avail) { return 0; }
out->len = (uint8_t)n;
return (int)n;
@@ -366,11 +381,15 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
if (n >= cap) { return 0; }
modrm = code[n];
have_modrm = 1;
int rip_present = 0; int32_t rip = 0;
const size_t m = decode_modrm(code + n, cap - n, &rip_present, &rip);
int rip_present = 0; int32_t rip = 0; size_t rip_off = 0;
const size_t modrm_at = n;
const size_t m = decode_modrm(code + n, cap - n, &rip_present, &rip,
&rip_off);
if (m == 0) { return 0; }
n += m;
out->has_riprel = rip_present; out->riprel = rip;
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
out->disp_len = rip_present ? 4u : 0u;
}
/* ---- immediate ---- */
@@ -402,12 +421,12 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
switch (op) {
case 0xE8u: { /* CALL rel32 */
out->flow = X86_CALL;
read_rel(code + (n - im), im, out);
read_rel(code + (n - im), n - im, im, out);
break;
}
case 0xE9u: case 0xEBu: { /* JMP rel32 / rel8 */
out->flow = X86_JMP;
read_rel(code + (n - im), im, out);
read_rel(code + (n - im), n - im, im, out);
break;
}
case 0xC2u: case 0xC3u: case 0xCAu: case 0xCBu: { /* RET forms */
@@ -429,14 +448,14 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
default: { /* 70..7F Jcc rel8 */
if (op >= 0x70u && op <= 0x7Fu) {
out->flow = X86_JCC;
read_rel(code + (n - im), im, out);
read_rel(code + (n - im), n - im, im, out);
}
break;
}
}
} else if (op >= 0x80u && op <= 0x8Fu) { /* 0F 80..8F Jcc rel32 */
out->flow = X86_JCC;
read_rel(code + (n - im), im, out);
read_rel(code + (n - im), n - im, im, out);
}
out->len = (uint8_t)n;