Add function inventory (.pdata), signature generation, and export/PDB symbols

Three reversing capabilities on the win32 surface plus a pure sig-gen handler:

- vmie_win32_functions enumerates a module's functions from the exception
  directory (.pdata RUNTIME_FUNCTION), folding unwind chain continuations into
  their primary - authoritative non-leaf boundaries, not prologue heuristics.
- vmie_win32_exports resolves the export table to {name, rva, ordinal,
  forwarded}: named functions with no PDB or network. vmie_win32_pdb_ref pulls
  the CodeView/RSDS {guid, age, pdb} from the debug directory - the symbol-server
  key for any module (full PDB parsing stays out of scope).
- sig_generate (siggen.h) builds a unique masked signature for a code span,
  wildcarding the rel/RIP-relative displacement bytes the x86 decoder locates and
  growing until it matches the scope exactly once - the dual of sigscan.

The decoder now also reports disp_off/disp_len so a caller can mask the floating
bytes. The MZ/PE walk gains one shared data-directory accessor and one shared
CodeView/RSDS parser; the kernel bootstrap is moved onto both, removing its
private copies - one PE parser in the tree.
This commit is contained in:
2026-06-16 19:27:42 +03:00
parent 06230ac680
commit c4419964aa
9 changed files with 542 additions and 67 deletions
+33 -14
View File
@@ -151,7 +151,7 @@ static size_t imm_bytes(unsigned imclass, const dstate* st) {
* mode, so it is not consulted here. */
__attribute__((hot))
static size_t decode_modrm(const uint8_t* p, size_t avail,
int* has_rip, int32_t* rip) {
int* has_rip, int32_t* rip, size_t* rip_off) {
if (avail < 1) { return 0; }
const uint8_t modrm = p[0];
const unsigned mod = (modrm >> 6) & 3u;
@@ -179,6 +179,7 @@ static size_t decode_modrm(const uint8_t* p, size_t avail,
((uint32_t)p[n + 2] << 16) | ((uint32_t)p[n + 3] << 24));
if (has_rip) { *has_rip = 1; }
if (rip) { *rip = d; }
if (rip_off) { *rip_off = n; } /* disp32 begins at ModRM+n */
n += 4;
return n;
}
@@ -224,7 +225,9 @@ static int decode_vex(const uint8_t* code, size_t avail, x86_insn* out) {
int rip_present = 0;
int32_t rip = 0;
const size_t m = decode_modrm(code + n, avail - n, &rip_present, &rip);
size_t rip_off = 0;
const size_t modrm_at = n;
const size_t m = decode_modrm(code + n, avail - n, &rip_present, &rip, &rip_off);
if (m == 0) { return 0; }
n += m;
@@ -241,20 +244,24 @@ static int decode_vex(const uint8_t* code, size_t avail, x86_insn* out) {
out->rel = 0;
out->has_riprel = rip_present;
out->riprel = rip;
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
out->disp_len = rip_present ? 4u : 0u;
return (int)n;
}
/* ---- branch displacement read -------------------------------------------- *
* Read a rel8 (bytes==1) or rel32 (bytes==4) branch displacement at `p`,
* sign-extend into out->rel, and mark has_rel. */
static void read_rel(const uint8_t* p, size_t bytes, x86_insn* out) {
static void read_rel(const uint8_t* p, size_t off, size_t bytes, x86_insn* out) {
if (bytes == 1) {
out->rel = (int32_t)(int8_t)p[0];
} else { /* 4 bytes */
out->rel = (int32_t)((uint32_t)p[0] | ((uint32_t)p[1] << 8) |
((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24));
}
out->has_rel = 1;
out->has_rel = 1;
out->disp_off = (uint8_t)off; /* rel field begins here */
out->disp_len = (uint8_t)bytes; /* rel8 -> 1, rel32 -> 4 */
}
/* ---- main decode --------------------------------------------------------- */
@@ -266,6 +273,7 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
out->len = 0; out->flow = X86_OTHER;
out->has_rel = 0; out->rel = 0;
out->has_riprel = 0; out->riprel = 0;
out->disp_off = 0; out->disp_len = 0;
}
return 0;
}
@@ -274,6 +282,7 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
out->len = 0; out->flow = X86_OTHER;
out->has_rel = 0; out->rel = 0;
out->has_riprel = 0; out->riprel = 0;
out->disp_off = 0; out->disp_len = 0;
const size_t cap = avail < 15u ? avail : 15u; /* never decode past 15 */
size_t n = 0;
@@ -327,12 +336,15 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
op = code[n];
n += 1;
/* every 0F38 opcode has a ModRM and no immediate. */
int rip_present = 0; int32_t rip = 0;
int rip_present = 0; int32_t rip = 0; size_t rip_off = 0;
const size_t modrm_at = n;
const size_t m = decode_modrm(code + n, cap - n,
&rip_present, &rip);
&rip_present, &rip, &rip_off);
if (m == 0) { return 0; }
n += m;
out->has_riprel = rip_present; out->riprel = rip;
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
out->disp_len = rip_present ? 4u : 0u;
if (n < 1 || n > 15 || n > avail) { return 0; }
out->len = (uint8_t)n;
return (int)n;
@@ -341,14 +353,17 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
if (n >= cap) { return 0; }
op = code[n];
n += 1;
int rip_present = 0; int32_t rip = 0;
int rip_present = 0; int32_t rip = 0; size_t rip_off = 0;
const size_t modrm_at = n;
const size_t m = decode_modrm(code + n, cap - n,
&rip_present, &rip);
&rip_present, &rip, &rip_off);
if (m == 0) { return 0; }
n += m;
if (n >= cap) { return 0; } /* trailing imm8 */
n += 1;
out->has_riprel = rip_present; out->riprel = rip;
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
out->disp_len = rip_present ? 4u : 0u;
if (n < 1 || n > 15 || n > avail) { return 0; }
out->len = (uint8_t)n;
return (int)n;
@@ -366,11 +381,15 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
if (n >= cap) { return 0; }
modrm = code[n];
have_modrm = 1;
int rip_present = 0; int32_t rip = 0;
const size_t m = decode_modrm(code + n, cap - n, &rip_present, &rip);
int rip_present = 0; int32_t rip = 0; size_t rip_off = 0;
const size_t modrm_at = n;
const size_t m = decode_modrm(code + n, cap - n, &rip_present, &rip,
&rip_off);
if (m == 0) { return 0; }
n += m;
out->has_riprel = rip_present; out->riprel = rip;
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
out->disp_len = rip_present ? 4u : 0u;
}
/* ---- immediate ---- */
@@ -402,12 +421,12 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
switch (op) {
case 0xE8u: { /* CALL rel32 */
out->flow = X86_CALL;
read_rel(code + (n - im), im, out);
read_rel(code + (n - im), n - im, im, out);
break;
}
case 0xE9u: case 0xEBu: { /* JMP rel32 / rel8 */
out->flow = X86_JMP;
read_rel(code + (n - im), im, out);
read_rel(code + (n - im), n - im, im, out);
break;
}
case 0xC2u: case 0xC3u: case 0xCAu: case 0xCBu: { /* RET forms */
@@ -429,14 +448,14 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
default: { /* 70..7F Jcc rel8 */
if (op >= 0x70u && op <= 0x7Fu) {
out->flow = X86_JCC;
read_rel(code + (n - im), im, out);
read_rel(code + (n - im), n - im, im, out);
}
break;
}
}
} else if (op >= 0x80u && op <= 0x8Fu) { /* 0F 80..8F Jcc rel32 */
out->flow = X86_JCC;
read_rel(code + (n - im), im, out);
read_rel(code + (n - im), n - im, im, out);
}
out->len = (uint8_t)n;