From 79e82ffc6afc50e1a120a78adc0601d391400192 Mon Sep 17 00:00:00 2001 From: Gregory Lirent Date: Tue, 16 Jun 2026 19:52:25 +0300 Subject: [PATCH] Add code-structure analysis: call graph, jump tables, basic blocks, constant xref Wave 1 of the code-analysis layer, built on the x86-64 decoder: - vmie_win32_callgraph walks each .pdata function with the decoder and emits an edge for every direct call/jmp whose target lands in the module - the intra-module call graph. Indirect edges are left to the IAT and jump tables. - gva_jumptable recovers a switch's case targets from an indirect jump's table: consecutive pointer entries that land in an executable region. - cfg_blocks splits one function view into basic blocks (a generic handler: leaders from intra-function branch targets, cut after jmp/jcc/ret). - gva_imm_xref finds the instructions whose immediate operand equals a constant - the dual of code-xref for magic values, error codes, syscall numbers. The decoder now also reports imm_off/imm_len so a caller can read or match the immediate operand. The generic primitives live in the new codeanalysis.h (jump tables, basic blocks) and scan.h (constant xref); the .pdata-bound call graph stays on the win32 surface and reuses the existing function/section/decode primitives - no second PE or instruction parser. --- CMakeLists.txt | 1 + include/codeanalysis.h | 80 +++++++++++++++++++++ include/scan.h | 28 ++++++++ include/win32.h | 36 ++++++++++ include/x86dec.h | 30 +++++++- src/engine/win32/pe.c | 110 ++++++++++++++++++++++++++++ src/handlers/codeanalysis.c | 140 ++++++++++++++++++++++++++++++++++++ src/handlers/codescan.c | 56 +++++++++++++++ src/handlers/x86dec.c | 25 +++++++ 9 files changed, 505 insertions(+), 1 deletion(-) create mode 100644 include/codeanalysis.h create mode 100644 src/handlers/codeanalysis.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 5653507..f3ba1e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,7 @@ add_library(vmie STATIC src/handlers/sigscan.c src/handlers/sigset.c src/handlers/codescan.c + src/handlers/codeanalysis.c src/handlers/siggen.c src/handlers/x86dec.c src/handlers/pmap.c diff --git a/include/codeanalysis.h b/include/codeanalysis.h new file mode 100644 index 0000000..bada3cf --- /dev/null +++ b/include/codeanalysis.h @@ -0,0 +1,80 @@ +/* codeanalysis.h - generic (OS-agnostic) x86-64 code-structure analysis. + * + * Handler layer: built on the generic memory model (memmodel.h: cr3 + VA, the + * region map, gva_read) and the light x86-64 decoder (x86dec.h). It names no + * Windows object - jump-table recovery and basic-block splitting are properties + * of code and the address space, not of any particular OS. The win32-specific + * call graph (which needs .pdata) lives in win32.h instead. + * + * These are the structure-recovery primitives that sit above the decoder and + * gva_code_xref / gva_imm_xref (scan.h): given a function body or an indirect + * jump's table, reconstruct the control flow the linear scanners cannot see. + */ +#ifndef VMIE_CODEANALYSIS_H +#define VMIE_CODEANALYSIS_H +#include +#include +#include "memmodel.h" /* vmie_mem, cr3+VA, vregion/VR_*, gva_read/gva_regions */ +#include "sigscan.h" /* mem_view_t (the single owner of the view type) */ +#include "x86dec.h" /* x86_decode, x86_insn, x86_branch_target */ + +/* Jump-table recovery. From `table_va`, read consecutive 8-byte entries and + * keep those that point into an EXECUTABLE region under `cr3` (membership tested + * against the live region map, i.e. a VR_X run from gva_regions); stop at the + * first entry that is not a code pointer, at a read failure, or at `max`. The + * entries are absolute 64-bit code VAs (the common /CASE jump-table form a + * compiler emits for a switch). Writes up to `max` recovered targets to + * `targets` (NULL to count only) and returns the number recovered. + * + * Feed it the table address taken from an indirect jump's memory operand - e.g. + * `jmp qword [rip+disp]` => rip+disp (x86_riprel_target), or the base of a + * `jmp qword [base + idx*8]` SIB table - to recover a switch's case targets and + * complete the control-flow graph that the linear decoders (cfg_blocks, + * vmie_win32_callgraph) leave dangling at the indirect jump. + * + * Returns 0 when the first entry is already not a code pointer (an empty/absent + * table), so a 0 return is "no table here", not an error. + * + * Example - resolve a switch reached by `jmp qword [rip+disp]`: + * x86_insn in; x86_decode(code, avail, &in); // the indirect jmp + * uint64_t tbl = x86_riprel_target(jmp_va, &in); // table base VA + * uint64_t cases[64]; + * int n = gva_jumptable(m, cr3, tbl, cases, 64); // case target VAs */ +int gva_jumptable(vmie_mem* m, uintptr_t cr3, uint64_t table_va, + uint64_t* targets, int max); + +/* One basic block inside a function view. The offsets are in the VIEW's own + * coordinate space (mem_view_t.base_va + offset): for a SECTION_LOCAL view they + * are section-local byte offsets, for a MODULE_RVA view they are RVAs. + * start - byte offset of the block's first instruction (inclusive) + * end - byte offset just past the block's last instruction (exclusive), so + * the block spans [start, end) and its length is end - start. */ +typedef struct { uint32_t start; uint32_t end; } code_block; + +/* Split one function's bytes into basic blocks. `fn` is a view spanning exactly + * one function (e.g. a section-view sub-range covering a func_range from + * vmie_win32_functions): fn.data[0] is the function's first byte and fn.size its + * length. Two linear passes over the bytes with the decoder: + * 1. collect intra-function branch targets (the destinations of jmp/jcc whose + * target lands inside [0, fn.size)) - these are leaders; + * 2. cut a block after every jmp/jcc/ret and before every leader. A CALL is + * treated as fall-through (it returns), so it does NOT end a block. A + * branch whose target is OUTSIDE `fn` (a tail call or inter-procedural jmp) + * ends the block but starts no new one inside `fn`. + * + * Blocks are emitted in ascending start order, partition [0, fn.size) with no + * gaps or overlaps, and are reported in the view's coordinate space (start/end + * are offsets from fn.base_va). Writes up to `max` blocks to `out` (NULL to + * count only) and returns the TOTAL block count, or -1 if the bytes do not + * decode cleanly (a desync: the linear walk hit an undecodable byte). Pure: it + * touches only the view and the decoder, no vmie_mem / no I/O. + * + * Example - block count and extents of one function: + * mem_view_t fn; // a SECTION_LOCAL/RVA sub-view of one function + * code_block bb[256]; + * int n = cfg_blocks(fn, bb, 256); + * for (int i = 0; i < n && i < 256; i++) + * printf("block %d: [%#x, %#x)\n", i, bb[i].start, bb[i].end); */ +int cfg_blocks(mem_view_t fn, code_block* out, int max); + +#endif /* VMIE_CODEANALYSIS_H */ diff --git a/include/scan.h b/include/scan.h index 0b10763..bdbb458 100644 --- a/include/scan.h +++ b/include/scan.h @@ -83,6 +83,34 @@ int gva_sig_scan_multi(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, int gva_code_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, uint64_t target_va, uint64_t* out, int max); +/* immediate / constant xref: every instruction in [lo,hi] (kept by the + * protection filter `prot_any`; pass VR_X to restrict to code) whose IMMEDIATE + * operand equals `value`, compared over the low `width` bytes (width is 1, 2, 4, + * or 8). Like gva_code_xref it brute-scans each byte offset with the light + * x86-64 decoder (x86dec.h, NOT a full disassembler) and carries a >=15-byte + * (max x86 instruction length) sweep overlap so no instruction is cut at a + * window seam; the same SEAM and INTERIOR de-duplications apply (a match + * starting in a non-last window's trailing overlap is left to the next window, + * and an interior alias falling inside an already-accepted match is dropped). + * + * An instruction matches when it carries an immediate (imm_len > 0) at least + * `width` bytes wide and its low `width` bytes equal `value & mask(width)`. The + * rel/RIP-relative DISPLACEMENT of a branch is NOT an immediate and never + * matches here - use gva_code_xref for displacement targets. + * + * Records each matching instruction-start VA in the view's coordinate space. + * Writes up to `max` VAs to `out` (NULL to count only) and returns the TOTAL + * number of matches, or -1 on bad input (a NULL m, an unswept range, or a width + * that is not 1/2/4/8). Use it to answer "what code uses the constant N" - error + * codes, magic values, syscall numbers, table sizes, struct sizes. + * + * Example - sites that load the NTSTATUS 0xC0000022 (ACCESS_DENIED) as a dword: + * uint64_t sites[64]; + * int n = gva_imm_xref(m, cr3, lo, hi, VR_X, 0xC0000022ull, 4, sites, 64); */ +int gva_imm_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, + uint32_t prot_any, uint64_t value, int width, + uint64_t* out, int max); + /* gva bridges to the signature matcher: build mem_view from guest memory and feed sigscan.h */ int gva_sig_scan (vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, uint32_t prot_any, const sig_pattern_t* p, uint64_t* out, int max); diff --git a/include/win32.h b/include/win32.h index 6a259c9..e33cf75 100644 --- a/include/win32.h +++ b/include/win32.h @@ -295,6 +295,42 @@ typedef struct { uint32_t rva; uint32_t size; } func_range; int vmie_win32_functions(vmie_win32* v, uint64_t cr3, uint64_t module_base, func_range* out, int max); +/* One call-graph edge, with both endpoints as RVAs relative to the module base + * (absolute VA = module_base + rva). + * from - RVA of the function that contains the call/jmp site (a .pdata + * function start) + * to - RVA of the branch target (inside the same module image) + * kind - 0 = call (E8 / direct CALL), 1 = direct jmp (E9/EB, including a tail + * call to another function). */ +typedef struct { uint32_t from; uint32_t to; uint8_t kind; } call_edge; + +/* Build the intra-module call graph of the image at `module_base` (in the `cr3` + * address space). Reuses the existing primitives - vmie_win32_functions to + * enumerate the .pdata function starts, vmie_win32_section_view to gather the + * .text bytes, and x86_decode to step each function - and emits one edge for + * every DIRECT call/jmp (has_rel) whose resolved target lands inside the module + * image [module_base, module_base + SizeOfImage). `from` is the containing + * function's RVA, `to` is the target's RVA. + * + * INDIRECT calls/jmps (through a register or memory, e.g. `call [rip+disp]` or + * `jmp rax`) are SKIPPED here - they carry no static rel target. Resolve those + * separately: switch tables via gva_jumptable, import thunks via the IAT (a + * wave-2 concern). A direct branch whose target falls OUTSIDE the image (an + * inter-module jmp/call) is also skipped - the graph is intra-module by + * construction. + * + * Writes up to `max` edges to `out` (NULL to count only) and returns the TOTAL + * edge count, or -1 if the .pdata/.text directory is missing or unreadable. + * Edges are grouped by source function (all of one function's edges are + * contiguous), in ascending function order. + * + * Example - out-degree of each function: + * call_edge e[4096]; + * int n = vmie_win32_callgraph(v, pr->cr3, m.base, e, 4096); + * // group by e[i].from to get each function's callees */ +int vmie_win32_callgraph(vmie_win32* v, uint64_t cr3, uint64_t module_base, + call_edge* out, int max); + /* One exported symbol from the module export directory (EAT). * rva - export target RVA (absolute VA = module_base + rva). Forwarder * exports report the forwarder-string RVA; see `forwarded`. diff --git a/include/x86dec.h b/include/x86dec.h index e59391c..291c901 100644 --- a/include/x86dec.h +++ b/include/x86dec.h @@ -46,6 +46,26 @@ typedef struct { uint8_t disp_len; /* displacement length: 1 (rel8), 4 (rel32 or RIP-rel * disp32), else 0 (no displacement). The wildcard span is * [disp_off, disp_off + disp_len). */ + uint8_t imm_off; /* byte offset, within the instruction, of the IMMEDIATE + * operand (the trailing constant: imm8/16/32/64 of mov + * reg,imm / cmp r/m,imm / push imm / test / add ...), or + * 0 if the instruction carries no immediate + * (imm_len == 0). This is distinct from disp_off: disp_* + * is the rel/RIP-relative DISPLACEMENT (an address that + * floats with the load address), imm_* is the encoded + * CONSTANT operand. An instruction can have neither, one, + * or - for a few forms (e.g. a RIP-relative store of an + * immediate) - both. The immediate value lives at + * code[imm_off .. imm_off + imm_len), little-endian. */ + uint8_t imm_len; /* immediate length in bytes: 1, 2, 4, or 8 (resolved + * against the effective operand size: the 66 prefix and + * REX.W are honoured, so e.g. mov r,imm is 2/4/8 and + * push imm / cmp r/m,imm32 is 2/4). 0 when the + * instruction has no single immediate operand; the rare + * combined-immediate forms (ENTER imm16,imm8; far ptr) + * also report 0 here - they are not a clean constant. + * The constant-xref scanner (gva_imm_xref) reads the low + * `width` bytes at imm_off when imm_len >= width. */ } x86_insn; /* Decode ONE 64-bit-mode instruction at `code` (`avail` readable bytes). Fills @@ -59,7 +79,15 @@ typedef struct { * byte position and length of the rel/RIP-relative displacement field within the * instruction (0/0 when there is none). These are exactly the bytes that float * with the load address / relocation, so a signature generator wildcards - * [disp_off, disp_off+disp_len) and keeps the rest as must-match. */ + * [disp_off, disp_off+disp_len) and keeps the rest as must-match. + * + * It also reports out->imm_off / out->imm_len: the position and length of the + * trailing IMMEDIATE constant operand (imm8/16/32/64), or 0/0 when there is + * none. The immediate is the encoded literal (a magic value, error code, table + * size, syscall number, ...) - distinct from the rel/RIP displacement. The + * length honours the 66 prefix and REX.W (so mov r,imm is 2/4/8); combined- + * immediate forms (ENTER, far ptr) report imm_len 0. This is what the + * constant-xref scanner (gva_imm_xref) compares against a wanted value. */ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out); /* Absolute target of a rel branch: ip + insn->len + insn->rel (0 unless has_rel). */ diff --git a/src/engine/win32/pe.c b/src/engine/win32/pe.c index eecdfe6..00c6066 100644 --- a/src/engine/win32/pe.c +++ b/src/engine/win32/pe.c @@ -1,9 +1,11 @@ #include "pe.h" #include +#include /* malloc/free (cold call-graph gather only) */ #include "memmodel.h" /* gva_read, VR_* */ #include "sigscan.h" /* mem_sub (pure matcher; engine may use it) */ #include "win32.h" /* public surface: vmie_win32, section_desc, view_base */ +#include "x86dec.h" /* x86_decode / x86_branch_target (call-graph step) */ /* IMAGE_SECTION_HEADER: 8-byte Name, then Misc.VirtualSize(+8), VirtualAddress * (+12), and Characteristics(+36); the header is 40 bytes wide. */ @@ -406,3 +408,111 @@ int vmie_win32_section_view(vmie_win32* v, uint64_t cr3, uint64_t module_base, out->data = buf; out->size = n; out->base_va = base_va; return 0; } + +/* ---- public win32 surface: intra-module call graph ----------------------- * + * Reuses the existing primitives only: vmie_win32_functions (.pdata starts), + * vmie_win32_section_view (.text bytes), and x86_decode (the light decoder) - + * there is no second PE parser and no second decoder here. For each function it + * steps the bytes linearly and, on a DIRECT call/jmp (has_rel), resolves the + * target and, if it lands inside the image, emits one {from, to, kind} edge. + * Cold: one-shot directory + section gather, not a hot loop. */ + +/* SizeOfImage lives in the PE32+ OptionalHeader at +0x38; the OptionalHeader + * begins at NT(base+lfanew)+0x18 (Signature(4)+FileHeader(20)). */ +#define OPT_SIZEOFIMAGE_OFF 0x38u + +int vmie_win32_callgraph(vmie_win32* v, uint64_t cr3, uint64_t module_base, + call_edge* out, int max) __attribute__((cold)); +int vmie_win32_callgraph(vmie_win32* v, uint64_t cr3, uint64_t module_base, + call_edge* out, int max) { + vmie_mem* m = vmie_win32_mem(v); + if (!m) { return -1; } + + /* image bounds: [module_base, module_base + SizeOfImage). */ + uint32_t lfanew; + if (gva_read(m, cr3, module_base + 0x3C, &lfanew, 4)) { return -1; } + uint32_t size_of_image; + if (gva_read(m, cr3, module_base + lfanew + 0x18 + OPT_SIZEOFIMAGE_OFF, + &size_of_image, 4)) { + return -1; + } + + /* locate .text (the executable section the .pdata functions live in). */ + section_desc sd[96]; + const int ns = vmie_win32_sections(v, cr3, module_base, sd, 96); + if (ns < 0) { return -1; } + const int nsuse = ns < 96 ? ns : 96; + const section_desc* text = NULL; + for (int i = 0; i < nsuse; i++) { + if (strcmp(sd[i].name, ".text") == 0) { text = &sd[i]; break; } + } + if (!text) { + /* fall back to the first executable section */ + for (int i = 0; i < nsuse; i++) { + if (sd[i].prot & VR_X) { text = &sd[i]; break; } + } + } + if (!text) { return -1; } + + /* gather the executable section once, addressed at its absolute VA so a + * decoded branch target is directly an absolute VA. */ + uint8_t* tbuf = malloc(text->vsize); + if (!tbuf) { return -1; } + mem_view_t tv; + if (vmie_win32_section_view(v, cr3, module_base, text, ABSOLUTE_VA, + tbuf, text->vsize, &tv) != 0) { + free(tbuf); + return -1; + } + const uint64_t text_lo = module_base + text->rva; /* tv.base_va */ + const uint64_t text_hi = text_lo + tv.size; /* exclusive */ + + /* function inventory: count, then gather (stack for the common case, heap on + * overflow) so every function is stepped, none silently dropped. */ + const int nfn = vmie_win32_functions(v, cr3, module_base, NULL, 0); + if (nfn < 0) { free(tbuf); return -1; } + func_range stack_fr[256]; + func_range* fr = stack_fr; + func_range* heap_fr = NULL; + if (nfn > (int)(sizeof stack_fr / sizeof stack_fr[0])) { + heap_fr = malloc((size_t)nfn * sizeof *heap_fr); + if (!heap_fr) { free(tbuf); return -1; } + fr = heap_fr; + } + const int got = vmie_win32_functions(v, cr3, module_base, fr, nfn); + if (got < 0) { free(heap_fr); free(tbuf); return -1; } + + int total = 0; + for (int f = 0; f < got; f++) { + const uint64_t fn_lo = module_base + fr[f].rva; + const uint64_t fn_hi = fn_lo + fr[f].size; + /* the function must lie inside the gathered section. */ + if (fn_lo < text_lo || fn_hi > text_hi) { continue; } + + size_t off = (size_t)(fn_lo - text_lo); + const size_t end = (size_t)(fn_hi - text_lo); + while (off < end) { + x86_insn in; + const int ilen = x86_decode(tv.data + off, end - off, &in); + if (ilen <= 0) { break; } /* desync: stop this fn */ + const uint64_t ip = text_lo + off; + if (in.has_rel && (in.flow == X86_CALL || in.flow == X86_JMP)) { + const uint64_t tgt = x86_branch_target(ip, &in); + if (tgt >= module_base && + tgt < module_base + (uint64_t)size_of_image) { + if (out && total < max) { + out[total].from = fr[f].rva; + out[total].to = (uint32_t)(tgt - module_base); + out[total].kind = (in.flow == X86_CALL) ? 0u : 1u; + } + total++; + } + } + off += (size_t)ilen; + } + } + + free(heap_fr); + free(tbuf); + return total; +} diff --git a/src/handlers/codeanalysis.c b/src/handlers/codeanalysis.c new file mode 100644 index 0000000..09e78b2 --- /dev/null +++ b/src/handlers/codeanalysis.c @@ -0,0 +1,140 @@ +/* codeanalysis.c - generic x86-64 code-structure analysis (see codeanalysis.h). + * + * gva_jumptable - recover an indirect-jump table (array of absolute code + * pointers) by reading consecutive 8-byte entries and keeping + * those that land in an executable region (region-map tested). + * cfg_blocks - split one function view into basic blocks with the decoder. + * + * Handler boundary: includes only memmodel.h / sigscan.h / x86dec.h (via + * codeanalysis.h) + the standard headers. It names no OS object and reuses the + * generic region map (gva_regions) and the light decoder (x86_decode) - it has + * no PE/Windows knowledge and no second decoder. + */ +#include +#include +#include +#include /* malloc/free (cfg leader bitmap, pure w.r.t. I/O) */ +#include "memmodel.h" +#include "sigscan.h" +#include "x86dec.h" +#include "codeanalysis.h" + +/* ---- jump-table recovery ------------------------------------------------- * + * Snapshot the executable runs once (cold setup), then read 8-byte entries from + * table_va and keep each that lands inside one of those VR_X runs, stopping at + * the first non-code-pointer / read failure / max. */ + +#define JT_MAXX 256 /* executable runs sampled for membership */ + +/* Is `va` inside one of the `n` executable runs `xr`? Linear scan: a module has + * a handful of X runs, and a switch table is short. */ +static int in_x_region(const vregion* xr, int n, uint64_t va) { + for (int i = 0; i < n; i++) { + if (va >= xr[i].va && va < xr[i].va + xr[i].len) { return 1; } + } + return 0; +} + +int gva_jumptable(vmie_mem* m, uintptr_t cr3, uint64_t table_va, + uint64_t* targets, int max) __attribute__((cold)); +int gva_jumptable(vmie_mem* m, uintptr_t cr3, uint64_t table_va, + uint64_t* targets, int max) { + if (!m) { return -1; } + + /* Executable runs under cr3, clamped to whichever canonical half the table + * sits in (gva_regions requires lo/hi in a single half). Code pointers in a + * jump table point into the same image, hence the same half as the table. */ + const int kern = (table_va >= KERN_MIN); + const uint64_t lo = kern ? KERN_MIN : USER_MIN; + const uint64_t hi = kern ? ~0ull : USER_MAX; + vregion xr[JT_MAXX]; + const int nx = gva_regions(m, cr3, lo, hi, VR_X, xr, JT_MAXX); + if (nx < 0) { return -1; } + const int nuse = nx < JT_MAXX ? nx : JT_MAXX; + + int n = 0; + for (uint64_t va = table_va; ; va += 8) { + uint64_t entry; + if (gva_read(m, cr3, va, &entry, 8)) { break; } /* read failure */ + if (!in_x_region(xr, nuse, entry)) { break; } /* not a code ptr */ + if (targets && n < max) { targets[n] = entry; } + n++; + } + return n; +} + +/* ---- basic-block split --------------------------------------------------- * + * Two linear passes with the decoder. A "leader" is the first instruction of a + * block: offset 0, the fall-through after any jmp/jcc/ret, and the in-function + * target of any jmp/jcc. A block runs from one leader up to (exclusive) the next + * leader. Pure: only the view and x86_decode, no vmie_mem. */ + +/* A terminator ends a block: an unconditional/conditional jump or a return. A + * CALL is fall-through (it returns), so it is NOT a terminator. */ +static int is_terminator(x86_flow f) { + return f == X86_JMP || f == X86_JCC || f == X86_RET; +} + +int cfg_blocks(mem_view_t fn, code_block* out, int max) { + if (!fn.data || fn.size == 0) { return -1; } + if (fn.size > 0xFFFFFFFFu) { return -1; } /* offsets are u32 */ + const size_t size = fn.size; + + /* leader[off] != 0 marks the start of a basic block. */ + uint8_t* leader = calloc(1, size); + if (!leader) { return -1; } + leader[0] = 1; /* entry is a leader */ + + /* Pass 1: mark fall-through-after-terminator and intra-function targets. A + * desync (undecodable byte in the linear walk) aborts with -1. */ + for (size_t off = 0; off < size; ) { + x86_insn in; + const int ilen = x86_decode(fn.data + off, size - off, &in); + if (ilen <= 0) { free(leader); return -1; } + const size_t next = off + (size_t)ilen; + if (is_terminator(in.flow)) { + if (next < size) { leader[next] = 1; } /* fall-through start */ + if (in.has_rel && (in.flow == X86_JMP || in.flow == X86_JCC)) { + /* branch target, in the view's coordinate space -> view offset. */ + const uint64_t tgt = x86_branch_target(fn.base_va + off, &in); + if (tgt >= fn.base_va && tgt < fn.base_va + size) { + leader[(size_t)(tgt - fn.base_va)] = 1; + } + } + } + off = next; + } + + /* Pass 2: walk again, emitting one block per leader run. A block ends at the + * instruction after a terminator, or just before the next leader. */ + int total = 0; + size_t blk_start = 0; + for (size_t off = 0; off < size; ) { + x86_insn in; + const int ilen = x86_decode(fn.data + off, size - off, &in); + if (ilen <= 0) { free(leader); return -1; } + const size_t next = off + (size_t)ilen; + const int ends = is_terminator(in.flow) || + (next < size && leader[next]); /* leader starts next */ + if (ends) { + if (out && total < max) { + out[total].start = (uint32_t)blk_start; + out[total].end = (uint32_t)next; + } + total++; + blk_start = next; + } + off = next; + } + /* a trailing run with no terminator (off ran off the end) is its own block. */ + if (blk_start < size) { + if (out && total < max) { + out[total].start = (uint32_t)blk_start; + out[total].end = (uint32_t)size; + } + total++; + } + + free(leader); + return total; +} diff --git a/src/handlers/codescan.c b/src/handlers/codescan.c index 81a8b8b..a0943ab 100644 --- a/src/handlers/codescan.c +++ b/src/handlers/codescan.c @@ -123,3 +123,59 @@ int gva_code_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, } return c.n; } + +/* ---- decoder-driven constant (immediate) xref ---------------------------- * + * Same +1 brute-scan skeleton as gva_code_xref, but the predicate compares the + * decoded IMMEDIATE operand (x86_insn.imm_off/imm_len) to a wanted value over + * its low `width` bytes - not a branch/RIP target. The SEAM and INTERIOR + * de-duplications are identical to code-xref (a +1 brute-scan invariant): the + * predicate is the only thing that differs, so the two stay as two narrow + * passes rather than a forced common skeleton. */ +struct imm_cb { + uint64_t want; /* value masked to `width` bytes */ + uint64_t mask; /* low-`width`-byte mask */ + int width; /* 1/2/4/8 */ + uint64_t cover; /* VA just past the last accepted match */ + uint64_t* out; int max, n; +}; + +__attribute__((hot)) +static int imm_sweep_cb(void* u, const uint8_t* data, size_t len, + uint64_t base, size_t ov, int last) { + struct imm_cb* c = u; + const size_t limit = last ? len : (len > ov ? len - ov : 0); + for (size_t off = 0; off < len; off++) { + if (!last && off >= limit) { break; } + x86_insn in; + const int ilen = x86_decode(data + off, len - off, &in); + if (ilen <= 0) { continue; } + if (in.imm_len < (uint8_t)c->width) { continue; } /* no imm wide enough */ + /* read the low `width` bytes of the immediate, little-endian. */ + uint64_t v = 0; + for (int b = 0; b < c->width; b++) { + v |= (uint64_t)data[off + in.imm_off + b] << (8 * b); + } + if ((v & c->mask) != c->want) { continue; } + const uint64_t va = base + off; + if (va < c->cover) { continue; } /* interior alias of a prior hit */ + c->cover = va + (uint64_t)ilen; + if (c->out && c->n < c->max) { c->out[c->n] = va; } + c->n++; + } + return 0; +} + +int gva_imm_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi, + uint32_t prot_any, uint64_t value, int width, + uint64_t* out, int max) { + if (width != 1 && width != 2 && width != 4 && width != 8) { return -1; } + struct imm_cb c; memset(&c, 0, sizeof c); + c.width = width; + c.mask = (width == 8) ? ~0ull : ((1ull << (8 * width)) - 1); + c.want = value & c.mask; + c.out = out; c.max = max; + if (gva_sweep(m, cr3, lo, hi, prot_any, X86_MAX_INSN, imm_sweep_cb, &c) < 0) { + return -1; + } + return c.n; +} diff --git a/src/handlers/x86dec.c b/src/handlers/x86dec.c index d5aeefe..c466d77 100644 --- a/src/handlers/x86dec.c +++ b/src/handlers/x86dec.c @@ -232,8 +232,12 @@ static int decode_vex(const uint8_t* code, size_t avail, x86_insn* out) { n += m; /* 0F3A map is the imm8 map: every opcode carries a trailing imm8. */ + int has_imm8 = 0; + size_t imm8_at = 0; if (mmmmm == 3u) { if (avail < n + 1) { return 0; } + imm8_at = n; /* the trailing imm8 starts here */ + has_imm8 = 1; n += 1; } @@ -246,6 +250,8 @@ static int decode_vex(const uint8_t* code, size_t avail, x86_insn* out) { out->riprel = rip; out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0; out->disp_len = rip_present ? 4u : 0u; + out->imm_off = has_imm8 ? (uint8_t)imm8_at : 0; + out->imm_len = has_imm8 ? 1u : 0u; return (int)n; } @@ -262,6 +268,11 @@ static void read_rel(const uint8_t* p, size_t off, size_t bytes, x86_insn* out) out->has_rel = 1; out->disp_off = (uint8_t)off; /* rel field begins here */ out->disp_len = (uint8_t)bytes; /* rel8 -> 1, rel32 -> 4 */ + /* The branch's trailing `bytes` are its rel DISPLACEMENT, not a constant + * immediate: the main path provisionally tagged them as imm (E8/E9/EB/Jcc + * read their rel via the immediate-class table), so undo that here. */ + out->imm_off = 0; + out->imm_len = 0; } /* ---- main decode --------------------------------------------------------- */ @@ -274,6 +285,7 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) { out->has_rel = 0; out->rel = 0; out->has_riprel = 0; out->riprel = 0; out->disp_off = 0; out->disp_len = 0; + out->imm_off = 0; out->imm_len = 0; } return 0; } @@ -283,6 +295,7 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) { out->has_rel = 0; out->rel = 0; out->has_riprel = 0; out->riprel = 0; out->disp_off = 0; out->disp_len = 0; + out->imm_off = 0; out->imm_len = 0; const size_t cap = avail < 15u ? avail : 15u; /* never decode past 15 */ size_t n = 0; @@ -345,6 +358,7 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) { out->has_riprel = rip_present; out->riprel = rip; out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0; out->disp_len = rip_present ? 4u : 0u; + /* 0F38 opcodes carry no immediate. imm_off/imm_len stay 0/0. */ if (n < 1 || n > 15 || n > avail) { return 0; } out->len = (uint8_t)n; return (int)n; @@ -360,10 +374,13 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) { if (m == 0) { return 0; } n += m; if (n >= cap) { return 0; } /* trailing imm8 */ + const size_t imm8_at = n; /* the imm8 starts here */ n += 1; out->has_riprel = rip_present; out->riprel = rip; out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0; out->disp_len = rip_present ? 4u : 0u; + out->imm_off = (uint8_t)imm8_at; /* the 0F3A trailing imm8 */ + out->imm_len = 1u; if (n < 1 || n > 15 || n > avail) { return 0; } out->len = (uint8_t)n; return (int)n; @@ -409,6 +426,14 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) { if (im) { if (cap < n + im) { return 0; } + /* Record the immediate field position/length for a clean single + * immediate (imm8/16/32/64). The combined-immediate forms - ENTER + * (imm16+imm8, im==3) and the legacy far pointer (IM_P) - are not a + * single constant operand, so they leave imm_off/imm_len at 0/0. */ + if (im == 1u || im == 2u || im == 4u || im == 8u) { + out->imm_off = (uint8_t)n; + out->imm_len = (uint8_t)im; + } n += im; }