mirror of
https://dev.lirent.ru/Vatrog/vm-introspection-engine.git
synced 2026-06-18 02:06:36 +03:00
Add code-structure analysis: call graph, jump tables, basic blocks, constant xref
Wave 1 of the code-analysis layer, built on the x86-64 decoder: - vmie_win32_callgraph walks each .pdata function with the decoder and emits an edge for every direct call/jmp whose target lands in the module - the intra-module call graph. Indirect edges are left to the IAT and jump tables. - gva_jumptable recovers a switch's case targets from an indirect jump's table: consecutive pointer entries that land in an executable region. - cfg_blocks splits one function view into basic blocks (a generic handler: leaders from intra-function branch targets, cut after jmp/jcc/ret). - gva_imm_xref finds the instructions whose immediate operand equals a constant - the dual of code-xref for magic values, error codes, syscall numbers. The decoder now also reports imm_off/imm_len so a caller can read or match the immediate operand. The generic primitives live in the new codeanalysis.h (jump tables, basic blocks) and scan.h (constant xref); the .pdata-bound call graph stays on the win32 surface and reuses the existing function/section/decode primitives - no second PE or instruction parser.
This commit is contained in:
@@ -21,6 +21,7 @@ add_library(vmie STATIC
|
||||
src/handlers/sigscan.c
|
||||
src/handlers/sigset.c
|
||||
src/handlers/codescan.c
|
||||
src/handlers/codeanalysis.c
|
||||
src/handlers/siggen.c
|
||||
src/handlers/x86dec.c
|
||||
src/handlers/pmap.c
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
/* codeanalysis.h - generic (OS-agnostic) x86-64 code-structure analysis.
|
||||
*
|
||||
* Handler layer: built on the generic memory model (memmodel.h: cr3 + VA, the
|
||||
* region map, gva_read) and the light x86-64 decoder (x86dec.h). It names no
|
||||
* Windows object - jump-table recovery and basic-block splitting are properties
|
||||
* of code and the address space, not of any particular OS. The win32-specific
|
||||
* call graph (which needs .pdata) lives in win32.h instead.
|
||||
*
|
||||
* These are the structure-recovery primitives that sit above the decoder and
|
||||
* gva_code_xref / gva_imm_xref (scan.h): given a function body or an indirect
|
||||
* jump's table, reconstruct the control flow the linear scanners cannot see.
|
||||
*/
|
||||
#ifndef VMIE_CODEANALYSIS_H
|
||||
#define VMIE_CODEANALYSIS_H
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include "memmodel.h" /* vmie_mem, cr3+VA, vregion/VR_*, gva_read/gva_regions */
|
||||
#include "sigscan.h" /* mem_view_t (the single owner of the view type) */
|
||||
#include "x86dec.h" /* x86_decode, x86_insn, x86_branch_target */
|
||||
|
||||
/* Jump-table recovery. From `table_va`, read consecutive 8-byte entries and
|
||||
* keep those that point into an EXECUTABLE region under `cr3` (membership tested
|
||||
* against the live region map, i.e. a VR_X run from gva_regions); stop at the
|
||||
* first entry that is not a code pointer, at a read failure, or at `max`. The
|
||||
* entries are absolute 64-bit code VAs (the common /CASE jump-table form a
|
||||
* compiler emits for a switch). Writes up to `max` recovered targets to
|
||||
* `targets` (NULL to count only) and returns the number recovered.
|
||||
*
|
||||
* Feed it the table address taken from an indirect jump's memory operand - e.g.
|
||||
* `jmp qword [rip+disp]` => rip+disp (x86_riprel_target), or the base of a
|
||||
* `jmp qword [base + idx*8]` SIB table - to recover a switch's case targets and
|
||||
* complete the control-flow graph that the linear decoders (cfg_blocks,
|
||||
* vmie_win32_callgraph) leave dangling at the indirect jump.
|
||||
*
|
||||
* Returns 0 when the first entry is already not a code pointer (an empty/absent
|
||||
* table), so a 0 return is "no table here", not an error.
|
||||
*
|
||||
* Example - resolve a switch reached by `jmp qword [rip+disp]`:
|
||||
* x86_insn in; x86_decode(code, avail, &in); // the indirect jmp
|
||||
* uint64_t tbl = x86_riprel_target(jmp_va, &in); // table base VA
|
||||
* uint64_t cases[64];
|
||||
* int n = gva_jumptable(m, cr3, tbl, cases, 64); // case target VAs */
|
||||
int gva_jumptable(vmie_mem* m, uintptr_t cr3, uint64_t table_va,
|
||||
uint64_t* targets, int max);
|
||||
|
||||
/* One basic block inside a function view. The offsets are in the VIEW's own
|
||||
* coordinate space (mem_view_t.base_va + offset): for a SECTION_LOCAL view they
|
||||
* are section-local byte offsets, for a MODULE_RVA view they are RVAs.
|
||||
* start - byte offset of the block's first instruction (inclusive)
|
||||
* end - byte offset just past the block's last instruction (exclusive), so
|
||||
* the block spans [start, end) and its length is end - start. */
|
||||
typedef struct { uint32_t start; uint32_t end; } code_block;
|
||||
|
||||
/* Split one function's bytes into basic blocks. `fn` is a view spanning exactly
|
||||
* one function (e.g. a section-view sub-range covering a func_range from
|
||||
* vmie_win32_functions): fn.data[0] is the function's first byte and fn.size its
|
||||
* length. Two linear passes over the bytes with the decoder:
|
||||
* 1. collect intra-function branch targets (the destinations of jmp/jcc whose
|
||||
* target lands inside [0, fn.size)) - these are leaders;
|
||||
* 2. cut a block after every jmp/jcc/ret and before every leader. A CALL is
|
||||
* treated as fall-through (it returns), so it does NOT end a block. A
|
||||
* branch whose target is OUTSIDE `fn` (a tail call or inter-procedural jmp)
|
||||
* ends the block but starts no new one inside `fn`.
|
||||
*
|
||||
* Blocks are emitted in ascending start order, partition [0, fn.size) with no
|
||||
* gaps or overlaps, and are reported in the view's coordinate space (start/end
|
||||
* are offsets from fn.base_va). Writes up to `max` blocks to `out` (NULL to
|
||||
* count only) and returns the TOTAL block count, or -1 if the bytes do not
|
||||
* decode cleanly (a desync: the linear walk hit an undecodable byte). Pure: it
|
||||
* touches only the view and the decoder, no vmie_mem / no I/O.
|
||||
*
|
||||
* Example - block count and extents of one function:
|
||||
* mem_view_t fn; // a SECTION_LOCAL/RVA sub-view of one function
|
||||
* code_block bb[256];
|
||||
* int n = cfg_blocks(fn, bb, 256);
|
||||
* for (int i = 0; i < n && i < 256; i++)
|
||||
* printf("block %d: [%#x, %#x)\n", i, bb[i].start, bb[i].end); */
|
||||
int cfg_blocks(mem_view_t fn, code_block* out, int max);
|
||||
|
||||
#endif /* VMIE_CODEANALYSIS_H */
|
||||
@@ -83,6 +83,34 @@ int gva_sig_scan_multi(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
int gva_code_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint64_t target_va, uint64_t* out, int max);
|
||||
|
||||
/* immediate / constant xref: every instruction in [lo,hi] (kept by the
|
||||
* protection filter `prot_any`; pass VR_X to restrict to code) whose IMMEDIATE
|
||||
* operand equals `value`, compared over the low `width` bytes (width is 1, 2, 4,
|
||||
* or 8). Like gva_code_xref it brute-scans each byte offset with the light
|
||||
* x86-64 decoder (x86dec.h, NOT a full disassembler) and carries a >=15-byte
|
||||
* (max x86 instruction length) sweep overlap so no instruction is cut at a
|
||||
* window seam; the same SEAM and INTERIOR de-duplications apply (a match
|
||||
* starting in a non-last window's trailing overlap is left to the next window,
|
||||
* and an interior alias falling inside an already-accepted match is dropped).
|
||||
*
|
||||
* An instruction matches when it carries an immediate (imm_len > 0) at least
|
||||
* `width` bytes wide and its low `width` bytes equal `value & mask(width)`. The
|
||||
* rel/RIP-relative DISPLACEMENT of a branch is NOT an immediate and never
|
||||
* matches here - use gva_code_xref for displacement targets.
|
||||
*
|
||||
* Records each matching instruction-start VA in the view's coordinate space.
|
||||
* Writes up to `max` VAs to `out` (NULL to count only) and returns the TOTAL
|
||||
* number of matches, or -1 on bad input (a NULL m, an unswept range, or a width
|
||||
* that is not 1/2/4/8). Use it to answer "what code uses the constant N" - error
|
||||
* codes, magic values, syscall numbers, table sizes, struct sizes.
|
||||
*
|
||||
* Example - sites that load the NTSTATUS 0xC0000022 (ACCESS_DENIED) as a dword:
|
||||
* uint64_t sites[64];
|
||||
* int n = gva_imm_xref(m, cr3, lo, hi, VR_X, 0xC0000022ull, 4, sites, 64); */
|
||||
int gva_imm_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint32_t prot_any, uint64_t value, int width,
|
||||
uint64_t* out, int max);
|
||||
|
||||
/* gva bridges to the signature matcher: build mem_view from guest memory and feed sigscan.h */
|
||||
int gva_sig_scan (vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint32_t prot_any, const sig_pattern_t* p, uint64_t* out, int max);
|
||||
|
||||
@@ -295,6 +295,42 @@ typedef struct { uint32_t rva; uint32_t size; } func_range;
|
||||
int vmie_win32_functions(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
func_range* out, int max);
|
||||
|
||||
/* One call-graph edge, with both endpoints as RVAs relative to the module base
|
||||
* (absolute VA = module_base + rva).
|
||||
* from - RVA of the function that contains the call/jmp site (a .pdata
|
||||
* function start)
|
||||
* to - RVA of the branch target (inside the same module image)
|
||||
* kind - 0 = call (E8 / direct CALL), 1 = direct jmp (E9/EB, including a tail
|
||||
* call to another function). */
|
||||
typedef struct { uint32_t from; uint32_t to; uint8_t kind; } call_edge;
|
||||
|
||||
/* Build the intra-module call graph of the image at `module_base` (in the `cr3`
|
||||
* address space). Reuses the existing primitives - vmie_win32_functions to
|
||||
* enumerate the .pdata function starts, vmie_win32_section_view to gather the
|
||||
* .text bytes, and x86_decode to step each function - and emits one edge for
|
||||
* every DIRECT call/jmp (has_rel) whose resolved target lands inside the module
|
||||
* image [module_base, module_base + SizeOfImage). `from` is the containing
|
||||
* function's RVA, `to` is the target's RVA.
|
||||
*
|
||||
* INDIRECT calls/jmps (through a register or memory, e.g. `call [rip+disp]` or
|
||||
* `jmp rax`) are SKIPPED here - they carry no static rel target. Resolve those
|
||||
* separately: switch tables via gva_jumptable, import thunks via the IAT (a
|
||||
* wave-2 concern). A direct branch whose target falls OUTSIDE the image (an
|
||||
* inter-module jmp/call) is also skipped - the graph is intra-module by
|
||||
* construction.
|
||||
*
|
||||
* Writes up to `max` edges to `out` (NULL to count only) and returns the TOTAL
|
||||
* edge count, or -1 if the .pdata/.text directory is missing or unreadable.
|
||||
* Edges are grouped by source function (all of one function's edges are
|
||||
* contiguous), in ascending function order.
|
||||
*
|
||||
* Example - out-degree of each function:
|
||||
* call_edge e[4096];
|
||||
* int n = vmie_win32_callgraph(v, pr->cr3, m.base, e, 4096);
|
||||
* // group by e[i].from to get each function's callees */
|
||||
int vmie_win32_callgraph(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
call_edge* out, int max);
|
||||
|
||||
/* One exported symbol from the module export directory (EAT).
|
||||
* rva - export target RVA (absolute VA = module_base + rva). Forwarder
|
||||
* exports report the forwarder-string RVA; see `forwarded`.
|
||||
|
||||
+29
-1
@@ -46,6 +46,26 @@ typedef struct {
|
||||
uint8_t disp_len; /* displacement length: 1 (rel8), 4 (rel32 or RIP-rel
|
||||
* disp32), else 0 (no displacement). The wildcard span is
|
||||
* [disp_off, disp_off + disp_len). */
|
||||
uint8_t imm_off; /* byte offset, within the instruction, of the IMMEDIATE
|
||||
* operand (the trailing constant: imm8/16/32/64 of mov
|
||||
* reg,imm / cmp r/m,imm / push imm / test / add ...), or
|
||||
* 0 if the instruction carries no immediate
|
||||
* (imm_len == 0). This is distinct from disp_off: disp_*
|
||||
* is the rel/RIP-relative DISPLACEMENT (an address that
|
||||
* floats with the load address), imm_* is the encoded
|
||||
* CONSTANT operand. An instruction can have neither, one,
|
||||
* or - for a few forms (e.g. a RIP-relative store of an
|
||||
* immediate) - both. The immediate value lives at
|
||||
* code[imm_off .. imm_off + imm_len), little-endian. */
|
||||
uint8_t imm_len; /* immediate length in bytes: 1, 2, 4, or 8 (resolved
|
||||
* against the effective operand size: the 66 prefix and
|
||||
* REX.W are honoured, so e.g. mov r,imm is 2/4/8 and
|
||||
* push imm / cmp r/m,imm32 is 2/4). 0 when the
|
||||
* instruction has no single immediate operand; the rare
|
||||
* combined-immediate forms (ENTER imm16,imm8; far ptr)
|
||||
* also report 0 here - they are not a clean constant.
|
||||
* The constant-xref scanner (gva_imm_xref) reads the low
|
||||
* `width` bytes at imm_off when imm_len >= width. */
|
||||
} x86_insn;
|
||||
|
||||
/* Decode ONE 64-bit-mode instruction at `code` (`avail` readable bytes). Fills
|
||||
@@ -59,7 +79,15 @@ typedef struct {
|
||||
* byte position and length of the rel/RIP-relative displacement field within the
|
||||
* instruction (0/0 when there is none). These are exactly the bytes that float
|
||||
* with the load address / relocation, so a signature generator wildcards
|
||||
* [disp_off, disp_off+disp_len) and keeps the rest as must-match. */
|
||||
* [disp_off, disp_off+disp_len) and keeps the rest as must-match.
|
||||
*
|
||||
* It also reports out->imm_off / out->imm_len: the position and length of the
|
||||
* trailing IMMEDIATE constant operand (imm8/16/32/64), or 0/0 when there is
|
||||
* none. The immediate is the encoded literal (a magic value, error code, table
|
||||
* size, syscall number, ...) - distinct from the rel/RIP displacement. The
|
||||
* length honours the 66 prefix and REX.W (so mov r,imm is 2/4/8); combined-
|
||||
* immediate forms (ENTER, far ptr) report imm_len 0. This is what the
|
||||
* constant-xref scanner (gva_imm_xref) compares against a wanted value. */
|
||||
int x86_decode(const uint8_t* code, size_t avail, x86_insn* out);
|
||||
|
||||
/* Absolute target of a rel branch: ip + insn->len + insn->rel (0 unless has_rel). */
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
#include "pe.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h> /* malloc/free (cold call-graph gather only) */
|
||||
#include "memmodel.h" /* gva_read, VR_* */
|
||||
#include "sigscan.h" /* mem_sub (pure matcher; engine may use it) */
|
||||
#include "win32.h" /* public surface: vmie_win32, section_desc, view_base */
|
||||
#include "x86dec.h" /* x86_decode / x86_branch_target (call-graph step) */
|
||||
|
||||
/* IMAGE_SECTION_HEADER: 8-byte Name, then Misc.VirtualSize(+8), VirtualAddress
|
||||
* (+12), and Characteristics(+36); the header is 40 bytes wide. */
|
||||
@@ -406,3 +408,111 @@ int vmie_win32_section_view(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
out->data = buf; out->size = n; out->base_va = base_va;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ---- public win32 surface: intra-module call graph ----------------------- *
|
||||
* Reuses the existing primitives only: vmie_win32_functions (.pdata starts),
|
||||
* vmie_win32_section_view (.text bytes), and x86_decode (the light decoder) -
|
||||
* there is no second PE parser and no second decoder here. For each function it
|
||||
* steps the bytes linearly and, on a DIRECT call/jmp (has_rel), resolves the
|
||||
* target and, if it lands inside the image, emits one {from, to, kind} edge.
|
||||
* Cold: one-shot directory + section gather, not a hot loop. */
|
||||
|
||||
/* SizeOfImage lives in the PE32+ OptionalHeader at +0x38; the OptionalHeader
|
||||
* begins at NT(base+lfanew)+0x18 (Signature(4)+FileHeader(20)). */
|
||||
#define OPT_SIZEOFIMAGE_OFF 0x38u
|
||||
|
||||
int vmie_win32_callgraph(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
call_edge* out, int max) __attribute__((cold));
|
||||
int vmie_win32_callgraph(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
call_edge* out, int max) {
|
||||
vmie_mem* m = vmie_win32_mem(v);
|
||||
if (!m) { return -1; }
|
||||
|
||||
/* image bounds: [module_base, module_base + SizeOfImage). */
|
||||
uint32_t lfanew;
|
||||
if (gva_read(m, cr3, module_base + 0x3C, &lfanew, 4)) { return -1; }
|
||||
uint32_t size_of_image;
|
||||
if (gva_read(m, cr3, module_base + lfanew + 0x18 + OPT_SIZEOFIMAGE_OFF,
|
||||
&size_of_image, 4)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* locate .text (the executable section the .pdata functions live in). */
|
||||
section_desc sd[96];
|
||||
const int ns = vmie_win32_sections(v, cr3, module_base, sd, 96);
|
||||
if (ns < 0) { return -1; }
|
||||
const int nsuse = ns < 96 ? ns : 96;
|
||||
const section_desc* text = NULL;
|
||||
for (int i = 0; i < nsuse; i++) {
|
||||
if (strcmp(sd[i].name, ".text") == 0) { text = &sd[i]; break; }
|
||||
}
|
||||
if (!text) {
|
||||
/* fall back to the first executable section */
|
||||
for (int i = 0; i < nsuse; i++) {
|
||||
if (sd[i].prot & VR_X) { text = &sd[i]; break; }
|
||||
}
|
||||
}
|
||||
if (!text) { return -1; }
|
||||
|
||||
/* gather the executable section once, addressed at its absolute VA so a
|
||||
* decoded branch target is directly an absolute VA. */
|
||||
uint8_t* tbuf = malloc(text->vsize);
|
||||
if (!tbuf) { return -1; }
|
||||
mem_view_t tv;
|
||||
if (vmie_win32_section_view(v, cr3, module_base, text, ABSOLUTE_VA,
|
||||
tbuf, text->vsize, &tv) != 0) {
|
||||
free(tbuf);
|
||||
return -1;
|
||||
}
|
||||
const uint64_t text_lo = module_base + text->rva; /* tv.base_va */
|
||||
const uint64_t text_hi = text_lo + tv.size; /* exclusive */
|
||||
|
||||
/* function inventory: count, then gather (stack for the common case, heap on
|
||||
* overflow) so every function is stepped, none silently dropped. */
|
||||
const int nfn = vmie_win32_functions(v, cr3, module_base, NULL, 0);
|
||||
if (nfn < 0) { free(tbuf); return -1; }
|
||||
func_range stack_fr[256];
|
||||
func_range* fr = stack_fr;
|
||||
func_range* heap_fr = NULL;
|
||||
if (nfn > (int)(sizeof stack_fr / sizeof stack_fr[0])) {
|
||||
heap_fr = malloc((size_t)nfn * sizeof *heap_fr);
|
||||
if (!heap_fr) { free(tbuf); return -1; }
|
||||
fr = heap_fr;
|
||||
}
|
||||
const int got = vmie_win32_functions(v, cr3, module_base, fr, nfn);
|
||||
if (got < 0) { free(heap_fr); free(tbuf); return -1; }
|
||||
|
||||
int total = 0;
|
||||
for (int f = 0; f < got; f++) {
|
||||
const uint64_t fn_lo = module_base + fr[f].rva;
|
||||
const uint64_t fn_hi = fn_lo + fr[f].size;
|
||||
/* the function must lie inside the gathered section. */
|
||||
if (fn_lo < text_lo || fn_hi > text_hi) { continue; }
|
||||
|
||||
size_t off = (size_t)(fn_lo - text_lo);
|
||||
const size_t end = (size_t)(fn_hi - text_lo);
|
||||
while (off < end) {
|
||||
x86_insn in;
|
||||
const int ilen = x86_decode(tv.data + off, end - off, &in);
|
||||
if (ilen <= 0) { break; } /* desync: stop this fn */
|
||||
const uint64_t ip = text_lo + off;
|
||||
if (in.has_rel && (in.flow == X86_CALL || in.flow == X86_JMP)) {
|
||||
const uint64_t tgt = x86_branch_target(ip, &in);
|
||||
if (tgt >= module_base &&
|
||||
tgt < module_base + (uint64_t)size_of_image) {
|
||||
if (out && total < max) {
|
||||
out[total].from = fr[f].rva;
|
||||
out[total].to = (uint32_t)(tgt - module_base);
|
||||
out[total].kind = (in.flow == X86_CALL) ? 0u : 1u;
|
||||
}
|
||||
total++;
|
||||
}
|
||||
}
|
||||
off += (size_t)ilen;
|
||||
}
|
||||
}
|
||||
|
||||
free(heap_fr);
|
||||
free(tbuf);
|
||||
return total;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,140 @@
|
||||
/* codeanalysis.c - generic x86-64 code-structure analysis (see codeanalysis.h).
|
||||
*
|
||||
* gva_jumptable - recover an indirect-jump table (array of absolute code
|
||||
* pointers) by reading consecutive 8-byte entries and keeping
|
||||
* those that land in an executable region (region-map tested).
|
||||
* cfg_blocks - split one function view into basic blocks with the decoder.
|
||||
*
|
||||
* Handler boundary: includes only memmodel.h / sigscan.h / x86dec.h (via
|
||||
* codeanalysis.h) + the standard headers. It names no OS object and reuses the
|
||||
* generic region map (gva_regions) and the light decoder (x86_decode) - it has
|
||||
* no PE/Windows knowledge and no second decoder.
|
||||
*/
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h> /* malloc/free (cfg leader bitmap, pure w.r.t. I/O) */
|
||||
#include "memmodel.h"
|
||||
#include "sigscan.h"
|
||||
#include "x86dec.h"
|
||||
#include "codeanalysis.h"
|
||||
|
||||
/* ---- jump-table recovery ------------------------------------------------- *
|
||||
* Snapshot the executable runs once (cold setup), then read 8-byte entries from
|
||||
* table_va and keep each that lands inside one of those VR_X runs, stopping at
|
||||
* the first non-code-pointer / read failure / max. */
|
||||
|
||||
#define JT_MAXX 256 /* executable runs sampled for membership */
|
||||
|
||||
/* Is `va` inside one of the `n` executable runs `xr`? Linear scan: a module has
|
||||
* a handful of X runs, and a switch table is short. */
|
||||
static int in_x_region(const vregion* xr, int n, uint64_t va) {
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (va >= xr[i].va && va < xr[i].va + xr[i].len) { return 1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gva_jumptable(vmie_mem* m, uintptr_t cr3, uint64_t table_va,
|
||||
uint64_t* targets, int max) __attribute__((cold));
|
||||
int gva_jumptable(vmie_mem* m, uintptr_t cr3, uint64_t table_va,
|
||||
uint64_t* targets, int max) {
|
||||
if (!m) { return -1; }
|
||||
|
||||
/* Executable runs under cr3, clamped to whichever canonical half the table
|
||||
* sits in (gva_regions requires lo/hi in a single half). Code pointers in a
|
||||
* jump table point into the same image, hence the same half as the table. */
|
||||
const int kern = (table_va >= KERN_MIN);
|
||||
const uint64_t lo = kern ? KERN_MIN : USER_MIN;
|
||||
const uint64_t hi = kern ? ~0ull : USER_MAX;
|
||||
vregion xr[JT_MAXX];
|
||||
const int nx = gva_regions(m, cr3, lo, hi, VR_X, xr, JT_MAXX);
|
||||
if (nx < 0) { return -1; }
|
||||
const int nuse = nx < JT_MAXX ? nx : JT_MAXX;
|
||||
|
||||
int n = 0;
|
||||
for (uint64_t va = table_va; ; va += 8) {
|
||||
uint64_t entry;
|
||||
if (gva_read(m, cr3, va, &entry, 8)) { break; } /* read failure */
|
||||
if (!in_x_region(xr, nuse, entry)) { break; } /* not a code ptr */
|
||||
if (targets && n < max) { targets[n] = entry; }
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/* ---- basic-block split --------------------------------------------------- *
|
||||
* Two linear passes with the decoder. A "leader" is the first instruction of a
|
||||
* block: offset 0, the fall-through after any jmp/jcc/ret, and the in-function
|
||||
* target of any jmp/jcc. A block runs from one leader up to (exclusive) the next
|
||||
* leader. Pure: only the view and x86_decode, no vmie_mem. */
|
||||
|
||||
/* A terminator ends a block: an unconditional/conditional jump or a return. A
|
||||
* CALL is fall-through (it returns), so it is NOT a terminator. */
|
||||
static int is_terminator(x86_flow f) {
|
||||
return f == X86_JMP || f == X86_JCC || f == X86_RET;
|
||||
}
|
||||
|
||||
int cfg_blocks(mem_view_t fn, code_block* out, int max) {
|
||||
if (!fn.data || fn.size == 0) { return -1; }
|
||||
if (fn.size > 0xFFFFFFFFu) { return -1; } /* offsets are u32 */
|
||||
const size_t size = fn.size;
|
||||
|
||||
/* leader[off] != 0 marks the start of a basic block. */
|
||||
uint8_t* leader = calloc(1, size);
|
||||
if (!leader) { return -1; }
|
||||
leader[0] = 1; /* entry is a leader */
|
||||
|
||||
/* Pass 1: mark fall-through-after-terminator and intra-function targets. A
|
||||
* desync (undecodable byte in the linear walk) aborts with -1. */
|
||||
for (size_t off = 0; off < size; ) {
|
||||
x86_insn in;
|
||||
const int ilen = x86_decode(fn.data + off, size - off, &in);
|
||||
if (ilen <= 0) { free(leader); return -1; }
|
||||
const size_t next = off + (size_t)ilen;
|
||||
if (is_terminator(in.flow)) {
|
||||
if (next < size) { leader[next] = 1; } /* fall-through start */
|
||||
if (in.has_rel && (in.flow == X86_JMP || in.flow == X86_JCC)) {
|
||||
/* branch target, in the view's coordinate space -> view offset. */
|
||||
const uint64_t tgt = x86_branch_target(fn.base_va + off, &in);
|
||||
if (tgt >= fn.base_va && tgt < fn.base_va + size) {
|
||||
leader[(size_t)(tgt - fn.base_va)] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
off = next;
|
||||
}
|
||||
|
||||
/* Pass 2: walk again, emitting one block per leader run. A block ends at the
|
||||
* instruction after a terminator, or just before the next leader. */
|
||||
int total = 0;
|
||||
size_t blk_start = 0;
|
||||
for (size_t off = 0; off < size; ) {
|
||||
x86_insn in;
|
||||
const int ilen = x86_decode(fn.data + off, size - off, &in);
|
||||
if (ilen <= 0) { free(leader); return -1; }
|
||||
const size_t next = off + (size_t)ilen;
|
||||
const int ends = is_terminator(in.flow) ||
|
||||
(next < size && leader[next]); /* leader starts next */
|
||||
if (ends) {
|
||||
if (out && total < max) {
|
||||
out[total].start = (uint32_t)blk_start;
|
||||
out[total].end = (uint32_t)next;
|
||||
}
|
||||
total++;
|
||||
blk_start = next;
|
||||
}
|
||||
off = next;
|
||||
}
|
||||
/* a trailing run with no terminator (off ran off the end) is its own block. */
|
||||
if (blk_start < size) {
|
||||
if (out && total < max) {
|
||||
out[total].start = (uint32_t)blk_start;
|
||||
out[total].end = (uint32_t)size;
|
||||
}
|
||||
total++;
|
||||
}
|
||||
|
||||
free(leader);
|
||||
return total;
|
||||
}
|
||||
@@ -123,3 +123,59 @@ int gva_code_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
}
|
||||
return c.n;
|
||||
}
|
||||
|
||||
/* ---- decoder-driven constant (immediate) xref ---------------------------- *
|
||||
* Same +1 brute-scan skeleton as gva_code_xref, but the predicate compares the
|
||||
* decoded IMMEDIATE operand (x86_insn.imm_off/imm_len) to a wanted value over
|
||||
* its low `width` bytes - not a branch/RIP target. The SEAM and INTERIOR
|
||||
* de-duplications are identical to code-xref (a +1 brute-scan invariant): the
|
||||
* predicate is the only thing that differs, so the two stay as two narrow
|
||||
* passes rather than a forced common skeleton. */
|
||||
struct imm_cb {
|
||||
uint64_t want; /* value masked to `width` bytes */
|
||||
uint64_t mask; /* low-`width`-byte mask */
|
||||
int width; /* 1/2/4/8 */
|
||||
uint64_t cover; /* VA just past the last accepted match */
|
||||
uint64_t* out; int max, n;
|
||||
};
|
||||
|
||||
__attribute__((hot))
|
||||
static int imm_sweep_cb(void* u, const uint8_t* data, size_t len,
|
||||
uint64_t base, size_t ov, int last) {
|
||||
struct imm_cb* c = u;
|
||||
const size_t limit = last ? len : (len > ov ? len - ov : 0);
|
||||
for (size_t off = 0; off < len; off++) {
|
||||
if (!last && off >= limit) { break; }
|
||||
x86_insn in;
|
||||
const int ilen = x86_decode(data + off, len - off, &in);
|
||||
if (ilen <= 0) { continue; }
|
||||
if (in.imm_len < (uint8_t)c->width) { continue; } /* no imm wide enough */
|
||||
/* read the low `width` bytes of the immediate, little-endian. */
|
||||
uint64_t v = 0;
|
||||
for (int b = 0; b < c->width; b++) {
|
||||
v |= (uint64_t)data[off + in.imm_off + b] << (8 * b);
|
||||
}
|
||||
if ((v & c->mask) != c->want) { continue; }
|
||||
const uint64_t va = base + off;
|
||||
if (va < c->cover) { continue; } /* interior alias of a prior hit */
|
||||
c->cover = va + (uint64_t)ilen;
|
||||
if (c->out && c->n < c->max) { c->out[c->n] = va; }
|
||||
c->n++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gva_imm_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint32_t prot_any, uint64_t value, int width,
|
||||
uint64_t* out, int max) {
|
||||
if (width != 1 && width != 2 && width != 4 && width != 8) { return -1; }
|
||||
struct imm_cb c; memset(&c, 0, sizeof c);
|
||||
c.width = width;
|
||||
c.mask = (width == 8) ? ~0ull : ((1ull << (8 * width)) - 1);
|
||||
c.want = value & c.mask;
|
||||
c.out = out; c.max = max;
|
||||
if (gva_sweep(m, cr3, lo, hi, prot_any, X86_MAX_INSN, imm_sweep_cb, &c) < 0) {
|
||||
return -1;
|
||||
}
|
||||
return c.n;
|
||||
}
|
||||
|
||||
@@ -232,8 +232,12 @@ static int decode_vex(const uint8_t* code, size_t avail, x86_insn* out) {
|
||||
n += m;
|
||||
|
||||
/* 0F3A map is the imm8 map: every opcode carries a trailing imm8. */
|
||||
int has_imm8 = 0;
|
||||
size_t imm8_at = 0;
|
||||
if (mmmmm == 3u) {
|
||||
if (avail < n + 1) { return 0; }
|
||||
imm8_at = n; /* the trailing imm8 starts here */
|
||||
has_imm8 = 1;
|
||||
n += 1;
|
||||
}
|
||||
|
||||
@@ -246,6 +250,8 @@ static int decode_vex(const uint8_t* code, size_t avail, x86_insn* out) {
|
||||
out->riprel = rip;
|
||||
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
|
||||
out->disp_len = rip_present ? 4u : 0u;
|
||||
out->imm_off = has_imm8 ? (uint8_t)imm8_at : 0;
|
||||
out->imm_len = has_imm8 ? 1u : 0u;
|
||||
return (int)n;
|
||||
}
|
||||
|
||||
@@ -262,6 +268,11 @@ static void read_rel(const uint8_t* p, size_t off, size_t bytes, x86_insn* out)
|
||||
out->has_rel = 1;
|
||||
out->disp_off = (uint8_t)off; /* rel field begins here */
|
||||
out->disp_len = (uint8_t)bytes; /* rel8 -> 1, rel32 -> 4 */
|
||||
/* The branch's trailing `bytes` are its rel DISPLACEMENT, not a constant
|
||||
* immediate: the main path provisionally tagged them as imm (E8/E9/EB/Jcc
|
||||
* read their rel via the immediate-class table), so undo that here. */
|
||||
out->imm_off = 0;
|
||||
out->imm_len = 0;
|
||||
}
|
||||
|
||||
/* ---- main decode --------------------------------------------------------- */
|
||||
@@ -274,6 +285,7 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
|
||||
out->has_rel = 0; out->rel = 0;
|
||||
out->has_riprel = 0; out->riprel = 0;
|
||||
out->disp_off = 0; out->disp_len = 0;
|
||||
out->imm_off = 0; out->imm_len = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -283,6 +295,7 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
|
||||
out->has_rel = 0; out->rel = 0;
|
||||
out->has_riprel = 0; out->riprel = 0;
|
||||
out->disp_off = 0; out->disp_len = 0;
|
||||
out->imm_off = 0; out->imm_len = 0;
|
||||
|
||||
const size_t cap = avail < 15u ? avail : 15u; /* never decode past 15 */
|
||||
size_t n = 0;
|
||||
@@ -345,6 +358,7 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
|
||||
out->has_riprel = rip_present; out->riprel = rip;
|
||||
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
|
||||
out->disp_len = rip_present ? 4u : 0u;
|
||||
/* 0F38 opcodes carry no immediate. imm_off/imm_len stay 0/0. */
|
||||
if (n < 1 || n > 15 || n > avail) { return 0; }
|
||||
out->len = (uint8_t)n;
|
||||
return (int)n;
|
||||
@@ -360,10 +374,13 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
|
||||
if (m == 0) { return 0; }
|
||||
n += m;
|
||||
if (n >= cap) { return 0; } /* trailing imm8 */
|
||||
const size_t imm8_at = n; /* the imm8 starts here */
|
||||
n += 1;
|
||||
out->has_riprel = rip_present; out->riprel = rip;
|
||||
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
|
||||
out->disp_len = rip_present ? 4u : 0u;
|
||||
out->imm_off = (uint8_t)imm8_at; /* the 0F3A trailing imm8 */
|
||||
out->imm_len = 1u;
|
||||
if (n < 1 || n > 15 || n > avail) { return 0; }
|
||||
out->len = (uint8_t)n;
|
||||
return (int)n;
|
||||
@@ -409,6 +426,14 @@ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
|
||||
|
||||
if (im) {
|
||||
if (cap < n + im) { return 0; }
|
||||
/* Record the immediate field position/length for a clean single
|
||||
* immediate (imm8/16/32/64). The combined-immediate forms - ENTER
|
||||
* (imm16+imm8, im==3) and the legacy far pointer (IM_P) - are not a
|
||||
* single constant operand, so they leave imm_off/imm_len at 0/0. */
|
||||
if (im == 1u || im == 2u || im == 4u || im == 8u) {
|
||||
out->imm_off = (uint8_t)n;
|
||||
out->imm_len = (uint8_t)im;
|
||||
}
|
||||
n += im;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user