mirror of
https://dev.lirent.ru/Vatrog/vm-introspection-engine.git
synced 2026-06-18 02:06:36 +03:00
79e82ffc6a
Wave 1 of the code-analysis layer, built on the x86-64 decoder: - vmie_win32_callgraph walks each .pdata function with the decoder and emits an edge for every direct call/jmp whose target lands in the module - the intra-module call graph. Indirect edges are left to the IAT and jump tables. - gva_jumptable recovers a switch's case targets from an indirect jump's table: consecutive pointer entries that land in an executable region. - cfg_blocks splits one function view into basic blocks (a generic handler: leaders from intra-function branch targets, cut after jmp/jcc/ret). - gva_imm_xref finds the instructions whose immediate operand equals a constant - the dual of code-xref for magic values, error codes, syscall numbers. The decoder now also reports imm_off/imm_len so a caller can read or match the immediate operand. The generic primitives live in the new codeanalysis.h (jump tables, basic blocks) and scan.h (constant xref); the .pdata-bound call graph stays on the win32 surface and reuses the existing function/section/decode primitives - no second PE or instruction parser.
99 lines
6.2 KiB
C
99 lines
6.2 KiB
C
/* x86dec.h - light x86-64 length decoder + control-flow / RIP-relative targets.
|
|
*
|
|
* A PURE primitive: it decodes ONE 64-bit-mode instruction over a raw byte
|
|
* buffer and reports its total length plus, when present, the near control-flow
|
|
* branch displacement (rel8/rel32) and the RIP-relative memory displacement
|
|
* (ModRM mod=00, rm=101). It does NOT decode registers or full operands - just
|
|
* enough for length-disassembly and control-flow / memory target recovery
|
|
* (function inventory, call graphs, xref databases, IAT and hook detection).
|
|
*
|
|
* It touches no I/O, no allocations, and no other module: <stdint.h>/<stddef.h>
|
|
* only. Length-correct over legacy prefixes (66/67/F0/F2/F3/segment), REX, the
|
|
* 1-byte / 0F two-byte / 0F38 / 0F3A opcode maps, ModRM+SIB, disp8/disp32, the
|
|
* immediate sizes (with 66 and REX.W operand-size effects), and VEX (C4/C5).
|
|
*
|
|
* DOCUMENTED GAP: EVEX (0x62, AVX-512) is NOT decoded - it yields len=0
|
|
* (undecodable). It is rare in ordinary user code; full EVEX support, if ever
|
|
* needed, is a separate task. Any byte stream that does not decode, or that
|
|
* would run past min(avail,15), also yields len=0.
|
|
*/
|
|
#ifndef VMIE_X86DEC_H
|
|
#define VMIE_X86DEC_H
|
|
#include <stdint.h>
|
|
#include <stddef.h>
|
|
|
|
typedef enum {
|
|
X86_OTHER, /* no tracked control-flow effect */
|
|
X86_CALL, /* E8 rel32, or FF /2 indirect (has_rel=0) */
|
|
X86_JMP, /* E9 rel32 / EB rel8, or FF /4 indirect (has_rel=0) */
|
|
X86_JCC, /* 70-7F rel8 / 0F 80-8F rel32 */
|
|
X86_RET, /* C3 / C2 imm16 / CB / CA */
|
|
X86_INT3 /* CC */
|
|
} x86_flow;
|
|
|
|
typedef struct {
|
|
uint8_t len; /* total length 1..15; 0 = undecodable / exceeds avail */
|
|
x86_flow flow; /* control-flow class */
|
|
int has_rel; /* 1: a rel8/rel32 branch displacement is present */
|
|
int32_t rel; /* sign-extended branch displacement (if has_rel) */
|
|
int has_riprel; /* 1: RIP-relative memory operand (ModRM mod=00,rm=101) */
|
|
int32_t riprel; /* sign-extended RIP-relative disp32 (if has_riprel) */
|
|
uint8_t disp_off; /* byte offset within the instruction of the rel/RIP-rel
|
|
* displacement field, or 0 if the instruction has
|
|
* neither (has_rel == 0 && has_riprel == 0). This is the
|
|
* field that "floats" with the load address / relocation,
|
|
* so a signature generator wildcards exactly these bytes. */
|
|
uint8_t disp_len; /* displacement length: 1 (rel8), 4 (rel32 or RIP-rel
|
|
* disp32), else 0 (no displacement). The wildcard span is
|
|
* [disp_off, disp_off + disp_len). */
|
|
uint8_t imm_off; /* byte offset, within the instruction, of the IMMEDIATE
|
|
* operand (the trailing constant: imm8/16/32/64 of mov
|
|
* reg,imm / cmp r/m,imm / push imm / test / add ...), or
|
|
* 0 if the instruction carries no immediate
|
|
* (imm_len == 0). This is distinct from disp_off: disp_*
|
|
* is the rel/RIP-relative DISPLACEMENT (an address that
|
|
* floats with the load address), imm_* is the encoded
|
|
* CONSTANT operand. An instruction can have neither, one,
|
|
* or - for a few forms (e.g. a RIP-relative store of an
|
|
* immediate) - both. The immediate value lives at
|
|
* code[imm_off .. imm_off + imm_len), little-endian. */
|
|
uint8_t imm_len; /* immediate length in bytes: 1, 2, 4, or 8 (resolved
|
|
* against the effective operand size: the 66 prefix and
|
|
* REX.W are honoured, so e.g. mov r,imm is 2/4/8 and
|
|
* push imm / cmp r/m,imm32 is 2/4). 0 when the
|
|
* instruction has no single immediate operand; the rare
|
|
* combined-immediate forms (ENTER imm16,imm8; far ptr)
|
|
* also report 0 here - they are not a clean constant.
|
|
* The constant-xref scanner (gva_imm_xref) reads the low
|
|
* `width` bytes at imm_off when imm_len >= width. */
|
|
} x86_insn;
|
|
|
|
/* Decode ONE 64-bit-mode instruction at `code` (`avail` readable bytes). Fills
|
|
* *out and returns the length (1..15), or 0 if the bytes do not decode or would
|
|
* exceed min(avail,15). Length-correct over: legacy prefixes (66/67/F0/F2/F3/
|
|
* seg), REX, 1-byte / 0F two-byte / 0F38 / 0F3A maps, ModRM+SIB, disp8/disp32,
|
|
* imm8/16/32/64 (66 and REX.W operand-size effects), and VEX (C4/C5). EVEX
|
|
* (0x62) is a documented gap: len=0.
|
|
*
|
|
* On a decoded instruction it also reports out->disp_off / out->disp_len: the
|
|
* byte position and length of the rel/RIP-relative displacement field within the
|
|
* instruction (0/0 when there is none). These are exactly the bytes that float
|
|
* with the load address / relocation, so a signature generator wildcards
|
|
* [disp_off, disp_off+disp_len) and keeps the rest as must-match.
|
|
*
|
|
* It also reports out->imm_off / out->imm_len: the position and length of the
|
|
* trailing IMMEDIATE constant operand (imm8/16/32/64), or 0/0 when there is
|
|
* none. The immediate is the encoded literal (a magic value, error code, table
|
|
* size, syscall number, ...) - distinct from the rel/RIP displacement. The
|
|
* length honours the 66 prefix and REX.W (so mov r,imm is 2/4/8); combined-
|
|
* immediate forms (ENTER, far ptr) report imm_len 0. This is what the
|
|
* constant-xref scanner (gva_imm_xref) compares against a wanted value. */
|
|
int x86_decode(const uint8_t* code, size_t avail, x86_insn* out);
|
|
|
|
/* Absolute target of a rel branch: ip + insn->len + insn->rel (0 unless has_rel). */
|
|
uint64_t x86_branch_target(uint64_t ip, const x86_insn* insn);
|
|
/* Absolute target of a RIP-relative operand: ip + insn->len + insn->riprel (0 unless has_riprel). */
|
|
uint64_t x86_riprel_target(uint64_t ip, const x86_insn* insn);
|
|
|
|
#endif /* VMIE_X86DEC_H */
|