mirror of
https://dev.lirent.ru/Vatrog/vm-introspection-engine.git
synced 2026-06-18 02:06:36 +03:00
Add a light x86-64 decoder; back code-xref with it
The reversing keystone: a length-disassembly decoder with control-flow and RIP-relative target extraction (x86dec.h), pure over a byte buffer - no vmie_mem, no cr3, no Windows. Table-driven length over the 1-byte / 0F / 0F38 / 0F3A maps, legacy + REX + VEX prefixes, ModRM/SIB, displacements and immediates (66 and REX.W operand-size aware). It reports the instruction length plus the rel and RIP-relative targets of near call/jmp/jcc and any RIP-relative memory operand. EVEX is a documented gap (decodes as length 0). This is the primitive the rest of the static-reversing layer builds on (function inventory, call graph, xref). gva_code_xref now brute-scans with the decoder instead of its own ad-hoc E8/E9 and REX.W-lea heuristic, which is removed - one decoder in the tree. Because a brute scan can re-enter a prefixed instruction one byte in and decode a shorter aliased form with the same target, the scan drops a match that starts inside the extent of an already-accepted one; real, non-overlapping instructions are unaffected.
This commit is contained in:
+10
-8
@@ -70,14 +70,16 @@ int gva_sig_scan_multi(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint32_t prot_any, const sigset* s,
|
||||
sig_multi_hit* out, int max);
|
||||
|
||||
/* code-xref: every instruction in the X-regions of [lo,hi] whose rel32 operand
|
||||
* targets `target_va`. Heuristic decoder (NOT a full disassembler): recognizes
|
||||
* E8 call / E9 jmp (next_rip + disp32) and the RIP-relative ModRM forms
|
||||
* (mod=00, rm=101) of lea/mov (REX.W 8D / 8B) where target = next_rip +
|
||||
* (int32)disp. Records each matching instruction-start VA. The sweep forces
|
||||
* VR_X and carries a >=15-byte overlap (max x86 instruction length) so no
|
||||
* instruction is cut at a window seam. Writes up to `max` VAs to `out` (NULL to
|
||||
* count only) and returns the TOTAL number of matches, or -1 on bad input. */
|
||||
/* code-xref: every instruction in the X-regions of [lo,hi] whose near rel
|
||||
* branch or RIP-relative memory operand resolves to `target_va`. Brute-scans
|
||||
* each byte offset with the light x86-64 decoder (x86dec.h, NOT a full
|
||||
* disassembler): an E8/E9/EB/Jcc rel branch matches when next_rip + rel ==
|
||||
* target_va, and any RIP-relative operand (ModRM mod=00, rm=101) matches when
|
||||
* next_rip + disp32 == target_va (this covers lea/mov and any other rip-rel
|
||||
* form). Records each matching instruction-start VA. The sweep forces VR_X and
|
||||
* carries a >=15-byte overlap (max x86 instruction length) so no instruction is
|
||||
* cut at a window seam. Writes up to `max` VAs to `out` (NULL to count only) and
|
||||
* returns the TOTAL number of matches, or -1 on bad input. */
|
||||
int gva_code_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint64_t target_va, uint64_t* out, int max);
|
||||
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
/* x86dec.h - light x86-64 length decoder + control-flow / RIP-relative targets.
|
||||
*
|
||||
* A PURE primitive: it decodes ONE 64-bit-mode instruction over a raw byte
|
||||
* buffer and reports its total length plus, when present, the near control-flow
|
||||
* branch displacement (rel8/rel32) and the RIP-relative memory displacement
|
||||
* (ModRM mod=00, rm=101). It does NOT decode registers or full operands - just
|
||||
* enough for length-disassembly and control-flow / memory target recovery
|
||||
* (function inventory, call graphs, xref databases, IAT and hook detection).
|
||||
*
|
||||
* It touches no I/O, no allocations, and no other module: <stdint.h>/<stddef.h>
|
||||
* only. Length-correct over legacy prefixes (66/67/F0/F2/F3/segment), REX, the
|
||||
* 1-byte / 0F two-byte / 0F38 / 0F3A opcode maps, ModRM+SIB, disp8/disp32, the
|
||||
* immediate sizes (with 66 and REX.W operand-size effects), and VEX (C4/C5).
|
||||
*
|
||||
* DOCUMENTED GAP: EVEX (0x62, AVX-512) is NOT decoded - it yields len=0
|
||||
* (undecodable). It is rare in ordinary user code; full EVEX support, if ever
|
||||
* needed, is a separate task. Any byte stream that does not decode, or that
|
||||
* would run past min(avail,15), also yields len=0.
|
||||
*/
|
||||
#ifndef VMIE_X86DEC_H
|
||||
#define VMIE_X86DEC_H
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
typedef enum {
|
||||
X86_OTHER, /* no tracked control-flow effect */
|
||||
X86_CALL, /* E8 rel32, or FF /2 indirect (has_rel=0) */
|
||||
X86_JMP, /* E9 rel32 / EB rel8, or FF /4 indirect (has_rel=0) */
|
||||
X86_JCC, /* 70-7F rel8 / 0F 80-8F rel32 */
|
||||
X86_RET, /* C3 / C2 imm16 / CB / CA */
|
||||
X86_INT3 /* CC */
|
||||
} x86_flow;
|
||||
|
||||
typedef struct {
|
||||
uint8_t len; /* total length 1..15; 0 = undecodable / exceeds avail */
|
||||
x86_flow flow; /* control-flow class */
|
||||
int has_rel; /* 1: a rel8/rel32 branch displacement is present */
|
||||
int32_t rel; /* sign-extended branch displacement (if has_rel) */
|
||||
int has_riprel; /* 1: RIP-relative memory operand (ModRM mod=00,rm=101) */
|
||||
int32_t riprel; /* sign-extended RIP-relative disp32 (if has_riprel) */
|
||||
} x86_insn;
|
||||
|
||||
/* Decode ONE 64-bit-mode instruction at `code` (`avail` readable bytes). Fills
|
||||
* *out and returns the length (1..15), or 0 if the bytes do not decode or would
|
||||
* exceed min(avail,15). Length-correct over: legacy prefixes (66/67/F0/F2/F3/
|
||||
* seg), REX, 1-byte / 0F two-byte / 0F38 / 0F3A maps, ModRM+SIB, disp8/disp32,
|
||||
* imm8/16/32/64 (66 and REX.W operand-size effects), and VEX (C4/C5). EVEX
|
||||
* (0x62) is a documented gap: len=0. */
|
||||
int x86_decode(const uint8_t* code, size_t avail, x86_insn* out);
|
||||
|
||||
/* Absolute target of a rel branch: ip + insn->len + insn->rel (0 unless has_rel). */
|
||||
uint64_t x86_branch_target(uint64_t ip, const x86_insn* insn);
|
||||
/* Absolute target of a RIP-relative operand: ip + insn->len + insn->riprel (0 unless has_riprel). */
|
||||
uint64_t x86_riprel_target(uint64_t ip, const x86_insn* insn);
|
||||
|
||||
#endif /* VMIE_X86DEC_H */
|
||||
Reference in New Issue
Block a user