Add a light x86-64 decoder; back code-xref with it

The reversing keystone: a length-disassembly decoder with control-flow and
RIP-relative target extraction (x86dec.h), pure over a byte buffer - no vmie_mem,
no cr3, no Windows. Table-driven length over the 1-byte / 0F / 0F38 / 0F3A maps,
legacy + REX + VEX prefixes, ModRM/SIB, displacements and immediates (66 and
REX.W operand-size aware). It reports the instruction length plus the rel and
RIP-relative targets of near call/jmp/jcc and any RIP-relative memory operand.
EVEX is a documented gap (decodes as length 0). This is the primitive the rest
of the static-reversing layer builds on (function inventory, call graph, xref).

gva_code_xref now brute-scans with the decoder instead of its own ad-hoc E8/E9
and REX.W-lea heuristic, which is removed - one decoder in the tree. Because a
brute scan can re-enter a prefixed instruction one byte in and decode a shorter
aliased form with the same target, the scan drops a match that starts inside the
extent of an already-accepted one; real, non-overlapping instructions are
unaffected.
This commit is contained in:
2026-06-16 18:11:29 +03:00
parent c36ffe295d
commit 3199fbf258
5 changed files with 560 additions and 52 deletions
+1
View File
@@ -21,6 +21,7 @@ add_library(vmie STATIC
src/handlers/sigscan.c
src/handlers/sigset.c
src/handlers/codescan.c
src/handlers/x86dec.c
src/handlers/pmap.c
src/handlers/snapdiff.c)
target_include_directories(vmie
+10 -8
View File
@@ -70,14 +70,16 @@ int gva_sig_scan_multi(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
uint32_t prot_any, const sigset* s,
sig_multi_hit* out, int max);
/* code-xref: every instruction in the X-regions of [lo,hi] whose rel32 operand
* targets `target_va`. Heuristic decoder (NOT a full disassembler): recognizes
* E8 call / E9 jmp (next_rip + disp32) and the RIP-relative ModRM forms
* (mod=00, rm=101) of lea/mov (REX.W 8D / 8B) where target = next_rip +
* (int32)disp. Records each matching instruction-start VA. The sweep forces
* VR_X and carries a >=15-byte overlap (max x86 instruction length) so no
* instruction is cut at a window seam. Writes up to `max` VAs to `out` (NULL to
* count only) and returns the TOTAL number of matches, or -1 on bad input. */
/* code-xref: every instruction in the X-regions of [lo,hi] whose near rel
* branch or RIP-relative memory operand resolves to `target_va`. Brute-scans
* each byte offset with the light x86-64 decoder (x86dec.h, NOT a full
* disassembler): an E8/E9/EB/Jcc rel branch matches when next_rip + rel ==
* target_va, and any RIP-relative operand (ModRM mod=00, rm=101) matches when
* next_rip + disp32 == target_va (this covers lea/mov and any other rip-rel
* form). Records each matching instruction-start VA. The sweep forces VR_X and
* carries a >=15-byte overlap (max x86 instruction length) so no instruction is
* cut at a window seam. Writes up to `max` VAs to `out` (NULL to count only) and
* returns the TOTAL number of matches, or -1 on bad input. */
int gva_code_xref(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
uint64_t target_va, uint64_t* out, int max);
+56
View File
@@ -0,0 +1,56 @@
/* x86dec.h - light x86-64 length decoder + control-flow / RIP-relative targets.
*
* A PURE primitive: it decodes ONE 64-bit-mode instruction over a raw byte
* buffer and reports its total length plus, when present, the near control-flow
* branch displacement (rel8/rel32) and the RIP-relative memory displacement
* (ModRM mod=00, rm=101). It does NOT decode registers or full operands - just
* enough for length-disassembly and control-flow / memory target recovery
* (function inventory, call graphs, xref databases, IAT and hook detection).
*
* It touches no I/O, no allocations, and no other module: <stdint.h>/<stddef.h>
* only. Length-correct over legacy prefixes (66/67/F0/F2/F3/segment), REX, the
* 1-byte / 0F two-byte / 0F38 / 0F3A opcode maps, ModRM+SIB, disp8/disp32, the
* immediate sizes (with 66 and REX.W operand-size effects), and VEX (C4/C5).
*
* DOCUMENTED GAP: EVEX (0x62, AVX-512) is NOT decoded - it yields len=0
* (undecodable). It is rare in ordinary user code; full EVEX support, if ever
* needed, is a separate task. Any byte stream that does not decode, or that
* would run past min(avail,15), also yields len=0.
*/
#ifndef VMIE_X86DEC_H
#define VMIE_X86DEC_H
#include <stdint.h>
#include <stddef.h>
typedef enum {
X86_OTHER, /* no tracked control-flow effect */
X86_CALL, /* E8 rel32, or FF /2 indirect (has_rel=0) */
X86_JMP, /* E9 rel32 / EB rel8, or FF /4 indirect (has_rel=0) */
X86_JCC, /* 70-7F rel8 / 0F 80-8F rel32 */
X86_RET, /* C3 / C2 imm16 / CB / CA */
X86_INT3 /* CC */
} x86_flow;
typedef struct {
uint8_t len; /* total length 1..15; 0 = undecodable / exceeds avail */
x86_flow flow; /* control-flow class */
int has_rel; /* 1: a rel8/rel32 branch displacement is present */
int32_t rel; /* sign-extended branch displacement (if has_rel) */
int has_riprel; /* 1: RIP-relative memory operand (ModRM mod=00,rm=101) */
int32_t riprel; /* sign-extended RIP-relative disp32 (if has_riprel) */
} x86_insn;
/* Decode ONE 64-bit-mode instruction at `code` (`avail` readable bytes). Fills
* *out and returns the length (1..15), or 0 if the bytes do not decode or would
* exceed min(avail,15). Length-correct over: legacy prefixes (66/67/F0/F2/F3/
* seg), REX, 1-byte / 0F two-byte / 0F38 / 0F3A maps, ModRM+SIB, disp8/disp32,
* imm8/16/32/64 (66 and REX.W operand-size effects), and VEX (C4/C5). EVEX
* (0x62) is a documented gap: len=0. */
int x86_decode(const uint8_t* code, size_t avail, x86_insn* out);
/* Absolute target of a rel branch: ip + insn->len + insn->rel (0 unless has_rel). */
uint64_t x86_branch_target(uint64_t ip, const x86_insn* insn);
/* Absolute target of a RIP-relative operand: ip + insn->len + insn->riprel (0 unless has_riprel). */
uint64_t x86_riprel_target(uint64_t ip, const x86_insn* insn);
#endif /* VMIE_X86DEC_H */
+38 -43
View File
@@ -1,14 +1,15 @@
/* codescan.c - windowed multi-pattern scan + heuristic rel32 code-xref.
/* codescan.c - windowed multi-pattern scan + decoder-driven code-xref.
*
* Both bridges stream guest memory through gva_sweep and report guest VAs:
* gva_sig_scan_multi - drives a compiled sigset over each window, seam-deduped
* (overlap = longest pattern len - 1).
* gva_code_xref - heuristic decode of the rel32 instruction forms in
* X-regions; records instruction starts whose computed
* target equals target_va. Overlap >= 15 (max x86 insn
* length) keeps an instruction whole across a seam.
* gva_code_xref - brute-scans X-regions with the light x86-64 decoder
* (x86dec.h); records instruction starts whose rel branch
* or RIP-relative operand resolves to target_va. Overlap
* >= 15 (max x86 insn length) keeps an instruction whole
* across a seam.
*
* Handler boundary: only memmodel.h / scan.h / sigscan.h.
* Handler boundary: only memmodel.h / scan.h / sigscan.h / x86dec.h.
*/
#include <stdint.h>
#include <stddef.h>
@@ -16,6 +17,7 @@
#include "memmodel.h"
#include "sigscan.h"
#include "scan.h"
#include "x86dec.h"
/* x86-64 maximum instruction length; the code-xref sweep overlap. A decoded
* instruction may be up to this long, so a window must carry this many leading
@@ -65,37 +67,28 @@ int gva_sig_scan_multi(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
return c.n;
}
/* ---- heuristic rel32 code-xref ------------------------------------------- *
* Decode just enough to recover a rel32 target. Two recognized shapes:
* E8/E9 disp32 (call/jmp) : start+5 + disp
* REX.W 8D|8B modrm(00,*,101) disp32 (lea/mov rip) : start+7 + disp
* The lea/mov form REQUIRES the REX.W prefix (0x48..0x4F with W set), per the
* 64-bit operand RIP-relative encoding; a bare 8D/8B is not accepted (it would
* also let the decoder re-recognize the same instruction one byte past its REX
* prefix). Returns the encoded length (>=5) and writes the target via *target,
* or 0 if `p[0..avail)` is not one of the forms. */
__attribute__((hot))
static size_t decode_rel32(const uint8_t* p, size_t avail,
uint64_t start_va, uint64_t* target) {
if (avail >= 5 && (p[0] == 0xE8 || p[0] == 0xE9)) {
int32_t disp; memcpy(&disp, p + 1, 4);
*target = start_va + 5 + (int64_t)disp;
return 5;
}
/* REX.W prefix (0x48..0x4F: bit 3 = W), then 8D/8B with RIP-rel ModRM */
if (avail >= 7 && (p[0] & 0xF8) == 0x48 && (p[1] == 0x8D || p[1] == 0x8B)) {
const uint8_t modrm = p[2];
if ((modrm & 0xC0) == 0x00 && (modrm & 0x07) == 0x05) { /* mod=00 rm=101 */
int32_t disp; memcpy(&disp, p + 3, 4);
*target = start_va + 7 + (int64_t)disp; /* rex op modrm disp32 */
return 7;
}
}
return 0;
}
/* ---- decoder-driven code-xref -------------------------------------------- *
* Brute-scan every byte offset of the window with the light x86-64 decoder. On
* a decoded instruction (len>0) whose rel branch (x86_branch_target) or RIP-
* relative operand (x86_riprel_target) resolves to the requested target VA,
* record the instruction's start VA. We step +1 regardless of decode length
* because function starts are not known here; the decoder rejects junk at
* non-instruction offsets far better than the old ad-hoc heuristic.
*
* Two distinct de-duplications, both intrinsic to a +1 brute-scan:
* - SEAM: a match that STARTS in the trailing overlap of a non-last window is
* dropped; the next window re-presents that instruction whole in its leading
* overlap (overlap >= X86_MAX_INSN).
* - INTERIOR: a match whose start lies inside the byte extent of an already-
* accepted same-target match is dropped. Real instructions never overlap, so
* such an interior hit is always a decode artifact of stepping into a prefix
* (e.g. the bare `8D 05 disp32` lea re-found one byte past a `48 8D 05 disp32`
* REX.W lea: same next_rip and disp => same target). `cover` tracks the VA
* just past the last accepted match; offsets ascend, so the outermost (real)
* instruction is always seen first. */
struct xref_cb {
uint64_t target;
uint64_t cover; /* VA just past the last accepted match */
uint64_t* out; int max, n;
};
@@ -103,19 +96,21 @@ __attribute__((hot))
static int xref_sweep_cb(void* u, const uint8_t* data, size_t len,
uint64_t base, size_t ov, int last) {
struct xref_cb* c = u;
/* Decode at every byte offset (heuristic, overlapping). A match that STARTS
* in the trailing overlap of a non-last window is dropped: the next window
* re-presents that instruction whole in its leading overlap. */
const size_t limit = last ? len : (len > ov ? len - ov : 0);
for (size_t off = 0; off < len; off++) {
if (!last && off >= limit) { break; }
uint64_t tgt = 0;
const size_t ilen = decode_rel32(data + off, len - off, base + off, &tgt);
if (ilen && tgt == c->target) {
if (c->out && c->n < c->max) { c->out[c->n] = base + off; }
const uint64_t va = base + off;
x86_insn in;
const int ilen = x86_decode(data + off, len - off, &in);
if (ilen <= 0) { continue; }
const int hit = (in.has_rel && x86_branch_target(va, &in) == c->target) ||
(in.has_riprel && x86_riprel_target(va, &in) == c->target);
if (!hit) { continue; }
if (va < c->cover) { continue; } /* interior alias of a prior hit */
c->cover = va + (uint64_t)ilen;
if (c->out && c->n < c->max) { c->out[c->n] = va; }
c->n++;
}
}
return 0;
}
+454
View File
@@ -0,0 +1,454 @@
/* x86dec.c - light x86-64 length decoder (see x86dec.h).
*
* Length-only + control-flow / RIP-relative target extraction over a raw byte
* buffer. 64-bit mode. Declarative: per-opcode properties live in static const
* tables (one per opcode map); the decode loop reads them, it does not branch
* per opcode. No globals, no allocations - all state is on the stack.
*
* Boundary: includes ONLY x86dec.h (which pulls <stdint.h>/<stddef.h>). It
* names no other module and no OS object.
*
* Per-opcode property byte (OP_*):
* MODRM - opcode carries a ModRM byte (then maybe SIB / disp / RIP-rel)
* immediate class (low nibble) - how many immediate bytes follow the operand
* encoding, resolved against the effective operand size:
* IM_0 none
* IM_8 1 byte
* IM_16 2 bytes
* IM_32 4 bytes
* IM_Z 2 if 66-prefix else 4 (word/dword immediate, never 8)
* IM_V 2 if 66, 8 if REX.W, else 4 (word/dword/qword immediate; mov r,imm)
* IM_P far ptr: IM_Z + 2 (seg) - legacy, unused in 64-bit but length-safe
* BAD - not decoded (e.g. EVEX prefix 0x62); forces len=0
*/
#include "x86dec.h"
/* ---- property-byte layout ------------------------------------------------ */
#define OP_MODRM 0x80u /* opcode has a ModRM byte */
#define OP_BAD 0x40u /* undecodable opcode (forces len=0) */
#define OP_IMASK 0x07u /* immediate-class field (low 3 bits) */
enum {
IM_0 = 0, /* no immediate */
IM_8, /* imm8 */
IM_16, /* imm16 */
IM_32, /* imm32 */
IM_Z, /* imm16 if 66 else imm32 (never qword) */
IM_V, /* imm16 if 66, imm64 if REX.W, else imm32 */
IM_P /* far pointer: IM_Z + 2 (legacy; length only) */
};
#define M OP_MODRM
#define B OP_BAD
/* ---- one-byte opcode map (no 0F prefix) ---------------------------------- *
* Indexed by the opcode byte. Control-flow opcodes (E8/E9/EB/70-7F/C2/C3/CA/CB/
* CC/FF) get their immediate size from this table too; their flow class is
* resolved separately in classify_one(). */
static const uint8_t OP1[256] = {
/* 00 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* ADD; 06 PUSH ES,07 POP ES */
/* 08 */ M, M, M, M, IM_8, IM_Z, 0, B, /* OR; 0F is escape (handled) */
/* 10 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* ADC */
/* 18 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* SBB */
/* 20 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* AND (26 seg = prefix) */
/* 28 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* SUB (2E seg = prefix) */
/* 30 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* XOR (36 seg = prefix) */
/* 38 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* CMP (3E seg = prefix) */
/* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, /* REX (prefix, handled before) */
/* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, /* REX */
/* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, /* PUSH r */
/* 58 */ 0, 0, 0, 0, 0, 0, 0, 0, /* POP r */
/* 60 */ 0, 0, M, M, 0, 0, 0, 0, /* 62=EVEX (BAD, special-cased) */
/* 68 */ IM_Z, M|IM_Z, IM_8, M|IM_8, 0, 0, 0, 0, /* PUSH imm; IMUL; INS/OUTS */
/* 70 */ IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, /* Jcc rel8 */
/* 78 */ IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, /* Jcc rel8 */
/* 80 */ M|IM_8, M|IM_Z, M|IM_8, M|IM_8, M, M, M, M, /* grp1; TEST; XCHG */
/* 88 */ M, M, M, M, M, M, M, M, /* MOV; LEA(8D); MOV sreg; POP */
/* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, /* NOP/XCHG */
/* 98 */ 0, 0, IM_P, 0, 0, 0, 0, 0, /* CWDE..; 9A far CALL (legacy) */
/* A0 */ IM_V, IM_V, IM_V, IM_V, 0, 0, 0, 0, /* MOV moffs (addr-size=64) */
/* A8 */ IM_8, IM_Z, 0, 0, 0, 0, 0, 0, /* TEST AL/eAX; STOS/LODS/SCAS */
/* B0 */ IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, /* MOV r8,imm8 */
/* B8 */ IM_V, IM_V, IM_V, IM_V, IM_V, IM_V, IM_V, IM_V, /* MOV r,imm(v) */
/* C0 */ M|IM_8, M|IM_8, IM_16, 0, M, M, M|IM_8, M|IM_Z, /* shift; RET imm16/RET; LES/LDS; MOV imm */
/* C8 */ IM_16|0, 0, IM_16, 0, 0, IM_8, 0, 0, /* C8 ENTER(imm16+imm8) special below; CA RET far imm16; CB; CD int imm8 */
/* D0 */ M, M, M, M, IM_8, IM_8, 0, 0, /* shift grp2; AAM/AAD imm8; XLAT */
/* D8 */ M, M, M, M, M, M, M, M, /* x87 ESC (ModRM) */
/* E0 */ IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, /* LOOP/JCXZ/IN/OUT rel8/imm8 */
/* E8 */ IM_Z, IM_Z, IM_P, IM_8, 0, 0, 0, 0, /* E8 CALL rel32; E9 JMP rel32; EA far; EB JMP rel8 */
/* F0 */ 0, B, 0, 0, 0, 0, M, M, /* F0 LOCK(prefix); F1 ICEBP=BAD; F6/F7 grp3 (imm via ext) */
/* F8 */ 0, 0, 0, 0, 0, 0, M, M /* flags; FE/FF grp */
};
/* ENTER (0xC8) takes imm16 + imm8 = 3 immediate bytes; the table cannot encode
* that combination, so it is added explicitly in decode. */
/* ---- 0F two-byte opcode map ---------------------------------------------- *
* Escapes 0F38 / 0F3A are handled before this table (opcodes 0x38 / 0x3A). The
* 0F 80..8F range is Jcc rel32 (immediate IM_Z) - flow set in classify. */
static const uint8_t OP2[256] = {
/* 00 */ M, M, M, M, B, 0, 0, 0, /* grp6/grp7; 04 invalid */
/* 08 */ 0, 0, B, B, B, M, B, B, /* WBINVD; UD2; prefetch(0D) */
/* 10 */ M, M, M, M, M, M, M, M, /* SSE mov* */
/* 18 */ M, M, M, M, M, M, M, M, /* hint-NOP / prefetch (ModRM) */
/* 20 */ M, M, M, M, B, B, B, B, /* MOV cr/dr (ModRM) */
/* 28 */ M, M, M, M, M, M, M, M, /* SSE */
/* 30 */ 0, 0, 0, 0, 0, 0, B, 0, /* WRMSR/RDTSC/RDMSR/RDPMC */
/* 38 */ B, B, B, B, B, B, B, B, /* escapes (38/3A done earlier) */
/* 40 */ M, M, M, M, M, M, M, M, /* CMOVcc */
/* 48 */ M, M, M, M, M, M, M, M, /* CMOVcc */
/* 50 */ M, M, M, M, M, M, M, M, /* SSE */
/* 58 */ M, M, M, M, M, M, M, M, /* SSE */
/* 60 */ M, M, M, M, M, M, M, M, /* MMX/SSE */
/* 68 */ M, M, M, M, M, M, M, M, /* MMX/SSE; 6E/6F mov */
/* 70 */ M|IM_8, M, M, M, M, M, M, 0, /* PSHUF imm8; grp shifts; EMMS */
/* 78 */ M, M, B, B, M, M, M, M, /* VMREAD/WRITE; SSE */
/* 80 */ IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, /* Jcc rel32 */
/* 88 */ IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, /* Jcc rel32 */
/* 90 */ M, M, M, M, M, M, M, M, /* SETcc */
/* 98 */ M, M, M, M, M, M, M, M, /* SETcc */
/* A0 */ 0, 0, 0, M, M|IM_8, M, B, B, /* PUSH/POP FS; CPUID; BT; SHLD */
/* A8 */ 0, 0, 0, M, M|IM_8, M, B, M, /* PUSH/POP GS; RSM; BTS; SHRD; IMUL */
/* B0 */ M, M, M, M, M, M, M, M, /* CMPXCHG; LSS/LFS/LGS; MOVZX */
/* B8 */ M, B, M|IM_8, M, M, M, M, M, /* POPCNT; grp8 BT imm8; BSF/BSR */
/* C0 */ M, M, M|IM_8, M, M|IM_8, M|IM_8, M|IM_8, M, /* XADD; CMPPS imm8; pinsr/extr; grp9 */
/* C8 */ 0, 0, 0, 0, 0, 0, 0, 0, /* BSWAP */
/* D0 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
/* D8 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
/* E0 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
/* E8 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
/* F0 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
/* F8 */ M, M, M, M, M, M, M, B /* SSE/MMX; FF invalid */
};
/* ---- decode state -------------------------------------------------------- */
typedef struct {
int rex_w; /* REX.W set (1) - selects 64-bit operand size */
int pfx66; /* 0x66 operand-size override present */
} dstate;
/* immediate byte count for an immediate class under the effective op-size. */
static size_t imm_bytes(unsigned imclass, const dstate* st) {
switch (imclass) {
case IM_0: { return 0; }
case IM_8: { return 1; }
case IM_16: { return 2; }
case IM_32: { return 4; }
case IM_Z: { return st->pfx66 ? 2u : 4u; }
case IM_V: { return st->rex_w ? 8u : (st->pfx66 ? 2u : 4u); }
case IM_P: { return (st->pfx66 ? 2u : 4u) + 2u; }
default: { return 0; }
}
}
/* Decode a ModRM (and any SIB / displacement). `p` points at the ModRM byte,
* `avail` is the bytes remaining from there. On success returns the number of
* bytes consumed (ModRM + SIB + disp) and, for a RIP-relative operand (64-bit
* mod=00 rm=101), sets *has_rip and *rip. Returns 0 if it would run past avail.
* Address-size (67) does not change the ModRM/SIB/disp32 byte layout in long
* mode, so it is not consulted here. */
__attribute__((hot))
static size_t decode_modrm(const uint8_t* p, size_t avail,
int* has_rip, int32_t* rip) {
if (avail < 1) { return 0; }
const uint8_t modrm = p[0];
const unsigned mod = (modrm >> 6) & 3u;
const unsigned rm = modrm & 7u;
size_t n = 1; /* the ModRM byte itself */
if (mod == 3u) { return n; } /* register direct: no mem */
size_t disp = 0;
int has_sib = 0;
if (rm == 4u) { /* SIB follows */
has_sib = 1;
if (avail < n + 1) { return 0; }
const uint8_t sib = p[n];
n += 1;
/* base==101 with mod==00 means disp32 (no base register) */
if (mod == 0u && (sib & 7u) == 5u) { disp = 4; }
}
if (mod == 0u) {
if (rm == 5u) { /* RIP-relative disp32 (64-bit) */
if (avail < n + 4) { return 0; }
int32_t d;
d = (int32_t)((uint32_t)p[n] | ((uint32_t)p[n + 1] << 8) |
((uint32_t)p[n + 2] << 16) | ((uint32_t)p[n + 3] << 24));
if (has_rip) { *has_rip = 1; }
if (rip) { *rip = d; }
n += 4;
return n;
}
/* mod=00, rm in {SIB special above handled disp}; else no disp */
} else if (mod == 1u) {
disp = 1; /* disp8 */
} else { /* mod == 2 */
disp = 4; /* disp32 */
}
(void)has_sib;
if (disp) {
if (avail < n + disp) { return 0; }
n += disp;
}
return n;
}
/* ---- VEX (C4 3-byte / C5 2-byte) ----------------------------------------- *
* VEX-encoded instructions carry a ModRM and (rarely) an imm8. We treat them
* as: [VEX bytes] [opcode] [ModRM(+SIB+disp)] [imm8?]. The map (mmmmm in C4)
* selects 0F / 0F38 / 0F3A; we always assume a ModRM follows the opcode (true
* for the VEX-encoded SSE/AVX space) and add the imm8 for the 0F3A map (which
* is the imm8 map) - this is length-correct for the common AVX encodings the
* decoder needs to step over. VEX.W does not change this length. */
__attribute__((hot))
static int decode_vex(const uint8_t* code, size_t avail, x86_insn* out) {
size_t n;
unsigned mmmmm;
if (code[0] == 0xC5u) { /* 2-byte VEX */
if (avail < 2) { return 0; }
n = 2; /* C5 + byte1 */
mmmmm = 1u; /* implied 0F map */
} else { /* 0xC4: 3-byte VEX */
if (avail < 3) { return 0; }
mmmmm = code[1] & 0x1fu; /* 1=0F, 2=0F38, 3=0F3A */
n = 3; /* C4 + byte1 + byte2 */
}
if (avail < n + 1) { return 0; } /* need an opcode byte */
n += 1; /* the opcode byte */
int rip_present = 0;
int32_t rip = 0;
const size_t m = decode_modrm(code + n, avail - n, &rip_present, &rip);
if (m == 0) { return 0; }
n += m;
/* 0F3A map is the imm8 map: every opcode carries a trailing imm8. */
if (mmmmm == 3u) {
if (avail < n + 1) { return 0; }
n += 1;
}
if (n < 1 || n > 15) { return 0; }
out->len = (uint8_t)n;
out->flow = X86_OTHER;
out->has_rel = 0;
out->rel = 0;
out->has_riprel = rip_present;
out->riprel = rip;
return (int)n;
}
/* ---- branch displacement read -------------------------------------------- *
* Read a rel8 (bytes==1) or rel32 (bytes==4) branch displacement at `p`,
* sign-extend into out->rel, and mark has_rel. */
static void read_rel(const uint8_t* p, size_t bytes, x86_insn* out) {
if (bytes == 1) {
out->rel = (int32_t)(int8_t)p[0];
} else { /* 4 bytes */
out->rel = (int32_t)((uint32_t)p[0] | ((uint32_t)p[1] << 8) |
((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24));
}
out->has_rel = 1;
}
/* ---- main decode --------------------------------------------------------- */
__attribute__((hot))
int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
if (!code || !out || avail == 0) {
if (out) {
out->len = 0; out->flow = X86_OTHER;
out->has_rel = 0; out->rel = 0;
out->has_riprel = 0; out->riprel = 0;
}
return 0;
}
/* zero the result; on any failure we leave len=0. */
out->len = 0; out->flow = X86_OTHER;
out->has_rel = 0; out->rel = 0;
out->has_riprel = 0; out->riprel = 0;
const size_t cap = avail < 15u ? avail : 15u; /* never decode past 15 */
size_t n = 0;
dstate st = { 0, 0 };
/* ---- legacy prefixes (66/67/F0/F2/F3/segment) ---- */
for (; n < cap; n++) {
const uint8_t b = code[n];
if (b == 0x66u) { st.pfx66 = 1; continue; }
if (b == 0x67u) { continue; } /* address-size: no length effect */
if (b == 0xF0u || b == 0xF2u || b == 0xF3u) { continue; } /* lock/rep */
if (b == 0x2Eu || b == 0x36u || b == 0x3Eu || b == 0x26u || /* seg */
b == 0x64u || b == 0x65u) { continue; }
break;
}
if (n >= cap) { return 0; } /* prefixes only / out of bytes */
/* ---- VEX (C4/C5) must precede REX and is mutually exclusive with it ---- */
if (code[n] == 0xC5u || code[n] == 0xC4u) {
const int r = decode_vex(code + n, cap - n, out);
if (r == 0) { out->len = 0; return 0; }
const size_t total = n + (size_t)r;
if (total < 1 || total > 15 || total > avail) { out->len = 0; return 0; }
out->len = (uint8_t)total;
return (int)total;
}
/* ---- EVEX (0x62) is a documented gap: undecodable ---- */
if (code[n] == 0x62u) { return 0; }
/* ---- REX prefix (0x40..0x4F): must be the last prefix ---- */
if ((code[n] & 0xF0u) == 0x40u) {
st.rex_w = (code[n] & 0x08u) ? 1 : 0;
n += 1;
if (n >= cap) { return 0; }
}
/* ---- opcode: 1-byte, or 0F (two-byte / 0F38 / 0F3A) ---- */
uint8_t op = code[n];
const uint8_t* tbl = OP1;
int two_byte = 0;
n += 1;
if (op == 0x0Fu) {
if (n >= cap) { return 0; }
op = code[n];
n += 1;
two_byte = 1;
if (op == 0x38u) { /* 0F38 map: all ModRM, no imm */
if (n >= cap) { return 0; }
op = code[n];
n += 1;
/* every 0F38 opcode has a ModRM and no immediate. */
int rip_present = 0; int32_t rip = 0;
const size_t m = decode_modrm(code + n, cap - n,
&rip_present, &rip);
if (m == 0) { return 0; }
n += m;
out->has_riprel = rip_present; out->riprel = rip;
if (n < 1 || n > 15 || n > avail) { return 0; }
out->len = (uint8_t)n;
return (int)n;
}
if (op == 0x3Au) { /* 0F3A map: ModRM + imm8 */
if (n >= cap) { return 0; }
op = code[n];
n += 1;
int rip_present = 0; int32_t rip = 0;
const size_t m = decode_modrm(code + n, cap - n,
&rip_present, &rip);
if (m == 0) { return 0; }
n += m;
if (n >= cap) { return 0; } /* trailing imm8 */
n += 1;
out->has_riprel = rip_present; out->riprel = rip;
if (n < 1 || n > 15 || n > avail) { return 0; }
out->len = (uint8_t)n;
return (int)n;
}
tbl = OP2;
}
const uint8_t prop = tbl[op];
if (prop & OP_BAD) { return 0; }
/* ---- ModRM (+SIB+disp), if any ---- */
int have_modrm = 0;
uint8_t modrm = 0; /* captured for grp5 classify */
if (prop & OP_MODRM) {
if (n >= cap) { return 0; }
modrm = code[n];
have_modrm = 1;
int rip_present = 0; int32_t rip = 0;
const size_t m = decode_modrm(code + n, cap - n, &rip_present, &rip);
if (m == 0) { return 0; }
n += m;
out->has_riprel = rip_present; out->riprel = rip;
}
/* ---- immediate ---- */
size_t im = imm_bytes(prop & OP_IMASK, &st);
/* opcodes the table cannot fully encode (combined immediates) ---- */
if (!two_byte && op == 0xC8u) { im = 3; } /* ENTER imm16, imm8 */
/* grp3 F6/F7: only the TEST sub-opcode (reg field /0 or /1) takes an
* immediate (imm8 for F6, immZ for F7). NOT/NEG/MUL/IMUL/DIV/IDIV (/2../7)
* take none. The table marks F6/F7 as ModRM-only; resolve the immediate
* here from the captured ModRM reg field. */
if (!two_byte && (op == 0xF6u || op == 0xF7u) && have_modrm) {
const unsigned reg = (modrm >> 3) & 7u;
if (reg <= 1u) { im = (op == 0xF6u) ? 1u : (st.pfx66 ? 2u : 4u); }
}
if (im) {
if (cap < n + im) { return 0; }
n += im;
}
if (n < 1 || n > 15 || n > avail) { return 0; }
/* ---- control-flow classification (near forms) ----
* `im` is the immediate byte count; a rel branch's displacement is the
* last `im` bytes of the instruction (code + n - im). */
if (!two_byte) {
switch (op) {
case 0xE8u: { /* CALL rel32 */
out->flow = X86_CALL;
read_rel(code + (n - im), im, out);
break;
}
case 0xE9u: case 0xEBu: { /* JMP rel32 / rel8 */
out->flow = X86_JMP;
read_rel(code + (n - im), im, out);
break;
}
case 0xC2u: case 0xC3u: case 0xCAu: case 0xCBu: { /* RET forms */
out->flow = X86_RET;
break;
}
case 0xCCu: { /* INT3 */
out->flow = X86_INT3;
break;
}
case 0xFFu: { /* grp5: /2 CALL, /4 JMP (ind) */
if (have_modrm) {
const unsigned reg = (modrm >> 3) & 7u;
if (reg == 2u || reg == 3u) { out->flow = X86_CALL; }
else if (reg == 4u || reg == 5u) { out->flow = X86_JMP; }
}
break; /* indirect: has_rel stays 0 */
}
default: { /* 70..7F Jcc rel8 */
if (op >= 0x70u && op <= 0x7Fu) {
out->flow = X86_JCC;
read_rel(code + (n - im), im, out);
}
break;
}
}
} else if (op >= 0x80u && op <= 0x8Fu) { /* 0F 80..8F Jcc rel32 */
out->flow = X86_JCC;
read_rel(code + (n - im), im, out);
}
out->len = (uint8_t)n;
return (int)n;
}
uint64_t x86_branch_target(uint64_t ip, const x86_insn* insn) {
if (!insn || !insn->has_rel) { return 0; }
return ip + (uint64_t)insn->len + (uint64_t)(int64_t)insn->rel;
}
uint64_t x86_riprel_target(uint64_t ip, const x86_insn* insn) {
if (!insn || !insn->has_riprel) { return 0; }
return ip + (uint64_t)insn->len + (uint64_t)(int64_t)insn->riprel;
}