mirror of
https://dev.lirent.ru/Vatrog/vm-introspection-engine.git
synced 2026-06-18 04:16:39 +03:00
Add a light x86-64 decoder; back code-xref with it
The reversing keystone: a length-disassembly decoder with control-flow and RIP-relative target extraction (x86dec.h), pure over a byte buffer - no vmie_mem, no cr3, no Windows. Table-driven length over the 1-byte / 0F / 0F38 / 0F3A maps, legacy + REX + VEX prefixes, ModRM/SIB, displacements and immediates (66 and REX.W operand-size aware). It reports the instruction length plus the rel and RIP-relative targets of near call/jmp/jcc and any RIP-relative memory operand. EVEX is a documented gap (decodes as length 0). This is the primitive the rest of the static-reversing layer builds on (function inventory, call graph, xref). gva_code_xref now brute-scans with the decoder instead of its own ad-hoc E8/E9 and REX.W-lea heuristic, which is removed - one decoder in the tree. Because a brute scan can re-enter a prefixed instruction one byte in and decode a shorter aliased form with the same target, the scan drops a match that starts inside the extent of an already-accepted one; real, non-overlapping instructions are unaffected.
This commit is contained in:
@@ -0,0 +1,454 @@
|
||||
/* x86dec.c - light x86-64 length decoder (see x86dec.h).
|
||||
*
|
||||
* Length-only + control-flow / RIP-relative target extraction over a raw byte
|
||||
* buffer. 64-bit mode. Declarative: per-opcode properties live in static const
|
||||
* tables (one per opcode map); the decode loop reads them, it does not branch
|
||||
* per opcode. No globals, no allocations - all state is on the stack.
|
||||
*
|
||||
* Boundary: includes ONLY x86dec.h (which pulls <stdint.h>/<stddef.h>). It
|
||||
* names no other module and no OS object.
|
||||
*
|
||||
* Per-opcode property byte (OP_*):
|
||||
* MODRM - opcode carries a ModRM byte (then maybe SIB / disp / RIP-rel)
|
||||
* immediate class (low nibble) - how many immediate bytes follow the operand
|
||||
* encoding, resolved against the effective operand size:
|
||||
* IM_0 none
|
||||
* IM_8 1 byte
|
||||
* IM_16 2 bytes
|
||||
* IM_32 4 bytes
|
||||
* IM_Z 2 if 66-prefix else 4 (word/dword immediate, never 8)
|
||||
* IM_V 2 if 66, 8 if REX.W, else 4 (word/dword/qword immediate; mov r,imm)
|
||||
* IM_P far ptr: IM_Z + 2 (seg) - legacy, unused in 64-bit but length-safe
|
||||
* BAD - not decoded (e.g. EVEX prefix 0x62); forces len=0
|
||||
*/
|
||||
#include "x86dec.h"
|
||||
|
||||
/* ---- property-byte layout ------------------------------------------------ */
|
||||
|
||||
#define OP_MODRM 0x80u /* opcode has a ModRM byte */
|
||||
#define OP_BAD 0x40u /* undecodable opcode (forces len=0) */
|
||||
#define OP_IMASK 0x07u /* immediate-class field (low 3 bits) */
|
||||
|
||||
enum {
|
||||
IM_0 = 0, /* no immediate */
|
||||
IM_8, /* imm8 */
|
||||
IM_16, /* imm16 */
|
||||
IM_32, /* imm32 */
|
||||
IM_Z, /* imm16 if 66 else imm32 (never qword) */
|
||||
IM_V, /* imm16 if 66, imm64 if REX.W, else imm32 */
|
||||
IM_P /* far pointer: IM_Z + 2 (legacy; length only) */
|
||||
};
|
||||
|
||||
#define M OP_MODRM
|
||||
#define B OP_BAD
|
||||
|
||||
/* ---- one-byte opcode map (no 0F prefix) ---------------------------------- *
|
||||
* Indexed by the opcode byte. Control-flow opcodes (E8/E9/EB/70-7F/C2/C3/CA/CB/
|
||||
* CC/FF) get their immediate size from this table too; their flow class is
|
||||
* resolved separately in classify_one(). */
|
||||
static const uint8_t OP1[256] = {
|
||||
/* 00 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* ADD; 06 PUSH ES,07 POP ES */
|
||||
/* 08 */ M, M, M, M, IM_8, IM_Z, 0, B, /* OR; 0F is escape (handled) */
|
||||
/* 10 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* ADC */
|
||||
/* 18 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* SBB */
|
||||
/* 20 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* AND (26 seg = prefix) */
|
||||
/* 28 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* SUB (2E seg = prefix) */
|
||||
/* 30 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* XOR (36 seg = prefix) */
|
||||
/* 38 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* CMP (3E seg = prefix) */
|
||||
/* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, /* REX (prefix, handled before) */
|
||||
/* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, /* REX */
|
||||
/* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, /* PUSH r */
|
||||
/* 58 */ 0, 0, 0, 0, 0, 0, 0, 0, /* POP r */
|
||||
/* 60 */ 0, 0, M, M, 0, 0, 0, 0, /* 62=EVEX (BAD, special-cased) */
|
||||
/* 68 */ IM_Z, M|IM_Z, IM_8, M|IM_8, 0, 0, 0, 0, /* PUSH imm; IMUL; INS/OUTS */
|
||||
/* 70 */ IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, /* Jcc rel8 */
|
||||
/* 78 */ IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, /* Jcc rel8 */
|
||||
/* 80 */ M|IM_8, M|IM_Z, M|IM_8, M|IM_8, M, M, M, M, /* grp1; TEST; XCHG */
|
||||
/* 88 */ M, M, M, M, M, M, M, M, /* MOV; LEA(8D); MOV sreg; POP */
|
||||
/* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, /* NOP/XCHG */
|
||||
/* 98 */ 0, 0, IM_P, 0, 0, 0, 0, 0, /* CWDE..; 9A far CALL (legacy) */
|
||||
/* A0 */ IM_V, IM_V, IM_V, IM_V, 0, 0, 0, 0, /* MOV moffs (addr-size=64) */
|
||||
/* A8 */ IM_8, IM_Z, 0, 0, 0, 0, 0, 0, /* TEST AL/eAX; STOS/LODS/SCAS */
|
||||
/* B0 */ IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, /* MOV r8,imm8 */
|
||||
/* B8 */ IM_V, IM_V, IM_V, IM_V, IM_V, IM_V, IM_V, IM_V, /* MOV r,imm(v) */
|
||||
/* C0 */ M|IM_8, M|IM_8, IM_16, 0, M, M, M|IM_8, M|IM_Z, /* shift; RET imm16/RET; LES/LDS; MOV imm */
|
||||
/* C8 */ IM_16|0, 0, IM_16, 0, 0, IM_8, 0, 0, /* C8 ENTER(imm16+imm8) special below; CA RET far imm16; CB; CD int imm8 */
|
||||
/* D0 */ M, M, M, M, IM_8, IM_8, 0, 0, /* shift grp2; AAM/AAD imm8; XLAT */
|
||||
/* D8 */ M, M, M, M, M, M, M, M, /* x87 ESC (ModRM) */
|
||||
/* E0 */ IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, /* LOOP/JCXZ/IN/OUT rel8/imm8 */
|
||||
/* E8 */ IM_Z, IM_Z, IM_P, IM_8, 0, 0, 0, 0, /* E8 CALL rel32; E9 JMP rel32; EA far; EB JMP rel8 */
|
||||
/* F0 */ 0, B, 0, 0, 0, 0, M, M, /* F0 LOCK(prefix); F1 ICEBP=BAD; F6/F7 grp3 (imm via ext) */
|
||||
/* F8 */ 0, 0, 0, 0, 0, 0, M, M /* flags; FE/FF grp */
|
||||
};
|
||||
|
||||
/* ENTER (0xC8) takes imm16 + imm8 = 3 immediate bytes; the table cannot encode
|
||||
* that combination, so it is added explicitly in decode. */
|
||||
|
||||
/* ---- 0F two-byte opcode map ---------------------------------------------- *
|
||||
* Escapes 0F38 / 0F3A are handled before this table (opcodes 0x38 / 0x3A). The
|
||||
* 0F 80..8F range is Jcc rel32 (immediate IM_Z) - flow set in classify. */
|
||||
static const uint8_t OP2[256] = {
|
||||
/* 00 */ M, M, M, M, B, 0, 0, 0, /* grp6/grp7; 04 invalid */
|
||||
/* 08 */ 0, 0, B, B, B, M, B, B, /* WBINVD; UD2; prefetch(0D) */
|
||||
/* 10 */ M, M, M, M, M, M, M, M, /* SSE mov* */
|
||||
/* 18 */ M, M, M, M, M, M, M, M, /* hint-NOP / prefetch (ModRM) */
|
||||
/* 20 */ M, M, M, M, B, B, B, B, /* MOV cr/dr (ModRM) */
|
||||
/* 28 */ M, M, M, M, M, M, M, M, /* SSE */
|
||||
/* 30 */ 0, 0, 0, 0, 0, 0, B, 0, /* WRMSR/RDTSC/RDMSR/RDPMC */
|
||||
/* 38 */ B, B, B, B, B, B, B, B, /* escapes (38/3A done earlier) */
|
||||
/* 40 */ M, M, M, M, M, M, M, M, /* CMOVcc */
|
||||
/* 48 */ M, M, M, M, M, M, M, M, /* CMOVcc */
|
||||
/* 50 */ M, M, M, M, M, M, M, M, /* SSE */
|
||||
/* 58 */ M, M, M, M, M, M, M, M, /* SSE */
|
||||
/* 60 */ M, M, M, M, M, M, M, M, /* MMX/SSE */
|
||||
/* 68 */ M, M, M, M, M, M, M, M, /* MMX/SSE; 6E/6F mov */
|
||||
/* 70 */ M|IM_8, M, M, M, M, M, M, 0, /* PSHUF imm8; grp shifts; EMMS */
|
||||
/* 78 */ M, M, B, B, M, M, M, M, /* VMREAD/WRITE; SSE */
|
||||
/* 80 */ IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, /* Jcc rel32 */
|
||||
/* 88 */ IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, /* Jcc rel32 */
|
||||
/* 90 */ M, M, M, M, M, M, M, M, /* SETcc */
|
||||
/* 98 */ M, M, M, M, M, M, M, M, /* SETcc */
|
||||
/* A0 */ 0, 0, 0, M, M|IM_8, M, B, B, /* PUSH/POP FS; CPUID; BT; SHLD */
|
||||
/* A8 */ 0, 0, 0, M, M|IM_8, M, B, M, /* PUSH/POP GS; RSM; BTS; SHRD; IMUL */
|
||||
/* B0 */ M, M, M, M, M, M, M, M, /* CMPXCHG; LSS/LFS/LGS; MOVZX */
|
||||
/* B8 */ M, B, M|IM_8, M, M, M, M, M, /* POPCNT; grp8 BT imm8; BSF/BSR */
|
||||
/* C0 */ M, M, M|IM_8, M, M|IM_8, M|IM_8, M|IM_8, M, /* XADD; CMPPS imm8; pinsr/extr; grp9 */
|
||||
/* C8 */ 0, 0, 0, 0, 0, 0, 0, 0, /* BSWAP */
|
||||
/* D0 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
|
||||
/* D8 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
|
||||
/* E0 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
|
||||
/* E8 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
|
||||
/* F0 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
|
||||
/* F8 */ M, M, M, M, M, M, M, B /* SSE/MMX; FF invalid */
|
||||
};
|
||||
|
||||
/* ---- decode state -------------------------------------------------------- */
|
||||
|
||||
typedef struct {
|
||||
int rex_w; /* REX.W set (1) - selects 64-bit operand size */
|
||||
int pfx66; /* 0x66 operand-size override present */
|
||||
} dstate;
|
||||
|
||||
/* immediate byte count for an immediate class under the effective op-size. */
|
||||
static size_t imm_bytes(unsigned imclass, const dstate* st) {
|
||||
switch (imclass) {
|
||||
case IM_0: { return 0; }
|
||||
case IM_8: { return 1; }
|
||||
case IM_16: { return 2; }
|
||||
case IM_32: { return 4; }
|
||||
case IM_Z: { return st->pfx66 ? 2u : 4u; }
|
||||
case IM_V: { return st->rex_w ? 8u : (st->pfx66 ? 2u : 4u); }
|
||||
case IM_P: { return (st->pfx66 ? 2u : 4u) + 2u; }
|
||||
default: { return 0; }
|
||||
}
|
||||
}
|
||||
|
||||
/* Decode a ModRM (and any SIB / displacement). `p` points at the ModRM byte,
|
||||
* `avail` is the bytes remaining from there. On success returns the number of
|
||||
* bytes consumed (ModRM + SIB + disp) and, for a RIP-relative operand (64-bit
|
||||
* mod=00 rm=101), sets *has_rip and *rip. Returns 0 if it would run past avail.
|
||||
* Address-size (67) does not change the ModRM/SIB/disp32 byte layout in long
|
||||
* mode, so it is not consulted here. */
|
||||
__attribute__((hot))
|
||||
static size_t decode_modrm(const uint8_t* p, size_t avail,
|
||||
int* has_rip, int32_t* rip) {
|
||||
if (avail < 1) { return 0; }
|
||||
const uint8_t modrm = p[0];
|
||||
const unsigned mod = (modrm >> 6) & 3u;
|
||||
const unsigned rm = modrm & 7u;
|
||||
size_t n = 1; /* the ModRM byte itself */
|
||||
|
||||
if (mod == 3u) { return n; } /* register direct: no mem */
|
||||
|
||||
size_t disp = 0;
|
||||
int has_sib = 0;
|
||||
if (rm == 4u) { /* SIB follows */
|
||||
has_sib = 1;
|
||||
if (avail < n + 1) { return 0; }
|
||||
const uint8_t sib = p[n];
|
||||
n += 1;
|
||||
/* base==101 with mod==00 means disp32 (no base register) */
|
||||
if (mod == 0u && (sib & 7u) == 5u) { disp = 4; }
|
||||
}
|
||||
|
||||
if (mod == 0u) {
|
||||
if (rm == 5u) { /* RIP-relative disp32 (64-bit) */
|
||||
if (avail < n + 4) { return 0; }
|
||||
int32_t d;
|
||||
d = (int32_t)((uint32_t)p[n] | ((uint32_t)p[n + 1] << 8) |
|
||||
((uint32_t)p[n + 2] << 16) | ((uint32_t)p[n + 3] << 24));
|
||||
if (has_rip) { *has_rip = 1; }
|
||||
if (rip) { *rip = d; }
|
||||
n += 4;
|
||||
return n;
|
||||
}
|
||||
/* mod=00, rm in {SIB special above handled disp}; else no disp */
|
||||
} else if (mod == 1u) {
|
||||
disp = 1; /* disp8 */
|
||||
} else { /* mod == 2 */
|
||||
disp = 4; /* disp32 */
|
||||
}
|
||||
(void)has_sib;
|
||||
|
||||
if (disp) {
|
||||
if (avail < n + disp) { return 0; }
|
||||
n += disp;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/* ---- VEX (C4 3-byte / C5 2-byte) ----------------------------------------- *
|
||||
* VEX-encoded instructions carry a ModRM and (rarely) an imm8. We treat them
|
||||
* as: [VEX bytes] [opcode] [ModRM(+SIB+disp)] [imm8?]. The map (mmmmm in C4)
|
||||
* selects 0F / 0F38 / 0F3A; we always assume a ModRM follows the opcode (true
|
||||
* for the VEX-encoded SSE/AVX space) and add the imm8 for the 0F3A map (which
|
||||
* is the imm8 map) - this is length-correct for the common AVX encodings the
|
||||
* decoder needs to step over. VEX.W does not change this length. */
|
||||
__attribute__((hot))
|
||||
static int decode_vex(const uint8_t* code, size_t avail, x86_insn* out) {
|
||||
size_t n;
|
||||
unsigned mmmmm;
|
||||
|
||||
if (code[0] == 0xC5u) { /* 2-byte VEX */
|
||||
if (avail < 2) { return 0; }
|
||||
n = 2; /* C5 + byte1 */
|
||||
mmmmm = 1u; /* implied 0F map */
|
||||
} else { /* 0xC4: 3-byte VEX */
|
||||
if (avail < 3) { return 0; }
|
||||
mmmmm = code[1] & 0x1fu; /* 1=0F, 2=0F38, 3=0F3A */
|
||||
n = 3; /* C4 + byte1 + byte2 */
|
||||
}
|
||||
|
||||
if (avail < n + 1) { return 0; } /* need an opcode byte */
|
||||
n += 1; /* the opcode byte */
|
||||
|
||||
int rip_present = 0;
|
||||
int32_t rip = 0;
|
||||
const size_t m = decode_modrm(code + n, avail - n, &rip_present, &rip);
|
||||
if (m == 0) { return 0; }
|
||||
n += m;
|
||||
|
||||
/* 0F3A map is the imm8 map: every opcode carries a trailing imm8. */
|
||||
if (mmmmm == 3u) {
|
||||
if (avail < n + 1) { return 0; }
|
||||
n += 1;
|
||||
}
|
||||
|
||||
if (n < 1 || n > 15) { return 0; }
|
||||
out->len = (uint8_t)n;
|
||||
out->flow = X86_OTHER;
|
||||
out->has_rel = 0;
|
||||
out->rel = 0;
|
||||
out->has_riprel = rip_present;
|
||||
out->riprel = rip;
|
||||
return (int)n;
|
||||
}
|
||||
|
||||
/* ---- branch displacement read -------------------------------------------- *
|
||||
* Read a rel8 (bytes==1) or rel32 (bytes==4) branch displacement at `p`,
|
||||
* sign-extend into out->rel, and mark has_rel. */
|
||||
static void read_rel(const uint8_t* p, size_t bytes, x86_insn* out) {
|
||||
if (bytes == 1) {
|
||||
out->rel = (int32_t)(int8_t)p[0];
|
||||
} else { /* 4 bytes */
|
||||
out->rel = (int32_t)((uint32_t)p[0] | ((uint32_t)p[1] << 8) |
|
||||
((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24));
|
||||
}
|
||||
out->has_rel = 1;
|
||||
}
|
||||
|
||||
/* ---- main decode --------------------------------------------------------- */
|
||||
|
||||
__attribute__((hot))
|
||||
int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
|
||||
if (!code || !out || avail == 0) {
|
||||
if (out) {
|
||||
out->len = 0; out->flow = X86_OTHER;
|
||||
out->has_rel = 0; out->rel = 0;
|
||||
out->has_riprel = 0; out->riprel = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* zero the result; on any failure we leave len=0. */
|
||||
out->len = 0; out->flow = X86_OTHER;
|
||||
out->has_rel = 0; out->rel = 0;
|
||||
out->has_riprel = 0; out->riprel = 0;
|
||||
|
||||
const size_t cap = avail < 15u ? avail : 15u; /* never decode past 15 */
|
||||
size_t n = 0;
|
||||
dstate st = { 0, 0 };
|
||||
|
||||
/* ---- legacy prefixes (66/67/F0/F2/F3/segment) ---- */
|
||||
for (; n < cap; n++) {
|
||||
const uint8_t b = code[n];
|
||||
if (b == 0x66u) { st.pfx66 = 1; continue; }
|
||||
if (b == 0x67u) { continue; } /* address-size: no length effect */
|
||||
if (b == 0xF0u || b == 0xF2u || b == 0xF3u) { continue; } /* lock/rep */
|
||||
if (b == 0x2Eu || b == 0x36u || b == 0x3Eu || b == 0x26u || /* seg */
|
||||
b == 0x64u || b == 0x65u) { continue; }
|
||||
break;
|
||||
}
|
||||
if (n >= cap) { return 0; } /* prefixes only / out of bytes */
|
||||
|
||||
/* ---- VEX (C4/C5) must precede REX and is mutually exclusive with it ---- */
|
||||
if (code[n] == 0xC5u || code[n] == 0xC4u) {
|
||||
const int r = decode_vex(code + n, cap - n, out);
|
||||
if (r == 0) { out->len = 0; return 0; }
|
||||
const size_t total = n + (size_t)r;
|
||||
if (total < 1 || total > 15 || total > avail) { out->len = 0; return 0; }
|
||||
out->len = (uint8_t)total;
|
||||
return (int)total;
|
||||
}
|
||||
|
||||
/* ---- EVEX (0x62) is a documented gap: undecodable ---- */
|
||||
if (code[n] == 0x62u) { return 0; }
|
||||
|
||||
/* ---- REX prefix (0x40..0x4F): must be the last prefix ---- */
|
||||
if ((code[n] & 0xF0u) == 0x40u) {
|
||||
st.rex_w = (code[n] & 0x08u) ? 1 : 0;
|
||||
n += 1;
|
||||
if (n >= cap) { return 0; }
|
||||
}
|
||||
|
||||
/* ---- opcode: 1-byte, or 0F (two-byte / 0F38 / 0F3A) ---- */
|
||||
uint8_t op = code[n];
|
||||
const uint8_t* tbl = OP1;
|
||||
int two_byte = 0;
|
||||
n += 1;
|
||||
|
||||
if (op == 0x0Fu) {
|
||||
if (n >= cap) { return 0; }
|
||||
op = code[n];
|
||||
n += 1;
|
||||
two_byte = 1;
|
||||
if (op == 0x38u) { /* 0F38 map: all ModRM, no imm */
|
||||
if (n >= cap) { return 0; }
|
||||
op = code[n];
|
||||
n += 1;
|
||||
/* every 0F38 opcode has a ModRM and no immediate. */
|
||||
int rip_present = 0; int32_t rip = 0;
|
||||
const size_t m = decode_modrm(code + n, cap - n,
|
||||
&rip_present, &rip);
|
||||
if (m == 0) { return 0; }
|
||||
n += m;
|
||||
out->has_riprel = rip_present; out->riprel = rip;
|
||||
if (n < 1 || n > 15 || n > avail) { return 0; }
|
||||
out->len = (uint8_t)n;
|
||||
return (int)n;
|
||||
}
|
||||
if (op == 0x3Au) { /* 0F3A map: ModRM + imm8 */
|
||||
if (n >= cap) { return 0; }
|
||||
op = code[n];
|
||||
n += 1;
|
||||
int rip_present = 0; int32_t rip = 0;
|
||||
const size_t m = decode_modrm(code + n, cap - n,
|
||||
&rip_present, &rip);
|
||||
if (m == 0) { return 0; }
|
||||
n += m;
|
||||
if (n >= cap) { return 0; } /* trailing imm8 */
|
||||
n += 1;
|
||||
out->has_riprel = rip_present; out->riprel = rip;
|
||||
if (n < 1 || n > 15 || n > avail) { return 0; }
|
||||
out->len = (uint8_t)n;
|
||||
return (int)n;
|
||||
}
|
||||
tbl = OP2;
|
||||
}
|
||||
|
||||
const uint8_t prop = tbl[op];
|
||||
if (prop & OP_BAD) { return 0; }
|
||||
|
||||
/* ---- ModRM (+SIB+disp), if any ---- */
|
||||
int have_modrm = 0;
|
||||
uint8_t modrm = 0; /* captured for grp5 classify */
|
||||
if (prop & OP_MODRM) {
|
||||
if (n >= cap) { return 0; }
|
||||
modrm = code[n];
|
||||
have_modrm = 1;
|
||||
int rip_present = 0; int32_t rip = 0;
|
||||
const size_t m = decode_modrm(code + n, cap - n, &rip_present, &rip);
|
||||
if (m == 0) { return 0; }
|
||||
n += m;
|
||||
out->has_riprel = rip_present; out->riprel = rip;
|
||||
}
|
||||
|
||||
/* ---- immediate ---- */
|
||||
size_t im = imm_bytes(prop & OP_IMASK, &st);
|
||||
|
||||
/* opcodes the table cannot fully encode (combined immediates) ---- */
|
||||
if (!two_byte && op == 0xC8u) { im = 3; } /* ENTER imm16, imm8 */
|
||||
|
||||
/* grp3 F6/F7: only the TEST sub-opcode (reg field /0 or /1) takes an
|
||||
* immediate (imm8 for F6, immZ for F7). NOT/NEG/MUL/IMUL/DIV/IDIV (/2../7)
|
||||
* take none. The table marks F6/F7 as ModRM-only; resolve the immediate
|
||||
* here from the captured ModRM reg field. */
|
||||
if (!two_byte && (op == 0xF6u || op == 0xF7u) && have_modrm) {
|
||||
const unsigned reg = (modrm >> 3) & 7u;
|
||||
if (reg <= 1u) { im = (op == 0xF6u) ? 1u : (st.pfx66 ? 2u : 4u); }
|
||||
}
|
||||
|
||||
if (im) {
|
||||
if (cap < n + im) { return 0; }
|
||||
n += im;
|
||||
}
|
||||
|
||||
if (n < 1 || n > 15 || n > avail) { return 0; }
|
||||
|
||||
/* ---- control-flow classification (near forms) ----
|
||||
* `im` is the immediate byte count; a rel branch's displacement is the
|
||||
* last `im` bytes of the instruction (code + n - im). */
|
||||
if (!two_byte) {
|
||||
switch (op) {
|
||||
case 0xE8u: { /* CALL rel32 */
|
||||
out->flow = X86_CALL;
|
||||
read_rel(code + (n - im), im, out);
|
||||
break;
|
||||
}
|
||||
case 0xE9u: case 0xEBu: { /* JMP rel32 / rel8 */
|
||||
out->flow = X86_JMP;
|
||||
read_rel(code + (n - im), im, out);
|
||||
break;
|
||||
}
|
||||
case 0xC2u: case 0xC3u: case 0xCAu: case 0xCBu: { /* RET forms */
|
||||
out->flow = X86_RET;
|
||||
break;
|
||||
}
|
||||
case 0xCCu: { /* INT3 */
|
||||
out->flow = X86_INT3;
|
||||
break;
|
||||
}
|
||||
case 0xFFu: { /* grp5: /2 CALL, /4 JMP (ind) */
|
||||
if (have_modrm) {
|
||||
const unsigned reg = (modrm >> 3) & 7u;
|
||||
if (reg == 2u || reg == 3u) { out->flow = X86_CALL; }
|
||||
else if (reg == 4u || reg == 5u) { out->flow = X86_JMP; }
|
||||
}
|
||||
break; /* indirect: has_rel stays 0 */
|
||||
}
|
||||
default: { /* 70..7F Jcc rel8 */
|
||||
if (op >= 0x70u && op <= 0x7Fu) {
|
||||
out->flow = X86_JCC;
|
||||
read_rel(code + (n - im), im, out);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (op >= 0x80u && op <= 0x8Fu) { /* 0F 80..8F Jcc rel32 */
|
||||
out->flow = X86_JCC;
|
||||
read_rel(code + (n - im), im, out);
|
||||
}
|
||||
|
||||
out->len = (uint8_t)n;
|
||||
return (int)n;
|
||||
}
|
||||
|
||||
uint64_t x86_branch_target(uint64_t ip, const x86_insn* insn) {
|
||||
if (!insn || !insn->has_rel) { return 0; }
|
||||
return ip + (uint64_t)insn->len + (uint64_t)(int64_t)insn->rel;
|
||||
}
|
||||
|
||||
uint64_t x86_riprel_target(uint64_t ip, const x86_insn* insn) {
|
||||
if (!insn || !insn->has_riprel) { return 0; }
|
||||
return ip + (uint64_t)insn->len + (uint64_t)(int64_t)insn->riprel;
|
||||
}
|
||||
Reference in New Issue
Block a user