mirror of
https://dev.lirent.ru/Vatrog/vm-introspection-engine.git
synced 2026-06-18 02:06:36 +03:00
c4419964aa
Three reversing capabilities on the win32 surface plus a pure sig-gen handler:
- vmie_win32_functions enumerates a module's functions from the exception
directory (.pdata RUNTIME_FUNCTION), folding unwind chain continuations into
their primary - authoritative non-leaf boundaries, not prologue heuristics.
- vmie_win32_exports resolves the export table to {name, rva, ordinal,
forwarded}: named functions with no PDB or network. vmie_win32_pdb_ref pulls
the CodeView/RSDS {guid, age, pdb} from the debug directory - the symbol-server
key for any module (full PDB parsing stays out of scope).
- sig_generate (siggen.h) builds a unique masked signature for a code span,
wildcarding the rel/RIP-relative displacement bytes the x86 decoder locates and
growing until it matches the scope exactly once - the dual of sigscan.
The decoder now also reports disp_off/disp_len so a caller can mask the floating
bytes. The MZ/PE walk gains one shared data-directory accessor and one shared
CodeView/RSDS parser; the kernel bootstrap is moved onto both, removing its
private copies - one PE parser in the tree.
474 lines
22 KiB
C
474 lines
22 KiB
C
/* x86dec.c - light x86-64 length decoder (see x86dec.h).
|
|
*
|
|
* Length-only + control-flow / RIP-relative target extraction over a raw byte
|
|
* buffer. 64-bit mode. Declarative: per-opcode properties live in static const
|
|
* tables (one per opcode map); the decode loop reads them, it does not branch
|
|
* per opcode. No globals, no allocations - all state is on the stack.
|
|
*
|
|
* Boundary: includes ONLY x86dec.h (which pulls <stdint.h>/<stddef.h>). It
|
|
* names no other module and no OS object.
|
|
*
|
|
* Per-opcode property byte (OP_*):
|
|
* MODRM - opcode carries a ModRM byte (then maybe SIB / disp / RIP-rel)
|
|
* immediate class (low nibble) - how many immediate bytes follow the operand
|
|
* encoding, resolved against the effective operand size:
|
|
* IM_0 none
|
|
* IM_8 1 byte
|
|
* IM_16 2 bytes
|
|
* IM_32 4 bytes
|
|
* IM_Z 2 if 66-prefix else 4 (word/dword immediate, never 8)
|
|
* IM_V 2 if 66, 8 if REX.W, else 4 (word/dword/qword immediate; mov r,imm)
|
|
* IM_P far ptr: IM_Z + 2 (seg) - legacy, unused in 64-bit but length-safe
|
|
* BAD - not decoded (e.g. EVEX prefix 0x62); forces len=0
|
|
*/
|
|
#include "x86dec.h"
|
|
|
|
/* ---- property-byte layout ------------------------------------------------ */
|
|
|
|
#define OP_MODRM 0x80u /* opcode has a ModRM byte */
|
|
#define OP_BAD 0x40u /* undecodable opcode (forces len=0) */
|
|
#define OP_IMASK 0x07u /* immediate-class field (low 3 bits) */
|
|
|
|
enum {
|
|
IM_0 = 0, /* no immediate */
|
|
IM_8, /* imm8 */
|
|
IM_16, /* imm16 */
|
|
IM_32, /* imm32 */
|
|
IM_Z, /* imm16 if 66 else imm32 (never qword) */
|
|
IM_V, /* imm16 if 66, imm64 if REX.W, else imm32 */
|
|
IM_P /* far pointer: IM_Z + 2 (legacy; length only) */
|
|
};
|
|
|
|
#define M OP_MODRM
|
|
#define B OP_BAD
|
|
|
|
/* ---- one-byte opcode map (no 0F prefix) ---------------------------------- *
|
|
* Indexed by the opcode byte. Control-flow opcodes (E8/E9/EB/70-7F/C2/C3/CA/CB/
|
|
* CC/FF) get their immediate size from this table too; their flow class is
|
|
* resolved separately in classify_one(). */
|
|
static const uint8_t OP1[256] = {
|
|
/* 00 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* ADD; 06 PUSH ES,07 POP ES */
|
|
/* 08 */ M, M, M, M, IM_8, IM_Z, 0, B, /* OR; 0F is escape (handled) */
|
|
/* 10 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* ADC */
|
|
/* 18 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* SBB */
|
|
/* 20 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* AND (26 seg = prefix) */
|
|
/* 28 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* SUB (2E seg = prefix) */
|
|
/* 30 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* XOR (36 seg = prefix) */
|
|
/* 38 */ M, M, M, M, IM_8, IM_Z, 0, 0, /* CMP (3E seg = prefix) */
|
|
/* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, /* REX (prefix, handled before) */
|
|
/* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, /* REX */
|
|
/* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, /* PUSH r */
|
|
/* 58 */ 0, 0, 0, 0, 0, 0, 0, 0, /* POP r */
|
|
/* 60 */ 0, 0, M, M, 0, 0, 0, 0, /* 62=EVEX (BAD, special-cased) */
|
|
/* 68 */ IM_Z, M|IM_Z, IM_8, M|IM_8, 0, 0, 0, 0, /* PUSH imm; IMUL; INS/OUTS */
|
|
/* 70 */ IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, /* Jcc rel8 */
|
|
/* 78 */ IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, /* Jcc rel8 */
|
|
/* 80 */ M|IM_8, M|IM_Z, M|IM_8, M|IM_8, M, M, M, M, /* grp1; TEST; XCHG */
|
|
/* 88 */ M, M, M, M, M, M, M, M, /* MOV; LEA(8D); MOV sreg; POP */
|
|
/* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, /* NOP/XCHG */
|
|
/* 98 */ 0, 0, IM_P, 0, 0, 0, 0, 0, /* CWDE..; 9A far CALL (legacy) */
|
|
/* A0 */ IM_V, IM_V, IM_V, IM_V, 0, 0, 0, 0, /* MOV moffs (addr-size=64) */
|
|
/* A8 */ IM_8, IM_Z, 0, 0, 0, 0, 0, 0, /* TEST AL/eAX; STOS/LODS/SCAS */
|
|
/* B0 */ IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, /* MOV r8,imm8 */
|
|
/* B8 */ IM_V, IM_V, IM_V, IM_V, IM_V, IM_V, IM_V, IM_V, /* MOV r,imm(v) */
|
|
/* C0 */ M|IM_8, M|IM_8, IM_16, 0, M, M, M|IM_8, M|IM_Z, /* shift; RET imm16/RET; LES/LDS; MOV imm */
|
|
/* C8 */ IM_16|0, 0, IM_16, 0, 0, IM_8, 0, 0, /* C8 ENTER(imm16+imm8) special below; CA RET far imm16; CB; CD int imm8 */
|
|
/* D0 */ M, M, M, M, IM_8, IM_8, 0, 0, /* shift grp2; AAM/AAD imm8; XLAT */
|
|
/* D8 */ M, M, M, M, M, M, M, M, /* x87 ESC (ModRM) */
|
|
/* E0 */ IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, IM_8, /* LOOP/JCXZ/IN/OUT rel8/imm8 */
|
|
/* E8 */ IM_Z, IM_Z, IM_P, IM_8, 0, 0, 0, 0, /* E8 CALL rel32; E9 JMP rel32; EA far; EB JMP rel8 */
|
|
/* F0 */ 0, B, 0, 0, 0, 0, M, M, /* F0 LOCK(prefix); F1 ICEBP=BAD; F6/F7 grp3 (imm via ext) */
|
|
/* F8 */ 0, 0, 0, 0, 0, 0, M, M /* flags; FE/FF grp */
|
|
};
|
|
|
|
/* ENTER (0xC8) takes imm16 + imm8 = 3 immediate bytes; the table cannot encode
|
|
* that combination, so it is added explicitly in decode. */
|
|
|
|
/* ---- 0F two-byte opcode map ---------------------------------------------- *
|
|
* Escapes 0F38 / 0F3A are handled before this table (opcodes 0x38 / 0x3A). The
|
|
* 0F 80..8F range is Jcc rel32 (immediate IM_Z) - flow set in classify. */
|
|
static const uint8_t OP2[256] = {
|
|
/* 00 */ M, M, M, M, B, 0, 0, 0, /* grp6/grp7; 04 invalid */
|
|
/* 08 */ 0, 0, B, B, B, M, B, B, /* WBINVD; UD2; prefetch(0D) */
|
|
/* 10 */ M, M, M, M, M, M, M, M, /* SSE mov* */
|
|
/* 18 */ M, M, M, M, M, M, M, M, /* hint-NOP / prefetch (ModRM) */
|
|
/* 20 */ M, M, M, M, B, B, B, B, /* MOV cr/dr (ModRM) */
|
|
/* 28 */ M, M, M, M, M, M, M, M, /* SSE */
|
|
/* 30 */ 0, 0, 0, 0, 0, 0, B, 0, /* WRMSR/RDTSC/RDMSR/RDPMC */
|
|
/* 38 */ B, B, B, B, B, B, B, B, /* escapes (38/3A done earlier) */
|
|
/* 40 */ M, M, M, M, M, M, M, M, /* CMOVcc */
|
|
/* 48 */ M, M, M, M, M, M, M, M, /* CMOVcc */
|
|
/* 50 */ M, M, M, M, M, M, M, M, /* SSE */
|
|
/* 58 */ M, M, M, M, M, M, M, M, /* SSE */
|
|
/* 60 */ M, M, M, M, M, M, M, M, /* MMX/SSE */
|
|
/* 68 */ M, M, M, M, M, M, M, M, /* MMX/SSE; 6E/6F mov */
|
|
/* 70 */ M|IM_8, M, M, M, M, M, M, 0, /* PSHUF imm8; grp shifts; EMMS */
|
|
/* 78 */ M, M, B, B, M, M, M, M, /* VMREAD/WRITE; SSE */
|
|
/* 80 */ IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, /* Jcc rel32 */
|
|
/* 88 */ IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, IM_Z, /* Jcc rel32 */
|
|
/* 90 */ M, M, M, M, M, M, M, M, /* SETcc */
|
|
/* 98 */ M, M, M, M, M, M, M, M, /* SETcc */
|
|
/* A0 */ 0, 0, 0, M, M|IM_8, M, B, B, /* PUSH/POP FS; CPUID; BT; SHLD */
|
|
/* A8 */ 0, 0, 0, M, M|IM_8, M, B, M, /* PUSH/POP GS; RSM; BTS; SHRD; IMUL */
|
|
/* B0 */ M, M, M, M, M, M, M, M, /* CMPXCHG; LSS/LFS/LGS; MOVZX */
|
|
/* B8 */ M, B, M|IM_8, M, M, M, M, M, /* POPCNT; grp8 BT imm8; BSF/BSR */
|
|
/* C0 */ M, M, M|IM_8, M, M|IM_8, M|IM_8, M|IM_8, M, /* XADD; CMPPS imm8; pinsr/extr; grp9 */
|
|
/* C8 */ 0, 0, 0, 0, 0, 0, 0, 0, /* BSWAP */
|
|
/* D0 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
|
|
/* D8 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
|
|
/* E0 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
|
|
/* E8 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
|
|
/* F0 */ M, M, M, M, M, M, M, M, /* SSE/MMX */
|
|
/* F8 */ M, M, M, M, M, M, M, B /* SSE/MMX; FF invalid */
|
|
};
|
|
|
|
/* ---- decode state -------------------------------------------------------- */
|
|
|
|
typedef struct {
|
|
int rex_w; /* REX.W set (1) - selects 64-bit operand size */
|
|
int pfx66; /* 0x66 operand-size override present */
|
|
} dstate;
|
|
|
|
/* immediate byte count for an immediate class under the effective op-size. */
|
|
static size_t imm_bytes(unsigned imclass, const dstate* st) {
|
|
switch (imclass) {
|
|
case IM_0: { return 0; }
|
|
case IM_8: { return 1; }
|
|
case IM_16: { return 2; }
|
|
case IM_32: { return 4; }
|
|
case IM_Z: { return st->pfx66 ? 2u : 4u; }
|
|
case IM_V: { return st->rex_w ? 8u : (st->pfx66 ? 2u : 4u); }
|
|
case IM_P: { return (st->pfx66 ? 2u : 4u) + 2u; }
|
|
default: { return 0; }
|
|
}
|
|
}
|
|
|
|
/* Decode a ModRM (and any SIB / displacement). `p` points at the ModRM byte,
|
|
* `avail` is the bytes remaining from there. On success returns the number of
|
|
* bytes consumed (ModRM + SIB + disp) and, for a RIP-relative operand (64-bit
|
|
* mod=00 rm=101), sets *has_rip and *rip. Returns 0 if it would run past avail.
|
|
* Address-size (67) does not change the ModRM/SIB/disp32 byte layout in long
|
|
* mode, so it is not consulted here. */
|
|
__attribute__((hot))
|
|
static size_t decode_modrm(const uint8_t* p, size_t avail,
|
|
int* has_rip, int32_t* rip, size_t* rip_off) {
|
|
if (avail < 1) { return 0; }
|
|
const uint8_t modrm = p[0];
|
|
const unsigned mod = (modrm >> 6) & 3u;
|
|
const unsigned rm = modrm & 7u;
|
|
size_t n = 1; /* the ModRM byte itself */
|
|
|
|
if (mod == 3u) { return n; } /* register direct: no mem */
|
|
|
|
size_t disp = 0;
|
|
int has_sib = 0;
|
|
if (rm == 4u) { /* SIB follows */
|
|
has_sib = 1;
|
|
if (avail < n + 1) { return 0; }
|
|
const uint8_t sib = p[n];
|
|
n += 1;
|
|
/* base==101 with mod==00 means disp32 (no base register) */
|
|
if (mod == 0u && (sib & 7u) == 5u) { disp = 4; }
|
|
}
|
|
|
|
if (mod == 0u) {
|
|
if (rm == 5u) { /* RIP-relative disp32 (64-bit) */
|
|
if (avail < n + 4) { return 0; }
|
|
int32_t d;
|
|
d = (int32_t)((uint32_t)p[n] | ((uint32_t)p[n + 1] << 8) |
|
|
((uint32_t)p[n + 2] << 16) | ((uint32_t)p[n + 3] << 24));
|
|
if (has_rip) { *has_rip = 1; }
|
|
if (rip) { *rip = d; }
|
|
if (rip_off) { *rip_off = n; } /* disp32 begins at ModRM+n */
|
|
n += 4;
|
|
return n;
|
|
}
|
|
/* mod=00, rm in {SIB special above handled disp}; else no disp */
|
|
} else if (mod == 1u) {
|
|
disp = 1; /* disp8 */
|
|
} else { /* mod == 2 */
|
|
disp = 4; /* disp32 */
|
|
}
|
|
(void)has_sib;
|
|
|
|
if (disp) {
|
|
if (avail < n + disp) { return 0; }
|
|
n += disp;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/* ---- VEX (C4 3-byte / C5 2-byte) ----------------------------------------- *
|
|
* VEX-encoded instructions carry a ModRM and (rarely) an imm8. We treat them
|
|
* as: [VEX bytes] [opcode] [ModRM(+SIB+disp)] [imm8?]. The map (mmmmm in C4)
|
|
* selects 0F / 0F38 / 0F3A; we always assume a ModRM follows the opcode (true
|
|
* for the VEX-encoded SSE/AVX space) and add the imm8 for the 0F3A map (which
|
|
* is the imm8 map) - this is length-correct for the common AVX encodings the
|
|
* decoder needs to step over. VEX.W does not change this length. */
|
|
__attribute__((hot))
|
|
static int decode_vex(const uint8_t* code, size_t avail, x86_insn* out) {
|
|
size_t n;
|
|
unsigned mmmmm;
|
|
|
|
if (code[0] == 0xC5u) { /* 2-byte VEX */
|
|
if (avail < 2) { return 0; }
|
|
n = 2; /* C5 + byte1 */
|
|
mmmmm = 1u; /* implied 0F map */
|
|
} else { /* 0xC4: 3-byte VEX */
|
|
if (avail < 3) { return 0; }
|
|
mmmmm = code[1] & 0x1fu; /* 1=0F, 2=0F38, 3=0F3A */
|
|
n = 3; /* C4 + byte1 + byte2 */
|
|
}
|
|
|
|
if (avail < n + 1) { return 0; } /* need an opcode byte */
|
|
n += 1; /* the opcode byte */
|
|
|
|
int rip_present = 0;
|
|
int32_t rip = 0;
|
|
size_t rip_off = 0;
|
|
const size_t modrm_at = n;
|
|
const size_t m = decode_modrm(code + n, avail - n, &rip_present, &rip, &rip_off);
|
|
if (m == 0) { return 0; }
|
|
n += m;
|
|
|
|
/* 0F3A map is the imm8 map: every opcode carries a trailing imm8. */
|
|
if (mmmmm == 3u) {
|
|
if (avail < n + 1) { return 0; }
|
|
n += 1;
|
|
}
|
|
|
|
if (n < 1 || n > 15) { return 0; }
|
|
out->len = (uint8_t)n;
|
|
out->flow = X86_OTHER;
|
|
out->has_rel = 0;
|
|
out->rel = 0;
|
|
out->has_riprel = rip_present;
|
|
out->riprel = rip;
|
|
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
|
|
out->disp_len = rip_present ? 4u : 0u;
|
|
return (int)n;
|
|
}
|
|
|
|
/* ---- branch displacement read -------------------------------------------- *
|
|
* Read a rel8 (bytes==1) or rel32 (bytes==4) branch displacement at `p`,
|
|
* sign-extend into out->rel, and mark has_rel. */
|
|
static void read_rel(const uint8_t* p, size_t off, size_t bytes, x86_insn* out) {
|
|
if (bytes == 1) {
|
|
out->rel = (int32_t)(int8_t)p[0];
|
|
} else { /* 4 bytes */
|
|
out->rel = (int32_t)((uint32_t)p[0] | ((uint32_t)p[1] << 8) |
|
|
((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24));
|
|
}
|
|
out->has_rel = 1;
|
|
out->disp_off = (uint8_t)off; /* rel field begins here */
|
|
out->disp_len = (uint8_t)bytes; /* rel8 -> 1, rel32 -> 4 */
|
|
}
|
|
|
|
/* ---- main decode --------------------------------------------------------- */
|
|
|
|
__attribute__((hot))
|
|
int x86_decode(const uint8_t* code, size_t avail, x86_insn* out) {
|
|
if (!code || !out || avail == 0) {
|
|
if (out) {
|
|
out->len = 0; out->flow = X86_OTHER;
|
|
out->has_rel = 0; out->rel = 0;
|
|
out->has_riprel = 0; out->riprel = 0;
|
|
out->disp_off = 0; out->disp_len = 0;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* zero the result; on any failure we leave len=0. */
|
|
out->len = 0; out->flow = X86_OTHER;
|
|
out->has_rel = 0; out->rel = 0;
|
|
out->has_riprel = 0; out->riprel = 0;
|
|
out->disp_off = 0; out->disp_len = 0;
|
|
|
|
const size_t cap = avail < 15u ? avail : 15u; /* never decode past 15 */
|
|
size_t n = 0;
|
|
dstate st = { 0, 0 };
|
|
|
|
/* ---- legacy prefixes (66/67/F0/F2/F3/segment) ---- */
|
|
for (; n < cap; n++) {
|
|
const uint8_t b = code[n];
|
|
if (b == 0x66u) { st.pfx66 = 1; continue; }
|
|
if (b == 0x67u) { continue; } /* address-size: no length effect */
|
|
if (b == 0xF0u || b == 0xF2u || b == 0xF3u) { continue; } /* lock/rep */
|
|
if (b == 0x2Eu || b == 0x36u || b == 0x3Eu || b == 0x26u || /* seg */
|
|
b == 0x64u || b == 0x65u) { continue; }
|
|
break;
|
|
}
|
|
if (n >= cap) { return 0; } /* prefixes only / out of bytes */
|
|
|
|
/* ---- VEX (C4/C5) must precede REX and is mutually exclusive with it ---- */
|
|
if (code[n] == 0xC5u || code[n] == 0xC4u) {
|
|
const int r = decode_vex(code + n, cap - n, out);
|
|
if (r == 0) { out->len = 0; return 0; }
|
|
const size_t total = n + (size_t)r;
|
|
if (total < 1 || total > 15 || total > avail) { out->len = 0; return 0; }
|
|
out->len = (uint8_t)total;
|
|
return (int)total;
|
|
}
|
|
|
|
/* ---- EVEX (0x62) is a documented gap: undecodable ---- */
|
|
if (code[n] == 0x62u) { return 0; }
|
|
|
|
/* ---- REX prefix (0x40..0x4F): must be the last prefix ---- */
|
|
if ((code[n] & 0xF0u) == 0x40u) {
|
|
st.rex_w = (code[n] & 0x08u) ? 1 : 0;
|
|
n += 1;
|
|
if (n >= cap) { return 0; }
|
|
}
|
|
|
|
/* ---- opcode: 1-byte, or 0F (two-byte / 0F38 / 0F3A) ---- */
|
|
uint8_t op = code[n];
|
|
const uint8_t* tbl = OP1;
|
|
int two_byte = 0;
|
|
n += 1;
|
|
|
|
if (op == 0x0Fu) {
|
|
if (n >= cap) { return 0; }
|
|
op = code[n];
|
|
n += 1;
|
|
two_byte = 1;
|
|
if (op == 0x38u) { /* 0F38 map: all ModRM, no imm */
|
|
if (n >= cap) { return 0; }
|
|
op = code[n];
|
|
n += 1;
|
|
/* every 0F38 opcode has a ModRM and no immediate. */
|
|
int rip_present = 0; int32_t rip = 0; size_t rip_off = 0;
|
|
const size_t modrm_at = n;
|
|
const size_t m = decode_modrm(code + n, cap - n,
|
|
&rip_present, &rip, &rip_off);
|
|
if (m == 0) { return 0; }
|
|
n += m;
|
|
out->has_riprel = rip_present; out->riprel = rip;
|
|
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
|
|
out->disp_len = rip_present ? 4u : 0u;
|
|
if (n < 1 || n > 15 || n > avail) { return 0; }
|
|
out->len = (uint8_t)n;
|
|
return (int)n;
|
|
}
|
|
if (op == 0x3Au) { /* 0F3A map: ModRM + imm8 */
|
|
if (n >= cap) { return 0; }
|
|
op = code[n];
|
|
n += 1;
|
|
int rip_present = 0; int32_t rip = 0; size_t rip_off = 0;
|
|
const size_t modrm_at = n;
|
|
const size_t m = decode_modrm(code + n, cap - n,
|
|
&rip_present, &rip, &rip_off);
|
|
if (m == 0) { return 0; }
|
|
n += m;
|
|
if (n >= cap) { return 0; } /* trailing imm8 */
|
|
n += 1;
|
|
out->has_riprel = rip_present; out->riprel = rip;
|
|
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
|
|
out->disp_len = rip_present ? 4u : 0u;
|
|
if (n < 1 || n > 15 || n > avail) { return 0; }
|
|
out->len = (uint8_t)n;
|
|
return (int)n;
|
|
}
|
|
tbl = OP2;
|
|
}
|
|
|
|
const uint8_t prop = tbl[op];
|
|
if (prop & OP_BAD) { return 0; }
|
|
|
|
/* ---- ModRM (+SIB+disp), if any ---- */
|
|
int have_modrm = 0;
|
|
uint8_t modrm = 0; /* captured for grp5 classify */
|
|
if (prop & OP_MODRM) {
|
|
if (n >= cap) { return 0; }
|
|
modrm = code[n];
|
|
have_modrm = 1;
|
|
int rip_present = 0; int32_t rip = 0; size_t rip_off = 0;
|
|
const size_t modrm_at = n;
|
|
const size_t m = decode_modrm(code + n, cap - n, &rip_present, &rip,
|
|
&rip_off);
|
|
if (m == 0) { return 0; }
|
|
n += m;
|
|
out->has_riprel = rip_present; out->riprel = rip;
|
|
out->disp_off = rip_present ? (uint8_t)(modrm_at + rip_off) : 0;
|
|
out->disp_len = rip_present ? 4u : 0u;
|
|
}
|
|
|
|
/* ---- immediate ---- */
|
|
size_t im = imm_bytes(prop & OP_IMASK, &st);
|
|
|
|
/* opcodes the table cannot fully encode (combined immediates) ---- */
|
|
if (!two_byte && op == 0xC8u) { im = 3; } /* ENTER imm16, imm8 */
|
|
|
|
/* grp3 F6/F7: only the TEST sub-opcode (reg field /0 or /1) takes an
|
|
* immediate (imm8 for F6, immZ for F7). NOT/NEG/MUL/IMUL/DIV/IDIV (/2../7)
|
|
* take none. The table marks F6/F7 as ModRM-only; resolve the immediate
|
|
* here from the captured ModRM reg field. */
|
|
if (!two_byte && (op == 0xF6u || op == 0xF7u) && have_modrm) {
|
|
const unsigned reg = (modrm >> 3) & 7u;
|
|
if (reg <= 1u) { im = (op == 0xF6u) ? 1u : (st.pfx66 ? 2u : 4u); }
|
|
}
|
|
|
|
if (im) {
|
|
if (cap < n + im) { return 0; }
|
|
n += im;
|
|
}
|
|
|
|
if (n < 1 || n > 15 || n > avail) { return 0; }
|
|
|
|
/* ---- control-flow classification (near forms) ----
|
|
* `im` is the immediate byte count; a rel branch's displacement is the
|
|
* last `im` bytes of the instruction (code + n - im). */
|
|
if (!two_byte) {
|
|
switch (op) {
|
|
case 0xE8u: { /* CALL rel32 */
|
|
out->flow = X86_CALL;
|
|
read_rel(code + (n - im), n - im, im, out);
|
|
break;
|
|
}
|
|
case 0xE9u: case 0xEBu: { /* JMP rel32 / rel8 */
|
|
out->flow = X86_JMP;
|
|
read_rel(code + (n - im), n - im, im, out);
|
|
break;
|
|
}
|
|
case 0xC2u: case 0xC3u: case 0xCAu: case 0xCBu: { /* RET forms */
|
|
out->flow = X86_RET;
|
|
break;
|
|
}
|
|
case 0xCCu: { /* INT3 */
|
|
out->flow = X86_INT3;
|
|
break;
|
|
}
|
|
case 0xFFu: { /* grp5: /2 CALL, /4 JMP (ind) */
|
|
if (have_modrm) {
|
|
const unsigned reg = (modrm >> 3) & 7u;
|
|
if (reg == 2u || reg == 3u) { out->flow = X86_CALL; }
|
|
else if (reg == 4u || reg == 5u) { out->flow = X86_JMP; }
|
|
}
|
|
break; /* indirect: has_rel stays 0 */
|
|
}
|
|
default: { /* 70..7F Jcc rel8 */
|
|
if (op >= 0x70u && op <= 0x7Fu) {
|
|
out->flow = X86_JCC;
|
|
read_rel(code + (n - im), n - im, im, out);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
} else if (op >= 0x80u && op <= 0x8Fu) { /* 0F 80..8F Jcc rel32 */
|
|
out->flow = X86_JCC;
|
|
read_rel(code + (n - im), n - im, im, out);
|
|
}
|
|
|
|
out->len = (uint8_t)n;
|
|
return (int)n;
|
|
}
|
|
|
|
uint64_t x86_branch_target(uint64_t ip, const x86_insn* insn) {
|
|
if (!insn || !insn->has_rel) { return 0; }
|
|
return ip + (uint64_t)insn->len + (uint64_t)(int64_t)insn->rel;
|
|
}
|
|
|
|
uint64_t x86_riprel_target(uint64_t ip, const x86_insn* insn) {
|
|
if (!insn || !insn->has_riprel) { return 0; }
|
|
return ip + (uint64_t)insn->len + (uint64_t)(int64_t)insn->riprel;
|
|
}
|