/* x86dec.h - light x86-64 length decoder + control-flow / RIP-relative targets. * * A PURE primitive: it decodes ONE 64-bit-mode instruction over a raw byte * buffer and reports its total length plus, when present, the near control-flow * branch displacement (rel8/rel32) and the RIP-relative memory displacement * (ModRM mod=00, rm=101). It does NOT decode registers or full operands - just * enough for length-disassembly and control-flow / memory target recovery * (function inventory, call graphs, xref databases, IAT and hook detection). * * It touches no I/O, no allocations, and no other module: / * only. Length-correct over legacy prefixes (66/67/F0/F2/F3/segment), REX, the * 1-byte / 0F two-byte / 0F38 / 0F3A opcode maps, ModRM+SIB, disp8/disp32, the * immediate sizes (with 66 and REX.W operand-size effects), and VEX (C4/C5). * * DOCUMENTED GAP: EVEX (0x62, AVX-512) is NOT decoded - it yields len=0 * (undecodable). It is rare in ordinary user code; full EVEX support, if ever * needed, is a separate task. Any byte stream that does not decode, or that * would run past min(avail,15), also yields len=0. */ #ifndef VMIE_X86DEC_H #define VMIE_X86DEC_H #include #include typedef enum { X86_OTHER, /* no tracked control-flow effect */ X86_CALL, /* E8 rel32, or FF /2 indirect (has_rel=0) */ X86_JMP, /* E9 rel32 / EB rel8, or FF /4 indirect (has_rel=0) */ X86_JCC, /* 70-7F rel8 / 0F 80-8F rel32 */ X86_RET, /* C3 / C2 imm16 / CB / CA */ X86_INT3 /* CC */ } x86_flow; typedef struct { uint8_t len; /* total length 1..15; 0 = undecodable / exceeds avail */ x86_flow flow; /* control-flow class */ int has_rel; /* 1: a rel8/rel32 branch displacement is present */ int32_t rel; /* sign-extended branch displacement (if has_rel) */ int has_riprel; /* 1: RIP-relative memory operand (ModRM mod=00,rm=101) */ int32_t riprel; /* sign-extended RIP-relative disp32 (if has_riprel) */ uint8_t disp_off; /* byte offset within the instruction of the rel/RIP-rel * displacement field, or 0 if the instruction has * neither (has_rel == 0 && has_riprel == 0). This is the * field that "floats" with the load address / relocation, * so a signature generator wildcards exactly these bytes. */ uint8_t disp_len; /* displacement length: 1 (rel8), 4 (rel32 or RIP-rel * disp32), else 0 (no displacement). The wildcard span is * [disp_off, disp_off + disp_len). */ } x86_insn; /* Decode ONE 64-bit-mode instruction at `code` (`avail` readable bytes). Fills * *out and returns the length (1..15), or 0 if the bytes do not decode or would * exceed min(avail,15). Length-correct over: legacy prefixes (66/67/F0/F2/F3/ * seg), REX, 1-byte / 0F two-byte / 0F38 / 0F3A maps, ModRM+SIB, disp8/disp32, * imm8/16/32/64 (66 and REX.W operand-size effects), and VEX (C4/C5). EVEX * (0x62) is a documented gap: len=0. * * On a decoded instruction it also reports out->disp_off / out->disp_len: the * byte position and length of the rel/RIP-relative displacement field within the * instruction (0/0 when there is none). These are exactly the bytes that float * with the load address / relocation, so a signature generator wildcards * [disp_off, disp_off+disp_len) and keeps the rest as must-match. */ int x86_decode(const uint8_t* code, size_t avail, x86_insn* out); /* Absolute target of a rel branch: ip + insn->len + insn->rel (0 unless has_rel). */ uint64_t x86_branch_target(uint64_t ip, const x86_insn* insn); /* Absolute target of a RIP-relative operand: ip + insn->len + insn->riprel (0 unless has_riprel). */ uint64_t x86_riprel_target(uint64_t ip, const x86_insn* insn); #endif /* VMIE_X86DEC_H */