diff --git a/include/codeanalysis.h b/include/codeanalysis.h index bada3cf..c03544b 100644 --- a/include/codeanalysis.h +++ b/include/codeanalysis.h @@ -77,4 +77,40 @@ typedef struct { uint32_t start; uint32_t end; } code_block; * printf("block %d: [%#x, %#x)\n", i, bb[i].start, bb[i].end); */ int cfg_blocks(mem_view_t fn, code_block* out, int max); +/* Position-independent hash of a function's bytes. `fn` is a view spanning + * exactly one function (e.g. a section-view sub-range covering a func_range from + * vmie_win32_functions): fn.data[0] is the function's first byte, fn.size its + * length. It steps `fn` with the decoder (x86_decode - no second decoder) and + * folds the opcode / ModRM / SIB / immediate bytes into a 64-bit hash while + * ZEROING the rel/RIP-relative displacement bytes of each instruction + * (in.disp_off .. in.disp_off + in.disp_len, exactly the span sig_generate + * wildcards). Those are the bytes that float with the load address and + * relocation, so zeroing them makes the hash STABLE across images and ASLR - + * the same function hashes identically wherever it is mapped. + * + * Returns a 64-bit hash, or 0 if `fn` is empty (no data / size 0) or does not + * decode cleanly (a desync stops the walk). 0 is therefore "no hash", never a + * valid fingerprint. + * + * Two uses on one primitive: + * - fingerprint / library-ID: compare against a table of known function hashes + * to auto-name recovered code (e.g. recognize a statically-linked CRT/SSL + * routine without symbols); + * - code diff: hash the same function in two snapshots - an unchanged hash + * means the body is byte-identical (modulo relocation), a changed hash means + * it was patched. + * + * Devirtualization needs NO new call - it is a composition of primitives the + * engine already has: a C++ vtable at `vtable_va` is an array of code pointers, + * so its METHODS are gva_jumptable(m, cr3, vtable_va, ...) (codeanalysis.h), and + * its live INSTANCES are pmap_referrers(pm, vtable_va, ...) (pmap.h) - every + * object's first qword is its vtable pointer. With the methods recovered, + * func_hash names each method body against a known-hash table. (See win32.h for + * the same note next to the indirect-call surface.) + * + * Example - diff a function across two snapshots: + * mem_view_t a, b; // same function, two captures (SECTION_LOCAL/RVA views) + * if (func_hash(a) != func_hash(b)) puts("function body changed"); */ +uint64_t func_hash(mem_view_t fn); + #endif /* VMIE_CODEANALYSIS_H */ diff --git a/include/win32.h b/include/win32.h index e33cf75..4d38266 100644 --- a/include/win32.h +++ b/include/win32.h @@ -331,6 +331,125 @@ typedef struct { uint32_t from; uint32_t to; uint8_t kind; } call_edge; int vmie_win32_callgraph(vmie_win32* v, uint64_t cr3, uint64_t module_base, call_edge* out, int max); +/* One import: a function this module pulls from another DLL, recovered from the + * import directory (the INT/IAT pair of an IMAGE_IMPORT_DESCRIPTOR). + * iat_rva - RVA of the IAT slot that holds the resolved function pointer at + * run time (absolute VA = module_base + iat_rva). A call through + * this import is `call qword [rip+disp]` whose target lands on this + * slot - so iat_rva is exactly what vmie_win32_func_imports reports; + * correlate the two to name a function's API calls. + * dll - the exporting DLL name as written in the descriptor, NUL- + * terminated, TRUNCATED to 31 chars (e.g. "KERNEL32.dll"). A name + * longer than 31 bytes is cut; this is the documented limit. + * name - the imported function name, NUL-terminated, TRUNCATED to 63 chars + * (long C++ mangled names are cut); "" for a by-ordinal import. + * ordinal - the import ordinal for a by-ordinal import (name[0]=='\0'), else + * 0. By-ordinal imports set the high bit in the thunk and carry no + * name in the image. */ +typedef struct { uint32_t iat_rva; char dll[32]; char name[64]; uint16_t ordinal; } import_sym; + +/* Enumerate the module's imports from its import directory (IMAGE_DIRECTORY_ + * ENTRY_IMPORT). For each IMAGE_IMPORT_DESCRIPTOR it reads the DLL name, then + * walks the parallel INT (OriginalFirstThunk: the name/ordinal hints) and IAT + * (FirstThunk: the resolved-pointer slots) in lockstep so every entry carries + * its own IAT-slot RVA. A by-name thunk points at an IMAGE_IMPORT_BY_NAME + * (hint+NUL-terminated name); a by-ordinal thunk has its top bit set and yields + * an ordinal instead. The INT is preferred when present (it survives binding); + * the IAT is the fallback. + * + * Returns the TOTAL number of imports (out=NULL => count only, so size then + * fill), or -1 if there is no import directory or the headers/directory are + * unreadable. Entries are reported descriptor by descriptor, and within a + * descriptor in thunk order. + * + * Example - list a module's imports and where each resolves: + * import_sym im[512]; + * int n = vmie_win32_imports(v, pr->cr3, m.base, im, 512); + * for (int i = 0; i < n && i < 512; i++) + * if (im[i].name[0]) + * printf("%s!%s -> IAT %#x\n", im[i].dll, im[i].name, im[i].iat_rva); + * else + * printf("%s!#%u -> IAT %#x\n", im[i].dll, im[i].ordinal, im[i].iat_rva); */ +int vmie_win32_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base, + import_sym* out, int max); + +/* One inline-hook finding: a function whose FIRST instruction is a direct + * jmp/call leaving the module image - the classic detour / trampoline shape. + * func_rva - the hooked function's RVA (a .pdata function start). Absolute VA + * = module_base + func_rva. + * target - the absolute VA the entry redirects to. It lies OUTSIDE the + * module image [module_base, module_base + SizeOfImage); that is + * exactly what makes it a cross-module hook rather than an ordinary + * intra-module branch. */ +typedef struct { uint32_t func_rva; uint64_t target; } inline_hook; + +/* Detect inline (entry-redirect) hooks. For each function from .pdata + * (vmie_win32_functions) it decodes the FIRST instruction with x86_decode; if + * that instruction is a DIRECT jmp/call (has_rel) whose resolved target + * (x86_branch_target) lands OUTSIDE the module image + * [module_base, module_base + SizeOfImage), it records {func_rva, target}. An + * un-hooked function begins with its real prologue (push/sub/mov/endbr64...) or + * branches inside its own image, so it is not reported. + * + * Returns the TOTAL number of hooked functions (out=NULL => count only), or -1 + * if the .pdata/.text directory or headers are missing/unreadable. + * + * Scope: this finds INLINE hooks (the function body's entry is patched). IAT + * hooks - an import SLOT redirected to point outside its resolving module - are + * a different shape that needs cross-module pointer resolution and are NOT + * covered here. + * + * Example - report any patched function entries in a module: + * inline_hook hk[64]; + * int n = vmie_win32_inline_hooks(v, pr->cr3, m.base, hk, 64); + * for (int i = 0; i < n && i < 64; i++) + * printf("sub_%x hooked -> %#llx\n", hk[i].func_rva, + * (unsigned long long)hk[i].target); */ +int vmie_win32_inline_hooks(vmie_win32* v, uint64_t cr3, uint64_t module_base, + inline_hook* out, int max); + +/* Recover which IAT slots a function calls, in call order - the function's + * API-call sequence / behavioral fingerprint. It steps `func_rva`'s body with + * x86_decode and, for every `call/jmp qword [rip+disp]` (an indirect branch + * through memory: has_riprel) whose resolved memory target (x86_riprel_target) + * is an IAT slot of THIS module's import directory, it records that slot's RVA. + * Correlate the returned RVAs with vmie_win32_imports (same iat_rva) to turn the + * sequence into named API calls (e.g. CreateFileW, WriteFile, CloseHandle). + * + * func_rva - the function to analyze, as an RVA (e.g. from + * vmie_win32_functions or an export). Absolute VA = module_base + + * func_rva. + * iat_rvas - caller array receiving up to `max` IAT-slot RVAs in the order + * the calls appear; NULL to count only. + * + * Returns the TOTAL number of IAT-slot calls in the function (out=NULL => + * count), or -1 if the headers / import directory / function bytes are + * unreadable. v1 resolves call/jmp THROUGH the IAT (rip-relative onto an import + * slot); other indirect forms are out of scope. + * + * Example - print the API sequence of a function: + * uint32_t slots[128]; + * int n = vmie_win32_func_imports(v, pr->cr3, m.base, fn_rva, slots, 128); + * import_sym im[512]; + * int ni = vmie_win32_imports(v, pr->cr3, m.base, im, 512); + * for (int i = 0; i < n && i < 128; i++) + * for (int j = 0; j < ni && j < 512; j++) + * if (im[j].iat_rva == slots[i]) { puts(im[j].name); break; } */ +int vmie_win32_func_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base, + uint32_t func_rva, uint32_t* iat_rvas, int max); + +/* Devirtualization (C++ vtables) needs NO dedicated symbol - it is a + * COMPOSITION of primitives the engine already exposes: + * - a vtable at `vtable_va` is an array of code pointers, so its METHODS are + * gva_jumptable(mem, cr3, vtable_va, ...) (codeanalysis.h) - the same + * code-pointer-array walk that recovers switch tables; + * - its live INSTANCES are pmap_referrers(pm, vtable_va, ...) (pmap.h), + * because an object's first qword is its vtable pointer (who-points-here on + * the vtable VA enumerates the objects). + * Recover the method RVAs with gva_jumptable, then func_hash (codeanalysis.h) + * can name each method body against a known-hash table. No new call is added + * for this on purpose. */ + /* One exported symbol from the module export directory (EAT). * rva - export target RVA (absolute VA = module_base + rva). Forwarder * exports report the forwarder-string RVA; see `forwarded`. diff --git a/src/engine/include/pe.h b/src/engine/include/pe.h index 298ab5a..d34703a 100644 --- a/src/engine/include/pe.h +++ b/src/engine/include/pe.h @@ -62,6 +62,7 @@ int vmie_pe_section(vmie_mem* m, uintptr_t cr3, uint64_t module_base, /* OptionalHeader DataDirectory indices used across the engine. */ #define PE_DIR_EXPORT 0u /* IMAGE_DIRECTORY_ENTRY_EXPORT */ +#define PE_DIR_IMPORT 1u /* IMAGE_DIRECTORY_ENTRY_IMPORT */ #define PE_DIR_DEBUG 6u /* IMAGE_DIRECTORY_ENTRY_DEBUG */ #define PE_DIR_EXCEPTION 3u /* IMAGE_DIRECTORY_ENTRY_EXCEPTION (.pdata) */ diff --git a/src/engine/win32/pe.c b/src/engine/win32/pe.c index 00c6066..1e7abff 100644 --- a/src/engine/win32/pe.c +++ b/src/engine/win32/pe.c @@ -516,3 +516,262 @@ int vmie_win32_callgraph(vmie_win32* v, uint64_t cr3, uint64_t module_base, free(tbuf); return total; } + +/* ---- public win32 surface: imports (import directory INT/IAT) ------------- * + * IMAGE_IMPORT_DESCRIPTOR (20 bytes): OriginalFirstThunk(+0, the INT RVA), + * TimeDateStamp(+4), ForwarderChain(+8), Name(+12, the DLL-name RVA), + * FirstThunk(+16, the IAT RVA). The array ends at an all-zero descriptor. Each + * descriptor's INT and IAT are parallel arrays of 8-byte thunks: a by-ordinal + * thunk has bit 63 set (ordinal in the low 16 bits); a by-name thunk is the RVA + * of an IMAGE_IMPORT_BY_NAME { uint16 Hint; char Name[]; }. We prefer the INT + * for the name/ordinal (it survives binding) and always take the slot RVA from + * the IAT position. Cold: one-shot directory read, reusing pe_data_dir. */ + +#define IMPDESC_SIZE 20u /* sizeof(IMAGE_IMPORT_DESCRIPTOR) */ +#define IMPDESC_OFT_OFF 0u /* OriginalFirstThunk (INT) RVA */ +#define IMPDESC_NAME_OFF 12u /* Name (DLL name) RVA */ +#define IMPDESC_FT_OFF 16u /* FirstThunk (IAT) RVA */ +#define IMP_ORDINAL_FLAG 0x8000000000000000ull /* by-ordinal thunk bit 63 */ +#define IMP_MAX_DESC 4096u /* descriptor-walk guard (malformed) */ +#define IMP_MAX_THUNK 65536u /* per-descriptor thunk guard */ + +/* Read a NUL-terminated ASCII string from guest VA into dst[cap], truncating to + * cap-1 and best-effort over a non-fully-resident tail (shrink the read until it + * succeeds), mirroring the export-name read. */ +static void imp_read_str(vmie_mem* m, uintptr_t cr3, uint64_t va, + char* dst, size_t cap) { + if (cap == 0) { return; } + for (size_t z = cap - 1; z > 0; z--) { + if (gva_read(m, cr3, va, dst, z) == 0) { dst[z] = 0; break; } + } + dst[cap - 1] = 0; +} + +int vmie_win32_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base, + import_sym* out, int max) __attribute__((cold)); +int vmie_win32_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base, + import_sym* out, int max) { + vmie_mem* m = vmie_win32_mem(v); + if (!m) { return -1; } + + uint32_t imp_rva, imp_sz; + if (pe_data_dir(m, cr3, module_base, PE_DIR_IMPORT, &imp_rva, &imp_sz) || + !imp_rva) { + return -1; + } + + int total = 0; + for (uint32_t d = 0; d < IMP_MAX_DESC; d++) { + const uint64_t desc = module_base + imp_rva + (uint64_t)d * IMPDESC_SIZE; + uint32_t oft, name_rva, ft; + if (gva_read(m, cr3, desc + IMPDESC_OFT_OFF, &oft, 4) || + gva_read(m, cr3, desc + IMPDESC_NAME_OFF, &name_rva, 4) || + gva_read(m, cr3, desc + IMPDESC_FT_OFF, &ft, 4)) { + return -1; + } + if (oft == 0 && name_rva == 0 && ft == 0) { + break; /* zero terminator descriptor */ + } + if (ft == 0) { + continue; /* no IAT: nothing to report */ + } + + char dll[32]; + dll[0] = 0; + if (name_rva) { + imp_read_str(m, cr3, module_base + name_rva, dll, sizeof dll); + } + + /* INT (OriginalFirstThunk) carries names even after binding; fall back to + * the IAT (FirstThunk) when there is no INT. The slot RVA always comes + * from the IAT position so it matches a `call qword [rip]` target. */ + const uint32_t int_rva = oft ? oft : ft; + for (uint32_t t = 0; t < IMP_MAX_THUNK; t++) { + uint64_t thunk; + if (gva_read(m, cr3, module_base + int_rva + (uint64_t)t * 8, + &thunk, 8)) { + return -1; + } + if (thunk == 0) { + break; /* end of this thunk array */ + } + const uint32_t iat_rva = ft + t * 8; + if (out && total < max) { + import_sym* s = &out[total]; + memset(s, 0, sizeof *s); + s->iat_rva = iat_rva; + memcpy(s->dll, dll, sizeof s->dll); + if (thunk & IMP_ORDINAL_FLAG) { + s->ordinal = (uint16_t)(thunk & 0xFFFFu); /* by-ordinal */ + } else { + /* by-name: thunk is the RVA of IMAGE_IMPORT_BY_NAME; the name + * begins at +2 (after the uint16 Hint). */ + imp_read_str(m, cr3, module_base + (uint32_t)thunk + 2, + s->name, sizeof s->name); + } + } + total++; + } + } + return total; +} + +/* ---- public win32 surface: inline-hook detection ------------------------- * + * For each .pdata function, decode the FIRST instruction; if it is a DIRECT + * jmp/call (has_rel) whose target leaves the module image, it is a detour. Reuses + * vmie_win32_functions (.pdata starts) and x86_decode - no second parser. The + * entry bytes are read directly under cr3 (a 16-byte window covers any single + * x86 instruction). Cold: one-shot directory read + per-function entry decode. */ + +#define HOOK_ENTRY_BYTES 16u /* max length of one x86 instruction */ + +int vmie_win32_inline_hooks(vmie_win32* v, uint64_t cr3, uint64_t module_base, + inline_hook* out, int max) __attribute__((cold)); +int vmie_win32_inline_hooks(vmie_win32* v, uint64_t cr3, uint64_t module_base, + inline_hook* out, int max) { + vmie_mem* m = vmie_win32_mem(v); + if (!m) { return -1; } + + /* image bounds [module_base, module_base + SizeOfImage) (as in callgraph). */ + uint32_t lfanew; + if (gva_read(m, cr3, module_base + 0x3C, &lfanew, 4)) { return -1; } + uint32_t size_of_image; + if (gva_read(m, cr3, module_base + lfanew + 0x18 + OPT_SIZEOFIMAGE_OFF, + &size_of_image, 4)) { + return -1; + } + const uint64_t img_lo = module_base; + const uint64_t img_hi = module_base + (uint64_t)size_of_image; /* exclusive */ + + /* function inventory: count, then gather (stack common case, heap overflow) + * so every function entry is examined, none silently dropped. */ + const int nfn = vmie_win32_functions(v, cr3, module_base, NULL, 0); + if (nfn < 0) { return -1; } + func_range stack_fr[256]; + func_range* fr = stack_fr; + func_range* heap_fr = NULL; + if (nfn > (int)(sizeof stack_fr / sizeof stack_fr[0])) { + heap_fr = malloc((size_t)nfn * sizeof *heap_fr); + if (!heap_fr) { return -1; } + fr = heap_fr; + } + const int got = vmie_win32_functions(v, cr3, module_base, fr, nfn); + if (got < 0) { free(heap_fr); return -1; } + + int total = 0; + for (int f = 0; f < got; f++) { + const uint64_t fn_va = module_base + fr[f].rva; + uint8_t entry[HOOK_ENTRY_BYTES]; + if (gva_read(m, cr3, fn_va, entry, sizeof entry)) { + continue; /* entry not resident: skip */ + } + x86_insn in; + const int ilen = x86_decode(entry, sizeof entry, &in); + if (ilen <= 0) { + continue; /* undecodable entry: skip */ + } + if (!in.has_rel || (in.flow != X86_JMP && in.flow != X86_CALL)) { + continue; /* not a direct jmp/call */ + } + const uint64_t tgt = x86_branch_target(fn_va, &in); + if (tgt >= img_lo && tgt < img_hi) { + continue; /* stays inside the image */ + } + if (out && total < max) { + out[total].func_rva = fr[f].rva; + out[total].target = tgt; + } + total++; + } + + free(heap_fr); + return total; +} + +/* ---- public win32 surface: per-function imports (call/jmp through IAT) ---- * + * Step one function and, for each `call/jmp qword [rip+disp]` (has_riprel) whose + * memory target is an IAT slot of this module's import directory, record the slot + * RVA - the function's API-call sequence. Reuses vmie_win32_functions (to bound + * the body), vmie_win32_section_view (to gather .text), and the decoder. Cold. */ + +int vmie_win32_func_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base, + uint32_t func_rva, uint32_t* iat_rvas, int max) + __attribute__((cold)); +int vmie_win32_func_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base, + uint32_t func_rva, uint32_t* iat_rvas, int max) { + vmie_mem* m = vmie_win32_mem(v); + if (!m) { return -1; } + + /* the IAT-slot window of this module: [imp_lo, imp_hi). A rip-relative target + * landing in this set and 8-aligned is an import-thunk call. */ + import_sym tmp[1]; + const int nimp = vmie_win32_imports(v, cr3, module_base, tmp, 0); + if (nimp < 0) { return -1; } + uint32_t imp_lo = 0xFFFFFFFFu, imp_hi = 0; /* IAT-slot RVA bounds */ + if (nimp > 0) { + import_sym* im = malloc((size_t)nimp * sizeof *im); + if (!im) { return -1; } + const int gi = vmie_win32_imports(v, cr3, module_base, im, nimp); + if (gi < 0) { free(im); return -1; } + const int use = gi < nimp ? gi : nimp; + for (int i = 0; i < use; i++) { + if (im[i].iat_rva < imp_lo) { imp_lo = im[i].iat_rva; } + if (im[i].iat_rva + 8 > imp_hi) { imp_hi = im[i].iat_rva + 8; } + } + free(im); + } + + /* locate the function's extent from .pdata (count then gather). */ + const int nfn = vmie_win32_functions(v, cr3, module_base, NULL, 0); + if (nfn < 0) { return -1; } + func_range stack_fr[256]; + func_range* fr = stack_fr; + func_range* heap_fr = NULL; + if (nfn > (int)(sizeof stack_fr / sizeof stack_fr[0])) { + heap_fr = malloc((size_t)nfn * sizeof *heap_fr); + if (!heap_fr) { return -1; } + fr = heap_fr; + } + const int got = vmie_win32_functions(v, cr3, module_base, fr, nfn); + if (got < 0) { free(heap_fr); return -1; } + uint32_t fn_size = 0; + for (int f = 0; f < got; f++) { + if (fr[f].rva == func_rva) { fn_size = fr[f].size; break; } + } + free(heap_fr); + if (fn_size == 0) { return -1; } /* not a known function start */ + + /* gather the function body addressed at its absolute VA so a rip-relative + * target is directly an absolute VA. */ + uint8_t* fb = malloc(fn_size); + if (!fb) { return -1; } + if (gva_read(m, cr3, module_base + func_rva, fb, fn_size)) { + free(fb); + return -1; + } + const uint64_t fn_va = module_base + func_rva; + + int total = 0; + for (size_t off = 0; off < fn_size; ) { + x86_insn in; + const int ilen = x86_decode(fb + off, fn_size - off, &in); + if (ilen <= 0) { break; } /* desync: stop this function */ + if (in.has_riprel && (in.flow == X86_CALL || in.flow == X86_JMP)) { + const uint64_t tgt = x86_riprel_target(fn_va + off, &in); + if (tgt >= module_base) { + const uint64_t tgt_rva = tgt - module_base; + if (tgt_rva >= imp_lo && tgt_rva < imp_hi && + (tgt_rva & 7u) == 0) { + if (iat_rvas && total < max) { + iat_rvas[total] = (uint32_t)tgt_rva; + } + total++; + } + } + } + off += (size_t)ilen; + } + + free(fb); + return total; +} diff --git a/src/handlers/codeanalysis.c b/src/handlers/codeanalysis.c index 09e78b2..e9aefa6 100644 --- a/src/handlers/codeanalysis.c +++ b/src/handlers/codeanalysis.c @@ -138,3 +138,38 @@ int cfg_blocks(mem_view_t fn, code_block* out, int max) { free(leader); return total; } + +/* ---- position-independent function hash ----------------------------------- * + * Step the function with the decoder (the single decoder, x86_decode) and fold + * its bytes into a 64-bit FNV-1a hash, ZEROING each instruction's rel/RIP-rel + * displacement bytes [disp_off, disp_off+disp_len) before folding - the same + * span sig_generate wildcards. Those bytes float with load address / relocation, + * so zeroing them makes the hash stable across images and ASLR. Cold: a one-shot + * pass over one function body, not a hot loop. */ + +#define FNV64_OFFSET 0xcbf29ce484222325ull +#define FNV64_PRIME 0x00000100000001b3ull + +uint64_t func_hash(mem_view_t fn) __attribute__((cold)); +uint64_t func_hash(mem_view_t fn) { + if (!fn.data || fn.size == 0) { return 0; } + + uint64_t h = FNV64_OFFSET; + for (size_t off = 0; off < fn.size; ) { + x86_insn in; + const int ilen = x86_decode(fn.data + off, fn.size - off, &in); + if (ilen <= 0) { return 0; } /* desync: not a valid hash */ + + /* The displacement span to neutralize, clamped to this instruction so a + * malformed disp_off/len can never read past it. */ + const size_t d0 = (in.disp_len > 0) ? (size_t)in.disp_off : (size_t)ilen; + const size_t d1 = (in.disp_len > 0) ? d0 + (size_t)in.disp_len : (size_t)ilen; + for (int i = 0; i < ilen; i++) { + const uint8_t b = (i >= (int)d0 && i < (int)d1) ? 0u : fn.data[off + (size_t)i]; + h ^= b; + h *= FNV64_PRIME; + } + off += (size_t)ilen; + } + return h; +}