mirror of
https://dev.lirent.ru/Vatrog/vm-introspection-engine.git
synced 2026-06-18 00:56:37 +03:00
Add imports, inline-hook detection, function hashing, per-function imports
Wave 2 of the code-analysis layer:
- vmie_win32_imports resolves the import directory (INT/IAT) to {iat_rva, dll,
name, ordinal} - named APIs, walking the name and slot thunks in lockstep so
every import carries the IAT slot a call lands on.
- vmie_win32_inline_hooks decodes each .pdata function's entry and reports any
whose first instruction is a direct jmp/call leaving the module image - the
detour/trampoline shape.
- vmie_win32_func_imports records, in order, the IAT slots a function calls
through (call qword [rip+disp] onto an import slot): the function's API-call
sequence, named by correlating with vmie_win32_imports.
- func_hash (codeanalysis.h) hashes a function position-independently, zeroing
the displacement bytes the decoder locates - one primitive for fingerprinting
known code and for detecting a changed body across snapshots.
Devirtualization needs no new call and is documented as a composition: a
vtable's methods are gva_jumptable(vtable_va), its instances are
pmap_referrers(vtable_va), and func_hash names each method. Imports reuse the
shared data-directory accessor; the analyses reuse the function/section/decode
primitives - no second PE or instruction parser.
This commit is contained in:
@@ -77,4 +77,40 @@ typedef struct { uint32_t start; uint32_t end; } code_block;
|
||||
* printf("block %d: [%#x, %#x)\n", i, bb[i].start, bb[i].end); */
|
||||
int cfg_blocks(mem_view_t fn, code_block* out, int max);
|
||||
|
||||
/* Position-independent hash of a function's bytes. `fn` is a view spanning
|
||||
* exactly one function (e.g. a section-view sub-range covering a func_range from
|
||||
* vmie_win32_functions): fn.data[0] is the function's first byte, fn.size its
|
||||
* length. It steps `fn` with the decoder (x86_decode - no second decoder) and
|
||||
* folds the opcode / ModRM / SIB / immediate bytes into a 64-bit hash while
|
||||
* ZEROING the rel/RIP-relative displacement bytes of each instruction
|
||||
* (in.disp_off .. in.disp_off + in.disp_len, exactly the span sig_generate
|
||||
* wildcards). Those are the bytes that float with the load address and
|
||||
* relocation, so zeroing them makes the hash STABLE across images and ASLR -
|
||||
* the same function hashes identically wherever it is mapped.
|
||||
*
|
||||
* Returns a 64-bit hash, or 0 if `fn` is empty (no data / size 0) or does not
|
||||
* decode cleanly (a desync stops the walk). 0 is therefore "no hash", never a
|
||||
* valid fingerprint.
|
||||
*
|
||||
* Two uses on one primitive:
|
||||
* - fingerprint / library-ID: compare against a table of known function hashes
|
||||
* to auto-name recovered code (e.g. recognize a statically-linked CRT/SSL
|
||||
* routine without symbols);
|
||||
* - code diff: hash the same function in two snapshots - an unchanged hash
|
||||
* means the body is byte-identical (modulo relocation), a changed hash means
|
||||
* it was patched.
|
||||
*
|
||||
* Devirtualization needs NO new call - it is a composition of primitives the
|
||||
* engine already has: a C++ vtable at `vtable_va` is an array of code pointers,
|
||||
* so its METHODS are gva_jumptable(m, cr3, vtable_va, ...) (codeanalysis.h), and
|
||||
* its live INSTANCES are pmap_referrers(pm, vtable_va, ...) (pmap.h) - every
|
||||
* object's first qword is its vtable pointer. With the methods recovered,
|
||||
* func_hash names each method body against a known-hash table. (See win32.h for
|
||||
* the same note next to the indirect-call surface.)
|
||||
*
|
||||
* Example - diff a function across two snapshots:
|
||||
* mem_view_t a, b; // same function, two captures (SECTION_LOCAL/RVA views)
|
||||
* if (func_hash(a) != func_hash(b)) puts("function body changed"); */
|
||||
uint64_t func_hash(mem_view_t fn);
|
||||
|
||||
#endif /* VMIE_CODEANALYSIS_H */
|
||||
|
||||
+119
@@ -331,6 +331,125 @@ typedef struct { uint32_t from; uint32_t to; uint8_t kind; } call_edge;
|
||||
int vmie_win32_callgraph(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
call_edge* out, int max);
|
||||
|
||||
/* One import: a function this module pulls from another DLL, recovered from the
|
||||
* import directory (the INT/IAT pair of an IMAGE_IMPORT_DESCRIPTOR).
|
||||
* iat_rva - RVA of the IAT slot that holds the resolved function pointer at
|
||||
* run time (absolute VA = module_base + iat_rva). A call through
|
||||
* this import is `call qword [rip+disp]` whose target lands on this
|
||||
* slot - so iat_rva is exactly what vmie_win32_func_imports reports;
|
||||
* correlate the two to name a function's API calls.
|
||||
* dll - the exporting DLL name as written in the descriptor, NUL-
|
||||
* terminated, TRUNCATED to 31 chars (e.g. "KERNEL32.dll"). A name
|
||||
* longer than 31 bytes is cut; this is the documented limit.
|
||||
* name - the imported function name, NUL-terminated, TRUNCATED to 63 chars
|
||||
* (long C++ mangled names are cut); "" for a by-ordinal import.
|
||||
* ordinal - the import ordinal for a by-ordinal import (name[0]=='\0'), else
|
||||
* 0. By-ordinal imports set the high bit in the thunk and carry no
|
||||
* name in the image. */
|
||||
typedef struct { uint32_t iat_rva; char dll[32]; char name[64]; uint16_t ordinal; } import_sym;
|
||||
|
||||
/* Enumerate the module's imports from its import directory (IMAGE_DIRECTORY_
|
||||
* ENTRY_IMPORT). For each IMAGE_IMPORT_DESCRIPTOR it reads the DLL name, then
|
||||
* walks the parallel INT (OriginalFirstThunk: the name/ordinal hints) and IAT
|
||||
* (FirstThunk: the resolved-pointer slots) in lockstep so every entry carries
|
||||
* its own IAT-slot RVA. A by-name thunk points at an IMAGE_IMPORT_BY_NAME
|
||||
* (hint+NUL-terminated name); a by-ordinal thunk has its top bit set and yields
|
||||
* an ordinal instead. The INT is preferred when present (it survives binding);
|
||||
* the IAT is the fallback.
|
||||
*
|
||||
* Returns the TOTAL number of imports (out=NULL => count only, so size then
|
||||
* fill), or -1 if there is no import directory or the headers/directory are
|
||||
* unreadable. Entries are reported descriptor by descriptor, and within a
|
||||
* descriptor in thunk order.
|
||||
*
|
||||
* Example - list a module's imports and where each resolves:
|
||||
* import_sym im[512];
|
||||
* int n = vmie_win32_imports(v, pr->cr3, m.base, im, 512);
|
||||
* for (int i = 0; i < n && i < 512; i++)
|
||||
* if (im[i].name[0])
|
||||
* printf("%s!%s -> IAT %#x\n", im[i].dll, im[i].name, im[i].iat_rva);
|
||||
* else
|
||||
* printf("%s!#%u -> IAT %#x\n", im[i].dll, im[i].ordinal, im[i].iat_rva); */
|
||||
int vmie_win32_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
import_sym* out, int max);
|
||||
|
||||
/* One inline-hook finding: a function whose FIRST instruction is a direct
|
||||
* jmp/call leaving the module image - the classic detour / trampoline shape.
|
||||
* func_rva - the hooked function's RVA (a .pdata function start). Absolute VA
|
||||
* = module_base + func_rva.
|
||||
* target - the absolute VA the entry redirects to. It lies OUTSIDE the
|
||||
* module image [module_base, module_base + SizeOfImage); that is
|
||||
* exactly what makes it a cross-module hook rather than an ordinary
|
||||
* intra-module branch. */
|
||||
typedef struct { uint32_t func_rva; uint64_t target; } inline_hook;
|
||||
|
||||
/* Detect inline (entry-redirect) hooks. For each function from .pdata
|
||||
* (vmie_win32_functions) it decodes the FIRST instruction with x86_decode; if
|
||||
* that instruction is a DIRECT jmp/call (has_rel) whose resolved target
|
||||
* (x86_branch_target) lands OUTSIDE the module image
|
||||
* [module_base, module_base + SizeOfImage), it records {func_rva, target}. An
|
||||
* un-hooked function begins with its real prologue (push/sub/mov/endbr64...) or
|
||||
* branches inside its own image, so it is not reported.
|
||||
*
|
||||
* Returns the TOTAL number of hooked functions (out=NULL => count only), or -1
|
||||
* if the .pdata/.text directory or headers are missing/unreadable.
|
||||
*
|
||||
* Scope: this finds INLINE hooks (the function body's entry is patched). IAT
|
||||
* hooks - an import SLOT redirected to point outside its resolving module - are
|
||||
* a different shape that needs cross-module pointer resolution and are NOT
|
||||
* covered here.
|
||||
*
|
||||
* Example - report any patched function entries in a module:
|
||||
* inline_hook hk[64];
|
||||
* int n = vmie_win32_inline_hooks(v, pr->cr3, m.base, hk, 64);
|
||||
* for (int i = 0; i < n && i < 64; i++)
|
||||
* printf("sub_%x hooked -> %#llx\n", hk[i].func_rva,
|
||||
* (unsigned long long)hk[i].target); */
|
||||
int vmie_win32_inline_hooks(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
inline_hook* out, int max);
|
||||
|
||||
/* Recover which IAT slots a function calls, in call order - the function's
|
||||
* API-call sequence / behavioral fingerprint. It steps `func_rva`'s body with
|
||||
* x86_decode and, for every `call/jmp qword [rip+disp]` (an indirect branch
|
||||
* through memory: has_riprel) whose resolved memory target (x86_riprel_target)
|
||||
* is an IAT slot of THIS module's import directory, it records that slot's RVA.
|
||||
* Correlate the returned RVAs with vmie_win32_imports (same iat_rva) to turn the
|
||||
* sequence into named API calls (e.g. CreateFileW, WriteFile, CloseHandle).
|
||||
*
|
||||
* func_rva - the function to analyze, as an RVA (e.g. from
|
||||
* vmie_win32_functions or an export). Absolute VA = module_base +
|
||||
* func_rva.
|
||||
* iat_rvas - caller array receiving up to `max` IAT-slot RVAs in the order
|
||||
* the calls appear; NULL to count only.
|
||||
*
|
||||
* Returns the TOTAL number of IAT-slot calls in the function (out=NULL =>
|
||||
* count), or -1 if the headers / import directory / function bytes are
|
||||
* unreadable. v1 resolves call/jmp THROUGH the IAT (rip-relative onto an import
|
||||
* slot); other indirect forms are out of scope.
|
||||
*
|
||||
* Example - print the API sequence of a function:
|
||||
* uint32_t slots[128];
|
||||
* int n = vmie_win32_func_imports(v, pr->cr3, m.base, fn_rva, slots, 128);
|
||||
* import_sym im[512];
|
||||
* int ni = vmie_win32_imports(v, pr->cr3, m.base, im, 512);
|
||||
* for (int i = 0; i < n && i < 128; i++)
|
||||
* for (int j = 0; j < ni && j < 512; j++)
|
||||
* if (im[j].iat_rva == slots[i]) { puts(im[j].name); break; } */
|
||||
int vmie_win32_func_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
uint32_t func_rva, uint32_t* iat_rvas, int max);
|
||||
|
||||
/* Devirtualization (C++ vtables) needs NO dedicated symbol - it is a
|
||||
* COMPOSITION of primitives the engine already exposes:
|
||||
* - a vtable at `vtable_va` is an array of code pointers, so its METHODS are
|
||||
* gva_jumptable(mem, cr3, vtable_va, ...) (codeanalysis.h) - the same
|
||||
* code-pointer-array walk that recovers switch tables;
|
||||
* - its live INSTANCES are pmap_referrers(pm, vtable_va, ...) (pmap.h),
|
||||
* because an object's first qword is its vtable pointer (who-points-here on
|
||||
* the vtable VA enumerates the objects).
|
||||
* Recover the method RVAs with gva_jumptable, then func_hash (codeanalysis.h)
|
||||
* can name each method body against a known-hash table. No new call is added
|
||||
* for this on purpose. */
|
||||
|
||||
/* One exported symbol from the module export directory (EAT).
|
||||
* rva - export target RVA (absolute VA = module_base + rva). Forwarder
|
||||
* exports report the forwarder-string RVA; see `forwarded`.
|
||||
|
||||
@@ -62,6 +62,7 @@ int vmie_pe_section(vmie_mem* m, uintptr_t cr3, uint64_t module_base,
|
||||
|
||||
/* OptionalHeader DataDirectory indices used across the engine. */
|
||||
#define PE_DIR_EXPORT 0u /* IMAGE_DIRECTORY_ENTRY_EXPORT */
|
||||
#define PE_DIR_IMPORT 1u /* IMAGE_DIRECTORY_ENTRY_IMPORT */
|
||||
#define PE_DIR_DEBUG 6u /* IMAGE_DIRECTORY_ENTRY_DEBUG */
|
||||
#define PE_DIR_EXCEPTION 3u /* IMAGE_DIRECTORY_ENTRY_EXCEPTION (.pdata) */
|
||||
|
||||
|
||||
@@ -516,3 +516,262 @@ int vmie_win32_callgraph(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
free(tbuf);
|
||||
return total;
|
||||
}
|
||||
|
||||
/* ---- public win32 surface: imports (import directory INT/IAT) ------------- *
|
||||
* IMAGE_IMPORT_DESCRIPTOR (20 bytes): OriginalFirstThunk(+0, the INT RVA),
|
||||
* TimeDateStamp(+4), ForwarderChain(+8), Name(+12, the DLL-name RVA),
|
||||
* FirstThunk(+16, the IAT RVA). The array ends at an all-zero descriptor. Each
|
||||
* descriptor's INT and IAT are parallel arrays of 8-byte thunks: a by-ordinal
|
||||
* thunk has bit 63 set (ordinal in the low 16 bits); a by-name thunk is the RVA
|
||||
* of an IMAGE_IMPORT_BY_NAME { uint16 Hint; char Name[]; }. We prefer the INT
|
||||
* for the name/ordinal (it survives binding) and always take the slot RVA from
|
||||
* the IAT position. Cold: one-shot directory read, reusing pe_data_dir. */
|
||||
|
||||
#define IMPDESC_SIZE 20u /* sizeof(IMAGE_IMPORT_DESCRIPTOR) */
|
||||
#define IMPDESC_OFT_OFF 0u /* OriginalFirstThunk (INT) RVA */
|
||||
#define IMPDESC_NAME_OFF 12u /* Name (DLL name) RVA */
|
||||
#define IMPDESC_FT_OFF 16u /* FirstThunk (IAT) RVA */
|
||||
#define IMP_ORDINAL_FLAG 0x8000000000000000ull /* by-ordinal thunk bit 63 */
|
||||
#define IMP_MAX_DESC 4096u /* descriptor-walk guard (malformed) */
|
||||
#define IMP_MAX_THUNK 65536u /* per-descriptor thunk guard */
|
||||
|
||||
/* Read a NUL-terminated ASCII string from guest VA into dst[cap], truncating to
|
||||
* cap-1 and best-effort over a non-fully-resident tail (shrink the read until it
|
||||
* succeeds), mirroring the export-name read. */
|
||||
static void imp_read_str(vmie_mem* m, uintptr_t cr3, uint64_t va,
|
||||
char* dst, size_t cap) {
|
||||
if (cap == 0) { return; }
|
||||
for (size_t z = cap - 1; z > 0; z--) {
|
||||
if (gva_read(m, cr3, va, dst, z) == 0) { dst[z] = 0; break; }
|
||||
}
|
||||
dst[cap - 1] = 0;
|
||||
}
|
||||
|
||||
int vmie_win32_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
import_sym* out, int max) __attribute__((cold));
|
||||
int vmie_win32_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
import_sym* out, int max) {
|
||||
vmie_mem* m = vmie_win32_mem(v);
|
||||
if (!m) { return -1; }
|
||||
|
||||
uint32_t imp_rva, imp_sz;
|
||||
if (pe_data_dir(m, cr3, module_base, PE_DIR_IMPORT, &imp_rva, &imp_sz) ||
|
||||
!imp_rva) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int total = 0;
|
||||
for (uint32_t d = 0; d < IMP_MAX_DESC; d++) {
|
||||
const uint64_t desc = module_base + imp_rva + (uint64_t)d * IMPDESC_SIZE;
|
||||
uint32_t oft, name_rva, ft;
|
||||
if (gva_read(m, cr3, desc + IMPDESC_OFT_OFF, &oft, 4) ||
|
||||
gva_read(m, cr3, desc + IMPDESC_NAME_OFF, &name_rva, 4) ||
|
||||
gva_read(m, cr3, desc + IMPDESC_FT_OFF, &ft, 4)) {
|
||||
return -1;
|
||||
}
|
||||
if (oft == 0 && name_rva == 0 && ft == 0) {
|
||||
break; /* zero terminator descriptor */
|
||||
}
|
||||
if (ft == 0) {
|
||||
continue; /* no IAT: nothing to report */
|
||||
}
|
||||
|
||||
char dll[32];
|
||||
dll[0] = 0;
|
||||
if (name_rva) {
|
||||
imp_read_str(m, cr3, module_base + name_rva, dll, sizeof dll);
|
||||
}
|
||||
|
||||
/* INT (OriginalFirstThunk) carries names even after binding; fall back to
|
||||
* the IAT (FirstThunk) when there is no INT. The slot RVA always comes
|
||||
* from the IAT position so it matches a `call qword [rip]` target. */
|
||||
const uint32_t int_rva = oft ? oft : ft;
|
||||
for (uint32_t t = 0; t < IMP_MAX_THUNK; t++) {
|
||||
uint64_t thunk;
|
||||
if (gva_read(m, cr3, module_base + int_rva + (uint64_t)t * 8,
|
||||
&thunk, 8)) {
|
||||
return -1;
|
||||
}
|
||||
if (thunk == 0) {
|
||||
break; /* end of this thunk array */
|
||||
}
|
||||
const uint32_t iat_rva = ft + t * 8;
|
||||
if (out && total < max) {
|
||||
import_sym* s = &out[total];
|
||||
memset(s, 0, sizeof *s);
|
||||
s->iat_rva = iat_rva;
|
||||
memcpy(s->dll, dll, sizeof s->dll);
|
||||
if (thunk & IMP_ORDINAL_FLAG) {
|
||||
s->ordinal = (uint16_t)(thunk & 0xFFFFu); /* by-ordinal */
|
||||
} else {
|
||||
/* by-name: thunk is the RVA of IMAGE_IMPORT_BY_NAME; the name
|
||||
* begins at +2 (after the uint16 Hint). */
|
||||
imp_read_str(m, cr3, module_base + (uint32_t)thunk + 2,
|
||||
s->name, sizeof s->name);
|
||||
}
|
||||
}
|
||||
total++;
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
/* ---- public win32 surface: inline-hook detection ------------------------- *
|
||||
* For each .pdata function, decode the FIRST instruction; if it is a DIRECT
|
||||
* jmp/call (has_rel) whose target leaves the module image, it is a detour. Reuses
|
||||
* vmie_win32_functions (.pdata starts) and x86_decode - no second parser. The
|
||||
* entry bytes are read directly under cr3 (a 16-byte window covers any single
|
||||
* x86 instruction). Cold: one-shot directory read + per-function entry decode. */
|
||||
|
||||
#define HOOK_ENTRY_BYTES 16u /* max length of one x86 instruction */
|
||||
|
||||
int vmie_win32_inline_hooks(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
inline_hook* out, int max) __attribute__((cold));
|
||||
int vmie_win32_inline_hooks(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
inline_hook* out, int max) {
|
||||
vmie_mem* m = vmie_win32_mem(v);
|
||||
if (!m) { return -1; }
|
||||
|
||||
/* image bounds [module_base, module_base + SizeOfImage) (as in callgraph). */
|
||||
uint32_t lfanew;
|
||||
if (gva_read(m, cr3, module_base + 0x3C, &lfanew, 4)) { return -1; }
|
||||
uint32_t size_of_image;
|
||||
if (gva_read(m, cr3, module_base + lfanew + 0x18 + OPT_SIZEOFIMAGE_OFF,
|
||||
&size_of_image, 4)) {
|
||||
return -1;
|
||||
}
|
||||
const uint64_t img_lo = module_base;
|
||||
const uint64_t img_hi = module_base + (uint64_t)size_of_image; /* exclusive */
|
||||
|
||||
/* function inventory: count, then gather (stack common case, heap overflow)
|
||||
* so every function entry is examined, none silently dropped. */
|
||||
const int nfn = vmie_win32_functions(v, cr3, module_base, NULL, 0);
|
||||
if (nfn < 0) { return -1; }
|
||||
func_range stack_fr[256];
|
||||
func_range* fr = stack_fr;
|
||||
func_range* heap_fr = NULL;
|
||||
if (nfn > (int)(sizeof stack_fr / sizeof stack_fr[0])) {
|
||||
heap_fr = malloc((size_t)nfn * sizeof *heap_fr);
|
||||
if (!heap_fr) { return -1; }
|
||||
fr = heap_fr;
|
||||
}
|
||||
const int got = vmie_win32_functions(v, cr3, module_base, fr, nfn);
|
||||
if (got < 0) { free(heap_fr); return -1; }
|
||||
|
||||
int total = 0;
|
||||
for (int f = 0; f < got; f++) {
|
||||
const uint64_t fn_va = module_base + fr[f].rva;
|
||||
uint8_t entry[HOOK_ENTRY_BYTES];
|
||||
if (gva_read(m, cr3, fn_va, entry, sizeof entry)) {
|
||||
continue; /* entry not resident: skip */
|
||||
}
|
||||
x86_insn in;
|
||||
const int ilen = x86_decode(entry, sizeof entry, &in);
|
||||
if (ilen <= 0) {
|
||||
continue; /* undecodable entry: skip */
|
||||
}
|
||||
if (!in.has_rel || (in.flow != X86_JMP && in.flow != X86_CALL)) {
|
||||
continue; /* not a direct jmp/call */
|
||||
}
|
||||
const uint64_t tgt = x86_branch_target(fn_va, &in);
|
||||
if (tgt >= img_lo && tgt < img_hi) {
|
||||
continue; /* stays inside the image */
|
||||
}
|
||||
if (out && total < max) {
|
||||
out[total].func_rva = fr[f].rva;
|
||||
out[total].target = tgt;
|
||||
}
|
||||
total++;
|
||||
}
|
||||
|
||||
free(heap_fr);
|
||||
return total;
|
||||
}
|
||||
|
||||
/* ---- public win32 surface: per-function imports (call/jmp through IAT) ---- *
|
||||
* Step one function and, for each `call/jmp qword [rip+disp]` (has_riprel) whose
|
||||
* memory target is an IAT slot of this module's import directory, record the slot
|
||||
* RVA - the function's API-call sequence. Reuses vmie_win32_functions (to bound
|
||||
* the body), vmie_win32_section_view (to gather .text), and the decoder. Cold. */
|
||||
|
||||
int vmie_win32_func_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
uint32_t func_rva, uint32_t* iat_rvas, int max)
|
||||
__attribute__((cold));
|
||||
int vmie_win32_func_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
uint32_t func_rva, uint32_t* iat_rvas, int max) {
|
||||
vmie_mem* m = vmie_win32_mem(v);
|
||||
if (!m) { return -1; }
|
||||
|
||||
/* the IAT-slot window of this module: [imp_lo, imp_hi). A rip-relative target
|
||||
* landing in this set and 8-aligned is an import-thunk call. */
|
||||
import_sym tmp[1];
|
||||
const int nimp = vmie_win32_imports(v, cr3, module_base, tmp, 0);
|
||||
if (nimp < 0) { return -1; }
|
||||
uint32_t imp_lo = 0xFFFFFFFFu, imp_hi = 0; /* IAT-slot RVA bounds */
|
||||
if (nimp > 0) {
|
||||
import_sym* im = malloc((size_t)nimp * sizeof *im);
|
||||
if (!im) { return -1; }
|
||||
const int gi = vmie_win32_imports(v, cr3, module_base, im, nimp);
|
||||
if (gi < 0) { free(im); return -1; }
|
||||
const int use = gi < nimp ? gi : nimp;
|
||||
for (int i = 0; i < use; i++) {
|
||||
if (im[i].iat_rva < imp_lo) { imp_lo = im[i].iat_rva; }
|
||||
if (im[i].iat_rva + 8 > imp_hi) { imp_hi = im[i].iat_rva + 8; }
|
||||
}
|
||||
free(im);
|
||||
}
|
||||
|
||||
/* locate the function's extent from .pdata (count then gather). */
|
||||
const int nfn = vmie_win32_functions(v, cr3, module_base, NULL, 0);
|
||||
if (nfn < 0) { return -1; }
|
||||
func_range stack_fr[256];
|
||||
func_range* fr = stack_fr;
|
||||
func_range* heap_fr = NULL;
|
||||
if (nfn > (int)(sizeof stack_fr / sizeof stack_fr[0])) {
|
||||
heap_fr = malloc((size_t)nfn * sizeof *heap_fr);
|
||||
if (!heap_fr) { return -1; }
|
||||
fr = heap_fr;
|
||||
}
|
||||
const int got = vmie_win32_functions(v, cr3, module_base, fr, nfn);
|
||||
if (got < 0) { free(heap_fr); return -1; }
|
||||
uint32_t fn_size = 0;
|
||||
for (int f = 0; f < got; f++) {
|
||||
if (fr[f].rva == func_rva) { fn_size = fr[f].size; break; }
|
||||
}
|
||||
free(heap_fr);
|
||||
if (fn_size == 0) { return -1; } /* not a known function start */
|
||||
|
||||
/* gather the function body addressed at its absolute VA so a rip-relative
|
||||
* target is directly an absolute VA. */
|
||||
uint8_t* fb = malloc(fn_size);
|
||||
if (!fb) { return -1; }
|
||||
if (gva_read(m, cr3, module_base + func_rva, fb, fn_size)) {
|
||||
free(fb);
|
||||
return -1;
|
||||
}
|
||||
const uint64_t fn_va = module_base + func_rva;
|
||||
|
||||
int total = 0;
|
||||
for (size_t off = 0; off < fn_size; ) {
|
||||
x86_insn in;
|
||||
const int ilen = x86_decode(fb + off, fn_size - off, &in);
|
||||
if (ilen <= 0) { break; } /* desync: stop this function */
|
||||
if (in.has_riprel && (in.flow == X86_CALL || in.flow == X86_JMP)) {
|
||||
const uint64_t tgt = x86_riprel_target(fn_va + off, &in);
|
||||
if (tgt >= module_base) {
|
||||
const uint64_t tgt_rva = tgt - module_base;
|
||||
if (tgt_rva >= imp_lo && tgt_rva < imp_hi &&
|
||||
(tgt_rva & 7u) == 0) {
|
||||
if (iat_rvas && total < max) {
|
||||
iat_rvas[total] = (uint32_t)tgt_rva;
|
||||
}
|
||||
total++;
|
||||
}
|
||||
}
|
||||
}
|
||||
off += (size_t)ilen;
|
||||
}
|
||||
|
||||
free(fb);
|
||||
return total;
|
||||
}
|
||||
|
||||
@@ -138,3 +138,38 @@ int cfg_blocks(mem_view_t fn, code_block* out, int max) {
|
||||
free(leader);
|
||||
return total;
|
||||
}
|
||||
|
||||
/* ---- position-independent function hash ----------------------------------- *
|
||||
* Step the function with the decoder (the single decoder, x86_decode) and fold
|
||||
* its bytes into a 64-bit FNV-1a hash, ZEROING each instruction's rel/RIP-rel
|
||||
* displacement bytes [disp_off, disp_off+disp_len) before folding - the same
|
||||
* span sig_generate wildcards. Those bytes float with load address / relocation,
|
||||
* so zeroing them makes the hash stable across images and ASLR. Cold: a one-shot
|
||||
* pass over one function body, not a hot loop. */
|
||||
|
||||
#define FNV64_OFFSET 0xcbf29ce484222325ull
|
||||
#define FNV64_PRIME 0x00000100000001b3ull
|
||||
|
||||
uint64_t func_hash(mem_view_t fn) __attribute__((cold));
|
||||
uint64_t func_hash(mem_view_t fn) {
|
||||
if (!fn.data || fn.size == 0) { return 0; }
|
||||
|
||||
uint64_t h = FNV64_OFFSET;
|
||||
for (size_t off = 0; off < fn.size; ) {
|
||||
x86_insn in;
|
||||
const int ilen = x86_decode(fn.data + off, fn.size - off, &in);
|
||||
if (ilen <= 0) { return 0; } /* desync: not a valid hash */
|
||||
|
||||
/* The displacement span to neutralize, clamped to this instruction so a
|
||||
* malformed disp_off/len can never read past it. */
|
||||
const size_t d0 = (in.disp_len > 0) ? (size_t)in.disp_off : (size_t)ilen;
|
||||
const size_t d1 = (in.disp_len > 0) ? d0 + (size_t)in.disp_len : (size_t)ilen;
|
||||
for (int i = 0; i < ilen; i++) {
|
||||
const uint8_t b = (i >= (int)d0 && i < (int)d1) ? 0u : fn.data[off + (size_t)i];
|
||||
h ^= b;
|
||||
h *= FNV64_PRIME;
|
||||
}
|
||||
off += (size_t)ilen;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user