mirror of
https://dev.lirent.ru/Vatrog/vm-introspection-engine.git
synced 2026-06-18 02:06:36 +03:00
Add imports, inline-hook detection, function hashing, per-function imports
Wave 2 of the code-analysis layer:
- vmie_win32_imports resolves the import directory (INT/IAT) to {iat_rva, dll,
name, ordinal} - named APIs, walking the name and slot thunks in lockstep so
every import carries the IAT slot a call lands on.
- vmie_win32_inline_hooks decodes each .pdata function's entry and reports any
whose first instruction is a direct jmp/call leaving the module image - the
detour/trampoline shape.
- vmie_win32_func_imports records, in order, the IAT slots a function calls
through (call qword [rip+disp] onto an import slot): the function's API-call
sequence, named by correlating with vmie_win32_imports.
- func_hash (codeanalysis.h) hashes a function position-independently, zeroing
the displacement bytes the decoder locates - one primitive for fingerprinting
known code and for detecting a changed body across snapshots.
Devirtualization needs no new call and is documented as a composition: a
vtable's methods are gva_jumptable(vtable_va), its instances are
pmap_referrers(vtable_va), and func_hash names each method. Imports reuse the
shared data-directory accessor; the analyses reuse the function/section/decode
primitives - no second PE or instruction parser.
This commit is contained in:
@@ -77,4 +77,40 @@ typedef struct { uint32_t start; uint32_t end; } code_block;
|
||||
* printf("block %d: [%#x, %#x)\n", i, bb[i].start, bb[i].end); */
|
||||
int cfg_blocks(mem_view_t fn, code_block* out, int max);
|
||||
|
||||
/* Position-independent hash of a function's bytes. `fn` is a view spanning
|
||||
* exactly one function (e.g. a section-view sub-range covering a func_range from
|
||||
* vmie_win32_functions): fn.data[0] is the function's first byte, fn.size its
|
||||
* length. It steps `fn` with the decoder (x86_decode - no second decoder) and
|
||||
* folds the opcode / ModRM / SIB / immediate bytes into a 64-bit hash while
|
||||
* ZEROING the rel/RIP-relative displacement bytes of each instruction
|
||||
* (in.disp_off .. in.disp_off + in.disp_len, exactly the span sig_generate
|
||||
* wildcards). Those are the bytes that float with the load address and
|
||||
* relocation, so zeroing them makes the hash STABLE across images and ASLR -
|
||||
* the same function hashes identically wherever it is mapped.
|
||||
*
|
||||
* Returns a 64-bit hash, or 0 if `fn` is empty (no data / size 0) or does not
|
||||
* decode cleanly (a desync stops the walk). 0 is therefore "no hash", never a
|
||||
* valid fingerprint.
|
||||
*
|
||||
* Two uses on one primitive:
|
||||
* - fingerprint / library-ID: compare against a table of known function hashes
|
||||
* to auto-name recovered code (e.g. recognize a statically-linked CRT/SSL
|
||||
* routine without symbols);
|
||||
* - code diff: hash the same function in two snapshots - an unchanged hash
|
||||
* means the body is byte-identical (modulo relocation), a changed hash means
|
||||
* it was patched.
|
||||
*
|
||||
* Devirtualization needs NO new call - it is a composition of primitives the
|
||||
* engine already has: a C++ vtable at `vtable_va` is an array of code pointers,
|
||||
* so its METHODS are gva_jumptable(m, cr3, vtable_va, ...) (codeanalysis.h), and
|
||||
* its live INSTANCES are pmap_referrers(pm, vtable_va, ...) (pmap.h) - every
|
||||
* object's first qword is its vtable pointer. With the methods recovered,
|
||||
* func_hash names each method body against a known-hash table. (See win32.h for
|
||||
* the same note next to the indirect-call surface.)
|
||||
*
|
||||
* Example - diff a function across two snapshots:
|
||||
* mem_view_t a, b; // same function, two captures (SECTION_LOCAL/RVA views)
|
||||
* if (func_hash(a) != func_hash(b)) puts("function body changed"); */
|
||||
uint64_t func_hash(mem_view_t fn);
|
||||
|
||||
#endif /* VMIE_CODEANALYSIS_H */
|
||||
|
||||
+119
@@ -331,6 +331,125 @@ typedef struct { uint32_t from; uint32_t to; uint8_t kind; } call_edge;
|
||||
int vmie_win32_callgraph(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
call_edge* out, int max);
|
||||
|
||||
/* One import: a function this module pulls from another DLL, recovered from the
|
||||
* import directory (the INT/IAT pair of an IMAGE_IMPORT_DESCRIPTOR).
|
||||
* iat_rva - RVA of the IAT slot that holds the resolved function pointer at
|
||||
* run time (absolute VA = module_base + iat_rva). A call through
|
||||
* this import is `call qword [rip+disp]` whose target lands on this
|
||||
* slot - so iat_rva is exactly what vmie_win32_func_imports reports;
|
||||
* correlate the two to name a function's API calls.
|
||||
* dll - the exporting DLL name as written in the descriptor, NUL-
|
||||
* terminated, TRUNCATED to 31 chars (e.g. "KERNEL32.dll"). A name
|
||||
* longer than 31 bytes is cut; this is the documented limit.
|
||||
* name - the imported function name, NUL-terminated, TRUNCATED to 63 chars
|
||||
* (long C++ mangled names are cut); "" for a by-ordinal import.
|
||||
* ordinal - the import ordinal for a by-ordinal import (name[0]=='\0'), else
|
||||
* 0. By-ordinal imports set the high bit in the thunk and carry no
|
||||
* name in the image. */
|
||||
typedef struct { uint32_t iat_rva; char dll[32]; char name[64]; uint16_t ordinal; } import_sym;
|
||||
|
||||
/* Enumerate the module's imports from its import directory (IMAGE_DIRECTORY_
|
||||
* ENTRY_IMPORT). For each IMAGE_IMPORT_DESCRIPTOR it reads the DLL name, then
|
||||
* walks the parallel INT (OriginalFirstThunk: the name/ordinal hints) and IAT
|
||||
* (FirstThunk: the resolved-pointer slots) in lockstep so every entry carries
|
||||
* its own IAT-slot RVA. A by-name thunk points at an IMAGE_IMPORT_BY_NAME
|
||||
* (hint+NUL-terminated name); a by-ordinal thunk has its top bit set and yields
|
||||
* an ordinal instead. The INT is preferred when present (it survives binding);
|
||||
* the IAT is the fallback.
|
||||
*
|
||||
* Returns the TOTAL number of imports (out=NULL => count only, so size then
|
||||
* fill), or -1 if there is no import directory or the headers/directory are
|
||||
* unreadable. Entries are reported descriptor by descriptor, and within a
|
||||
* descriptor in thunk order.
|
||||
*
|
||||
* Example - list a module's imports and where each resolves:
|
||||
* import_sym im[512];
|
||||
* int n = vmie_win32_imports(v, pr->cr3, m.base, im, 512);
|
||||
* for (int i = 0; i < n && i < 512; i++)
|
||||
* if (im[i].name[0])
|
||||
* printf("%s!%s -> IAT %#x\n", im[i].dll, im[i].name, im[i].iat_rva);
|
||||
* else
|
||||
* printf("%s!#%u -> IAT %#x\n", im[i].dll, im[i].ordinal, im[i].iat_rva); */
|
||||
int vmie_win32_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
import_sym* out, int max);
|
||||
|
||||
/* One inline-hook finding: a function whose FIRST instruction is a direct
|
||||
* jmp/call leaving the module image - the classic detour / trampoline shape.
|
||||
* func_rva - the hooked function's RVA (a .pdata function start). Absolute VA
|
||||
* = module_base + func_rva.
|
||||
* target - the absolute VA the entry redirects to. It lies OUTSIDE the
|
||||
* module image [module_base, module_base + SizeOfImage); that is
|
||||
* exactly what makes it a cross-module hook rather than an ordinary
|
||||
* intra-module branch. */
|
||||
typedef struct { uint32_t func_rva; uint64_t target; } inline_hook;
|
||||
|
||||
/* Detect inline (entry-redirect) hooks. For each function from .pdata
|
||||
* (vmie_win32_functions) it decodes the FIRST instruction with x86_decode; if
|
||||
* that instruction is a DIRECT jmp/call (has_rel) whose resolved target
|
||||
* (x86_branch_target) lands OUTSIDE the module image
|
||||
* [module_base, module_base + SizeOfImage), it records {func_rva, target}. An
|
||||
* un-hooked function begins with its real prologue (push/sub/mov/endbr64...) or
|
||||
* branches inside its own image, so it is not reported.
|
||||
*
|
||||
* Returns the TOTAL number of hooked functions (out=NULL => count only), or -1
|
||||
* if the .pdata/.text directory or headers are missing/unreadable.
|
||||
*
|
||||
* Scope: this finds INLINE hooks (the function body's entry is patched). IAT
|
||||
* hooks - an import SLOT redirected to point outside its resolving module - are
|
||||
* a different shape that needs cross-module pointer resolution and are NOT
|
||||
* covered here.
|
||||
*
|
||||
* Example - report any patched function entries in a module:
|
||||
* inline_hook hk[64];
|
||||
* int n = vmie_win32_inline_hooks(v, pr->cr3, m.base, hk, 64);
|
||||
* for (int i = 0; i < n && i < 64; i++)
|
||||
* printf("sub_%x hooked -> %#llx\n", hk[i].func_rva,
|
||||
* (unsigned long long)hk[i].target); */
|
||||
int vmie_win32_inline_hooks(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
inline_hook* out, int max);
|
||||
|
||||
/* Recover which IAT slots a function calls, in call order - the function's
|
||||
* API-call sequence / behavioral fingerprint. It steps `func_rva`'s body with
|
||||
* x86_decode and, for every `call/jmp qword [rip+disp]` (an indirect branch
|
||||
* through memory: has_riprel) whose resolved memory target (x86_riprel_target)
|
||||
* is an IAT slot of THIS module's import directory, it records that slot's RVA.
|
||||
* Correlate the returned RVAs with vmie_win32_imports (same iat_rva) to turn the
|
||||
* sequence into named API calls (e.g. CreateFileW, WriteFile, CloseHandle).
|
||||
*
|
||||
* func_rva - the function to analyze, as an RVA (e.g. from
|
||||
* vmie_win32_functions or an export). Absolute VA = module_base +
|
||||
* func_rva.
|
||||
* iat_rvas - caller array receiving up to `max` IAT-slot RVAs in the order
|
||||
* the calls appear; NULL to count only.
|
||||
*
|
||||
* Returns the TOTAL number of IAT-slot calls in the function (out=NULL =>
|
||||
* count), or -1 if the headers / import directory / function bytes are
|
||||
* unreadable. v1 resolves call/jmp THROUGH the IAT (rip-relative onto an import
|
||||
* slot); other indirect forms are out of scope.
|
||||
*
|
||||
* Example - print the API sequence of a function:
|
||||
* uint32_t slots[128];
|
||||
* int n = vmie_win32_func_imports(v, pr->cr3, m.base, fn_rva, slots, 128);
|
||||
* import_sym im[512];
|
||||
* int ni = vmie_win32_imports(v, pr->cr3, m.base, im, 512);
|
||||
* for (int i = 0; i < n && i < 128; i++)
|
||||
* for (int j = 0; j < ni && j < 512; j++)
|
||||
* if (im[j].iat_rva == slots[i]) { puts(im[j].name); break; } */
|
||||
int vmie_win32_func_imports(vmie_win32* v, uint64_t cr3, uint64_t module_base,
|
||||
uint32_t func_rva, uint32_t* iat_rvas, int max);
|
||||
|
||||
/* Devirtualization (C++ vtables) needs NO dedicated symbol - it is a
|
||||
* COMPOSITION of primitives the engine already exposes:
|
||||
* - a vtable at `vtable_va` is an array of code pointers, so its METHODS are
|
||||
* gva_jumptable(mem, cr3, vtable_va, ...) (codeanalysis.h) - the same
|
||||
* code-pointer-array walk that recovers switch tables;
|
||||
* - its live INSTANCES are pmap_referrers(pm, vtable_va, ...) (pmap.h),
|
||||
* because an object's first qword is its vtable pointer (who-points-here on
|
||||
* the vtable VA enumerates the objects).
|
||||
* Recover the method RVAs with gva_jumptable, then func_hash (codeanalysis.h)
|
||||
* can name each method body against a known-hash table. No new call is added
|
||||
* for this on purpose. */
|
||||
|
||||
/* One exported symbol from the module export directory (EAT).
|
||||
* rva - export target RVA (absolute VA = module_base + rva). Forwarder
|
||||
* exports report the forwarder-string RVA; see `forwarded`.
|
||||
|
||||
Reference in New Issue
Block a user