mirror of
https://dev.lirent.ru/Vatrog/vm-introspection-engine.git
synced 2026-06-17 23:46:37 +03:00
Add function-level code diff over caller-supplied views
code_diff compares two views of the same code in one coordinate space - an on-disk image section against the live in-memory section, or one .text across two snapshots - and reports the functions whose body changed. For each function extent it func_hash()es the slice of each view and flags a mismatch: a patch, an inline hook, or an unpacked/JIT-rewritten body. A thin handler over func_hash + mem_sub, with no file I/O of its own - the caller owns reading the on-disk image. The relocation limit (absolute-address immediates) is documented; two snapshots at the same base diff exactly. Closes the non-starred reversing series.
This commit is contained in:
@@ -113,4 +113,38 @@ int cfg_blocks(mem_view_t fn, code_block* out, int max);
|
||||
* if (func_hash(a) != func_hash(b)) puts("function body changed"); */
|
||||
uint64_t func_hash(mem_view_t fn);
|
||||
|
||||
/* Function-level code diff between two views of the same code in the SAME coordinate space (both
|
||||
* MODULE_RVA, or both SECTION_LOCAL): e.g. an on-disk image section vs the live in-memory section,
|
||||
* or one .text across two snapshots. For each function extent in `fns` (a code_block [start,end) in
|
||||
* the views' coordinate), it func_hash()es that slice of `a` and of `b`; where the two hashes differ
|
||||
* the function body changed - a patch, an inline hook, an unpacked/JIT-rewritten body.
|
||||
*
|
||||
* a, b - the two code views, SAME coordinate space and SAME layout (a function's bytes sit at
|
||||
* the same offset in both). Build them with vmie_win32_section_view (live) and from the
|
||||
* on-disk PE (caller's own file read), or from two snapshots.
|
||||
* fns - function extents to compare (e.g. from vmie_win32_functions: code_block{start=rva,
|
||||
* end=rva+size} for a MODULE_RVA view). A function whose extent falls outside either
|
||||
* view is skipped.
|
||||
* changed - caller array receiving up to `max` differing function start offsets (NULL to count).
|
||||
* Returns the TOTAL number of functions that differ (out=NULL => count), or -1 on bad input.
|
||||
*
|
||||
* Relocation note (v1): func_hash already neutralizes rel/RIP-relative displacements (they are
|
||||
* position-independent and identical on disk and in memory), so ordinary x86-64 code diffs cleanly
|
||||
* WITHOUT applying relocations. The exception is an ABSOLUTE-address immediate (e.g. movabs reg,
|
||||
* imm64 carrying a relocated pointer): such a function may read as "changed" on an on-disk-vs-memory
|
||||
* diff even when unpatched. A .reloc cross-check (to also mask relocated immediates) is a future
|
||||
* extension; for two snapshots at the same load address the diff is exact.
|
||||
*
|
||||
* Example - functions patched in the live image vs the on-disk file:
|
||||
* func_range fr[1024];
|
||||
* int nf = vmie_win32_functions(v, cr3, base, fr, 1024);
|
||||
* code_block fns[1024];
|
||||
* for (int i = 0; i < nf && i < 1024; i++) { fns[i].start = fr[i].rva;
|
||||
* fns[i].end = fr[i].rva + fr[i].size; }
|
||||
* // live_view, disk_view: both MODULE_RVA over .text (disk_view from the caller's file read)
|
||||
* uint32_t changed[256];
|
||||
* int nc = code_diff(disk_view, live_view, fns, nf, changed, 256); */
|
||||
int code_diff(mem_view_t a, mem_view_t b, const code_block* fns, int nfns,
|
||||
uint32_t* changed, int max);
|
||||
|
||||
#endif /* VMIE_CODEANALYSIS_H */
|
||||
|
||||
@@ -173,3 +173,43 @@ uint64_t func_hash(mem_view_t fn) {
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
/* ---- function-level code diff -------------------------------------------- *
|
||||
* For each function extent, mem_sub the SAME [start,end) out of both views and
|
||||
* compare their func_hash (the position-independent, relocation-normalized
|
||||
* fingerprint). A differing hash means a patched / hooked / rewritten body. The
|
||||
* slices are zero-copy (mem_sub borrows the views' bytes; no byte is copied) and
|
||||
* hashing reuses func_hash - no second decoder or hash here. Cold: a one-shot
|
||||
* pass over the function table, not a hot loop. */
|
||||
|
||||
/* Does mem_sub yield exactly the requested extent? mem_sub clamps an out-of-view
|
||||
* window to a zeroed view (data == NULL) or trims its size, so an extent that is
|
||||
* fully present comes back with the same data and the full size - anything else
|
||||
* is partially or wholly outside the view and must be skipped. */
|
||||
static int sub_is_exact(mem_view_t sub, size_t want) {
|
||||
return sub.data != NULL && sub.size == want;
|
||||
}
|
||||
|
||||
int code_diff(mem_view_t a, mem_view_t b, const code_block* fns, int nfns,
|
||||
uint32_t* changed, int max) __attribute__((cold));
|
||||
int code_diff(mem_view_t a, mem_view_t b, const code_block* fns, int nfns,
|
||||
uint32_t* changed, int max) {
|
||||
if (!fns || nfns < 0) { return -1; }
|
||||
|
||||
int total = 0;
|
||||
for (int i = 0; i < nfns; i++) {
|
||||
if (fns[i].end <= fns[i].start) { continue; } /* empty/inverted ext */
|
||||
const size_t len = (size_t)(fns[i].end - fns[i].start);
|
||||
|
||||
/* same [start,end) sliced out of both views (zero-copy borrow). */
|
||||
const mem_view_t sa = mem_sub(a, a.base_va + fns[i].start, len);
|
||||
const mem_view_t sb = mem_sub(b, b.base_va + fns[i].start, len);
|
||||
if (!sub_is_exact(sa, len) || !sub_is_exact(sb, len)) { continue; }
|
||||
|
||||
if (func_hash(sa) != func_hash(sb)) {
|
||||
if (changed && total < max) { changed[total] = fns[i].start; }
|
||||
total++;
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user