Add function-level code diff over caller-supplied views

code_diff compares two views of the same code in one coordinate space - an
on-disk image section against the live in-memory section, or one .text across
two snapshots - and reports the functions whose body changed. For each function
extent it func_hash()es the slice of each view and flags a mismatch: a patch, an
inline hook, or an unpacked/JIT-rewritten body. A thin handler over func_hash +
mem_sub, with no file I/O of its own - the caller owns reading the on-disk image.
The relocation limit (absolute-address immediates) is documented; two snapshots
at the same base diff exactly. Closes the non-starred reversing series.
This commit is contained in:
2026-06-16 20:21:36 +03:00
parent 35c5dc06ba
commit 50ed32b7dc
2 changed files with 74 additions and 0 deletions
+34
View File
@@ -113,4 +113,38 @@ int cfg_blocks(mem_view_t fn, code_block* out, int max);
* if (func_hash(a) != func_hash(b)) puts("function body changed"); */ * if (func_hash(a) != func_hash(b)) puts("function body changed"); */
uint64_t func_hash(mem_view_t fn); uint64_t func_hash(mem_view_t fn);
/* Function-level code diff between two views of the same code in the SAME coordinate space (both
* MODULE_RVA, or both SECTION_LOCAL): e.g. an on-disk image section vs the live in-memory section,
* or one .text across two snapshots. For each function extent in `fns` (a code_block [start,end) in
* the views' coordinate), it func_hash()es that slice of `a` and of `b`; where the two hashes differ
* the function body changed - a patch, an inline hook, an unpacked/JIT-rewritten body.
*
* a, b - the two code views, SAME coordinate space and SAME layout (a function's bytes sit at
* the same offset in both). Build them with vmie_win32_section_view (live) and from the
* on-disk PE (caller's own file read), or from two snapshots.
* fns - function extents to compare (e.g. from vmie_win32_functions: code_block{start=rva,
* end=rva+size} for a MODULE_RVA view). A function whose extent falls outside either
* view is skipped.
* changed - caller array receiving up to `max` differing function start offsets (NULL to count).
* Returns the TOTAL number of functions that differ (out=NULL => count), or -1 on bad input.
*
* Relocation note (v1): func_hash already neutralizes rel/RIP-relative displacements (they are
* position-independent and identical on disk and in memory), so ordinary x86-64 code diffs cleanly
* WITHOUT applying relocations. The exception is an ABSOLUTE-address immediate (e.g. movabs reg,
* imm64 carrying a relocated pointer): such a function may read as "changed" on an on-disk-vs-memory
* diff even when unpatched. A .reloc cross-check (to also mask relocated immediates) is a future
* extension; for two snapshots at the same load address the diff is exact.
*
* Example - functions patched in the live image vs the on-disk file:
* func_range fr[1024];
* int nf = vmie_win32_functions(v, cr3, base, fr, 1024);
* code_block fns[1024];
* for (int i = 0; i < nf && i < 1024; i++) { fns[i].start = fr[i].rva;
* fns[i].end = fr[i].rva + fr[i].size; }
* // live_view, disk_view: both MODULE_RVA over .text (disk_view from the caller's file read)
* uint32_t changed[256];
* int nc = code_diff(disk_view, live_view, fns, nf, changed, 256); */
int code_diff(mem_view_t a, mem_view_t b, const code_block* fns, int nfns,
uint32_t* changed, int max);
#endif /* VMIE_CODEANALYSIS_H */ #endif /* VMIE_CODEANALYSIS_H */
+40
View File
@@ -173,3 +173,43 @@ uint64_t func_hash(mem_view_t fn) {
} }
return h; return h;
} }
/* ---- function-level code diff -------------------------------------------- *
* For each function extent, mem_sub the SAME [start,end) out of both views and
* compare their func_hash (the position-independent, relocation-normalized
* fingerprint). A differing hash means a patched / hooked / rewritten body. The
* slices are zero-copy (mem_sub borrows the views' bytes; no byte is copied) and
* hashing reuses func_hash - no second decoder or hash here. Cold: a one-shot
* pass over the function table, not a hot loop. */
/* Does mem_sub yield exactly the requested extent? mem_sub clamps an out-of-view
* window to a zeroed view (data == NULL) or trims its size, so an extent that is
* fully present comes back with the same data and the full size - anything else
* is partially or wholly outside the view and must be skipped. */
static int sub_is_exact(mem_view_t sub, size_t want) {
return sub.data != NULL && sub.size == want;
}
int code_diff(mem_view_t a, mem_view_t b, const code_block* fns, int nfns,
uint32_t* changed, int max) __attribute__((cold));
int code_diff(mem_view_t a, mem_view_t b, const code_block* fns, int nfns,
uint32_t* changed, int max) {
if (!fns || nfns < 0) { return -1; }
int total = 0;
for (int i = 0; i < nfns; i++) {
if (fns[i].end <= fns[i].start) { continue; } /* empty/inverted ext */
const size_t len = (size_t)(fns[i].end - fns[i].start);
/* same [start,end) sliced out of both views (zero-copy borrow). */
const mem_view_t sa = mem_sub(a, a.base_va + fns[i].start, len);
const mem_view_t sb = mem_sub(b, b.base_va + fns[i].start, len);
if (!sub_is_exact(sa, len) || !sub_is_exact(sb, len)) { continue; }
if (func_hash(sa) != func_hash(sb)) {
if (changed && total < max) { changed[total] = fns[i].start; }
total++;
}
}
return total;
}