diff --git a/include/codeanalysis.h b/include/codeanalysis.h index c03544b..17d28c9 100644 --- a/include/codeanalysis.h +++ b/include/codeanalysis.h @@ -113,4 +113,38 @@ int cfg_blocks(mem_view_t fn, code_block* out, int max); * if (func_hash(a) != func_hash(b)) puts("function body changed"); */ uint64_t func_hash(mem_view_t fn); +/* Function-level code diff between two views of the same code in the SAME coordinate space (both + * MODULE_RVA, or both SECTION_LOCAL): e.g. an on-disk image section vs the live in-memory section, + * or one .text across two snapshots. For each function extent in `fns` (a code_block [start,end) in + * the views' coordinate), it func_hash()es that slice of `a` and of `b`; where the two hashes differ + * the function body changed - a patch, an inline hook, an unpacked/JIT-rewritten body. + * + * a, b - the two code views, SAME coordinate space and SAME layout (a function's bytes sit at + * the same offset in both). Build them with vmie_win32_section_view (live) and from the + * on-disk PE (caller's own file read), or from two snapshots. + * fns - function extents to compare (e.g. from vmie_win32_functions: code_block{start=rva, + * end=rva+size} for a MODULE_RVA view). A function whose extent falls outside either + * view is skipped. + * changed - caller array receiving up to `max` differing function start offsets (NULL to count). + * Returns the TOTAL number of functions that differ (out=NULL => count), or -1 on bad input. + * + * Relocation note (v1): func_hash already neutralizes rel/RIP-relative displacements (they are + * position-independent and identical on disk and in memory), so ordinary x86-64 code diffs cleanly + * WITHOUT applying relocations. The exception is an ABSOLUTE-address immediate (e.g. movabs reg, + * imm64 carrying a relocated pointer): such a function may read as "changed" on an on-disk-vs-memory + * diff even when unpatched. A .reloc cross-check (to also mask relocated immediates) is a future + * extension; for two snapshots at the same load address the diff is exact. + * + * Example - functions patched in the live image vs the on-disk file: + * func_range fr[1024]; + * int nf = vmie_win32_functions(v, cr3, base, fr, 1024); + * code_block fns[1024]; + * for (int i = 0; i < nf && i < 1024; i++) { fns[i].start = fr[i].rva; + * fns[i].end = fr[i].rva + fr[i].size; } + * // live_view, disk_view: both MODULE_RVA over .text (disk_view from the caller's file read) + * uint32_t changed[256]; + * int nc = code_diff(disk_view, live_view, fns, nf, changed, 256); */ +int code_diff(mem_view_t a, mem_view_t b, const code_block* fns, int nfns, + uint32_t* changed, int max); + #endif /* VMIE_CODEANALYSIS_H */ diff --git a/src/handlers/codeanalysis.c b/src/handlers/codeanalysis.c index e9aefa6..4d514cb 100644 --- a/src/handlers/codeanalysis.c +++ b/src/handlers/codeanalysis.c @@ -173,3 +173,43 @@ uint64_t func_hash(mem_view_t fn) { } return h; } + +/* ---- function-level code diff -------------------------------------------- * + * For each function extent, mem_sub the SAME [start,end) out of both views and + * compare their func_hash (the position-independent, relocation-normalized + * fingerprint). A differing hash means a patched / hooked / rewritten body. The + * slices are zero-copy (mem_sub borrows the views' bytes; no byte is copied) and + * hashing reuses func_hash - no second decoder or hash here. Cold: a one-shot + * pass over the function table, not a hot loop. */ + +/* Does mem_sub yield exactly the requested extent? mem_sub clamps an out-of-view + * window to a zeroed view (data == NULL) or trims its size, so an extent that is + * fully present comes back with the same data and the full size - anything else + * is partially or wholly outside the view and must be skipped. */ +static int sub_is_exact(mem_view_t sub, size_t want) { + return sub.data != NULL && sub.size == want; +} + +int code_diff(mem_view_t a, mem_view_t b, const code_block* fns, int nfns, + uint32_t* changed, int max) __attribute__((cold)); +int code_diff(mem_view_t a, mem_view_t b, const code_block* fns, int nfns, + uint32_t* changed, int max) { + if (!fns || nfns < 0) { return -1; } + + int total = 0; + for (int i = 0; i < nfns; i++) { + if (fns[i].end <= fns[i].start) { continue; } /* empty/inverted ext */ + const size_t len = (size_t)(fns[i].end - fns[i].start); + + /* same [start,end) sliced out of both views (zero-copy borrow). */ + const mem_view_t sa = mem_sub(a, a.base_va + fns[i].start, len); + const mem_view_t sb = mem_sub(b, b.base_va + fns[i].start, len); + if (!sub_is_exact(sa, len) || !sub_is_exact(sb, len)) { continue; } + + if (func_hash(sa) != func_hash(sb)) { + if (changed && total < max) { changed[total] = fns[i].start; } + total++; + } + } + return total; +}