mirror of
https://dev.lirent.ru/Vatrog/vm-introspection-engine.git
synced 2026-06-18 00:56:37 +03:00
Zero-copy hot path, correctness hardening
gva_ptr: leaf-bounded zero-copy guest reads. gva_sweep redesigned to drive on it — large-page leaves are lent to the callback while 4K runs stay buffered, and the run loop is guarded against wrap at the top of the address space. gva_gpa fetches PTEs zero-copy; optional W32MS_LTO build option folds the per-fetch call boundary (shipped -O2 default unchanged). Correctness: subtract-form bounds check (no add overflow), memcpy decode in place of type-punned wide loads, zero-init PDB name before compare, PCI-hole-crossing range rejection, single-sourced VA_CANON and USER bounds. hot/cold attributes audited across the translation and scan path.
This commit is contained in:
@@ -5,6 +5,8 @@ set(CMAKE_C_STANDARD 17) # generation B uses no C23 feature
|
||||
set(CMAKE_C_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_C_EXTENSIONS ON) # deliberate: strnlen (POSIX) + void* arithmetic (GNU)
|
||||
|
||||
option(W32MS_LTO "Enable LTO" OFF) # build-only; shipped default is -O2, no LTO
|
||||
|
||||
# ---- host: VMI core as a static library ---------------------------------
|
||||
add_library(w32ms STATIC
|
||||
src/gpa.c
|
||||
@@ -19,6 +21,10 @@ target_include_directories(w32ms
|
||||
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include # public API: include/*.h
|
||||
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) # private: src/include/*.h via "include/..."
|
||||
target_compile_options(w32ms PRIVATE -O2 -Wall -Wextra)
|
||||
if(W32MS_LTO)
|
||||
target_compile_options(w32ms PRIVATE -flto)
|
||||
target_link_options(w32ms PRIVATE -flto)
|
||||
endif()
|
||||
|
||||
# ---- host: CLI demonstrator over the library ----------------------------
|
||||
add_executable(w32ms_cli src/cli.c)
|
||||
|
||||
@@ -35,7 +35,9 @@ static void clean_ctx(gpa_ctx* ctx) {
|
||||
}
|
||||
|
||||
static int out_of_bounds(gpa_ctx* ctx, uintptr_t* offs, const size_t nmemb) {
|
||||
return gpa_offset(ctx, *offs, offs) || *offs + nmemb > ctx->fsize;
|
||||
return gpa_offset(ctx, *offs, offs)
|
||||
|| nmemb > ctx->fsize - *offs
|
||||
|| (*offs < ctx->low && nmemb > ctx->low - *offs); /* range crosses split */
|
||||
}
|
||||
|
||||
__attribute__((hot))
|
||||
@@ -57,6 +59,7 @@ int gpa_write(gpa_ctx* ctx, uintptr_t offs, const void* src, const size_t nmemb)
|
||||
|
||||
/* Zero-copy host pointer to [offs, offs+nmemb) GPA, or NULL if that range is not
|
||||
* fully backed by the mapped image. Same split + bounds check as gpa_read. */
|
||||
__attribute__((hot))
|
||||
void* gpa_ptr(gpa_ctx* ctx, uintptr_t offs, const size_t nmemb) {
|
||||
if (out_of_bounds(ctx, &offs, nmemb)) {
|
||||
return NULL;
|
||||
|
||||
@@ -5,9 +5,6 @@
|
||||
#include "include/memory.h"
|
||||
#include "../include/include.h"
|
||||
|
||||
/* sign-extend a 48-bit canonical VA */
|
||||
#define VA_CANON(v) (((v) & (1ull << 47)) ? ((v) | 0xFFFF000000000000ull) : (v))
|
||||
|
||||
/* PTE permission bits we propagate down the walk. */
|
||||
#define PTE_RW (1ull << 1)
|
||||
#define PTE_US (1ull << 2)
|
||||
@@ -16,15 +13,17 @@
|
||||
/* ---- single-address translation (hot) ----------------------------------- *
|
||||
* Translate `va` under `cr3` to a GPA. On success: *gpa = GPA of `va`, and
|
||||
* *leaf (if non-NULL) = bytes from `va` to the end of the containing leaf. */
|
||||
__attribute__((hot))
|
||||
static int gva_gpa(gva_ctx* ctx, uintptr_t cr3, uintptr_t va,
|
||||
uintptr_t* gpa, size_t* leaf) {
|
||||
uint64_t t = cr3 & PFN_MASK, e;
|
||||
const uint64_t* pe;
|
||||
const unsigned i4 = (va >> 39) & 0x1ff, i3 = (va >> 30) & 0x1ff,
|
||||
i2 = (va >> 21) & 0x1ff, i1 = (va >> 12) & 0x1ff;
|
||||
|
||||
if (gpa_read(&p_(ctx), t + i4 * 8, &e, 8) || !(e & PG_P)) return -1;
|
||||
if (!(pe = gpa_ptr(&p_(ctx), t + i4 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
|
||||
t = e & PFN_MASK;
|
||||
if (gpa_read(&p_(ctx), t + i3 * 8, &e, 8) || !(e & PG_P)) return -1;
|
||||
if (!(pe = gpa_ptr(&p_(ctx), t + i3 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
|
||||
if (e & PG_PS) { /* 1 GiB leaf */
|
||||
const uint64_t off = va & 0x3FFFFFFF;
|
||||
*gpa = (e & PFN_MASK & ~0x3FFFFFFFull) + off;
|
||||
@@ -32,7 +31,7 @@ static int gva_gpa(gva_ctx* ctx, uintptr_t cr3, uintptr_t va,
|
||||
return 0;
|
||||
}
|
||||
t = e & PFN_MASK;
|
||||
if (gpa_read(&p_(ctx), t + i2 * 8, &e, 8) || !(e & PG_P)) return -1;
|
||||
if (!(pe = gpa_ptr(&p_(ctx), t + i2 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
|
||||
if (e & PG_PS) { /* 2 MiB leaf */
|
||||
const uint64_t off = va & 0x1FFFFF;
|
||||
*gpa = (e & PFN_MASK & ~0x1FFFFFull) + off;
|
||||
@@ -40,13 +39,22 @@ static int gva_gpa(gva_ctx* ctx, uintptr_t cr3, uintptr_t va,
|
||||
return 0;
|
||||
}
|
||||
t = e & PFN_MASK;
|
||||
if (gpa_read(&p_(ctx), t + i1 * 8, &e, 8) || !(e & PG_P)) return -1;
|
||||
if (!(pe = gpa_ptr(&p_(ctx), t + i1 * 8, 8)) || !((e = *pe) & PG_P)) return -1;
|
||||
const uint64_t off = va & 0xFFF; /* 4 KiB leaf */
|
||||
*gpa = (e & PFN_MASK) + off;
|
||||
if (leaf) *leaf = 0x1000 - off;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* zero-copy borrowed read: leaf-bounded host pointer at `va` (see memory.h). */
|
||||
__attribute__((hot))
|
||||
const void* gva_ptr(gva_ctx* ctx, uintptr_t cr3, uintptr_t va, size_t* avail) {
|
||||
uintptr_t gpa; size_t leaf;
|
||||
if (gva_gpa(ctx, cr3, va, &gpa, &leaf)) return NULL;
|
||||
*avail = leaf;
|
||||
return gpa_ptr(&p_(ctx), gpa, leaf);
|
||||
}
|
||||
|
||||
__attribute__((hot))
|
||||
int gva_read(gva_ctx* ctx, uintptr_t cr3, uintptr_t va, void* dst, size_t nmemb) {
|
||||
uint8_t* d = dst;
|
||||
@@ -169,6 +177,7 @@ static int rgn_hit(uint64_t base, uint64_t span, uint64_t lo, uint64_t hi) {
|
||||
return !(end < lo || base > hi);
|
||||
}
|
||||
|
||||
__attribute__((hot))
|
||||
int gva_regions(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint32_t prot_any, vregion* out, int nmax) {
|
||||
if (nmax <= 0) return 0;
|
||||
@@ -230,6 +239,7 @@ int gva_regions(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
#define SWEEP_WIN (1u << 20) /* 1 MiB window (multiple of 8) */
|
||||
#define SWEEP_RMAX (1u << 16) /* max runs enumerated per sweep */
|
||||
|
||||
__attribute__((hot))
|
||||
int gva_sweep(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint32_t prot_any, size_t overlap, gva_sweep_cb cb, void* user) {
|
||||
if (overlap >= SWEEP_WIN) return -1;
|
||||
@@ -245,26 +255,40 @@ int gva_sweep(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
for (int r = 0; r < nr && !rc; r++) {
|
||||
uint64_t base = rg[r].va; /* VA of buf[0] */
|
||||
uint64_t va = rg[r].va;
|
||||
const uint64_t vend = rg[r].va + rg[r].len;
|
||||
const uint64_t vend = rg[r].va + (rg[r].len - 1); /* inclusive last */
|
||||
size_t fill = 0;
|
||||
|
||||
while (va < vend) {
|
||||
size_t pg = 0x1000 - (size_t)(va & 0xFFF); /* to page edge */
|
||||
if (pg > (size_t)(vend - va)) pg = (size_t)(vend - va);
|
||||
if (pg > SWEEP_WIN - fill) pg = SWEEP_WIN - fill;
|
||||
|
||||
if (gva_read(ctx, cr3, va, buf + fill, pg)) { /* gap: flush+skip */
|
||||
while (va <= vend) {
|
||||
size_t avail;
|
||||
const uint8_t* p = gva_ptr(ctx, cr3, va, &avail);
|
||||
if (!p) { /* gap: flush+skip */
|
||||
if (fill && cb(user, buf, fill, base, overlap, 1)) { rc = 1; break; }
|
||||
if (vend - va < 0x1000 - (va & 0xFFF)) break; /* skip past top: done */
|
||||
va += 0x1000 - (va & 0xFFF);
|
||||
base = va; fill = 0;
|
||||
continue;
|
||||
}
|
||||
fill += pg; va += pg;
|
||||
size_t n = avail; /* leaf-contiguous */
|
||||
if (n > (size_t)(vend - va + 1)) n = (size_t)(vend - va + 1);
|
||||
if (n > SWEEP_WIN - fill) n = SWEEP_WIN - fill;
|
||||
const int end = (n == (size_t)(vend - va + 1)); /* chunk hits vend */
|
||||
|
||||
if (fill == SWEEP_WIN) {
|
||||
const int last = (va >= vend);
|
||||
if (cb(user, buf, fill, base, overlap, last)) { rc = 1; break; }
|
||||
if (last || overlap == 0 || overlap >= fill) {
|
||||
if (fill == 0 && avail > 0x1000) { /* large-page lend */
|
||||
if (cb(user, p, n, va, 0, end)) { rc = 1; break; }
|
||||
if (end) break; /* avoid va wrap */
|
||||
va += n;
|
||||
if (overlap == 0) base = va;
|
||||
else { memcpy(buf, p + n - overlap, overlap); base = va - overlap; fill = overlap; }
|
||||
continue;
|
||||
}
|
||||
|
||||
memcpy(buf + fill, p, n); /* buffered window */
|
||||
fill += n; va += n;
|
||||
|
||||
if (end || fill == SWEEP_WIN) {
|
||||
if (cb(user, buf, fill, base, overlap, end)) { rc = 1; break; }
|
||||
if (end) { fill = 0; break; } /* avoid va wrap */
|
||||
if (overlap == 0 || overlap >= fill) {
|
||||
base = va; fill = 0;
|
||||
} else { /* carry overlap */
|
||||
memmove(buf, buf + fill - overlap, overlap);
|
||||
|
||||
+7
-8
@@ -100,11 +100,9 @@ static int find_ntoskrnl(gva_ctx* ctx, uintptr_t cr3, uint64_t* base, uint8_t gu
|
||||
}
|
||||
|
||||
uint64_t va = (uint64_t)p4<<39 | (uint64_t)p3<<30 | (uint64_t)p2<<21;
|
||||
if (va & (1ull<<47)) {
|
||||
va |= 0xFFFF000000000000ull; /* canonical sign-extend */
|
||||
}
|
||||
va = VA_CANON(va);
|
||||
|
||||
uint16_t mz; char pdb[16];
|
||||
uint16_t mz; char pdb[16] = {0};
|
||||
if (gva_read(ctx, cr3, va, &mz, 2) || mz != MZ) {
|
||||
continue;
|
||||
}
|
||||
@@ -132,10 +130,11 @@ static uint32_t ko_export_rva(gva_ctx* ctx, uintptr_t cr3, uint64_t kbase, const
|
||||
if (gva_read(ctx, cr3, kbase + exp_rva, ed, sizeof ed)) {
|
||||
return 0;
|
||||
}
|
||||
const uint32_t nnames = *(uint32_t*)(ed + 0x18);
|
||||
const uint32_t a_funcs = *(uint32_t*)(ed + 0x1C);
|
||||
const uint32_t a_names = *(uint32_t*)(ed + 0x20);
|
||||
const uint32_t a_ords = *(uint32_t*)(ed + 0x24);
|
||||
uint32_t nnames, a_funcs, a_names, a_ords;
|
||||
memcpy(&nnames, ed + 0x18, 4);
|
||||
memcpy(&a_funcs, ed + 0x1C, 4);
|
||||
memcpy(&a_names, ed + 0x20, 4);
|
||||
memcpy(&a_ords, ed + 0x24, 4);
|
||||
|
||||
for (uint32_t i = 0; i < nnames; i++) {
|
||||
uint32_t nrva; char nm[40];
|
||||
|
||||
@@ -10,6 +10,9 @@ struct gva_ctx; /* forward: completed below; lets profile.h name it *
|
||||
#define PG_P 0x1ull
|
||||
#define PG_PS 0x80ull
|
||||
|
||||
/* sign-extend a 48-bit canonical VA */
|
||||
#define VA_CANON(v) (((v) & (1ull << 47)) ? ((v) | 0xFFFF000000000000ull) : (v))
|
||||
|
||||
/* Canonical VA window bounds, single-sourced here for every scanning TU.
|
||||
* USER_MIN is 0x10000: Windows reserves the low 64 KiB, so no live user pointer
|
||||
* targets below it - starting there drops a class of false positives. */
|
||||
@@ -59,6 +62,12 @@ int gpa_write(gpa_ctx* ctx, uintptr_t offs, const void* src, size_t nmemb);
|
||||
* whole leaf (or a 4096-byte page table) can be taken in one call. */
|
||||
void* gpa_ptr(gpa_ctx* ctx, uintptr_t offs, size_t nmemb);
|
||||
|
||||
/* Zero-copy borrowed read: host pointer to the guest byte at `va` (under `cr3`),
|
||||
* valid for *avail contiguous bytes (to the end of the containing leaf). NULL if
|
||||
* `va` is not mapped or the leaf is not fully covered by the image (caller falls
|
||||
* back to gva_read). Borrowed: valid until gva_ctx_free, do NOT retain/free. */
|
||||
const void* gva_ptr(gva_ctx* ctx, uintptr_t cr3, uintptr_t va, size_t* avail) __attribute__((hot));
|
||||
|
||||
/* bootstrap helpers (gva.c) */
|
||||
int khalf_score(const gva_ctx* ctx, uint64_t pml4) __attribute__((cold));
|
||||
int cr3_recover(gva_ctx* ctx, uint64_t va_self, uint64_t target_pa, uintptr_t* cr3_out) __attribute__((cold));
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@
|
||||
#include "include/memory.h"
|
||||
#include "../include/include.h"
|
||||
|
||||
#define pr_(ctx) (ctx->prof)
|
||||
#define pr_(ctx) ((ctx)->prof)
|
||||
|
||||
#define RING_GUARD 100000u
|
||||
#define MOD_GUARD 4096u
|
||||
|
||||
+4
-2
@@ -53,7 +53,8 @@ static int discover_core(gva_ctx* ctx, uintptr_t cr3, uint64_t sys_ep) {
|
||||
|
||||
int pid_off = -1;
|
||||
for (int o = 0x80; o + 8 <= name_off; o += 8) {
|
||||
if (*(uint64_t*)(buf + o) != 4) {
|
||||
uint64_t v; memcpy(&v, buf + o, 8);
|
||||
if (v != 4) {
|
||||
continue;
|
||||
}
|
||||
const uint16_t links = (uint16_t)(o + 8);
|
||||
@@ -70,7 +71,8 @@ static int discover_core(gva_ctx* ctx, uintptr_t cr3, uint64_t sys_ep) {
|
||||
|
||||
int dtb_off = -1;
|
||||
for (int o = 0x18; o <= 0x60; o += 8) {
|
||||
const uint64_t c = *(uint64_t*)(buf + o) & PFN_MASK;
|
||||
uint64_t v; memcpy(&v, buf + o, 8);
|
||||
const uint64_t c = v & PFN_MASK;
|
||||
uint8_t probe;
|
||||
if (c && khalf_score(ctx, c) >= 16 && !gva_read(ctx, c, sys_ep, &probe, 1)) {
|
||||
dtb_off = o;
|
||||
|
||||
@@ -72,6 +72,7 @@ void sig_free(sig_pattern_t* p) {
|
||||
p->bytes = p->mask = NULL; p->len = 0;
|
||||
}
|
||||
|
||||
__attribute__((hot))
|
||||
void sig_each(mem_view_t v, const sig_pattern_t* p,
|
||||
int (*cb)(void*, uint64_t), void* user) {
|
||||
if (!v.data || !p || p->len == 0 || v.size < p->len) return;
|
||||
|
||||
+9
-8
@@ -3,7 +3,7 @@
|
||||
#include "include/memory.h"
|
||||
#include "../include/include.h"
|
||||
|
||||
static void utf8_emit(uint32_t cp, char* dst, size_t size, size_t* need) {
|
||||
static void utf8_emit(uint32_t cp, char* dst, size_t size, size_t* need, size_t* wrote) {
|
||||
uint8_t b[4]; size_t k;
|
||||
if (cp < 0x80) { b[0]=(uint8_t)cp; k=1; }
|
||||
else if (cp < 0x800) { b[0]=0xC0|(uint8_t)(cp>>6); b[1]=0x80|(cp&0x3F); k=2; }
|
||||
@@ -11,12 +11,13 @@ static void utf8_emit(uint32_t cp, char* dst, size_t size, size_t* need) {
|
||||
else { b[0]=0xF0|(uint8_t)(cp>>18); b[1]=0x80|((cp>>12)&0x3F); b[2]=0x80|((cp>>6)&0x3F); b[3]=0x80|(cp&0x3F); k=4; }
|
||||
if (dst && *need + k < size) {
|
||||
for (size_t j = 0; j < k; j++) dst[*need + j] = (char)b[j];
|
||||
*wrote = *need + k; /* end of last full code point */
|
||||
}
|
||||
*need += k;
|
||||
}
|
||||
|
||||
size_t gva_read_text(gva_ctx* ctx, uintptr_t cr3, uintptr_t va, size_t nmemb, char* dst, size_t size) {
|
||||
size_t need = 0;
|
||||
size_t need = 0, wrote = 0;
|
||||
uint16_t stage[256];
|
||||
uint32_t hi = 0;
|
||||
nmemb &= ~(size_t)1;
|
||||
@@ -31,21 +32,21 @@ size_t gva_read_text(gva_ctx* ctx, uintptr_t cr3, uintptr_t va, size_t nmemb, ch
|
||||
uint32_t u = stage[i];
|
||||
if (hi) {
|
||||
if (u >= 0xDC00 && u <= 0xDFFF) {
|
||||
utf8_emit(0x10000u + ((hi - 0xD800u) << 10) + (u - 0xDC00u), dst, size, &need);
|
||||
utf8_emit(0x10000u + ((hi - 0xD800u) << 10) + (u - 0xDC00u), dst, size, &need, &wrote);
|
||||
hi = 0;
|
||||
continue;
|
||||
}
|
||||
utf8_emit(0xFFFD, dst, size, &need);
|
||||
utf8_emit(0xFFFD, dst, size, &need, &wrote);
|
||||
hi = 0;
|
||||
}
|
||||
if (u >= 0xD800 && u <= 0xDBFF) hi = u;
|
||||
else if (u >= 0xDC00 && u <= 0xDFFF) utf8_emit(0xFFFD, dst, size, &need);
|
||||
else utf8_emit(u, dst, size, &need);
|
||||
else if (u >= 0xDC00 && u <= 0xDFFF) utf8_emit(0xFFFD, dst, size, &need, &wrote);
|
||||
else utf8_emit(u, dst, size, &need, &wrote);
|
||||
}
|
||||
va += chunk;
|
||||
nmemb -= chunk;
|
||||
}
|
||||
if (hi) utf8_emit(0xFFFD, dst, size, &need);
|
||||
if (dst && size) dst[need < size ? need : size - 1] = 0;
|
||||
if (hi) utf8_emit(0xFFFD, dst, size, &need, &wrote);
|
||||
if (dst && size) dst[need < size ? need : wrote] = 0;
|
||||
return need;
|
||||
}
|
||||
Reference in New Issue
Block a user