mirror of
https://dev.lirent.ru/Vatrog/vm-introspection-engine.git
synced 2026-06-18 02:06:36 +03:00
b3441dd6f6
CORE (src/core): vmie_mem — guest-physical substrate with a data-driven segment map (replaces the hardcoded 4 GiB PCI-hole topology). ENGINE (src/engine): x86-64 paging + Windows bring-up; produces the generic memory model. HANDLERS (src/handlers): the signature/value/pointer scanners, which now consume an OS-agnostic contract. Keystone: gva_ctx is split into vmie_mem (core) + vmie (engine); the generic access functions take vmie_mem* + cr3 and no longer compile in the Windows offset table. New public contract include/memmodel.h (vmie_mem, mem_view_t, vregion, task, range, the gva_* access); win32 surface in include/vmie.h. Leak relocations: the PE parser, UTF-16 decode and CR3-recovery heuristics move engine-side; the matcher stays a pure, source-agnostic handler, and the pointer scanner takes a generic range[] instead of reaching into the process enumerator.
157 lines
8.1 KiB
C
157 lines
8.1 KiB
C
/* memmodel.h - the OS-agnostic memory-model contract (the middle layer).
|
|
*
|
|
* This is the shared vocabulary between the ENGINE (which turns guest-physical
|
|
* RAM into a usable virtual memory model via x86-64 paging + Windows bring-up)
|
|
* and the HANDLERS (scanners that consume that model). It names no Windows
|
|
* concept: a handler compiled against this header literally cannot mention an
|
|
* _EPROCESS, a PEB, or an LDR entry.
|
|
*
|
|
* Everything here is keyed by a `vmie_mem*` (the opaque physical/paging
|
|
* substrate) plus a `cr3` (the address space). The engine handle `vmie` is
|
|
* never handed to a handler - only `vmie_mem*` + `cr3`.
|
|
*
|
|
* Conventions:
|
|
* - `cr3` is a raw CR3 / DirectoryTableBase value; low flag bits are masked
|
|
* internally, so either the masked PML4 GPA or the raw register works.
|
|
* - A "VA" is a 64-bit canonical guest virtual address. Reads/writes that
|
|
* cross a page boundary are handled internally (per-page translation).
|
|
* - Integer returns: 0 on success, negative on failure, unless stated.
|
|
*/
|
|
#ifndef VMIE_MEMMODEL_H
|
|
#define VMIE_MEMMODEL_H
|
|
#include <stdint.h>
|
|
#include <stddef.h>
|
|
|
|
/* Opaque guest-physical memory handle (the mmap'd RAM backing file + segment
|
|
* map). Defined in src/core/include/core.h; handlers hold only a pointer and
|
|
* pass it, with a cr3, to the address-space primitives below. */
|
|
typedef struct vmie_mem vmie_mem;
|
|
|
|
/* ---- flat memory view (single owner) ------------------------------------- *
|
|
* A contiguous view of memory.
|
|
* data - host pointer to the bytes (borrowed; not owned by the view)
|
|
* size - number of valid bytes at `data`
|
|
* base_va - address that data[0] corresponds to (guest VA, or GPA for a
|
|
* physical view). All matches are reported as base_va + offset. */
|
|
typedef struct {
|
|
const uint8_t* data;
|
|
size_t size;
|
|
uint64_t base_va;
|
|
} mem_view_t;
|
|
|
|
/* ---- region map ---------------------------------------------------------- *
|
|
* A vregion is one run of VA-contiguous, present guest pages sharing the same
|
|
* effective protection. It is the unit of "what is mapped, and how" and the
|
|
* scoping primitive for the scanners (see scan.h).
|
|
*
|
|
* x86-64 has no read bit: a present page is readable, so VR_R is always set on a
|
|
* returned region. Write/execute/user are the EFFECTIVE rights along the whole
|
|
* page-table path (RW & US are AND-ed across levels, NX is OR-ed), not just the
|
|
* leaf entry, so they reflect what the guest CPU actually enforces. */
|
|
#ifndef VMIE_VREGION_DEFINED
|
|
#define VMIE_VREGION_DEFINED
|
|
#define VR_R 0x1u /* readable (present => always set) */
|
|
#define VR_W 0x2u /* writable (RW bit set at every level) */
|
|
#define VR_X 0x4u /* executable(NX clear at every level) */
|
|
#define VR_U 0x8u /* user-accessible (US bit set at every level) */
|
|
|
|
typedef struct {
|
|
uint64_t va; /* run start VA (clamped into the requested [lo,hi] window) */
|
|
uint64_t len; /* run length in bytes */
|
|
uint32_t prot; /* OR of VR_* flags */
|
|
} vregion;
|
|
#endif
|
|
|
|
/* Canonical VA-window bounds of the memory model, shared by every scanning TU.
|
|
* These describe the address space the contract operates over (the [lo,hi]
|
|
* windows of gva_regions/gva_sweep), so they are handler-visible.
|
|
* USER_MIN is 0x10000: the low 64 KiB is reserved, so no live user pointer
|
|
* targets below it - starting there drops a class of false positives. */
|
|
#define USER_MIN 0x0000000000010000ull
|
|
#define USER_MAX 0x00007FFFFFFFFFFFull
|
|
#define KERN_MIN 0xFFFF800000000000ull
|
|
|
|
/* ---- generic boundary types (replace the Windows-typed process/pmodule) --- *
|
|
* A schedulable address space, decoded by the engine from whatever the guest
|
|
* OS calls one. `cr3` is all a handler needs to read/write its memory.
|
|
* cr3 - DirectoryTableBase (PFN-masked); key to this address space
|
|
* pid, ppid - process / parent ids (ppid == (uint64_t)-1 if unavailable)
|
|
* name - short image name, NUL-terminated UTF-8 (engine-decoded) */
|
|
typedef struct {
|
|
uint64_t cr3;
|
|
uint64_t pid;
|
|
uint64_t ppid;
|
|
char name[16];
|
|
} task;
|
|
|
|
/* A named, contiguous VA range (e.g. a loaded module image), the anchor a
|
|
* pointer scan walks back to. The engine decodes the name; no LDR entry VA.
|
|
* base - range base VA (page-aligned)
|
|
* size - range length in bytes
|
|
* name - decoded UTF-8 name (e.g. "ntdll.dll"), NUL-terminated */
|
|
typedef struct {
|
|
uint64_t base;
|
|
uint64_t size;
|
|
char name[64];
|
|
} range;
|
|
|
|
/* ---- guest memory access (hot path) -------------------------------------- */
|
|
|
|
/* Read `nmemb` bytes from guest VA `va` (translated under `cr3`) into `dst`.
|
|
* Crosses page boundaries internally. Returns 0 on success, -1 if any page in
|
|
* the range is not present/translatable (in which case `dst` is partially
|
|
* written and must be treated as invalid). */
|
|
int gva_read(vmie_mem* m, uintptr_t cr3, uintptr_t va, void* dst, size_t nmemb);
|
|
|
|
/* Write `nmemb` bytes from `src` to guest VA `va` (translated under `cr3`).
|
|
* The mapping is RW and coherent, so the guest observes the change. Returns 0
|
|
* on success, -1 if any page in the range is not present/translatable. */
|
|
int gva_write(vmie_mem* m, uintptr_t cr3, uintptr_t va, const void* src, size_t nmemb);
|
|
|
|
/* Zero-copy borrowed read: host pointer to the guest byte at `va` (under `cr3`),
|
|
* valid for *avail contiguous bytes (to the end of the containing leaf). NULL if
|
|
* `va` is not mapped or the leaf is not fully covered by the image (caller falls
|
|
* back to gva_read). Borrowed: valid until the mapping is closed, do NOT retain. */
|
|
const void* gva_ptr(vmie_mem* m, uintptr_t cr3, uintptr_t va, size_t* avail);
|
|
|
|
/* Enumerate mapped memory under `cr3`, clamped to the VA window [lo,hi]
|
|
* (inclusive), as runs of equal effective protection.
|
|
* lo, hi - inclusive VA window; MUST lie within a single canonical half
|
|
* (entirely user or entirely kernel). Use (0, ~0ull) loosely; the
|
|
* walk prunes whole subtrees outside the window.
|
|
* prot_any - protection filter: 0 keeps every run; otherwise a run is kept
|
|
* only if (run.prot & prot_any) != 0 (e.g. VR_W for writable-only)
|
|
* out - caller array receiving up to `nmax` `vregion` records
|
|
* nmax - capacity of `out`
|
|
* Returns the TOTAL number of matching runs found. If the return value exceeds
|
|
* `nmax` the output was truncated; enlarge the buffer and retry. */
|
|
int gva_regions(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
|
uint32_t prot_any, vregion* out, int nmax);
|
|
|
|
/* ---- shared windowed sweep engine ---------------------------------------- *
|
|
* gva_sweep() streams every mapped byte under `cr3` within [lo,hi] that passes
|
|
* the protection filter to `cb`, one contiguous window at a time. Physical
|
|
* fragmentation is hidden: each window is a flat buffer (gva_read-filled), and
|
|
* adjacent windows of one run share `overlap` leading bytes so an object or
|
|
* pattern straddling a window boundary is still seen whole. */
|
|
typedef int (*gva_sweep_cb)(void* user, const uint8_t* data, size_t len,
|
|
uint64_t base_va, size_t overlap, int last);
|
|
/* user - passed through verbatim
|
|
* data - host buffer with `len` valid bytes (do not retain past the call)
|
|
* len - valid bytes at data
|
|
* base_va - guest VA of data[0]
|
|
* overlap - bytes at the front of `data` shared with the previous window of
|
|
* this run (0 on a run's first window or right after a gap)
|
|
* last - nonzero if this window ends a contiguous segment (run end / gap):
|
|
* accept hits up to `len`; otherwise drop hits starting in the
|
|
* trailing `overlap` zone, the next window re-presents them
|
|
* cb returns nonzero to abort the sweep early (e.g. result buffer full).
|
|
*
|
|
* gva_sweep() returns 0 normally, 1 if a callback aborted it, -1 on allocation
|
|
* failure. `overlap` must be < the internal window (1 MiB); patterns longer
|
|
* than that are not supported by the windowed path. */
|
|
int gva_sweep(vmie_mem* m, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
|
uint32_t prot_any, size_t overlap, gva_sweep_cb cb, void* user);
|
|
|
|
#endif /* VMIE_MEMMODEL_H */
|