mirror of
https://dev.lirent.ru/Vatrog/vm-introspection-engine.git
synced 2026-06-18 02:06:36 +03:00
Windows guest VMI core: host library, CLI, guest agent
Static library over a flat RW mmap of guest RAM: GPA/GVA paging walks, beacon-driven bootstrap, dynamic struct-offset profiling, process and module enumeration, a region map, and value/pointer/signature scanners on a shared windowed sweep. Public API in include/; internals under src/. Thin CLI demonstrator over the public API. Guest agent cross-compiled to Windows x86-64 via mingw-w64. CMake: static library + CLI + guest target, C17.
This commit is contained in:
@@ -0,0 +1,172 @@
|
||||
/* include.h - public interface of the Windows VMI core.
|
||||
*
|
||||
* The host opens a guest's RAM backing file (a flat, writable, coherent mmap),
|
||||
* recovers the kernel address space, and reads/writes guest memory by CR3 and
|
||||
* virtual address. Everything is CR3-keyed, never PID-keyed: a `process` already
|
||||
* carries its own cr3, which is the key to that address space.
|
||||
*
|
||||
* Conventions used throughout this header:
|
||||
* - `cr3` is a raw CR3 / DirectoryTableBase value; low flag bits are masked
|
||||
* internally, so either the masked PML4 GPA or the raw register works.
|
||||
* - A "VA" is a 64-bit canonical guest virtual address. A "GPA" is a guest
|
||||
* physical address. Reads/writes that cross a page boundary are handled
|
||||
* internally (per-page translation), so callers pass plain ranges.
|
||||
* - Integer returns: 0 on success, negative on failure, unless stated.
|
||||
* - The library never takes ownership of caller buffers and never retains a
|
||||
* pointer past the call that received it, unless explicitly stated.
|
||||
*/
|
||||
|
||||
#ifndef W32MS_INCLUDE_H
|
||||
#define W32MS_INCLUDE_H
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/* Opaque introspection context. Completed in src/include/memory.h; callers only
|
||||
* ever hold a pointer. Created by gva_ctx_alloc(), populated by host_bootstrap(),
|
||||
* released by gva_ctx_free(). */
|
||||
typedef struct gva_ctx gva_ctx;
|
||||
|
||||
/* A guest counted string still resident in guest memory (e.g. a UNICODE_STRING
|
||||
* buffer). Not a copy: `va` points into the guest, decode it with gva_read_text.
|
||||
* va - guest VA of the first UTF-16LE code unit (0 if absent)
|
||||
* len - length in BYTES (not characters); always even for UTF-16 */
|
||||
typedef struct { uint64_t va; uint32_t len; } gtext;
|
||||
|
||||
/* A live process, as produced by proc_list(). Self-contained: `cr3` is all you
|
||||
* need to read/write its user address space, `eprocess`/`peb` re-anchor it in
|
||||
* kernel/user space without another lookup.
|
||||
* cr3 - DirectoryTableBase (PFN-masked); key to this address space
|
||||
* peb - PEB VA (0 for system/kernel-only processes)
|
||||
* eprocess - _EPROCESS VA (kernel object, read under the kernel cr3)
|
||||
* pid, ppid - process / parent ids (ppid == (uint32_t)-1 if unavailable)
|
||||
* create_time - raw KSYSTEM_TIME / FILETIME (100 ns ticks; 0 if unavailable)
|
||||
* name - ImageFileName, NUL-terminated ASCII (up to 15 chars)
|
||||
* path - full image path as a guest UTF-16 string (gtext; may be empty) */
|
||||
typedef struct {
|
||||
uint64_t cr3;
|
||||
uint64_t peb;
|
||||
uint64_t eprocess;
|
||||
uint32_t pid;
|
||||
uint32_t ppid;
|
||||
uint64_t create_time;
|
||||
char name[16];
|
||||
gtext path;
|
||||
} process;
|
||||
|
||||
/* A loaded module (image) inside a process, as produced by proc_modules().
|
||||
* pr - owning process (its cr3 is the address space these VAs live in)
|
||||
* entry - _LDR_DATA_TABLE_ENTRY VA
|
||||
* base - image base VA (page-aligned); pair with `size` for a MODULE scope
|
||||
* size - image size in bytes (SizeOfImage)
|
||||
* name - module file name (gtext UTF-16, e.g. "ntdll.dll")
|
||||
* path - full module path (gtext UTF-16) */
|
||||
typedef struct {
|
||||
const process* pr;
|
||||
uint64_t entry;
|
||||
uint64_t base;
|
||||
uint32_t size;
|
||||
gtext name;
|
||||
gtext path;
|
||||
} pmodule;
|
||||
|
||||
/* ---- region map ---------------------------------------------------------- *
|
||||
* A vregion is one run of VA-contiguous, present guest pages sharing the same
|
||||
* effective protection. It is the unit of "what is mapped, and how" and the
|
||||
* scoping primitive for the scanners (see scan.h).
|
||||
*
|
||||
* x86-64 has no read bit: a present page is readable, so VR_R is always set on a
|
||||
* returned region. Write/execute/user are the EFFECTIVE rights along the whole
|
||||
* page-table path (RW & US are AND-ed across levels, NX is OR-ed), not just the
|
||||
* leaf entry, so they reflect what the guest CPU actually enforces. */
|
||||
#define VR_R 0x1u /* readable (present => always set) */
|
||||
#define VR_W 0x2u /* writable (RW bit set at every level) */
|
||||
#define VR_X 0x4u /* executable(NX clear at every level) */
|
||||
#define VR_U 0x8u /* user-accessible (US bit set at every level) */
|
||||
|
||||
typedef struct {
|
||||
uint64_t va; /* run start VA (clamped into the requested [lo,hi] window) */
|
||||
uint64_t len; /* run length in bytes */
|
||||
uint32_t prot; /* OR of VR_* flags */
|
||||
} vregion;
|
||||
|
||||
/* ---- lifecycle ----------------------------------------------------------- */
|
||||
|
||||
/* Open `ram_path` (the guest RAM backing file) and build a context over it.
|
||||
* ram_path - path to a writable, share=on RAM backing file
|
||||
* low - size in bytes of below-4G guest RAM (the PCI-hole split point);
|
||||
* pass the value from the VM's memory layout. If total RAM <= low,
|
||||
* the split is inert.
|
||||
* Returns a new context (call host_bootstrap() next), or NULL on open/mmap
|
||||
* failure. Free with gva_ctx_free(). */
|
||||
gva_ctx* gva_ctx_alloc(const char* ram_path, uint64_t low);
|
||||
|
||||
/* Unmap, close, and free a context. Safe on NULL. After this, every pointer
|
||||
* into guest memory obtained through this context is invalid. */
|
||||
void gva_ctx_free(gva_ctx* ctx);
|
||||
|
||||
/* One-shot bring-up: locate the guest agent beacon in physical RAM, recover a
|
||||
* bootstrap CR3, find ntoskrnl, build the struct-offset profile, derive the
|
||||
* permanent System DirectoryTableBase (kernel cr3) and System _EPROCESS, then
|
||||
* ACK the agent. On success the context is ready for proc_list()/gva_read()/etc.
|
||||
* Returns 0 on success, or a negative stage code (-1..-6) identifying the step
|
||||
* that failed. Cold path: call once after gva_ctx_alloc(). */
|
||||
int host_bootstrap(gva_ctx* ctx);
|
||||
|
||||
/* ---- guest memory access (hot path) -------------------------------------- */
|
||||
|
||||
/* Read `nmemb` bytes from guest VA `va` (translated under `cr3`) into `dst`.
|
||||
* Crosses page boundaries internally. Returns 0 on success, -1 if any page in
|
||||
* the range is not present/translatable (in which case `dst` is partially
|
||||
* written and must be treated as invalid). */
|
||||
int gva_read(gva_ctx* ctx, uintptr_t cr3, uintptr_t va, void* dst, size_t nmemb);
|
||||
|
||||
/* Write `nmemb` bytes from `src` to guest VA `va` (translated under `cr3`).
|
||||
* The mapping is RW and coherent, so the guest observes the change. Returns 0
|
||||
* on success, -1 if any page in the range is not present/translatable. */
|
||||
int gva_write(gva_ctx* ctx, uintptr_t cr3, uintptr_t va, const void* src, size_t nmemb);
|
||||
|
||||
/* Read a UTF-16LE guest string and transcode it to UTF-8.
|
||||
* va - guest VA of the first UTF-16 code unit
|
||||
* nmemb - number of BYTES to read from the guest (rounded down to even)
|
||||
* dst - output buffer for NUL-terminated UTF-8 (may be NULL to size only)
|
||||
* size - capacity of `dst` in bytes
|
||||
* Returns the number of UTF-8 bytes the full conversion needs, EXCLUDING the
|
||||
* terminator (like snprintf): if it is >= `size`, output was truncated. When
|
||||
* `dst` is non-NULL and `size` > 0 the result is always NUL-terminated. */
|
||||
size_t gva_read_text(gva_ctx* ctx, uintptr_t cr3, uintptr_t va, size_t nmemb, char* dst, size_t size);
|
||||
|
||||
/* ---- enumeration --------------------------------------------------------- */
|
||||
|
||||
/* Enumerate processes by walking ActiveProcessLinks from System.
|
||||
* skip_system - if nonzero, omit processes with no PEB (System/kernel-only)
|
||||
* dst - caller array receiving up to `nmax` `process` records
|
||||
* nmax - capacity of `dst`
|
||||
* Returns the number written (<= nmax), or negative on failure (e.g. bootstrap
|
||||
* not completed). Enumeration stops at `nmax`; raise it to see more. */
|
||||
int proc_list(gva_ctx* ctx, int skip_system, process* dst, size_t nmax);
|
||||
|
||||
/* Enumerate a process's loaded modules via the PEB loader InLoadOrder list.
|
||||
* pr - process to inspect (uses pr->cr3 and pr->peb)
|
||||
* dst - caller array receiving up to `nmax` `pmodule` records
|
||||
* nmax - capacity of `dst`
|
||||
* Returns the number written (<= nmax), 0 if the process has no PEB/loader. */
|
||||
int proc_modules(gva_ctx* ctx, const process* pr, pmodule* dst, size_t nmax);
|
||||
|
||||
/* Enumerate mapped memory under `cr3`, clamped to the VA window [lo,hi]
|
||||
* (inclusive), as runs of equal effective protection.
|
||||
* cr3 - address space to walk (a process cr3, or the kernel cr3)
|
||||
* lo, hi - inclusive VA window; MUST lie within a single canonical half
|
||||
* (entirely user or entirely kernel). Use (0, ~0ull) loosely; the
|
||||
* walk prunes whole subtrees outside the window.
|
||||
* prot_any - protection filter: 0 keeps every run; otherwise a run is kept
|
||||
* only if (run.prot & prot_any) != 0 (e.g. VR_W for writable-only,
|
||||
* VR_X for executable-only)
|
||||
* out - caller array receiving up to `nmax` `vregion` records
|
||||
* nmax - capacity of `out`
|
||||
* Returns the TOTAL number of matching runs found. If the return value exceeds
|
||||
* `nmax` the output was truncated (only `nmax` runs were written); enlarge the
|
||||
* buffer and retry for the full map. */
|
||||
int gva_regions(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint32_t prot_any, vregion* out, int nmax);
|
||||
|
||||
#endif /* W32MS_INCLUDE_H */
|
||||
@@ -0,0 +1,63 @@
|
||||
/* scan.h - typed value scanner, pointer scanner, and gva<->signature bridges.
|
||||
*
|
||||
* Layered above the pure matcher (sigscan.h) and the gva core (include.h): this
|
||||
* is the gva-bound scanning surface. The value scanner narrows a candidate set
|
||||
* across successive snapshots; the pointer scanner discovers module-anchored
|
||||
* pointer chains; the gva_sig_* bridges build mem_view_t windows out of guest
|
||||
* memory and feed them to the signature matcher.
|
||||
*/
|
||||
#ifndef W32MS_SCAN_H
|
||||
#define W32MS_SCAN_H
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include "include.h" /* gva_ctx, process (vregion - internal) */
|
||||
#include "sigscan.h" /* mem_view_t, sig_pattern_t */
|
||||
|
||||
/* typed value scanner. ENUMERATOR ORDER IS LOAD-BEARING: scan.c indexes the
|
||||
* table g_tsz[] = {1,2,4,8, 1,2,4,8, 4,8, 2} by these values - do not reorder
|
||||
* without updating scan.c. */
|
||||
typedef enum {
|
||||
SCAN_I8, SCAN_I16, SCAN_I32, SCAN_I64, /* signed */
|
||||
SCAN_U8, SCAN_U16, SCAN_U32, SCAN_U64, /* unsigned */
|
||||
SCAN_F32, SCAN_F64, SCAN_F16 /* float */
|
||||
} scan_type;
|
||||
|
||||
typedef enum {
|
||||
SCAN_EQ, SCAN_NEQ, SCAN_GT, SCAN_LT, /* require a value argument */
|
||||
SCAN_INC, SCAN_DEC, SCAN_CHANGED, SCAN_UNCHANGED /* relative to the previous snapshot */
|
||||
} scan_op;
|
||||
|
||||
typedef struct scan scan; /* opaque session */
|
||||
typedef struct { uint64_t addr; uint64_t value; } scan_hit;
|
||||
|
||||
#define SCAN_PTR_MAXDEPTH 8 /* DFS depth and size of off[] */
|
||||
typedef struct {
|
||||
uint64_t base; /* module-anchored base address */
|
||||
int depth; /* number of offsets in off[] */
|
||||
int32_t off[SCAN_PTR_MAXDEPTH]; /* dereference chain */
|
||||
} scan_ptr_path;
|
||||
|
||||
scan* scan_new(gva_ctx* ctx, const process* pr, scan_type t, const void* value,
|
||||
int be, int aligned, uint64_t lo, uint64_t hi);
|
||||
scan* scan_new_cr3(gva_ctx* ctx, uintptr_t cr3, scan_type t, const void* value,
|
||||
int be, int aligned, uint64_t lo, uint64_t hi);
|
||||
int64_t scan_next(scan* s, scan_op op, const void* value);
|
||||
int64_t scan_count(scan* s);
|
||||
int scan_results(scan* s, uint64_t offset, int max, scan_hit* out);
|
||||
void scan_free(scan* s);
|
||||
|
||||
int scan_pointer(gva_ctx* ctx, const process* pr, uint64_t target,
|
||||
int max_depth, uint32_t max_off, scan_ptr_path* out, int max);
|
||||
|
||||
/* gva bridges to the signature matcher: build mem_view from guest memory and feed sigscan.h */
|
||||
int gva_sig_scan (gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint32_t prot_any, const sig_pattern_t* p, uint64_t* out, int max);
|
||||
int gva_sig_first(gva_ctx* ctx, uintptr_t cr3, uint64_t lo, uint64_t hi,
|
||||
uint32_t prot_any, const sig_pattern_t* p, uint64_t* va);
|
||||
int gva_sig_rip (gva_ctx* ctx, uintptr_t cr3, uint64_t hit_va,
|
||||
size_t disp_off, size_t instr_len, uint64_t* target);
|
||||
int gva_pe_section(gva_ctx* ctx, uintptr_t cr3, uint64_t module_base,
|
||||
const char* name, uint8_t* buf, size_t bufcap, mem_view_t* out);
|
||||
int gva_sig_phys (gva_ctx* ctx, const sig_pattern_t* p, uint64_t* out, int max);
|
||||
|
||||
#endif /* W32MS_SCAN_H */
|
||||
@@ -0,0 +1,105 @@
|
||||
/* sigscan.h - source-agnostic x86-64 signature scanner.
|
||||
*
|
||||
* Everything operates on a mem_view_t: a flat byte span plus the virtual address
|
||||
* that data[0] maps to. Live guest memory, a retained snapshot, and an on-disk
|
||||
* dump are identical to the matcher - only how you build the view differs. All
|
||||
* results are reported as addresses in the view's own coordinate space
|
||||
* (base_va + offset): a guest VA for a virtual view, a GPA for a physical view.
|
||||
*
|
||||
* This module is pure: it never touches a gva_ctx and performs no I/O. To scan
|
||||
* guest memory, build views from the gva layer (see scan.h: gva_sig_scan,
|
||||
* gva_pe_section, gva_sig_phys) and feed them here.
|
||||
*/
|
||||
#ifndef W32MS_SIGSCAN_H
|
||||
#define W32MS_SIGSCAN_H
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
/* A contiguous view of memory.
|
||||
* data - host pointer to the bytes (borrowed; not owned by the view)
|
||||
* size - number of valid bytes at `data`
|
||||
* base_va - address that data[0] corresponds to (guest VA, or GPA for a
|
||||
* physical view). All matches are reported as base_va + offset. */
|
||||
typedef struct {
|
||||
const uint8_t* data;
|
||||
size_t size;
|
||||
uint64_t base_va;
|
||||
} mem_view_t;
|
||||
|
||||
/* A parsed byte pattern. mask[i] == 1 means bytes[i] must match; 0 = wildcard.
|
||||
* Owns two heap allocations of `len` bytes each; release with sig_free(). */
|
||||
typedef struct {
|
||||
uint8_t* bytes;
|
||||
uint8_t* mask;
|
||||
size_t len;
|
||||
} sig_pattern_t;
|
||||
|
||||
/* Parse an IDA-style string, e.g. "48 8B 05 ? ? ? ? 48 85 C0" ('?' or '??' =
|
||||
* wildcard). On success fills *out and returns true; free it with sig_free().
|
||||
* Returns false on NULL args, an empty string, or a malformed hex byte. */
|
||||
bool sig_parse_ida(const char* ida, sig_pattern_t* out);
|
||||
|
||||
/* Parse code+mask form, e.g. bytes="\x48\x8B\x05\x00\x00\x00\x00", mask="xxx????"
|
||||
* ('x'/'X' = must match, anything else = wildcard). `bytes` must have at least
|
||||
* strlen(mask) readable bytes. Returns true on success (free with sig_free()),
|
||||
* false on NULL args or an empty mask. */
|
||||
bool sig_parse_mask(const uint8_t* bytes, const char* mask, sig_pattern_t* out);
|
||||
|
||||
/* Release a pattern produced by sig_parse_*. Safe on NULL and on an
|
||||
* already-freed pattern (it is zeroed). */
|
||||
void sig_free(sig_pattern_t* p);
|
||||
|
||||
/* Invoke cb(user, match_va) for every match of `p` in `v`, in ascending address
|
||||
* order. The driver anchors on the pattern's first non-wildcard byte and uses
|
||||
* memchr to skip, so it is fast even on sparse matches. `cb` returns nonzero to
|
||||
* stop early. This is the building block under sig_first/sig_all and is what a
|
||||
* windowed caller uses to de-duplicate across window seams (see scan.h). */
|
||||
void sig_each(mem_view_t v, const sig_pattern_t* p,
|
||||
int (*cb)(void* user, uint64_t va), void* user);
|
||||
|
||||
/* First match, or 0 if none. (0 is also a theoretically valid base_va of 0; in
|
||||
* practice view base addresses are nonzero, so 0 reliably means "no match".) */
|
||||
uint64_t sig_first(mem_view_t v, const sig_pattern_t* p);
|
||||
|
||||
/* All matches. If `out` is NULL, returns the total match count (use it to size a
|
||||
* buffer). Otherwise writes up to `max` addresses to `out` and returns how many
|
||||
* were written (capped at `max`). */
|
||||
size_t sig_all(mem_view_t v, const sig_pattern_t* p, uint64_t* out, size_t max);
|
||||
|
||||
/* Resolve an x86-64 RIP-relative operand at a match site.
|
||||
* hit_va - VA of the matched pattern start (== instruction start)
|
||||
* disp_off - byte offset of the int32 displacement within the pattern
|
||||
* instr_len - full instruction length (next RIP = hit_va + instr_len); for the
|
||||
* common "<prefix> disp32" tail this is disp_off + 4
|
||||
* Returns the absolute target VA, or 0 if the displacement bytes lie outside `v`.
|
||||
* The result is an address in the same space as `v` (a guest VA for a guest
|
||||
* view): dereference it with gva_read under the matching cr3. This is how an
|
||||
* unexported global is located from a code signature. */
|
||||
uint64_t sig_rip(mem_view_t v, uint64_t hit_va, size_t disp_off, size_t instr_len);
|
||||
|
||||
/* Clamp a sub-view [start_va, start_va+size) against `v`. Returns a zeroed view
|
||||
* (data == NULL) if start_va is outside `v`; otherwise `size` is trimmed to what
|
||||
* is actually available. Useful for narrowing a scan to a [start,end] window. */
|
||||
mem_view_t mem_sub(mem_view_t v, uint64_t start_va, size_t size);
|
||||
|
||||
/* Locate a PE section by name within a view that contains at least the image
|
||||
* headers at `module_base` (the first page is enough).
|
||||
* module_base - image base VA, must be >= v.base_va and inside `v`
|
||||
* name - section name, e.g. ".text" (compared up to 8 bytes)
|
||||
* rva_out - receives the section RVA (relative to module_base); may be NULL
|
||||
* vsize_out - receives the section virtual size; may be NULL
|
||||
* Returns true if found. Only the headers need to be present in `v`; the section
|
||||
* body does not. */
|
||||
bool pe_find_section(mem_view_t v, uint64_t module_base, const char* name,
|
||||
uint64_t* rva_out, uint32_t* vsize_out);
|
||||
|
||||
/* Locate a PE section AND return a sub-view spanning it. Requires the whole
|
||||
* section body to be present in `v` (true for an in-memory image dump). Prefer
|
||||
* scanning ".text" over a whole image: faster, and avoids false hits in data.
|
||||
* Returns true and fills *out on success. For guest memory, where the body is
|
||||
* usually not co-resident with the headers, use gva_pe_section (scan.h). */
|
||||
bool pe_section(mem_view_t v, uint64_t module_base, const char* name,
|
||||
mem_view_t* out);
|
||||
|
||||
#endif /* W32MS_SIGSCAN_H */
|
||||
Reference in New Issue
Block a user