Files
vatrog-vm-introspection-engine/include/win32.h
T
lirent 06230ac680 Add PE section enumeration and section views (section-local / RVA / absolute)
vmie_win32_sections lists a module's PE sections (name, RVA, virtual size,
VR_* protection) for any image base in a process address space - including a
base found by scanning, not only loader-list modules. vmie_win32_section_view
gathers a section's bytes into a caller buffer and returns a mem_view_t whose
base_va is chosen by view_base: SECTION_LOCAL (0, section-relative offsets),
MODULE_RVA (ASLR-stable module RVAs), or ABSOLUTE_VA (live VA). Because the pure
scanners report base_va + offset, the mode directly selects the coordinate space
of every hit - feeding a view to sig_all or x86_decode yields section-relative,
RVA, or absolute results with no extra work.

The MZ/PE header walk is factored into one helper that both pe_find_section and
the new enumerator share - no second parser. The whole public surface is
documented with the operational nuances (coordinate stability, borrowed-buffer
lifetime, truncation, residency) and worked examples.
2026-06-16 19:06:59 +03:00

271 lines
14 KiB
C

/* win32.h - public Windows-guest surface of the vmi-engine.
*
* The host opens a guest's RAM backing file (a flat, writable, coherent mmap),
* recovers the kernel address space, and reads/writes guest memory by CR3 and
* virtual address. Everything is CR3-keyed, never PID-keyed: a `process` already
* carries its own cr3, which is the key to that address space.
*
* This header is the Windows-typed surface (process/pmodule/gtext, bring-up,
* enumeration, the win32 scan wrappers). The OS-agnostic memory-model contract
* lives in memmodel.h (pulled in below); the scanners in scan.h/sigscan.h.
*
* Conventions:
* - `cr3` is a raw CR3 / DirectoryTableBase value; low flag bits are masked
* internally, so either the masked PML4 GPA or the raw register works.
* - Integer returns: 0 on success, negative on failure, unless stated.
* - The library never takes ownership of caller buffers and never retains a
* pointer past the call that received it, unless explicitly stated.
*/
#ifndef VMIE_WIN32_H
#define VMIE_WIN32_H
#include <stdint.h>
#include <stddef.h>
#include "memmodel.h" /* vmie_mem, vregion/VR_*, task/range, gva_read/write/ptr/regions/sweep */
#include "sigscan.h" /* mem_view_t, sig_pattern_t */
#include "scan.h" /* scan_type, scan_ptr_path, generic scan surface */
/* Opaque introspection context. Completed in src/engine/win32/engine-win32.h;
* callers only ever hold a pointer. Created by vmie_win32_open(), populated by
* host_bootstrap(), released by vmie_win32_close(). */
typedef struct vmie_win32 vmie_win32;
/* A guest counted string still resident in guest memory (e.g. a UNICODE_STRING
* buffer). Not a copy: `va` points into the guest, decode it with gva_read_text.
* va - guest VA of the first UTF-16LE code unit (0 if absent)
* len - length in BYTES (not characters); always even for UTF-16 */
typedef struct { uint64_t va; uint32_t len; } gtext;
/* A live process, as produced by proc_list(). Self-contained: `cr3` is all you
* need to read/write its user address space, `eprocess`/`peb` re-anchor it in
* kernel/user space without another lookup.
* cr3 - DirectoryTableBase (PFN-masked); key to this address space
* peb - PEB VA (0 for system/kernel-only processes)
* eprocess - _EPROCESS VA (kernel object, read under the kernel cr3)
* pid, ppid - process / parent ids (ppid == (uint32_t)-1 if unavailable)
* create_time - raw KSYSTEM_TIME / FILETIME (100 ns ticks; 0 if unavailable)
* name - ImageFileName, NUL-terminated ASCII (up to 15 chars)
* path - full image path as a guest UTF-16 string (gtext; may be empty) */
typedef struct {
uint64_t cr3;
uint64_t peb;
uint64_t eprocess;
uint32_t pid;
uint32_t ppid;
uint64_t create_time;
char name[16];
gtext path;
} process;
/* A loaded module (image) inside a process, as produced by proc_modules().
* pr - owning process (its cr3 is the address space these VAs live in)
* entry - _LDR_DATA_TABLE_ENTRY VA
* base - image base VA (page-aligned); pair with `size` for a MODULE scope
* size - image size in bytes (SizeOfImage)
* name - module file name (gtext UTF-16, e.g. "ntdll.dll")
* path - full module path (gtext UTF-16) */
typedef struct {
const process* pr;
uint64_t entry;
uint64_t base;
uint32_t size;
gtext name;
gtext path;
} pmodule;
/* ---- lifecycle ----------------------------------------------------------- */
/* Open `ram_path` (the guest RAM backing file) and build a context over it.
* ram_path - path to a writable, share=on RAM backing file
* low - size in bytes of below-4G guest RAM (the PCI-hole split point);
* pass the value from the VM's memory layout. If total RAM <= low,
* the split is inert.
* Returns a new context (call host_bootstrap() next), or NULL on open/mmap
* failure. Free with vmie_win32_close(). */
vmie_win32* vmie_win32_open(const char* ram_path, uint64_t low);
/* Unmap, close, and free a context. Safe on NULL. After this, every pointer
* into guest memory obtained through this context is invalid. */
void vmie_win32_close(vmie_win32* v);
/* Borrow the engine's guest-memory handle for the generic address-space
* primitives (gva_read/gva_regions/...). The returned pointer is owned by `v`
* and valid until vmie_win32_close(v); do NOT free or retain it past that. NULL on
* NULL `v`. */
vmie_mem* vmie_win32_mem(vmie_win32* v);
/* One-shot bring-up: locate the guest agent beacon in physical RAM, recover a
* bootstrap CR3, find ntoskrnl, build the struct-offset profile, derive the
* permanent System DirectoryTableBase (kernel cr3) and System _EPROCESS, then
* ACK the agent. On success the context is ready for proc_list()/gva_read()/etc.
* Returns 0 on success, or a negative stage code (-1..-6) identifying the step
* that failed. Cold path: call once after vmie_win32_open(). */
int host_bootstrap(vmie_win32* v);
/* ---- guest string decode ------------------------------------------------- */
/* Read a UTF-16LE guest string and transcode it to UTF-8.
* va - guest VA of the first UTF-16 code unit
* nmemb - number of BYTES to read from the guest (rounded down to even)
* dst - output buffer for NUL-terminated UTF-8 (may be NULL to size only)
* size - capacity of `dst` in bytes
* Returns the number of UTF-8 bytes the full conversion needs, EXCLUDING the
* terminator (like snprintf): if it is >= `size`, output was truncated. When
* `dst` is non-NULL and `size` > 0 the result is always NUL-terminated. */
size_t gva_read_text(vmie_win32* v, uintptr_t cr3, uintptr_t va, size_t nmemb, char* dst, size_t size);
/* ---- enumeration --------------------------------------------------------- */
/* Enumerate processes by walking ActiveProcessLinks from System.
* skip_system - if nonzero, omit processes with no PEB (System/kernel-only)
* dst - caller array receiving up to `nmax` `process` records
* nmax - capacity of `dst`
* Returns the number written (<= nmax), or negative on failure (e.g. bootstrap
* not completed). Enumeration stops at `nmax`; raise it to see more. */
int proc_list(vmie_win32* v, int skip_system, process* dst, size_t nmax);
/* Enumerate a process's loaded modules via the PEB loader InLoadOrder list.
* pr - process to inspect (uses pr->cr3 and pr->peb)
* dst - caller array receiving up to `nmax` `pmodule` records
* nmax - capacity of `dst`
* Returns the number written (<= nmax), 0 if the process has no PEB/loader. */
int proc_modules(vmie_win32* v, const process* pr, pmodule* dst, size_t nmax);
/* ---- win32 scan wrappers ------------------------------------------------- *
* Convenience entry points over the generic cr3/range scan surface (scan.h).
* They project a Windows `process` to its cr3, and its `pmodule[]` to a decoded
* `range[]` (UTF-8 names), then delegate to scan_new_cr3 / scan_pointer. */
/* Open a value-scan session over the user address space of `pr`. Equivalent to
* scan_new_cr3(&v->mem, pr->cr3, ...). Returns NULL on NULL pr or OOM. */
scan* scan_new(vmie_win32* v, const process* pr, scan_type t, const void* value,
int be, int aligned, uint64_t lo, uint64_t hi);
/* Pointer scan over `pr`'s user space, anchored on its loaded modules. Resolves
* `pr`'s module list to range[] (names engine-decoded) and delegates to
* scan_pointer. Returns the number of paths found, or negative on failure. */
int vmie_scan_pointer(vmie_win32* v, const process* pr, uint64_t target,
int max_depth, uint32_t max_off, scan_ptr_path* out, int max);
/* ---- PE sections + section views ----------------------------------------- *
* A section is a PE-image concept, so it is keyed by (vmie_win32*, cr3,
* module_base): the address space and where the image is based in it. The
* module need NOT be in the loader list - any valid PE base works, including
* one found by scanning for MZ/PE (a manually-mapped or hidden module). */
/* Coordinate space of a section view's mem_view_t.base_va. The pure scanners
* (sig_all/sig_each, x86_decode callers, ...) report every result as
* base_va + offset, so this enum decides what coordinate the hits/targets come
* back in:
* SECTION_LOCAL - base_va = 0 => results are section-relative
* offsets [0, vsize). The most stable form: independent of
* the image base AND of where the section sits in the image.
* Use when you only care about positions inside one section.
* MODULE_RVA - base_va = section RVA => results are module-relative (RVA).
* ASLR-stable across runs of the same binary; the canonical
* form for portable signatures and for correlating across
* sections of one module. Recommended for sig-gen.
* ABSOLUTE_VA - base_va = module_base + RVA => results are live guest
* virtual addresses, valid for gva_read/gva_ptr under this
* cr3 NOW. NOT stable across runs (ASLR). Use when you must
* dereference a hit immediately in the live process. */
typedef enum { SECTION_LOCAL, MODULE_RVA, ABSOLUTE_VA } view_base;
/* One PE section, as enumerated by vmie_win32_sections.
* name - section name, NUL-terminated; PE names are <= 8 bytes (name[8] is
* the NUL slot). Names are NOT unique in a malformed/packed image -
* prefer iterating by index over matching by name.
* rva - section RVA: byte offset of the section from the module base
* (so ABSOLUTE_VA = module_base + rva).
* vsize - virtual size in bytes (the in-memory size; may exceed the on-disk
* raw size). Size the buffer for vmie_win32_section_view from THIS.
* prot - effective protection as VR_* flags (VR_R/VR_W/VR_X), derived from
* the section Characteristics (IMAGE_SCN_MEM_READ/WRITE/EXECUTE).
* VR_U is never set: these are image semantics, not live PTE rights. */
typedef struct { char name[9]; uint32_t rva; uint32_t vsize; uint32_t prot; } section_desc;
/* Enumerate the sections of the PE image based at `module_base` in the address
* space `cr3`.
* v - engine handle
* cr3 - the process address space the module is mapped in (e.g. a
* process->cr3). The module need NOT be in the loader list -
* any valid PE base works (e.g. one found by scanning for
* MZ/PE, i.e. a manually-mapped / hidden module).
* module_base - image base VA; only the PE headers (first page) must be
* resident and readable under `cr3`.
* out, max - caller array receiving up to `max` section_desc; `out` may be
* NULL to count only (then `max` is ignored).
* Returns the TOTAL section count (may exceed `max` => enlarge and retry), or
* -1 if the headers are absent/unreadable or `module_base` is not a PE.
*
* The returned `rva`/`vsize` are ASLR-independent (image-relative): stable
* across runs of the same binary. The absolute placement is module_base + rva.
*
* Example - list the sections of the first module of a process:
* pmodule m; proc_modules(v, pr, &m, 1);
* section_desc s[32];
* int n = vmie_win32_sections(v, pr->cr3, m.base, s, 32);
* for (int i = 0; i < n && i < 32; i++)
* printf("%-8s rva=%#x vsize=%#x %c%c%c\n", s[i].name, s[i].rva, s[i].vsize,
* (s[i].prot & VR_R) ? 'R' : '-', (s[i].prot & VR_W) ? 'W' : '-',
* (s[i].prot & VR_X) ? 'X' : '-'); */
int vmie_win32_sections(vmie_win32* v, uint64_t cr3, uint64_t module_base,
section_desc* out, int max);
/* Gather a section's bytes from the live process into `buf` and return a flat
* mem_view_t over them in the coordinate space chosen by `mode`. This is the
* "section memory, addressed from 0" entry point.
* v, cr3, module_base - as in vmie_win32_sections.
* sec - the section to open (from vmie_win32_sections; carries
* rva/vsize). Must be non-NULL.
* mode - view_base: sets out->base_va (see view_base).
* SECTION_LOCAL => 0, MODULE_RVA => sec->rva,
* ABSOLUTE_VA => module_base + sec->rva.
* buf, bufcap - caller-owned destination; size it to sec->vsize (from
* enumeration). If bufcap < sec->vsize the section is TRUNCATED:
* out->size = bufcap (this is NOT an error). The returned view
* BORROWS buf - it is valid only while `buf` lives and is left
* unmodified; the library retains no pointer past this call.
* out - on success: out->data = buf, out->size = min(sec->vsize,
* bufcap), out->base_va per `mode`. Must be non-NULL.
* Returns 0 on success, or -1 if `sec`/`buf`/`out` is NULL, the headers are
* unreadable, or the section bytes are not fully resident (paged out / sparse)
* so the read fails.
*
* Reversing nuance: .text/.rdata are normally fully resident; a section that is
* partly paged out yields -1 - re-try when resident, or sweep the live VA range
* with gva_sweep instead. The base-mode also picks the coordinate stability:
* SECTION_LOCAL/MODULE_RVA are ASLR-stable (offset / RVA), ABSOLUTE_VA is the
* live VA for this run only.
*
* Example - find an IDA pattern in .text as RVAs (ASLR-stable across runs):
* section_desc s[32];
* int n = vmie_win32_sections(v, cr3, base, s, 32);
* for (int i = 0; i < n && i < 32; i++) if (!strcmp(s[i].name, ".text")) {
* uint8_t* buf = malloc(s[i].vsize);
* mem_view_t tv;
* if (vmie_win32_section_view(v, cr3, base, &s[i], MODULE_RVA,
* buf, s[i].vsize, &tv) == 0) {
* sig_pattern_t p; sig_parse_ida("48 8B 05 ? ? ? ?", &p);
* uint64_t rvas[64];
* int h = sig_all(tv, &p, rvas, 64); // rvas[] are module RVAs, stable
* sig_free(&p);
* }
* free(buf);
* }
*
* Example - step instructions section-locally (offset 0 == section start):
* mem_view_t tv;
* vmie_win32_section_view(v, cr3, base, &text, SECTION_LOCAL,
* buf, text.vsize, &tv);
* for (size_t off = 0; off < tv.size; ) {
* x86_insn in;
* int len = x86_decode(tv.data + off, tv.size - off, &in); // off == section-local addr
* if (len <= 0) { off++; continue; }
* off += (size_t)len;
* } */
int vmie_win32_section_view(vmie_win32* v, uint64_t cr3, uint64_t module_base,
const section_desc* sec, view_base mode,
uint8_t* buf, size_t bufcap, mem_view_t* out);
#endif /* VMIE_WIN32_H */