Files
vatrog-vm-signaling/src/si/vgpu-stream/win32/region.c
T

173 lines
6.8 KiB
C
Raw Normal View History

#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <stdio.h>
#include <string.h>
#include "region.h"
#include "atomic-shim.h" /* x86-TSO ordering for contract init publish */
#define VGPU_2MB (2u * 1024u * 1024u)
/* Page-segregated init of the contract over an already-pinned region base.
* Init-ordering per contract: status=INIT, latest=NONE, backend, supported_formats,
* release-barrier; heartbeat starts later (in the run pump). */
static void region_init_contract(vgpu_region_t* r) {
vgpu_producer_t* p = r->producer;
vgpu_control_t* c = r->control;
memset(p, 0, sizeof *p);
memset(c, 0, sizeof *c);
p->status = VGPU_ST_INIT;
p->backend = VGPU_BK_NONE;
p->error_code = 0;
p->applied_fps = 0;
p->supported_formats = (1u << VGPU_FMT_BGRA8888);
p->run_epoch = 0;
p->heartbeat = 0;
p->frame_id = 0;
p->ctrl_ack = 0;
p->full_frame_ack = 0;
for (uint32_t i = 0; i < VGPU_SLOT_COUNT; i++)
p->seq[i] = 0;
/* control starts RUN: producer captures immediately; host may STOP/PAUSE */
c->ctrl_gen = 0;
c->desired_state = VGPU_CMD_RUN;
c->target_fps = 0;
c->draw_cursor = 1;
c->full_frame_req = 0;
c->consumer_tick = 0;
c->attached = 0;
/* publish latest last with a release store gating all of the above */
vgpu_sfence();
vgpu_store_release32(&p->latest, VGPU_LATEST_NONE);
}
static int adjust_lock_memory_privilege(void) {
HANDLE tok;
if (!OpenProcessToken(GetCurrentProcess(),
TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &tok))
return 0;
TOKEN_PRIVILEGES tp;
memset(&tp, 0, sizeof tp);
tp.PrivilegeCount = 1;
if (!LookupPrivilegeValueA(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid)) {
CloseHandle(tok);
return 0;
}
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
int ok = AdjustTokenPrivileges(tok, FALSE, &tp, sizeof tp, NULL, NULL)
&& GetLastError() == ERROR_SUCCESS;
CloseHandle(tok);
return ok;
}
int vgpu_region_create(vgpu_region_t* out) {
memset(out, 0, sizeof *out);
const uint64_t bytes = VGPU_REGION_BYTES;
void* os_base = NULL;
uint8_t* base = NULL;
uint64_t os_total = 0;
if (adjust_lock_memory_privilege()) {
SIZE_T large_min = GetLargePageMinimum();
if (large_min && large_min <= VGPU_2MB) {
SIZE_T rounded = (SIZE_T)((bytes + VGPU_2MB - 1) & ~(uint64_t)(VGPU_2MB - 1));
void* p = VirtualAlloc(NULL, rounded,
MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
PAGE_READWRITE);
if (p) {
/* large pages are >= 2 MiB → base is already 2 MiB-aligned */
os_base = p;
base = (uint8_t*)p;
os_total = rounded;
fprintf(stderr, "region: MEM_LARGE_PAGES %llu MiB at %p\n",
(unsigned long long)(rounded / (1024 * 1024)), p);
} else {
fprintf(stderr, "region: MEM_LARGE_PAGES failed (%lu), fallback\n",
GetLastError());
}
}
} else {
fprintf(stderr, "region: SE_LOCK_MEMORY unavailable, fallback\n");
}
if (!base) {
uint64_t total = bytes + VGPU_2MB;
void* p = VirtualAlloc(NULL, (SIZE_T)total, MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE);
if (!p) {
fprintf(stderr, "region: VirtualAlloc %llu MiB failed (%lu)\n",
(unsigned long long)(total / (1024 * 1024)), GetLastError());
return 1;
}
uintptr_t addr = (uintptr_t)p;
uintptr_t aligned = (addr + VGPU_2MB - 1) & ~(uintptr_t)(VGPU_2MB - 1);
/* The region must be RESIDENT, not merely committed: the host reads it out
* of guest RAM and only PRESENT pages are visible to it — a committed but
* demand-zero page has no PTE, so it is unreadable from the host. VirtualLock
* pins the pages into the working set, but it can lock at most the process
* MINIMUM working set, and the default quota is far below the region size
* (so a bare VirtualLock fails with ERROR_WORKING_SET_QUOTA). Raise the
* minimum first. NB: VirtualLock / SetProcessWorkingSetSize do NOT need
* SE_LOCK_MEMORY — that privilege is only for large pages / AWE. */
SIZE_T ws_min = (SIZE_T)(bytes + 64ull * 1024 * 1024); /* region + headroom */
SIZE_T ws_max = ws_min + 128ull * 1024 * 1024;
SIZE_T cur_min = 0, cur_max = 0;
if (GetProcessWorkingSetSize(GetCurrentProcess(), &cur_min, &cur_max)) {
if (cur_min > ws_min) ws_min = cur_min; /* never shrink an existing quota */
if (cur_max > ws_max) ws_max = cur_max;
}
if (!SetProcessWorkingSetSize(GetCurrentProcess(), ws_min, ws_max))
fprintf(stderr, "region: SetProcessWorkingSetSize(%llu MiB) failed (%lu)\n",
(unsigned long long)(ws_min / (1024 * 1024)), GetLastError());
if (!VirtualLock((void*)aligned, (SIZE_T)bytes)) {
fprintf(stderr, "region: VirtualLock failed (%lu) — pre-faulting region\n",
GetLastError());
/* Last resort: fault every page so it is at least PRESENT now. Without
* the lock the trimmer may evict it under pressure, but the raised
* minimum working set above makes eviction far less likely. */
volatile uint8_t* q = (volatile uint8_t*)aligned;
for (uint64_t off = 0; off < bytes; off += 4096u) q[off] = q[off];
}
os_base = p;
base = (uint8_t*)aligned;
os_total = total;
fprintf(stderr, "region: fallback VirtualAlloc+lock %llu MiB, aligned at %p\n",
(unsigned long long)(bytes / (1024 * 1024)), (void*)aligned);
}
if (((uintptr_t)base & (VGPU_2MB - 1)) != 0) {
fprintf(stderr, "region: base %p not 2 MiB aligned\n", (void*)base);
VirtualFree(os_base, 0, MEM_RELEASE);
return 1;
}
out->os_base = os_base;
out->base = base;
out->os_total = os_total;
out->producer = (vgpu_producer_t*)(base + VGPU_PRODUCER_OFFSET);
out->control = (vgpu_control_t*)(base + VGPU_CONTROL_OFFSET);
out->ring = base + VGPU_RING_OFFSET;
region_init_contract(out);
fprintf(stderr, "region: contract ready (producer=%p control=%p ring=%p)\n",
(void*)out->producer, (void*)out->control, (void*)out->ring);
return 0;
}
void vgpu_region_destroy(vgpu_region_t* r) {
if (r && r->os_base) {
VirtualUnlock(r->base, (SIZE_T)VGPU_REGION_BYTES);
VirtualFree(r->os_base, 0, MEM_RELEASE);
memset(r, 0, sizeof *r);
}
}