#define WIN32_LEAN_AND_MEAN #include #include #include #include "region.h" #include "atomic-shim.h" /* x86-TSO ordering for contract init publish */ #define VGPU_2MB (2u * 1024u * 1024u) /* Page-segregated init of the contract over an already-pinned region base. * Init-ordering per contract: status=INIT, latest=NONE, backend, supported_formats, * release-barrier; heartbeat starts later (in the run pump). */ static void region_init_contract(vgpu_region_t* r) { vgpu_producer_t* p = r->producer; vgpu_control_t* c = r->control; memset(p, 0, sizeof *p); memset(c, 0, sizeof *c); p->status = VGPU_ST_INIT; p->backend = VGPU_BK_NONE; p->error_code = 0; p->applied_fps = 0; p->supported_formats = (1u << VGPU_FMT_BGRA8888); p->run_epoch = 0; p->heartbeat = 0; p->frame_id = 0; p->ctrl_ack = 0; p->full_frame_ack = 0; for (uint32_t i = 0; i < VGPU_SLOT_COUNT; i++) p->seq[i] = 0; /* control starts RUN: producer captures immediately; host may STOP/PAUSE */ c->ctrl_gen = 0; c->desired_state = VGPU_CMD_RUN; c->target_fps = 0; c->draw_cursor = 1; c->full_frame_req = 0; c->consumer_tick = 0; c->attached = 0; /* publish latest last with a release store gating all of the above */ vgpu_sfence(); vgpu_store_release32(&p->latest, VGPU_LATEST_NONE); } static int adjust_lock_memory_privilege(void) { HANDLE tok; if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &tok)) return 0; TOKEN_PRIVILEGES tp; memset(&tp, 0, sizeof tp); tp.PrivilegeCount = 1; if (!LookupPrivilegeValueA(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid)) { CloseHandle(tok); return 0; } tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; int ok = AdjustTokenPrivileges(tok, FALSE, &tp, sizeof tp, NULL, NULL) && GetLastError() == ERROR_SUCCESS; CloseHandle(tok); return ok; } int vgpu_region_create(vgpu_region_t* out) { memset(out, 0, sizeof *out); const uint64_t bytes = VGPU_REGION_BYTES; void* os_base = NULL; uint8_t* base = NULL; uint64_t os_total = 0; if (adjust_lock_memory_privilege()) { SIZE_T large_min = GetLargePageMinimum(); if (large_min && large_min <= VGPU_2MB) { SIZE_T rounded = (SIZE_T)((bytes + VGPU_2MB - 1) & ~(uint64_t)(VGPU_2MB - 1)); void* p = VirtualAlloc(NULL, rounded, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); if (p) { /* large pages are >= 2 MiB → base is already 2 MiB-aligned */ os_base = p; base = (uint8_t*)p; os_total = rounded; fprintf(stderr, "region: MEM_LARGE_PAGES %llu MiB at %p\n", (unsigned long long)(rounded / (1024 * 1024)), p); } else { fprintf(stderr, "region: MEM_LARGE_PAGES failed (%lu), fallback\n", GetLastError()); } } } else { fprintf(stderr, "region: SE_LOCK_MEMORY unavailable, fallback\n"); } if (!base) { uint64_t total = bytes + VGPU_2MB; void* p = VirtualAlloc(NULL, (SIZE_T)total, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); if (!p) { fprintf(stderr, "region: VirtualAlloc %llu MiB failed (%lu)\n", (unsigned long long)(total / (1024 * 1024)), GetLastError()); return 1; } uintptr_t addr = (uintptr_t)p; uintptr_t aligned = (addr + VGPU_2MB - 1) & ~(uintptr_t)(VGPU_2MB - 1); /* The region must be RESIDENT, not merely committed: the host reads it out * of guest RAM and only PRESENT pages are visible to it — a committed but * demand-zero page has no PTE, so it is unreadable from the host. VirtualLock * pins the pages into the working set, but it can lock at most the process * MINIMUM working set, and the default quota is far below the region size * (so a bare VirtualLock fails with ERROR_WORKING_SET_QUOTA). Raise the * minimum first. NB: VirtualLock / SetProcessWorkingSetSize do NOT need * SE_LOCK_MEMORY — that privilege is only for large pages / AWE. */ SIZE_T ws_min = (SIZE_T)(bytes + 64ull * 1024 * 1024); /* region + headroom */ SIZE_T ws_max = ws_min + 128ull * 1024 * 1024; SIZE_T cur_min = 0, cur_max = 0; if (GetProcessWorkingSetSize(GetCurrentProcess(), &cur_min, &cur_max)) { if (cur_min > ws_min) ws_min = cur_min; /* never shrink an existing quota */ if (cur_max > ws_max) ws_max = cur_max; } if (!SetProcessWorkingSetSize(GetCurrentProcess(), ws_min, ws_max)) fprintf(stderr, "region: SetProcessWorkingSetSize(%llu MiB) failed (%lu)\n", (unsigned long long)(ws_min / (1024 * 1024)), GetLastError()); if (!VirtualLock((void*)aligned, (SIZE_T)bytes)) { fprintf(stderr, "region: VirtualLock failed (%lu) — pre-faulting region\n", GetLastError()); /* Last resort: fault every page so it is at least PRESENT now. Without * the lock the trimmer may evict it under pressure, but the raised * minimum working set above makes eviction far less likely. */ volatile uint8_t* q = (volatile uint8_t*)aligned; for (uint64_t off = 0; off < bytes; off += 4096u) q[off] = q[off]; } os_base = p; base = (uint8_t*)aligned; os_total = total; fprintf(stderr, "region: fallback VirtualAlloc+lock %llu MiB, aligned at %p\n", (unsigned long long)(bytes / (1024 * 1024)), (void*)aligned); } if (((uintptr_t)base & (VGPU_2MB - 1)) != 0) { fprintf(stderr, "region: base %p not 2 MiB aligned\n", (void*)base); VirtualFree(os_base, 0, MEM_RELEASE); return 1; } out->os_base = os_base; out->base = base; out->os_total = os_total; out->producer = (vgpu_producer_t*)(base + VGPU_PRODUCER_OFFSET); out->control = (vgpu_control_t*)(base + VGPU_CONTROL_OFFSET); out->ring = base + VGPU_RING_OFFSET; region_init_contract(out); fprintf(stderr, "region: contract ready (producer=%p control=%p ring=%p)\n", (void*)out->producer, (void*)out->control, (void*)out->ring); return 0; } void vgpu_region_destroy(vgpu_region_t* r) { if (r && r->os_base) { VirtualUnlock(r->base, (SIZE_T)VGPU_REGION_BYTES); VirtualFree(r->os_base, 0, MEM_RELEASE); memset(r, 0, sizeof *r); } }