Files
vatrog-vm-signaling/src/si/vgpu-stream/win32/present.c
T

213 lines
8.9 KiB
C
Raw Normal View History

#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <string.h>
#include <stdio.h>
#include "present.h"
#include "stream.h" /* OS-agnostic publish / control API + region-view */
#include "cursor.h"
#include "geometry.h" /* one-shot display-geometry sample at session start */
/* cursor arena sizing */
#define VGPU_CUR_MAX 256u
#define VGPU_CUR_BGRA (VGPU_CUR_MAX * VGPU_CUR_MAX * 4u)
#define VGPU_CUR_MASK (VGPU_CUR_MAX * VGPU_CUR_MAX)
static uint64_t now_ns(void) {
static LARGE_INTEGER freq = { .QuadPart = 0 };
if (freq.QuadPart == 0) QueryPerformanceFrequency(&freq);
LARGE_INTEGER c; QueryPerformanceCounter(&c);
return (uint64_t)((double)c.QuadPart * 1e9 / (double)freq.QuadPart);
}
int vgpu_present_init(vgpu_ctx* ctx, vgpu_region_t* region, uint32_t default_fps) {
memset(ctx, 0, sizeof *ctx);
ctx->view.producer = region->producer;
ctx->view.control = region->control;
ctx->view.ring = region->ring;
ctx->default_fps = default_fps ? default_fps : 30u;
ctx->backend = VGPU_BK_NONE;
ctx->draw_cursor_cap = 1;
/* one arena: content + frame + cursor buffers */
size_t bytes = VGPU_STAGING_BYTES /* content */
+ VGPU_STAGING_BYTES /* frame */
+ VGPU_CUR_BGRA /* cursor bgra */
+ VGPU_CUR_MASK /* and */
+ VGPU_CUR_MASK; /* xor */
uint8_t* a = (uint8_t*)VirtualAlloc(NULL, bytes, MEM_RESERVE | MEM_COMMIT,
PAGE_READWRITE);
if (!a) {
fprintf(stderr, "present: arena VirtualAlloc %zu MiB failed (%lu)\n",
bytes / (1024 * 1024), GetLastError());
return 1;
}
ctx->arena = a;
ctx->arena_bytes = bytes;
size_t off = 0;
ctx->content_buf = a + off; off += VGPU_STAGING_BYTES;
ctx->frame_buf = a + off; off += VGPU_STAGING_BYTES;
ctx->cursor.bgra = a + off; off += VGPU_CUR_BGRA;
ctx->cursor.and_mask = a + off; off += VGPU_CUR_MASK;
ctx->cursor.xor_mask = a + off; off += VGPU_CUR_MASK;
InitializeCriticalSection(&ctx->lock);
ctx->submit_event = CreateEvent(NULL, FALSE, FALSE, NULL);
ctx->content_seq = 0;
ctx->content_w = ctx->content_h = 0;
return 0;
}
void vgpu_present_deinit(vgpu_ctx* ctx) {
if (ctx->submit_event) { CloseHandle(ctx->submit_event); ctx->submit_event = NULL; }
DeleteCriticalSection(&ctx->lock);
if (ctx->arena) { VirtualFree(ctx->arena, 0, MEM_RELEASE); ctx->arena = NULL; }
}
void vgpu_present_submit(vgpu_ctx* ctx, const uint8_t* src,
uint32_t W, uint32_t H, uint32_t src_pitch) {
if (W > VGPU_MAX_WIDTH) W = VGPU_MAX_WIDTH;
if (H > VGPU_MAX_HEIGHT) H = VGPU_MAX_HEIGHT;
if (W == 0 || H == 0) return;
EnterCriticalSection(&ctx->lock);
uint8_t* d = ctx->content_buf;
const uint32_t row = W * 4u;
for (uint32_t y = 0; y < H; y++)
memcpy(d + (size_t)y * row, src + (size_t)y * src_pitch, row);
ctx->content_w = W;
ctx->content_h = H;
ctx->content_seq++;
LeaveCriticalSection(&ctx->lock);
/* static-idle: stamp the moment the source delivered new content (the raw perception;
* the host derives "ms idle" from its own clock). Single 8-aligned MOV, off the lock. */
vgpu_publish_content_change(&ctx->view, now_ns());
SetEvent(ctx->submit_event);
}
void vgpu_present_run(vgpu_ctx* ctx) {
const vgpu_region_view* rv = &ctx->view; /* neutral handle for the engine */
const DWORD poll_ms = 8;
int64_t last_seq = -1;
uint32_t prev_state = VGPU_CMD_STOP;
uint32_t last_ff_ack = rv->producer->full_frame_ack;
DWORD last_beat = GetTickCount();
uint64_t last_publish_ns = 0; /* 0 → first eligible frame publishes immediately */
int last_cur_x = 0, last_cur_y = 0, last_cur_vis = 0;
HCURSOR last_cur_handle = NULL;
/* one-shot display geometry: publish once before the loop (flat pull contract). The
* captured-output origin is (0,0) for the primary/full-screen capture path; backends
* resample reactively on recreate / capture-size change. No periodic poll in the loop. */
geometry_sample_and_publish(ctx, 0, 0);
for (;;) {
WaitForSingleObject(ctx->submit_event, poll_ms);
/* --- heartbeat: always ticks, independent of desired_state --- */
DWORD nowt = GetTickCount();
if (nowt - last_beat >= VGPU_HEARTBEAT_PERIOD_MS) {
vgpu_tick_heartbeat(rv);
last_beat = nowt;
}
/* --- reconcile control (gen-seqlock -> apply -> ack) --- */
vgpu_control_view cv;
uint32_t desired = prev_state;
uint32_t draw_cursor = 1;
int force_full = 0;
uint32_t fps = ctx->default_fps; /* publish-rate cap (applied) */
uint32_t ff_req = last_ff_ack; /* full_frame_req value to honor */
if (vgpu_control_read(rv, &cv)) {
desired = cv.desired_state;
draw_cursor = cv.draw_cursor;
fps = cv.target_fps ? cv.target_fps : ctx->default_fps;
vgpu_set_applied_fps(rv, fps);
vgpu_publish_ctrl_ack(rv, cv.gen);
ff_req = cv.full_frame_req;
if ((ff_req - last_ff_ack) != 0u)
force_full = 1; /* edge pending, wrap-tolerant */
}
/* --- lifecycle transitions --- */
if (desired != prev_state) {
if (desired == VGPU_CMD_RUN && prev_state != VGPU_CMD_RUN) {
vgpu_bump_run_epoch(rv);
vgpu_set_status(rv, VGPU_ST_CAPTURING);
force_full = 1; /* fresh frame on start */
} else if (desired == VGPU_CMD_PAUSE) {
vgpu_set_status(rv, VGPU_ST_PAUSED);
} else if (desired == VGPU_CMD_STOP) {
vgpu_set_status(rv, VGPU_ST_STOPPED);
}
prev_state = desired;
} else if (last_seq < 0 && desired == VGPU_CMD_RUN) {
vgpu_set_status(rv, VGPU_ST_CAPTURING);
}
if (desired != VGPU_CMD_RUN) {
/* PAUSED/STOPPED: no new frames; heartbeat still ticks. We do NOT
* ack a pending full_frame here — acking without publishing would
* be a false "honored". A pending request is honored on the next
* transition to RUN (force_full=1 there → publish + ack). */
continue;
}
/* --- compose + publish on content change OR forced full frame, but
* rate-limited to the applied fps cap (the single publish point →
* contract-level cap, independent of the capture backend). A
* force_full bypasses the cap (due=1). present does NOT sample the
* cursor (capture threads source it); it only reads ctx->cursor under
* ctx->lock for compositing, and detects cursor motion via a delta so
* a pure cursor move over static desktop still recomposes. --- */
uint64_t interval_ns = fps > 0 ? (1000000000ull / fps) : 0;
uint64_t now = now_ns();
int due = force_full || interval_ns == 0
|| (now - last_publish_ns) >= interval_ns;
int compose_cursor = (ctx->draw_cursor_cap && draw_cursor);
EnterCriticalSection(&ctx->lock);
int64_t seq = ctx->content_seq;
uint32_t W = ctx->content_w, H = ctx->content_h;
int cur_changed = compose_cursor
&& ((ctx->cursor.visible != last_cur_vis)
|| (ctx->cursor.x != last_cur_x)
|| (ctx->cursor.y != last_cur_y)
|| (ctx->cursor.handle != last_cur_handle));
int have = (W && H);
int content_new = have && (seq != last_seq || cur_changed || force_full);
/* take the frame ONLY when due — so we never drop the latest content;
* if not due, last_seq is left untouched and it publishes next due. */
int dirty = content_new && due;
if (dirty) {
memcpy(ctx->frame_buf, ctx->content_buf, (size_t)W * H * 4u);
last_seq = seq;
if (compose_cursor)
cursor_draw(ctx, ctx->frame_buf, W, H);
last_cur_vis = ctx->cursor.visible;
last_cur_x = ctx->cursor.x; last_cur_y = ctx->cursor.y;
last_cur_handle = ctx->cursor.handle;
}
LeaveCriticalSection(&ctx->lock);
if (!dirty) {
/* not due, or nothing to publish. A force_full with content has
* due=1 → dirty=1, so it never lands here while have is true; thus
* no spurious ack edge. */
continue;
}
if (vgpu_publish_frame(rv, ctx->frame_buf, W, H, now) == 0) {
last_publish_ns = now;
if (force_full) {
vgpu_publish_full_frame_ack(rv, ff_req);
last_ff_ack = ff_req;
}
} else {
vgpu_set_error(rv, 1u); /* frame too large for slot (mode > max) */
}
}
}