mirror of
https://dev.lirent.ru/Vatrog/vm-automation-signaling.git
synced 2026-06-25 20:36:36 +03:00
213 lines
8.9 KiB
C
213 lines
8.9 KiB
C
|
|
#define WIN32_LEAN_AND_MEAN
|
||
|
|
#include <windows.h>
|
||
|
|
#include <string.h>
|
||
|
|
#include <stdio.h>
|
||
|
|
#include "present.h"
|
||
|
|
#include "stream.h" /* OS-agnostic publish / control API + region-view */
|
||
|
|
#include "cursor.h"
|
||
|
|
#include "geometry.h" /* one-shot display-geometry sample at session start */
|
||
|
|
|
||
|
|
/* cursor arena sizing */
|
||
|
|
#define VGPU_CUR_MAX 256u
|
||
|
|
#define VGPU_CUR_BGRA (VGPU_CUR_MAX * VGPU_CUR_MAX * 4u)
|
||
|
|
#define VGPU_CUR_MASK (VGPU_CUR_MAX * VGPU_CUR_MAX)
|
||
|
|
|
||
|
|
static uint64_t now_ns(void) {
|
||
|
|
static LARGE_INTEGER freq = { .QuadPart = 0 };
|
||
|
|
if (freq.QuadPart == 0) QueryPerformanceFrequency(&freq);
|
||
|
|
LARGE_INTEGER c; QueryPerformanceCounter(&c);
|
||
|
|
return (uint64_t)((double)c.QuadPart * 1e9 / (double)freq.QuadPart);
|
||
|
|
}
|
||
|
|
|
||
|
|
int vgpu_present_init(vgpu_ctx* ctx, vgpu_region_t* region, uint32_t default_fps) {
|
||
|
|
memset(ctx, 0, sizeof *ctx);
|
||
|
|
ctx->view.producer = region->producer;
|
||
|
|
ctx->view.control = region->control;
|
||
|
|
ctx->view.ring = region->ring;
|
||
|
|
ctx->default_fps = default_fps ? default_fps : 30u;
|
||
|
|
ctx->backend = VGPU_BK_NONE;
|
||
|
|
ctx->draw_cursor_cap = 1;
|
||
|
|
|
||
|
|
/* one arena: content + frame + cursor buffers */
|
||
|
|
size_t bytes = VGPU_STAGING_BYTES /* content */
|
||
|
|
+ VGPU_STAGING_BYTES /* frame */
|
||
|
|
+ VGPU_CUR_BGRA /* cursor bgra */
|
||
|
|
+ VGPU_CUR_MASK /* and */
|
||
|
|
+ VGPU_CUR_MASK; /* xor */
|
||
|
|
uint8_t* a = (uint8_t*)VirtualAlloc(NULL, bytes, MEM_RESERVE | MEM_COMMIT,
|
||
|
|
PAGE_READWRITE);
|
||
|
|
if (!a) {
|
||
|
|
fprintf(stderr, "present: arena VirtualAlloc %zu MiB failed (%lu)\n",
|
||
|
|
bytes / (1024 * 1024), GetLastError());
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
ctx->arena = a;
|
||
|
|
ctx->arena_bytes = bytes;
|
||
|
|
|
||
|
|
size_t off = 0;
|
||
|
|
ctx->content_buf = a + off; off += VGPU_STAGING_BYTES;
|
||
|
|
ctx->frame_buf = a + off; off += VGPU_STAGING_BYTES;
|
||
|
|
ctx->cursor.bgra = a + off; off += VGPU_CUR_BGRA;
|
||
|
|
ctx->cursor.and_mask = a + off; off += VGPU_CUR_MASK;
|
||
|
|
ctx->cursor.xor_mask = a + off; off += VGPU_CUR_MASK;
|
||
|
|
|
||
|
|
InitializeCriticalSection(&ctx->lock);
|
||
|
|
ctx->submit_event = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||
|
|
ctx->content_seq = 0;
|
||
|
|
ctx->content_w = ctx->content_h = 0;
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
void vgpu_present_deinit(vgpu_ctx* ctx) {
|
||
|
|
if (ctx->submit_event) { CloseHandle(ctx->submit_event); ctx->submit_event = NULL; }
|
||
|
|
DeleteCriticalSection(&ctx->lock);
|
||
|
|
if (ctx->arena) { VirtualFree(ctx->arena, 0, MEM_RELEASE); ctx->arena = NULL; }
|
||
|
|
}
|
||
|
|
|
||
|
|
void vgpu_present_submit(vgpu_ctx* ctx, const uint8_t* src,
|
||
|
|
uint32_t W, uint32_t H, uint32_t src_pitch) {
|
||
|
|
if (W > VGPU_MAX_WIDTH) W = VGPU_MAX_WIDTH;
|
||
|
|
if (H > VGPU_MAX_HEIGHT) H = VGPU_MAX_HEIGHT;
|
||
|
|
if (W == 0 || H == 0) return;
|
||
|
|
|
||
|
|
EnterCriticalSection(&ctx->lock);
|
||
|
|
uint8_t* d = ctx->content_buf;
|
||
|
|
const uint32_t row = W * 4u;
|
||
|
|
for (uint32_t y = 0; y < H; y++)
|
||
|
|
memcpy(d + (size_t)y * row, src + (size_t)y * src_pitch, row);
|
||
|
|
ctx->content_w = W;
|
||
|
|
ctx->content_h = H;
|
||
|
|
ctx->content_seq++;
|
||
|
|
LeaveCriticalSection(&ctx->lock);
|
||
|
|
/* static-idle: stamp the moment the source delivered new content (the raw perception;
|
||
|
|
* the host derives "ms idle" from its own clock). Single 8-aligned MOV, off the lock. */
|
||
|
|
vgpu_publish_content_change(&ctx->view, now_ns());
|
||
|
|
SetEvent(ctx->submit_event);
|
||
|
|
}
|
||
|
|
|
||
|
|
void vgpu_present_run(vgpu_ctx* ctx) {
|
||
|
|
const vgpu_region_view* rv = &ctx->view; /* neutral handle for the engine */
|
||
|
|
const DWORD poll_ms = 8;
|
||
|
|
int64_t last_seq = -1;
|
||
|
|
uint32_t prev_state = VGPU_CMD_STOP;
|
||
|
|
uint32_t last_ff_ack = rv->producer->full_frame_ack;
|
||
|
|
DWORD last_beat = GetTickCount();
|
||
|
|
uint64_t last_publish_ns = 0; /* 0 → first eligible frame publishes immediately */
|
||
|
|
int last_cur_x = 0, last_cur_y = 0, last_cur_vis = 0;
|
||
|
|
HCURSOR last_cur_handle = NULL;
|
||
|
|
|
||
|
|
/* one-shot display geometry: publish once before the loop (flat pull contract). The
|
||
|
|
* captured-output origin is (0,0) for the primary/full-screen capture path; backends
|
||
|
|
* resample reactively on recreate / capture-size change. No periodic poll in the loop. */
|
||
|
|
geometry_sample_and_publish(ctx, 0, 0);
|
||
|
|
|
||
|
|
for (;;) {
|
||
|
|
WaitForSingleObject(ctx->submit_event, poll_ms);
|
||
|
|
|
||
|
|
/* --- heartbeat: always ticks, independent of desired_state --- */
|
||
|
|
DWORD nowt = GetTickCount();
|
||
|
|
if (nowt - last_beat >= VGPU_HEARTBEAT_PERIOD_MS) {
|
||
|
|
vgpu_tick_heartbeat(rv);
|
||
|
|
last_beat = nowt;
|
||
|
|
}
|
||
|
|
|
||
|
|
/* --- reconcile control (gen-seqlock -> apply -> ack) --- */
|
||
|
|
vgpu_control_view cv;
|
||
|
|
uint32_t desired = prev_state;
|
||
|
|
uint32_t draw_cursor = 1;
|
||
|
|
int force_full = 0;
|
||
|
|
uint32_t fps = ctx->default_fps; /* publish-rate cap (applied) */
|
||
|
|
uint32_t ff_req = last_ff_ack; /* full_frame_req value to honor */
|
||
|
|
if (vgpu_control_read(rv, &cv)) {
|
||
|
|
desired = cv.desired_state;
|
||
|
|
draw_cursor = cv.draw_cursor;
|
||
|
|
fps = cv.target_fps ? cv.target_fps : ctx->default_fps;
|
||
|
|
vgpu_set_applied_fps(rv, fps);
|
||
|
|
vgpu_publish_ctrl_ack(rv, cv.gen);
|
||
|
|
|
||
|
|
ff_req = cv.full_frame_req;
|
||
|
|
if ((ff_req - last_ff_ack) != 0u)
|
||
|
|
force_full = 1; /* edge pending, wrap-tolerant */
|
||
|
|
}
|
||
|
|
|
||
|
|
/* --- lifecycle transitions --- */
|
||
|
|
if (desired != prev_state) {
|
||
|
|
if (desired == VGPU_CMD_RUN && prev_state != VGPU_CMD_RUN) {
|
||
|
|
vgpu_bump_run_epoch(rv);
|
||
|
|
vgpu_set_status(rv, VGPU_ST_CAPTURING);
|
||
|
|
force_full = 1; /* fresh frame on start */
|
||
|
|
} else if (desired == VGPU_CMD_PAUSE) {
|
||
|
|
vgpu_set_status(rv, VGPU_ST_PAUSED);
|
||
|
|
} else if (desired == VGPU_CMD_STOP) {
|
||
|
|
vgpu_set_status(rv, VGPU_ST_STOPPED);
|
||
|
|
}
|
||
|
|
prev_state = desired;
|
||
|
|
} else if (last_seq < 0 && desired == VGPU_CMD_RUN) {
|
||
|
|
vgpu_set_status(rv, VGPU_ST_CAPTURING);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (desired != VGPU_CMD_RUN) {
|
||
|
|
/* PAUSED/STOPPED: no new frames; heartbeat still ticks. We do NOT
|
||
|
|
* ack a pending full_frame here — acking without publishing would
|
||
|
|
* be a false "honored". A pending request is honored on the next
|
||
|
|
* transition to RUN (force_full=1 there → publish + ack). */
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
/* --- compose + publish on content change OR forced full frame, but
|
||
|
|
* rate-limited to the applied fps cap (the single publish point →
|
||
|
|
* contract-level cap, independent of the capture backend). A
|
||
|
|
* force_full bypasses the cap (due=1). present does NOT sample the
|
||
|
|
* cursor (capture threads source it); it only reads ctx->cursor under
|
||
|
|
* ctx->lock for compositing, and detects cursor motion via a delta so
|
||
|
|
* a pure cursor move over static desktop still recomposes. --- */
|
||
|
|
uint64_t interval_ns = fps > 0 ? (1000000000ull / fps) : 0;
|
||
|
|
uint64_t now = now_ns();
|
||
|
|
int due = force_full || interval_ns == 0
|
||
|
|
|| (now - last_publish_ns) >= interval_ns;
|
||
|
|
|
||
|
|
int compose_cursor = (ctx->draw_cursor_cap && draw_cursor);
|
||
|
|
|
||
|
|
EnterCriticalSection(&ctx->lock);
|
||
|
|
int64_t seq = ctx->content_seq;
|
||
|
|
uint32_t W = ctx->content_w, H = ctx->content_h;
|
||
|
|
int cur_changed = compose_cursor
|
||
|
|
&& ((ctx->cursor.visible != last_cur_vis)
|
||
|
|
|| (ctx->cursor.x != last_cur_x)
|
||
|
|
|| (ctx->cursor.y != last_cur_y)
|
||
|
|
|| (ctx->cursor.handle != last_cur_handle));
|
||
|
|
int have = (W && H);
|
||
|
|
int content_new = have && (seq != last_seq || cur_changed || force_full);
|
||
|
|
/* take the frame ONLY when due — so we never drop the latest content;
|
||
|
|
* if not due, last_seq is left untouched and it publishes next due. */
|
||
|
|
int dirty = content_new && due;
|
||
|
|
if (dirty) {
|
||
|
|
memcpy(ctx->frame_buf, ctx->content_buf, (size_t)W * H * 4u);
|
||
|
|
last_seq = seq;
|
||
|
|
if (compose_cursor)
|
||
|
|
cursor_draw(ctx, ctx->frame_buf, W, H);
|
||
|
|
last_cur_vis = ctx->cursor.visible;
|
||
|
|
last_cur_x = ctx->cursor.x; last_cur_y = ctx->cursor.y;
|
||
|
|
last_cur_handle = ctx->cursor.handle;
|
||
|
|
}
|
||
|
|
LeaveCriticalSection(&ctx->lock);
|
||
|
|
|
||
|
|
if (!dirty) {
|
||
|
|
/* not due, or nothing to publish. A force_full with content has
|
||
|
|
* due=1 → dirty=1, so it never lands here while have is true; thus
|
||
|
|
* no spurious ack edge. */
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (vgpu_publish_frame(rv, ctx->frame_buf, W, H, now) == 0) {
|
||
|
|
last_publish_ns = now;
|
||
|
|
if (force_full) {
|
||
|
|
vgpu_publish_full_frame_ack(rv, ff_req);
|
||
|
|
last_ff_ack = ff_req;
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
vgpu_set_error(rv, 1u); /* frame too large for slot (mode > max) */
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|