mirror of
https://dev.lirent.ru/Vatrog/vm-automation-signaling.git
synced 2026-06-26 04:36:37 +03:00
vgpu in-guest producer in-tree, release CI, flexible vmie discovery
- src/si/vgpu-stream: in-guest vgpu producer built as a Windows cross-compiled target (if(WIN32)) - .gitea: release workflow — cross-build the agent and build/publish the deb against system vmie - cmake/makefile: resolve vmie from a source tree (LIBVMIE_PATH) or installed libvmie-dev
This commit is contained in:
@@ -0,0 +1,212 @@
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "present.h"
|
||||
#include "stream.h" /* OS-agnostic publish / control API + region-view */
|
||||
#include "cursor.h"
|
||||
#include "geometry.h" /* one-shot display-geometry sample at session start */
|
||||
|
||||
/* cursor arena sizing */
|
||||
#define VGPU_CUR_MAX 256u
|
||||
#define VGPU_CUR_BGRA (VGPU_CUR_MAX * VGPU_CUR_MAX * 4u)
|
||||
#define VGPU_CUR_MASK (VGPU_CUR_MAX * VGPU_CUR_MAX)
|
||||
|
||||
static uint64_t now_ns(void) {
|
||||
static LARGE_INTEGER freq = { .QuadPart = 0 };
|
||||
if (freq.QuadPart == 0) QueryPerformanceFrequency(&freq);
|
||||
LARGE_INTEGER c; QueryPerformanceCounter(&c);
|
||||
return (uint64_t)((double)c.QuadPart * 1e9 / (double)freq.QuadPart);
|
||||
}
|
||||
|
||||
int vgpu_present_init(vgpu_ctx* ctx, vgpu_region_t* region, uint32_t default_fps) {
|
||||
memset(ctx, 0, sizeof *ctx);
|
||||
ctx->view.producer = region->producer;
|
||||
ctx->view.control = region->control;
|
||||
ctx->view.ring = region->ring;
|
||||
ctx->default_fps = default_fps ? default_fps : 30u;
|
||||
ctx->backend = VGPU_BK_NONE;
|
||||
ctx->draw_cursor_cap = 1;
|
||||
|
||||
/* one arena: content + frame + cursor buffers */
|
||||
size_t bytes = VGPU_STAGING_BYTES /* content */
|
||||
+ VGPU_STAGING_BYTES /* frame */
|
||||
+ VGPU_CUR_BGRA /* cursor bgra */
|
||||
+ VGPU_CUR_MASK /* and */
|
||||
+ VGPU_CUR_MASK; /* xor */
|
||||
uint8_t* a = (uint8_t*)VirtualAlloc(NULL, bytes, MEM_RESERVE | MEM_COMMIT,
|
||||
PAGE_READWRITE);
|
||||
if (!a) {
|
||||
fprintf(stderr, "present: arena VirtualAlloc %zu MiB failed (%lu)\n",
|
||||
bytes / (1024 * 1024), GetLastError());
|
||||
return 1;
|
||||
}
|
||||
ctx->arena = a;
|
||||
ctx->arena_bytes = bytes;
|
||||
|
||||
size_t off = 0;
|
||||
ctx->content_buf = a + off; off += VGPU_STAGING_BYTES;
|
||||
ctx->frame_buf = a + off; off += VGPU_STAGING_BYTES;
|
||||
ctx->cursor.bgra = a + off; off += VGPU_CUR_BGRA;
|
||||
ctx->cursor.and_mask = a + off; off += VGPU_CUR_MASK;
|
||||
ctx->cursor.xor_mask = a + off; off += VGPU_CUR_MASK;
|
||||
|
||||
InitializeCriticalSection(&ctx->lock);
|
||||
ctx->submit_event = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
ctx->content_seq = 0;
|
||||
ctx->content_w = ctx->content_h = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vgpu_present_deinit(vgpu_ctx* ctx) {
|
||||
if (ctx->submit_event) { CloseHandle(ctx->submit_event); ctx->submit_event = NULL; }
|
||||
DeleteCriticalSection(&ctx->lock);
|
||||
if (ctx->arena) { VirtualFree(ctx->arena, 0, MEM_RELEASE); ctx->arena = NULL; }
|
||||
}
|
||||
|
||||
void vgpu_present_submit(vgpu_ctx* ctx, const uint8_t* src,
|
||||
uint32_t W, uint32_t H, uint32_t src_pitch) {
|
||||
if (W > VGPU_MAX_WIDTH) W = VGPU_MAX_WIDTH;
|
||||
if (H > VGPU_MAX_HEIGHT) H = VGPU_MAX_HEIGHT;
|
||||
if (W == 0 || H == 0) return;
|
||||
|
||||
EnterCriticalSection(&ctx->lock);
|
||||
uint8_t* d = ctx->content_buf;
|
||||
const uint32_t row = W * 4u;
|
||||
for (uint32_t y = 0; y < H; y++)
|
||||
memcpy(d + (size_t)y * row, src + (size_t)y * src_pitch, row);
|
||||
ctx->content_w = W;
|
||||
ctx->content_h = H;
|
||||
ctx->content_seq++;
|
||||
LeaveCriticalSection(&ctx->lock);
|
||||
/* static-idle: stamp the moment the source delivered new content (the raw perception;
|
||||
* the host derives "ms idle" from its own clock). Single 8-aligned MOV, off the lock. */
|
||||
vgpu_publish_content_change(&ctx->view, now_ns());
|
||||
SetEvent(ctx->submit_event);
|
||||
}
|
||||
|
||||
void vgpu_present_run(vgpu_ctx* ctx) {
|
||||
const vgpu_region_view* rv = &ctx->view; /* neutral handle for the engine */
|
||||
const DWORD poll_ms = 8;
|
||||
int64_t last_seq = -1;
|
||||
uint32_t prev_state = VGPU_CMD_STOP;
|
||||
uint32_t last_ff_ack = rv->producer->full_frame_ack;
|
||||
DWORD last_beat = GetTickCount();
|
||||
uint64_t last_publish_ns = 0; /* 0 → first eligible frame publishes immediately */
|
||||
int last_cur_x = 0, last_cur_y = 0, last_cur_vis = 0;
|
||||
HCURSOR last_cur_handle = NULL;
|
||||
|
||||
/* one-shot display geometry: publish once before the loop (flat pull contract). The
|
||||
* captured-output origin is (0,0) for the primary/full-screen capture path; backends
|
||||
* resample reactively on recreate / capture-size change. No periodic poll in the loop. */
|
||||
geometry_sample_and_publish(ctx, 0, 0);
|
||||
|
||||
for (;;) {
|
||||
WaitForSingleObject(ctx->submit_event, poll_ms);
|
||||
|
||||
/* --- heartbeat: always ticks, independent of desired_state --- */
|
||||
DWORD nowt = GetTickCount();
|
||||
if (nowt - last_beat >= VGPU_HEARTBEAT_PERIOD_MS) {
|
||||
vgpu_tick_heartbeat(rv);
|
||||
last_beat = nowt;
|
||||
}
|
||||
|
||||
/* --- reconcile control (gen-seqlock -> apply -> ack) --- */
|
||||
vgpu_control_view cv;
|
||||
uint32_t desired = prev_state;
|
||||
uint32_t draw_cursor = 1;
|
||||
int force_full = 0;
|
||||
uint32_t fps = ctx->default_fps; /* publish-rate cap (applied) */
|
||||
uint32_t ff_req = last_ff_ack; /* full_frame_req value to honor */
|
||||
if (vgpu_control_read(rv, &cv)) {
|
||||
desired = cv.desired_state;
|
||||
draw_cursor = cv.draw_cursor;
|
||||
fps = cv.target_fps ? cv.target_fps : ctx->default_fps;
|
||||
vgpu_set_applied_fps(rv, fps);
|
||||
vgpu_publish_ctrl_ack(rv, cv.gen);
|
||||
|
||||
ff_req = cv.full_frame_req;
|
||||
if ((ff_req - last_ff_ack) != 0u)
|
||||
force_full = 1; /* edge pending, wrap-tolerant */
|
||||
}
|
||||
|
||||
/* --- lifecycle transitions --- */
|
||||
if (desired != prev_state) {
|
||||
if (desired == VGPU_CMD_RUN && prev_state != VGPU_CMD_RUN) {
|
||||
vgpu_bump_run_epoch(rv);
|
||||
vgpu_set_status(rv, VGPU_ST_CAPTURING);
|
||||
force_full = 1; /* fresh frame on start */
|
||||
} else if (desired == VGPU_CMD_PAUSE) {
|
||||
vgpu_set_status(rv, VGPU_ST_PAUSED);
|
||||
} else if (desired == VGPU_CMD_STOP) {
|
||||
vgpu_set_status(rv, VGPU_ST_STOPPED);
|
||||
}
|
||||
prev_state = desired;
|
||||
} else if (last_seq < 0 && desired == VGPU_CMD_RUN) {
|
||||
vgpu_set_status(rv, VGPU_ST_CAPTURING);
|
||||
}
|
||||
|
||||
if (desired != VGPU_CMD_RUN) {
|
||||
/* PAUSED/STOPPED: no new frames; heartbeat still ticks. We do NOT
|
||||
* ack a pending full_frame here — acking without publishing would
|
||||
* be a false "honored". A pending request is honored on the next
|
||||
* transition to RUN (force_full=1 there → publish + ack). */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* --- compose + publish on content change OR forced full frame, but
|
||||
* rate-limited to the applied fps cap (the single publish point →
|
||||
* contract-level cap, independent of the capture backend). A
|
||||
* force_full bypasses the cap (due=1). present does NOT sample the
|
||||
* cursor (capture threads source it); it only reads ctx->cursor under
|
||||
* ctx->lock for compositing, and detects cursor motion via a delta so
|
||||
* a pure cursor move over static desktop still recomposes. --- */
|
||||
uint64_t interval_ns = fps > 0 ? (1000000000ull / fps) : 0;
|
||||
uint64_t now = now_ns();
|
||||
int due = force_full || interval_ns == 0
|
||||
|| (now - last_publish_ns) >= interval_ns;
|
||||
|
||||
int compose_cursor = (ctx->draw_cursor_cap && draw_cursor);
|
||||
|
||||
EnterCriticalSection(&ctx->lock);
|
||||
int64_t seq = ctx->content_seq;
|
||||
uint32_t W = ctx->content_w, H = ctx->content_h;
|
||||
int cur_changed = compose_cursor
|
||||
&& ((ctx->cursor.visible != last_cur_vis)
|
||||
|| (ctx->cursor.x != last_cur_x)
|
||||
|| (ctx->cursor.y != last_cur_y)
|
||||
|| (ctx->cursor.handle != last_cur_handle));
|
||||
int have = (W && H);
|
||||
int content_new = have && (seq != last_seq || cur_changed || force_full);
|
||||
/* take the frame ONLY when due — so we never drop the latest content;
|
||||
* if not due, last_seq is left untouched and it publishes next due. */
|
||||
int dirty = content_new && due;
|
||||
if (dirty) {
|
||||
memcpy(ctx->frame_buf, ctx->content_buf, (size_t)W * H * 4u);
|
||||
last_seq = seq;
|
||||
if (compose_cursor)
|
||||
cursor_draw(ctx, ctx->frame_buf, W, H);
|
||||
last_cur_vis = ctx->cursor.visible;
|
||||
last_cur_x = ctx->cursor.x; last_cur_y = ctx->cursor.y;
|
||||
last_cur_handle = ctx->cursor.handle;
|
||||
}
|
||||
LeaveCriticalSection(&ctx->lock);
|
||||
|
||||
if (!dirty) {
|
||||
/* not due, or nothing to publish. A force_full with content has
|
||||
* due=1 → dirty=1, so it never lands here while have is true; thus
|
||||
* no spurious ack edge. */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (vgpu_publish_frame(rv, ctx->frame_buf, W, H, now) == 0) {
|
||||
last_publish_ns = now;
|
||||
if (force_full) {
|
||||
vgpu_publish_full_frame_ack(rv, ff_req);
|
||||
last_ff_ack = ff_req;
|
||||
}
|
||||
} else {
|
||||
vgpu_set_error(rv, 1u); /* frame too large for slot (mode > max) */
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user