#define WIN32_LEAN_AND_MEAN #include #include #include #include "present.h" #include "stream.h" /* OS-agnostic publish / control API + region-view */ #include "cursor.h" #include "geometry.h" /* one-shot display-geometry sample at session start */ /* cursor arena sizing */ #define VGPU_CUR_MAX 256u #define VGPU_CUR_BGRA (VGPU_CUR_MAX * VGPU_CUR_MAX * 4u) #define VGPU_CUR_MASK (VGPU_CUR_MAX * VGPU_CUR_MAX) static uint64_t now_ns(void) { static LARGE_INTEGER freq = { .QuadPart = 0 }; if (freq.QuadPart == 0) QueryPerformanceFrequency(&freq); LARGE_INTEGER c; QueryPerformanceCounter(&c); return (uint64_t)((double)c.QuadPart * 1e9 / (double)freq.QuadPart); } int vgpu_present_init(vgpu_ctx* ctx, vgpu_region_t* region, uint32_t default_fps) { memset(ctx, 0, sizeof *ctx); ctx->view.producer = region->producer; ctx->view.control = region->control; ctx->view.ring = region->ring; ctx->default_fps = default_fps ? default_fps : 30u; ctx->backend = VGPU_BK_NONE; ctx->draw_cursor_cap = 1; /* one arena: content + frame + cursor buffers */ size_t bytes = VGPU_STAGING_BYTES /* content */ + VGPU_STAGING_BYTES /* frame */ + VGPU_CUR_BGRA /* cursor bgra */ + VGPU_CUR_MASK /* and */ + VGPU_CUR_MASK; /* xor */ uint8_t* a = (uint8_t*)VirtualAlloc(NULL, bytes, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); if (!a) { fprintf(stderr, "present: arena VirtualAlloc %zu MiB failed (%lu)\n", bytes / (1024 * 1024), GetLastError()); return 1; } ctx->arena = a; ctx->arena_bytes = bytes; size_t off = 0; ctx->content_buf = a + off; off += VGPU_STAGING_BYTES; ctx->frame_buf = a + off; off += VGPU_STAGING_BYTES; ctx->cursor.bgra = a + off; off += VGPU_CUR_BGRA; ctx->cursor.and_mask = a + off; off += VGPU_CUR_MASK; ctx->cursor.xor_mask = a + off; off += VGPU_CUR_MASK; InitializeCriticalSection(&ctx->lock); ctx->submit_event = CreateEvent(NULL, FALSE, FALSE, NULL); ctx->content_seq = 0; ctx->content_w = ctx->content_h = 0; return 0; } void vgpu_present_deinit(vgpu_ctx* ctx) { if (ctx->submit_event) { CloseHandle(ctx->submit_event); ctx->submit_event = NULL; } DeleteCriticalSection(&ctx->lock); if (ctx->arena) { VirtualFree(ctx->arena, 0, MEM_RELEASE); ctx->arena = NULL; } } void vgpu_present_submit(vgpu_ctx* ctx, const uint8_t* src, uint32_t W, uint32_t H, uint32_t src_pitch) { if (W > VGPU_MAX_WIDTH) W = VGPU_MAX_WIDTH; if (H > VGPU_MAX_HEIGHT) H = VGPU_MAX_HEIGHT; if (W == 0 || H == 0) return; EnterCriticalSection(&ctx->lock); uint8_t* d = ctx->content_buf; const uint32_t row = W * 4u; for (uint32_t y = 0; y < H; y++) memcpy(d + (size_t)y * row, src + (size_t)y * src_pitch, row); ctx->content_w = W; ctx->content_h = H; ctx->content_seq++; LeaveCriticalSection(&ctx->lock); /* static-idle: stamp the moment the source delivered new content (the raw perception; * the host derives "ms idle" from its own clock). Single 8-aligned MOV, off the lock. */ vgpu_publish_content_change(&ctx->view, now_ns()); SetEvent(ctx->submit_event); } void vgpu_present_run(vgpu_ctx* ctx) { const vgpu_region_view* rv = &ctx->view; /* neutral handle for the engine */ const DWORD poll_ms = 8; int64_t last_seq = -1; uint32_t prev_state = VGPU_CMD_STOP; uint32_t last_ff_ack = rv->producer->full_frame_ack; DWORD last_beat = GetTickCount(); uint64_t last_publish_ns = 0; /* 0 → first eligible frame publishes immediately */ int last_cur_x = 0, last_cur_y = 0, last_cur_vis = 0; HCURSOR last_cur_handle = NULL; /* one-shot display geometry: publish once before the loop (flat pull contract). The * captured-output origin is (0,0) for the primary/full-screen capture path; backends * resample reactively on recreate / capture-size change. No periodic poll in the loop. */ geometry_sample_and_publish(ctx, 0, 0); for (;;) { WaitForSingleObject(ctx->submit_event, poll_ms); /* --- heartbeat: always ticks, independent of desired_state --- */ DWORD nowt = GetTickCount(); if (nowt - last_beat >= VGPU_HEARTBEAT_PERIOD_MS) { vgpu_tick_heartbeat(rv); last_beat = nowt; } /* --- reconcile control (gen-seqlock -> apply -> ack) --- */ vgpu_control_view cv; uint32_t desired = prev_state; uint32_t draw_cursor = 1; int force_full = 0; uint32_t fps = ctx->default_fps; /* publish-rate cap (applied) */ uint32_t ff_req = last_ff_ack; /* full_frame_req value to honor */ if (vgpu_control_read(rv, &cv)) { desired = cv.desired_state; draw_cursor = cv.draw_cursor; fps = cv.target_fps ? cv.target_fps : ctx->default_fps; vgpu_set_applied_fps(rv, fps); vgpu_publish_ctrl_ack(rv, cv.gen); ff_req = cv.full_frame_req; if ((ff_req - last_ff_ack) != 0u) force_full = 1; /* edge pending, wrap-tolerant */ } /* --- lifecycle transitions --- */ if (desired != prev_state) { if (desired == VGPU_CMD_RUN && prev_state != VGPU_CMD_RUN) { vgpu_bump_run_epoch(rv); vgpu_set_status(rv, VGPU_ST_CAPTURING); force_full = 1; /* fresh frame on start */ } else if (desired == VGPU_CMD_PAUSE) { vgpu_set_status(rv, VGPU_ST_PAUSED); } else if (desired == VGPU_CMD_STOP) { vgpu_set_status(rv, VGPU_ST_STOPPED); } prev_state = desired; } else if (last_seq < 0 && desired == VGPU_CMD_RUN) { vgpu_set_status(rv, VGPU_ST_CAPTURING); } if (desired != VGPU_CMD_RUN) { /* PAUSED/STOPPED: no new frames; heartbeat still ticks. We do NOT * ack a pending full_frame here — acking without publishing would * be a false "honored". A pending request is honored on the next * transition to RUN (force_full=1 there → publish + ack). */ continue; } /* --- compose + publish on content change OR forced full frame, but * rate-limited to the applied fps cap (the single publish point → * contract-level cap, independent of the capture backend). A * force_full bypasses the cap (due=1). present does NOT sample the * cursor (capture threads source it); it only reads ctx->cursor under * ctx->lock for compositing, and detects cursor motion via a delta so * a pure cursor move over static desktop still recomposes. --- */ uint64_t interval_ns = fps > 0 ? (1000000000ull / fps) : 0; uint64_t now = now_ns(); int due = force_full || interval_ns == 0 || (now - last_publish_ns) >= interval_ns; int compose_cursor = (ctx->draw_cursor_cap && draw_cursor); EnterCriticalSection(&ctx->lock); int64_t seq = ctx->content_seq; uint32_t W = ctx->content_w, H = ctx->content_h; int cur_changed = compose_cursor && ((ctx->cursor.visible != last_cur_vis) || (ctx->cursor.x != last_cur_x) || (ctx->cursor.y != last_cur_y) || (ctx->cursor.handle != last_cur_handle)); int have = (W && H); int content_new = have && (seq != last_seq || cur_changed || force_full); /* take the frame ONLY when due — so we never drop the latest content; * if not due, last_seq is left untouched and it publishes next due. */ int dirty = content_new && due; if (dirty) { memcpy(ctx->frame_buf, ctx->content_buf, (size_t)W * H * 4u); last_seq = seq; if (compose_cursor) cursor_draw(ctx, ctx->frame_buf, W, H); last_cur_vis = ctx->cursor.visible; last_cur_x = ctx->cursor.x; last_cur_y = ctx->cursor.y; last_cur_handle = ctx->cursor.handle; } LeaveCriticalSection(&ctx->lock); if (!dirty) { /* not due, or nothing to publish. A force_full with content has * due=1 → dirty=1, so it never lands here while have is true; thus * no spurious ack edge. */ continue; } if (vgpu_publish_frame(rv, ctx->frame_buf, W, H, now) == 0) { last_publish_ns = now; if (force_full) { vgpu_publish_full_frame_ack(rv, ff_req); last_ff_ack = ff_req; } } else { vgpu_set_error(rv, 1u); /* frame too large for slot (mode > max) */ } } }