src/si/vgpu-stream/win32/present.c

#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <string.h>
#include <stdio.h>
#include "present.h"
#include "stream.h"        /* OS-agnostic publish / control API + region-view */
#include "cursor.h"
#include "geometry.h"      /* one-shot display-geometry sample at session start */

/* cursor arena sizing */
#define VGPU_CUR_MAX     256u
#define VGPU_CUR_BGRA    (VGPU_CUR_MAX * VGPU_CUR_MAX * 4u)
#define VGPU_CUR_MASK    (VGPU_CUR_MAX * VGPU_CUR_MAX)

static uint64_t now_ns(void) {
    static LARGE_INTEGER freq = { .QuadPart = 0 };
    if (freq.QuadPart == 0) QueryPerformanceFrequency(&freq);
    LARGE_INTEGER c; QueryPerformanceCounter(&c);
    return (uint64_t)((double)c.QuadPart * 1e9 / (double)freq.QuadPart);
}

int vgpu_present_init(vgpu_ctx* ctx, vgpu_region_t* region, uint32_t default_fps) {
    memset(ctx, 0, sizeof *ctx);
    ctx->view.producer = region->producer;
    ctx->view.control  = region->control;
    ctx->view.ring     = region->ring;
    ctx->default_fps = default_fps ? default_fps : 30u;
    ctx->backend  = VGPU_BK_NONE;
    ctx->draw_cursor_cap = 1;

    /* one arena: content + frame + cursor buffers */
    size_t bytes = VGPU_STAGING_BYTES   /* content */
                 + VGPU_STAGING_BYTES   /* frame   */
                 + VGPU_CUR_BGRA        /* cursor bgra */
                 + VGPU_CUR_MASK        /* and */
                 + VGPU_CUR_MASK;       /* xor */
    uint8_t* a = (uint8_t*)VirtualAlloc(NULL, bytes, MEM_RESERVE | MEM_COMMIT,
                                        PAGE_READWRITE);
    if (!a) {
        fprintf(stderr, "present: arena VirtualAlloc %zu MiB failed (%lu)\n",
                bytes / (1024 * 1024), GetLastError());
        return 1;
    }
    ctx->arena       = a;
    ctx->arena_bytes = bytes;

    size_t off = 0;
    ctx->content_buf      = a + off; off += VGPU_STAGING_BYTES;
    ctx->frame_buf        = a + off; off += VGPU_STAGING_BYTES;
    ctx->cursor.bgra      = a + off; off += VGPU_CUR_BGRA;
    ctx->cursor.and_mask  = a + off; off += VGPU_CUR_MASK;
    ctx->cursor.xor_mask  = a + off; off += VGPU_CUR_MASK;

    InitializeCriticalSection(&ctx->lock);
    ctx->submit_event = CreateEvent(NULL, FALSE, FALSE, NULL);
    ctx->content_seq  = 0;
    ctx->content_w = ctx->content_h = 0;
    return 0;
}

void vgpu_present_deinit(vgpu_ctx* ctx) {
    if (ctx->submit_event) { CloseHandle(ctx->submit_event); ctx->submit_event = NULL; }
    DeleteCriticalSection(&ctx->lock);
    if (ctx->arena) { VirtualFree(ctx->arena, 0, MEM_RELEASE); ctx->arena = NULL; }
}

void vgpu_present_submit(vgpu_ctx* ctx, const uint8_t* src,
                         uint32_t W, uint32_t H, uint32_t src_pitch) {
    if (W > VGPU_MAX_WIDTH)  W = VGPU_MAX_WIDTH;
    if (H > VGPU_MAX_HEIGHT) H = VGPU_MAX_HEIGHT;
    if (W == 0 || H == 0) return;

    EnterCriticalSection(&ctx->lock);
    uint8_t* d = ctx->content_buf;
    const uint32_t row = W * 4u;
    for (uint32_t y = 0; y < H; y++)
        memcpy(d + (size_t)y * row, src + (size_t)y * src_pitch, row);
    ctx->content_w = W;
    ctx->content_h = H;
    ctx->content_seq++;
    LeaveCriticalSection(&ctx->lock);
    /* static-idle: stamp the moment the source delivered new content (the raw perception;
     * the host derives "ms idle" from its own clock). Single 8-aligned MOV, off the lock. */
    vgpu_publish_content_change(&ctx->view, now_ns());
    SetEvent(ctx->submit_event);
}

void vgpu_present_run(vgpu_ctx* ctx) {
    const vgpu_region_view* rv = &ctx->view;   /* neutral handle for the engine */
    const DWORD poll_ms = 8;
    int64_t  last_seq   = -1;
    uint32_t prev_state = VGPU_CMD_STOP;
    uint32_t last_ff_ack = rv->producer->full_frame_ack;
    DWORD    last_beat  = GetTickCount();
    uint64_t last_publish_ns = 0;   /* 0 → first eligible frame publishes immediately */
    int      last_cur_x = 0, last_cur_y = 0, last_cur_vis = 0;
    HCURSOR  last_cur_handle = NULL;

    /* one-shot display geometry: publish once before the loop (flat pull contract). The
     * captured-output origin is (0,0) for the primary/full-screen capture path; backends
     * resample reactively on recreate / capture-size change. No periodic poll in the loop. */
    geometry_sample_and_publish(ctx, 0, 0);

    for (;;) {
        WaitForSingleObject(ctx->submit_event, poll_ms);

        /* --- heartbeat: always ticks, independent of desired_state --- */
        DWORD nowt = GetTickCount();
        if (nowt - last_beat >= VGPU_HEARTBEAT_PERIOD_MS) {
            vgpu_tick_heartbeat(rv);
            last_beat = nowt;
        }

        /* --- reconcile control (gen-seqlock -> apply -> ack) --- */
        vgpu_control_view cv;
        uint32_t desired = prev_state;
        uint32_t draw_cursor = 1;
        int      force_full = 0;
        uint32_t fps = ctx->default_fps;   /* publish-rate cap (applied) */
        uint32_t ff_req = last_ff_ack;     /* full_frame_req value to honor */
        if (vgpu_control_read(rv, &cv)) {
            desired = cv.desired_state;
            draw_cursor = cv.draw_cursor;
            fps = cv.target_fps ? cv.target_fps : ctx->default_fps;
            vgpu_set_applied_fps(rv, fps);
            vgpu_publish_ctrl_ack(rv, cv.gen);

            ff_req = cv.full_frame_req;
            if ((ff_req - last_ff_ack) != 0u)
                force_full = 1;            /* edge pending, wrap-tolerant */
        }

        /* --- lifecycle transitions --- */
        if (desired != prev_state) {
            if (desired == VGPU_CMD_RUN && prev_state != VGPU_CMD_RUN) {
                vgpu_bump_run_epoch(rv);
                vgpu_set_status(rv, VGPU_ST_CAPTURING);
                force_full = 1;     /* fresh frame on start */
            } else if (desired == VGPU_CMD_PAUSE) {
                vgpu_set_status(rv, VGPU_ST_PAUSED);
            } else if (desired == VGPU_CMD_STOP) {
                vgpu_set_status(rv, VGPU_ST_STOPPED);
            }
            prev_state = desired;
        } else if (last_seq < 0 && desired == VGPU_CMD_RUN) {
            vgpu_set_status(rv, VGPU_ST_CAPTURING);
        }

        if (desired != VGPU_CMD_RUN) {
            /* PAUSED/STOPPED: no new frames; heartbeat still ticks. We do NOT
             * ack a pending full_frame here — acking without publishing would
             * be a false "honored". A pending request is honored on the next
             * transition to RUN (force_full=1 there → publish + ack). */
            continue;
        }

        /* --- compose + publish on content change OR forced full frame, but
         *     rate-limited to the applied fps cap (the single publish point →
         *     contract-level cap, independent of the capture backend). A
         *     force_full bypasses the cap (due=1). present does NOT sample the
         *     cursor (capture threads source it); it only reads ctx->cursor under
         *     ctx->lock for compositing, and detects cursor motion via a delta so
         *     a pure cursor move over static desktop still recomposes. --- */
        uint64_t interval_ns = fps > 0 ? (1000000000ull / fps) : 0;
        uint64_t now = now_ns();
        int due = force_full || interval_ns == 0
                  || (now - last_publish_ns) >= interval_ns;

        int compose_cursor = (ctx->draw_cursor_cap && draw_cursor);

        EnterCriticalSection(&ctx->lock);
        int64_t  seq = ctx->content_seq;
        uint32_t W = ctx->content_w, H = ctx->content_h;
        int cur_changed = compose_cursor
                          && ((ctx->cursor.visible != last_cur_vis)
                              || (ctx->cursor.x != last_cur_x)
                              || (ctx->cursor.y != last_cur_y)
                              || (ctx->cursor.handle != last_cur_handle));
        int have = (W && H);
        int content_new = have && (seq != last_seq || cur_changed || force_full);
        /* take the frame ONLY when due — so we never drop the latest content;
         * if not due, last_seq is left untouched and it publishes next due. */
        int dirty = content_new && due;
        if (dirty) {
            memcpy(ctx->frame_buf, ctx->content_buf, (size_t)W * H * 4u);
            last_seq = seq;
            if (compose_cursor)
                cursor_draw(ctx, ctx->frame_buf, W, H);
            last_cur_vis = ctx->cursor.visible;
            last_cur_x = ctx->cursor.x; last_cur_y = ctx->cursor.y;
            last_cur_handle = ctx->cursor.handle;
        }
        LeaveCriticalSection(&ctx->lock);

        if (!dirty) {
            /* not due, or nothing to publish. A force_full with content has
             * due=1 → dirty=1, so it never lands here while have is true; thus
             * no spurious ack edge. */
            continue;
        }

        if (vgpu_publish_frame(rv, ctx->frame_buf, W, H, now) == 0) {
            last_publish_ns = now;
            if (force_full) {
                vgpu_publish_full_frame_ack(rv, ff_req);
                last_ff_ack = ff_req;
            }
        } else {
            vgpu_set_error(rv, 1u);  /* frame too large for slot (mode > max) */
        }
    }
}
vgpu in-guest producer in-tree, release CI, flexible vmie discovery 2026-06-22 18:35:12 +03:00			`#define WIN32_LEAN_AND_MEAN`
			`#include <windows.h>`
			`#include <string.h>`
			`#include <stdio.h>`
			`#include "present.h"`
			`#include "stream.h" /* OS-agnostic publish / control API + region-view */`
			`#include "cursor.h"`
			`#include "geometry.h" /* one-shot display-geometry sample at session start */`

			`/* cursor arena sizing */`
			`#define VGPU_CUR_MAX 256u`
			`#define VGPU_CUR_BGRA (VGPU_CUR_MAX * VGPU_CUR_MAX * 4u)`
			`#define VGPU_CUR_MASK (VGPU_CUR_MAX * VGPU_CUR_MAX)`

			`static uint64_t now_ns(void) {`
			`static LARGE_INTEGER freq = { .QuadPart = 0 };`
			`if (freq.QuadPart == 0) QueryPerformanceFrequency(&freq);`
			`LARGE_INTEGER c; QueryPerformanceCounter(&c);`
			`return (uint64_t)((double)c.QuadPart * 1e9 / (double)freq.QuadPart);`
			`}`

			`int vgpu_present_init(vgpu_ctx* ctx, vgpu_region_t* region, uint32_t default_fps) {`
			`memset(ctx, 0, sizeof *ctx);`
			`ctx->view.producer = region->producer;`
			`ctx->view.control = region->control;`
			`ctx->view.ring = region->ring;`
			`ctx->default_fps = default_fps ? default_fps : 30u;`
			`ctx->backend = VGPU_BK_NONE;`
			`ctx->draw_cursor_cap = 1;`

			`/* one arena: content + frame + cursor buffers */`
			`size_t bytes = VGPU_STAGING_BYTES /* content */`
			`+ VGPU_STAGING_BYTES /* frame */`
			`+ VGPU_CUR_BGRA /* cursor bgra */`
			`+ VGPU_CUR_MASK /* and */`
			`+ VGPU_CUR_MASK; /* xor */`
			`uint8_t* a = (uint8_t*)VirtualAlloc(NULL, bytes, MEM_RESERVE \| MEM_COMMIT,`
			`PAGE_READWRITE);`
			`if (!a) {`
			`fprintf(stderr, "present: arena VirtualAlloc %zu MiB failed (%lu)\n",`
			`bytes / (1024 * 1024), GetLastError());`
			`return 1;`
			`}`
			`ctx->arena = a;`
			`ctx->arena_bytes = bytes;`

			`size_t off = 0;`
			`ctx->content_buf = a + off; off += VGPU_STAGING_BYTES;`
			`ctx->frame_buf = a + off; off += VGPU_STAGING_BYTES;`
			`ctx->cursor.bgra = a + off; off += VGPU_CUR_BGRA;`
			`ctx->cursor.and_mask = a + off; off += VGPU_CUR_MASK;`
			`ctx->cursor.xor_mask = a + off; off += VGPU_CUR_MASK;`

			`InitializeCriticalSection(&ctx->lock);`
			`ctx->submit_event = CreateEvent(NULL, FALSE, FALSE, NULL);`
			`ctx->content_seq = 0;`
			`ctx->content_w = ctx->content_h = 0;`
			`return 0;`
			`}`

			`void vgpu_present_deinit(vgpu_ctx* ctx) {`
			`if (ctx->submit_event) { CloseHandle(ctx->submit_event); ctx->submit_event = NULL; }`
			`DeleteCriticalSection(&ctx->lock);`
			`if (ctx->arena) { VirtualFree(ctx->arena, 0, MEM_RELEASE); ctx->arena = NULL; }`
			`}`

			`void vgpu_present_submit(vgpu_ctx* ctx, const uint8_t* src,`
			`uint32_t W, uint32_t H, uint32_t src_pitch) {`
			`if (W > VGPU_MAX_WIDTH) W = VGPU_MAX_WIDTH;`
			`if (H > VGPU_MAX_HEIGHT) H = VGPU_MAX_HEIGHT;`
			`if (W == 0 \|\| H == 0) return;`

			`EnterCriticalSection(&ctx->lock);`
			`uint8_t* d = ctx->content_buf;`
			`const uint32_t row = W * 4u;`
			`for (uint32_t y = 0; y < H; y++)`
			`memcpy(d + (size_t)y * row, src + (size_t)y * src_pitch, row);`
			`ctx->content_w = W;`
			`ctx->content_h = H;`
			`ctx->content_seq++;`
			`LeaveCriticalSection(&ctx->lock);`
			`/* static-idle: stamp the moment the source delivered new content (the raw perception;`
			`* the host derives "ms idle" from its own clock). Single 8-aligned MOV, off the lock. */`
			`vgpu_publish_content_change(&ctx->view, now_ns());`
			`SetEvent(ctx->submit_event);`
			`}`

			`void vgpu_present_run(vgpu_ctx* ctx) {`
			`const vgpu_region_view* rv = &ctx->view; /* neutral handle for the engine */`
			`const DWORD poll_ms = 8;`
			`int64_t last_seq = -1;`
			`uint32_t prev_state = VGPU_CMD_STOP;`
			`uint32_t last_ff_ack = rv->producer->full_frame_ack;`
			`DWORD last_beat = GetTickCount();`
			`uint64_t last_publish_ns = 0; /* 0 → first eligible frame publishes immediately */`
			`int last_cur_x = 0, last_cur_y = 0, last_cur_vis = 0;`
			`HCURSOR last_cur_handle = NULL;`

			`/* one-shot display geometry: publish once before the loop (flat pull contract). The`
			`* captured-output origin is (0,0) for the primary/full-screen capture path; backends`
			`* resample reactively on recreate / capture-size change. No periodic poll in the loop. */`
			`geometry_sample_and_publish(ctx, 0, 0);`

			`for (;;) {`
			`WaitForSingleObject(ctx->submit_event, poll_ms);`

			`/* --- heartbeat: always ticks, independent of desired_state --- */`
			`DWORD nowt = GetTickCount();`
			`if (nowt - last_beat >= VGPU_HEARTBEAT_PERIOD_MS) {`
			`vgpu_tick_heartbeat(rv);`
			`last_beat = nowt;`
			`}`

			`/* --- reconcile control (gen-seqlock -> apply -> ack) --- */`
			`vgpu_control_view cv;`
			`uint32_t desired = prev_state;`
			`uint32_t draw_cursor = 1;`
			`int force_full = 0;`
			`uint32_t fps = ctx->default_fps; /* publish-rate cap (applied) */`
			`uint32_t ff_req = last_ff_ack; /* full_frame_req value to honor */`
			`if (vgpu_control_read(rv, &cv)) {`
			`desired = cv.desired_state;`
			`draw_cursor = cv.draw_cursor;`
			`fps = cv.target_fps ? cv.target_fps : ctx->default_fps;`
			`vgpu_set_applied_fps(rv, fps);`
			`vgpu_publish_ctrl_ack(rv, cv.gen);`

			`ff_req = cv.full_frame_req;`
			`if ((ff_req - last_ff_ack) != 0u)`
			`force_full = 1; /* edge pending, wrap-tolerant */`
			`}`

			`/* --- lifecycle transitions --- */`
			`if (desired != prev_state) {`
			`if (desired == VGPU_CMD_RUN && prev_state != VGPU_CMD_RUN) {`
			`vgpu_bump_run_epoch(rv);`
			`vgpu_set_status(rv, VGPU_ST_CAPTURING);`
			`force_full = 1; /* fresh frame on start */`
			`} else if (desired == VGPU_CMD_PAUSE) {`
			`vgpu_set_status(rv, VGPU_ST_PAUSED);`
			`} else if (desired == VGPU_CMD_STOP) {`
			`vgpu_set_status(rv, VGPU_ST_STOPPED);`
			`}`
			`prev_state = desired;`
			`} else if (last_seq < 0 && desired == VGPU_CMD_RUN) {`
			`vgpu_set_status(rv, VGPU_ST_CAPTURING);`
			`}`

			`if (desired != VGPU_CMD_RUN) {`
			`/* PAUSED/STOPPED: no new frames; heartbeat still ticks. We do NOT`
			`* ack a pending full_frame here — acking without publishing would`
			`* be a false "honored". A pending request is honored on the next`
			`* transition to RUN (force_full=1 there → publish + ack). */`
			`continue;`
			`}`

			`/* --- compose + publish on content change OR forced full frame, but`
			`* rate-limited to the applied fps cap (the single publish point →`
			`* contract-level cap, independent of the capture backend). A`
			`* force_full bypasses the cap (due=1). present does NOT sample the`
			`* cursor (capture threads source it); it only reads ctx->cursor under`
			`* ctx->lock for compositing, and detects cursor motion via a delta so`
			`* a pure cursor move over static desktop still recomposes. --- */`
			`uint64_t interval_ns = fps > 0 ? (1000000000ull / fps) : 0;`
			`uint64_t now = now_ns();`
			`int due = force_full \|\| interval_ns == 0`
			`\|\| (now - last_publish_ns) >= interval_ns;`

			`int compose_cursor = (ctx->draw_cursor_cap && draw_cursor);`

			`EnterCriticalSection(&ctx->lock);`
			`int64_t seq = ctx->content_seq;`
			`uint32_t W = ctx->content_w, H = ctx->content_h;`
			`int cur_changed = compose_cursor`
			`&& ((ctx->cursor.visible != last_cur_vis)`
			`\|\| (ctx->cursor.x != last_cur_x)`
			`\|\| (ctx->cursor.y != last_cur_y)`
			`\|\| (ctx->cursor.handle != last_cur_handle));`
			`int have = (W && H);`
			`int content_new = have && (seq != last_seq \|\| cur_changed \|\| force_full);`
			`/* take the frame ONLY when due — so we never drop the latest content;`
			`* if not due, last_seq is left untouched and it publishes next due. */`
			`int dirty = content_new && due;`
			`if (dirty) {`
			`memcpy(ctx->frame_buf, ctx->content_buf, (size_t)W * H * 4u);`
			`last_seq = seq;`
			`if (compose_cursor)`
			`cursor_draw(ctx, ctx->frame_buf, W, H);`
			`last_cur_vis = ctx->cursor.visible;`
			`last_cur_x = ctx->cursor.x; last_cur_y = ctx->cursor.y;`
			`last_cur_handle = ctx->cursor.handle;`
			`}`
			`LeaveCriticalSection(&ctx->lock);`

			`if (!dirty) {`
			`/* not due, or nothing to publish. A force_full with content has`
			`* due=1 → dirty=1, so it never lands here while have is true; thus`
			`* no spurious ack edge. */`
			`continue;`
			`}`

			`if (vgpu_publish_frame(rv, ctx->frame_buf, W, H, now) == 0) {`
			`last_publish_ns = now;`
			`if (force_full) {`
			`vgpu_publish_full_frame_ack(rv, ff_req);`
			`last_ff_ack = ff_req;`
			`}`
			`} else {`
			`vgpu_set_error(rv, 1u); /* frame too large for slot (mode > max) */`
			`}`
			`}`
			`}`