vgpu in-guest producer in-tree, release CI, flexible vmie discovery

- src/si/vgpu-stream: in-guest vgpu producer built as a Windows cross-compiled target (if(WIN32))
- .gitea: release workflow — cross-build the agent and build/publish the deb against system vmie
- cmake/makefile: resolve vmie from a source tree (LIBVMIE_PATH) or installed libvmie-dev
This commit is contained in:
2026-06-22 18:35:12 +03:00
parent 9bde398b6c
commit bd8b966017
31 changed files with 2393 additions and 8 deletions
+163
View File
@@ -0,0 +1,163 @@
/* publish.c — OS-agnostic implementation of the streaming protocol.
* Operates purely on the contract through a borrowed vgpu_region_view; no
* platform headers, no runtime context. The x86-TSO ordering lives in the
* atomic shim. */
#include <string.h>
#include "vgpu_stream.h" /* contract types / slot geometry */
#include "atomic-shim.h" /* x86-TSO memory-order accessors */
#include "stream.h" /* region-view handle + this API */
#define VGPU_CTRL_READ_TRIES 16u
int vgpu_publish_frame(const vgpu_region_view* rv, const uint8_t* tight_bgra,
uint32_t width, uint32_t height, uint64_t timestamp_ns) {
vgpu_producer_t* p = rv->producer;
const uint32_t stride = width * 4u; /* tight invariant */
const uint64_t need = (uint64_t)height * stride;
if (need > VGPU_SLOT_STRIDE) /* clamp by slot size */
return 1;
uint32_t cur = vgpu_load_acquire32(&p->latest);
uint32_t S = (cur == VGPU_LATEST_NONE) ? 0u : ((cur + 1u) % VGPU_SLOT_COUNT);
uint8_t* dst = rv->ring + (size_t)S * VGPU_SLOT_STRIDE;
/* seqlock: even -> odd (writing) */
vgpu_store_release32(&p->seq[S], p->seq[S] + 1u);
vgpu_compiler_barrier();
/* descriptor (self-describing slot) */
p->desc[S].width = width;
p->desc[S].height = height;
p->desc[S].stride = stride;
p->desc[S].format = VGPU_FMT_BGRA8888;
p->desc[S].frame_id = p->frame_id + 1u;
p->desc[S].timestamp_ns = timestamp_ns;
/* pixels (source is already tight) */
memcpy(dst, tight_bgra, (size_t)need);
vgpu_sfence();
/* seqlock: odd -> even (stable) */
vgpu_store_release32(&p->seq[S], p->seq[S] + 1u);
vgpu_sfence();
p->frame_id += 1u;
vgpu_store_release32(&p->latest, S);
return 0;
}
int vgpu_control_read(const vgpu_region_view* rv, vgpu_control_view* out) {
volatile vgpu_control_t* c = rv->control;
for (uint32_t t = 0; t < VGPU_CTRL_READ_TRIES; t++) {
uint32_t g0 = vgpu_load_acquire32(&c->ctrl_gen);
if (g0 & 1u)
continue; /* writer in progress */
vgpu_compiler_barrier();
uint32_t desired = c->desired_state;
uint32_t fps = c->target_fps;
uint32_t cursor = c->draw_cursor;
uint32_t ffreq = c->full_frame_req;
uint32_t ctick = c->consumer_tick;
uint32_t att = c->attached;
vgpu_compiler_barrier();
uint32_t g1 = vgpu_load_acquire32(&c->ctrl_gen);
if (g0 != g1)
continue; /* torn read, retry */
out->gen = g0;
out->desired_state = desired;
out->target_fps = fps;
out->draw_cursor = cursor;
out->full_frame_req = ffreq;
out->consumer_tick = ctick;
out->attached = att;
return 1;
}
return 0;
}
void vgpu_publish_ctrl_ack(const vgpu_region_view* rv, uint32_t gen) {
vgpu_store_release32(&rv->producer->ctrl_ack, gen);
}
void vgpu_set_status(const vgpu_region_view* rv, uint32_t status) {
vgpu_store_release32(&rv->producer->status, status);
}
void vgpu_set_backend(const vgpu_region_view* rv, uint32_t backend) {
vgpu_store_release32(&rv->producer->backend, backend);
}
void vgpu_set_error(const vgpu_region_view* rv, uint32_t error_code) {
vgpu_store_release32(&rv->producer->error_code, error_code);
}
void vgpu_set_applied_fps(const vgpu_region_view* rv, uint32_t fps) {
vgpu_store_release32(&rv->producer->applied_fps, fps);
}
void vgpu_bump_run_epoch(const vgpu_region_view* rv) {
vgpu_producer_t* p = rv->producer;
vgpu_store_release32(&p->run_epoch, p->run_epoch + 1u);
}
void vgpu_tick_heartbeat(const vgpu_region_view* rv) {
/* 64-bit aligned single MOV is atomic on x86_64; barrier orders it */
rv->producer->heartbeat += 1u;
vgpu_compiler_barrier();
}
void vgpu_publish_full_frame_ack(const vgpu_region_view* rv, uint32_t req) {
vgpu_store_release32(&rv->producer->full_frame_ack, req);
}
void vgpu_publish_cursor(const vgpu_region_view* rv, int32_t x, int32_t y, uint32_t visible) {
vgpu_producer_t* p = rv->producer;
/* pack: low 32 = x, high 32 = y (signed → two's-complement bits) */
uint64_t packed = ((uint64_t)(uint32_t)y << 32) | (uint64_t)(uint32_t)x;
/* 64-bit aligned single MOV is atomic on x86_64; barrier orders it (heartbeat pattern) */
p->cursor_pos = packed;
vgpu_store_release32(&p->cursor_visible, visible);
/* publish seq last: its release-store gates the pos/visible writes above for the host */
vgpu_store_release32(&p->cursor_seq, p->cursor_seq + 1u);
}
void vgpu_publish_cursor_shape(const vgpu_region_view* rv, uint32_t hot_x, uint32_t hot_y,
uint32_t gw, uint32_t gh, uint32_t cursor_id) {
vgpu_producer_t* p = rv->producer;
/* pack 16|16 strictly unsigned (mask low half so no sign bits bleed into the high half).
* No own seq: the following vgpu_publish_cursor bumps cursor_seq last and gates this line. */
vgpu_store_release32(&p->cursor_hotspot, (hot_y << 16) | (hot_x & 0xFFFFu));
vgpu_store_release32(&p->cursor_glyph, (gh << 16) | (gw & 0xFFFFu));
vgpu_store_release32(&p->cursor_id, cursor_id);
}
void vgpu_publish_content_change(const vgpu_region_view* rv, uint64_t change_ns) {
/* 64-bit aligned single MOV is atomic on x86_64; barrier orders it (heartbeat pattern) */
rv->producer->content_change_ns = change_ns;
vgpu_compiler_barrier();
}
void vgpu_publish_geometry(const vgpu_region_view* rv, int32_t virt_x, int32_t virt_y,
uint32_t virt_w, uint32_t virt_h,
int32_t cap_x, int32_t cap_y,
uint32_t dpi, uint32_t refresh_mhz) {
vgpu_producer_t* p = rv->producer;
/* seqlock: even -> odd (writing) */
vgpu_store_release32(&p->geom_seq, p->geom_seq + 1u);
vgpu_compiler_barrier();
p->virt_x = virt_x; p->virt_y = virt_y;
p->virt_w = virt_w; p->virt_h = virt_h;
p->cap_x = cap_x; p->cap_y = cap_y;
p->dpi = dpi; p->refresh_mhz = refresh_mhz;
vgpu_sfence();
/* seqlock: odd -> even (stable) */
vgpu_store_release32(&p->geom_seq, p->geom_seq + 1u);
vgpu_sfence();
}