/* publish.c — OS-agnostic implementation of the streaming protocol. * Operates purely on the contract through a borrowed vgpu_region_view; no * platform headers, no runtime context. The x86-TSO ordering lives in the * atomic shim. */ #include #include "vgpu_stream.h" /* contract types / slot geometry */ #include "atomic-shim.h" /* x86-TSO memory-order accessors */ #include "stream.h" /* region-view handle + this API */ #define VGPU_CTRL_READ_TRIES 16u int vgpu_publish_frame(const vgpu_region_view* rv, const uint8_t* tight_bgra, uint32_t width, uint32_t height, uint64_t timestamp_ns) { vgpu_producer_t* p = rv->producer; const uint32_t stride = width * 4u; /* tight invariant */ const uint64_t need = (uint64_t)height * stride; if (need > VGPU_SLOT_STRIDE) /* clamp by slot size */ return 1; uint32_t cur = vgpu_load_acquire32(&p->latest); uint32_t S = (cur == VGPU_LATEST_NONE) ? 0u : ((cur + 1u) % VGPU_SLOT_COUNT); uint8_t* dst = rv->ring + (size_t)S * VGPU_SLOT_STRIDE; /* seqlock: even -> odd (writing) */ vgpu_store_release32(&p->seq[S], p->seq[S] + 1u); vgpu_compiler_barrier(); /* descriptor (self-describing slot) */ p->desc[S].width = width; p->desc[S].height = height; p->desc[S].stride = stride; p->desc[S].format = VGPU_FMT_BGRA8888; p->desc[S].frame_id = p->frame_id + 1u; p->desc[S].timestamp_ns = timestamp_ns; /* pixels (source is already tight) */ memcpy(dst, tight_bgra, (size_t)need); vgpu_sfence(); /* seqlock: odd -> even (stable) */ vgpu_store_release32(&p->seq[S], p->seq[S] + 1u); vgpu_sfence(); p->frame_id += 1u; vgpu_store_release32(&p->latest, S); return 0; } int vgpu_control_read(const vgpu_region_view* rv, vgpu_control_view* out) { volatile vgpu_control_t* c = rv->control; for (uint32_t t = 0; t < VGPU_CTRL_READ_TRIES; t++) { uint32_t g0 = vgpu_load_acquire32(&c->ctrl_gen); if (g0 & 1u) continue; /* writer in progress */ vgpu_compiler_barrier(); uint32_t desired = c->desired_state; uint32_t fps = c->target_fps; uint32_t cursor = c->draw_cursor; uint32_t ffreq = c->full_frame_req; uint32_t ctick = c->consumer_tick; uint32_t att = c->attached; vgpu_compiler_barrier(); uint32_t g1 = vgpu_load_acquire32(&c->ctrl_gen); if (g0 != g1) continue; /* torn read, retry */ out->gen = g0; out->desired_state = desired; out->target_fps = fps; out->draw_cursor = cursor; out->full_frame_req = ffreq; out->consumer_tick = ctick; out->attached = att; return 1; } return 0; } void vgpu_publish_ctrl_ack(const vgpu_region_view* rv, uint32_t gen) { vgpu_store_release32(&rv->producer->ctrl_ack, gen); } void vgpu_set_status(const vgpu_region_view* rv, uint32_t status) { vgpu_store_release32(&rv->producer->status, status); } void vgpu_set_backend(const vgpu_region_view* rv, uint32_t backend) { vgpu_store_release32(&rv->producer->backend, backend); } void vgpu_set_error(const vgpu_region_view* rv, uint32_t error_code) { vgpu_store_release32(&rv->producer->error_code, error_code); } void vgpu_set_applied_fps(const vgpu_region_view* rv, uint32_t fps) { vgpu_store_release32(&rv->producer->applied_fps, fps); } void vgpu_bump_run_epoch(const vgpu_region_view* rv) { vgpu_producer_t* p = rv->producer; vgpu_store_release32(&p->run_epoch, p->run_epoch + 1u); } void vgpu_tick_heartbeat(const vgpu_region_view* rv) { /* 64-bit aligned single MOV is atomic on x86_64; barrier orders it */ rv->producer->heartbeat += 1u; vgpu_compiler_barrier(); } void vgpu_publish_full_frame_ack(const vgpu_region_view* rv, uint32_t req) { vgpu_store_release32(&rv->producer->full_frame_ack, req); } void vgpu_publish_cursor(const vgpu_region_view* rv, int32_t x, int32_t y, uint32_t visible) { vgpu_producer_t* p = rv->producer; /* pack: low 32 = x, high 32 = y (signed → two's-complement bits) */ uint64_t packed = ((uint64_t)(uint32_t)y << 32) | (uint64_t)(uint32_t)x; /* 64-bit aligned single MOV is atomic on x86_64; barrier orders it (heartbeat pattern) */ p->cursor_pos = packed; vgpu_store_release32(&p->cursor_visible, visible); /* publish seq last: its release-store gates the pos/visible writes above for the host */ vgpu_store_release32(&p->cursor_seq, p->cursor_seq + 1u); } void vgpu_publish_cursor_shape(const vgpu_region_view* rv, uint32_t hot_x, uint32_t hot_y, uint32_t gw, uint32_t gh, uint32_t cursor_id) { vgpu_producer_t* p = rv->producer; /* pack 16|16 strictly unsigned (mask low half so no sign bits bleed into the high half). * No own seq: the following vgpu_publish_cursor bumps cursor_seq last and gates this line. */ vgpu_store_release32(&p->cursor_hotspot, (hot_y << 16) | (hot_x & 0xFFFFu)); vgpu_store_release32(&p->cursor_glyph, (gh << 16) | (gw & 0xFFFFu)); vgpu_store_release32(&p->cursor_id, cursor_id); } void vgpu_publish_content_change(const vgpu_region_view* rv, uint64_t change_ns) { /* 64-bit aligned single MOV is atomic on x86_64; barrier orders it (heartbeat pattern) */ rv->producer->content_change_ns = change_ns; vgpu_compiler_barrier(); } void vgpu_publish_geometry(const vgpu_region_view* rv, int32_t virt_x, int32_t virt_y, uint32_t virt_w, uint32_t virt_h, int32_t cap_x, int32_t cap_y, uint32_t dpi, uint32_t refresh_mhz) { vgpu_producer_t* p = rv->producer; /* seqlock: even -> odd (writing) */ vgpu_store_release32(&p->geom_seq, p->geom_seq + 1u); vgpu_compiler_barrier(); p->virt_x = virt_x; p->virt_y = virt_y; p->virt_w = virt_w; p->virt_h = virt_h; p->cap_x = cap_x; p->cap_y = cap_y; p->dpi = dpi; p->refresh_mhz = refresh_mhz; vgpu_sfence(); /* seqlock: odd -> even (stable) */ vgpu_store_release32(&p->geom_seq, p->geom_seq + 1u); vgpu_sfence(); }