diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml new file mode 100644 index 0000000..80590b9 --- /dev/null +++ b/.gitea/workflows/release.yml @@ -0,0 +1,123 @@ +name: release + +on: + push: + tags: + - 'v*' + +# No deployment-specific values are hardcoded: server/owner/repo come from the CI context, +# the publish token from a secret. Mirrors the sibling vmie release pipeline. +jobs: + # In-guest vgpu producer (Windows, cross-compiled) -> attached to the release. + windows-agent: + runs-on: ubuntu-latest + container: + image: node:20-bookworm-slim + defaults: + run: + shell: bash + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install toolchain + run: | + apt-get update + apt-get install -y --no-install-recommends \ + cmake make zip jq curl ca-certificates gcc-mingw-w64-x86-64 + + - name: Cross-build the agent + run: | + cmake -S . -B build-win -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-mingw-w64.cmake + cmake --build build-win -j + + - name: Package + env: + TAG: ${{ github.ref_name }} + run: | + set -euo pipefail + mkdir -p dist/vgpu-streamer + cp build-win/vgpu-streamer.exe dist/vgpu-streamer/ + [ -f LICENSE ] && cp LICENSE dist/vgpu-streamer/ || true + (cd dist && zip -r "vgpu-streamer-${TAG}-win64.zip" vgpu-streamer) + + - name: Attach to release + env: + GITEA_TOKEN: ${{ secrets.PUBLISH_TOKEN }} + SERVER: ${{ github.server_url }} + REPO: ${{ github.repository }} + TAG: ${{ github.ref_name }} + run: | + set -euo pipefail + asset="vgpu-streamer-${TAG}-win64.zip" + api="${SERVER}/api/v1/repos/${REPO}" + auth="Authorization: token ${GITEA_TOKEN}" + + rid=$(curl -sSL -H "$auth" "${api}/releases/tags/${TAG}" | jq -r '.id // empty' || true) + if [ -z "$rid" ]; then + rid=$(curl -fsSL -X POST -H "$auth" -H "Content-Type: application/json" \ + -d "{\"tag_name\":\"${TAG}\",\"name\":\"${TAG}\"}" \ + "${api}/releases" | jq -r '.id') + fi + curl -fsSL -H "$auth" "${api}/releases/${rid}/assets" \ + | jq -r ".[] | select(.name==\"${asset}\") | .id" \ + | while read -r aid; do + [ -n "$aid" ] && curl -fsSL -X DELETE -H "$auth" "${api}/releases/${rid}/assets/${aid}" + done + curl -fsSL -X POST -H "$auth" \ + -F "attachment=@dist/${asset};type=application/zip" \ + "${api}/releases/${rid}/assets?name=${asset}" + + # Host package (daemon + libs) -> the Gitea Debian registry. Built against the published + # vmie dev package (external dependency), installed from the same registry. + deb: + runs-on: ubuntu-latest + container: + image: node:20-bookworm-slim + defaults: + run: + shell: bash + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install toolchain + vmie (external dependency) + env: + SERVER: ${{ github.server_url }} + OWNER: ${{ github.repository_owner }} + run: | + set -euo pipefail + apt-get update + apt-get install -y --no-install-recommends \ + cmake make gcc libc6-dev dpkg-dev file ca-certificates curl + echo "deb [trusted=yes] ${SERVER}/api/packages/${OWNER}/debian stable main" \ + > /etc/apt/sources.list.d/gitea.list + apt-get update + apt-get install -y libvmie-dev + + - name: Build package + env: + TAG: ${{ github.ref_name }} + run: make deb VERSION="${TAG#v}" + + - name: Publish to Debian registry + env: + TOKEN: ${{ secrets.PUBLISH_TOKEN }} # requires scope: package:write + SERVER: ${{ github.server_url }} + OWNER: ${{ github.repository_owner }} + DISTRIBUTION: stable + COMPONENT: main + run: | + set -euo pipefail + url="${SERVER}/api/packages/${OWNER}/debian/pool/${DISTRIBUTION}/${COMPONENT}/upload" + auth="Authorization: token ${TOKEN}" + for deb in dist/*.deb; do + # 201 Created = uploaded; 409 Conflict = this version already present (re-run). + code=$(curl -s -o /dev/null -w '%{http_code}' -X PUT -H "$auth" -T "$deb" "$url") + echo "$deb -> HTTP $code" + if [ "$code" != 201 ] && [ "$code" != 409 ]; then + echo "upload failed: $deb (HTTP $code)" >&2 + exit 1 + fi + done diff --git a/.gitignore b/.gitignore index bad1588..0ac2f23 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ compile* Testing/ CLAUDE.md dist/ +!.gitea/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 2ba2128..630bc2f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.16) -project(vmsig VERSION 0.3.0 LANGUAGES C) +project(vmsig VERSION 0.3.1 LANGUAGES C) set(CMAKE_C_STANDARD 17) set(CMAKE_C_STANDARD_REQUIRED ON) @@ -16,6 +16,37 @@ option(VMSIG_WITH_VMIE "Link real vmie (libvmie.a, PIC) for armed memctx" OFF) # The input driver (vmctl) is ABSORBED in-tree (src/si/input/) — no external flag. set(LIBVMIE_PATH "" CACHE PATH "Path to the vmie library sources (for VMSIG_WITH_VMIE)") +# ---- in-guest vgpu producer (Windows agent, cross-compiled) ----------------- +# The host signaling stack below is Linux-only (epoll/eventfd/timerfd), so a Windows-targeted +# build (mingw toolchain, CMAKE_SYSTEM_NAME=Windows) produces ONLY this agent. Producer and +# host consumer share the ABI header include/vgpu_stream.h, so they version together in one tree. +# cmake -S . -B .build-win -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-mingw-w64.cmake +if(WIN32) + add_executable(vgpu-streamer + src/si/vgpu-stream/win32/main.c + src/si/vgpu-stream/publish.c + src/si/vgpu-stream/win32/region.c + src/si/vgpu-stream/win32/present.c + src/si/vgpu-stream/win32/cursor.c + src/si/vgpu-stream/win32/geometry.c + src/si/vgpu-stream/win32/capture.c + src/si/vgpu-stream/win32/capture_nvfbc.c + src/si/vgpu-stream/win32/capture_dda.c + src/si/vgpu-stream/win32/capture_gdi.c) + target_include_directories(vgpu-streamer PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/src/si/vgpu-stream/include + ${CMAKE_CURRENT_SOURCE_DIR}/src/si/vgpu-stream/win32 + ${CMAKE_CURRENT_SOURCE_DIR}/third_party) # vendor NvFBC + Windows.h shim + target_compile_definitions(vgpu-streamer PRIVATE CINTERFACE WIN32_LEAN_AND_MEAN=) + target_compile_options(vgpu-streamer PRIVATE + $<$:-O2;-Wall;-Wextra> + $<$:/O2;/W3>) + target_link_libraries(vgpu-streamer PRIVATE d3d11 dxgi dxguid uuid user32 gdi32) + target_link_options(vgpu-streamer PRIVATE $<$:-static;-s>) + return() # a Windows-targeted build is the agent ONLY; the host stack below is skipped +endif() + find_package(Threads REQUIRED) # ---- signaling library ------------------------------------------------------ @@ -58,9 +89,19 @@ target_link_libraries(vmsig PRIVATE Threads::Threads) # package Depends on libvmie). Headers + symbols come from the imported target. if(VMSIG_WITH_VMIE) add_library(vmie SHARED IMPORTED) - set_target_properties(vmie PROPERTIES - IMPORTED_LOCATION ${LIBVMIE_PATH}/.build/libvmie.so - INTERFACE_INCLUDE_DIRECTORIES ${LIBVMIE_PATH}/include) + if(LIBVMIE_PATH) + # dev: link against an in-place source-tree build + set_target_properties(vmie PROPERTIES + IMPORTED_LOCATION ${LIBVMIE_PATH}/.build/libvmie.so + INTERFACE_INCLUDE_DIRECTORIES ${LIBVMIE_PATH}/include) + else() + # CI/system: the installed libvmie-dev package (/usr, or via CMAKE_PREFIX_PATH) + find_library(VMIE_LIBRARY NAMES vmie REQUIRED) + find_path( VMIE_INCLUDE_DIR NAMES memmodel.h PATH_SUFFIXES vmie REQUIRED) + set_target_properties(vmie PROPERTIES + IMPORTED_LOCATION ${VMIE_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES ${VMIE_INCLUDE_DIR}) + endif() target_link_libraries(vmsig PRIVATE vmie) target_compile_definitions(vmsig PRIVATE VMSIG_WITH_VMIE) endif() diff --git a/Makefile b/Makefile index 4455490..275d9d8 100644 --- a/Makefile +++ b/Makefile @@ -20,12 +20,12 @@ DIST ?= $(CURDIR)/dist .PHONY: deb clean # Armed package: the shipped daemon needs vmie for memctx. vmie stays an external dependency -# (the package Depends on its runtime; pass DEPENDS to add it). +# (package Depends on its runtime). vmie is found from a source tree (LIBVMIE_PATH) or, when +# that is empty, from the installed libvmie-dev (system / CMAKE_PREFIX_PATH) — the CI path. deb: - @test -n "$(LIBVMIE_PATH)" || { echo "set LIBVMIE_PATH=/path/to/vmie sources (armed memctx)"; exit 1; } rm -rf $(STAGE) - cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=Release -DVMSIG_INSTALL=ON \ - -DVMSIG_WITH_VMIE=ON -DLIBVMIE_PATH=$(LIBVMIE_PATH) + cmake -S . -B $(BUILD_DIR) -DCMAKE_BUILD_TYPE=Release -DVMSIG_INSTALL=ON -DVMSIG_WITH_VMIE=ON \ + $(if $(LIBVMIE_PATH),-DLIBVMIE_PATH=$(LIBVMIE_PATH),) cmake --build $(BUILD_DIR) -j DESTDIR=$(STAGE) cmake --install $(BUILD_DIR) --prefix /usr mkdir -p $(STAGE)/DEBIAN diff --git a/cmake/toolchain-mingw-w64.cmake b/cmake/toolchain-mingw-w64.cmake new file mode 100644 index 0000000..315a1b1 --- /dev/null +++ b/cmake/toolchain-mingw-w64.cmake @@ -0,0 +1,8 @@ +set(CMAKE_SYSTEM_NAME Windows) +set(CMAKE_SYSTEM_PROCESSOR x86_64) +set(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc) +set(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres) +set(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32) +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/src/si/vgpu-stream/include/atomic-shim.h b/src/si/vgpu-stream/include/atomic-shim.h new file mode 100644 index 0000000..2576514 --- /dev/null +++ b/src/si/vgpu-stream/include/atomic-shim.h @@ -0,0 +1,52 @@ +#ifndef VGPU_ATOMIC_SHIM_H +#define VGPU_ATOMIC_SHIM_H + +/* atomic-shim.h — x86-TSO memory-order accessors (arch, not OS). + * + * x86-TSO memory-order shim. NO _Atomic in the shared region type: the consumer + * maps the region as raw bytes. Synchronization lives entirely in the producer's + * accessors here. Per-compiler implementation, never exposed in the contract. + * + * On x86_64 every naturally-aligned MOV up to 8 bytes is atomic and stores are + * already release / loads already acquire at the hardware level; the only things + * we must prevent are (1) compiler reordering across the sync point and + * (2) store-buffer visibility delay between the data writes and the publish + * store, for which an explicit SFENCE is used at publish boundaries. + */ + +#include + +#if defined(_MSC_VER) + +#include + +static inline void vgpu_compiler_barrier(void) { _ReadWriteBarrier(); } +static inline void vgpu_sfence(void) { _mm_sfence(); } + +static inline void vgpu_store_release32(volatile uint32_t* p, uint32_t v) { + _ReadWriteBarrier(); + *p = v; +} + +static inline uint32_t vgpu_load_acquire32(const volatile uint32_t* p) { + uint32_t v = *p; + _ReadWriteBarrier(); + return v; +} + +#else /* gcc / mingw / clang */ + +static inline void vgpu_compiler_barrier(void) { __asm__ __volatile__("" ::: "memory"); } +static inline void vgpu_sfence(void) { __asm__ __volatile__("sfence" ::: "memory"); } + +static inline void vgpu_store_release32(volatile uint32_t* p, uint32_t v) { + __atomic_store_n(p, v, __ATOMIC_RELEASE); +} + +static inline uint32_t vgpu_load_acquire32(const volatile uint32_t* p) { + return __atomic_load_n(p, __ATOMIC_ACQUIRE); +} + +#endif + +#endif /* VGPU_ATOMIC_SHIM_H */ diff --git a/src/si/vgpu-stream/include/capture.h b/src/si/vgpu-stream/include/capture.h new file mode 100644 index 0000000..fa0be57 --- /dev/null +++ b/src/si/vgpu-stream/include/capture.h @@ -0,0 +1,28 @@ +#ifndef VGPU_CAPTURE_H +#define VGPU_CAPTURE_H + +/* capture.h — extension seam for capture backends. + * A backend produces desktop frames and submits them to the presenter. This + * header is OS-agnostic: it names backends through an opaque vgpu_ctx* and a + * uniform start contract. A platform layer defines vgpu_ctx and any private + * backend plumbing (see src/stream/win32/capture-win32.h). A future Linux layer + * implements the same seam against its own vgpu_ctx + region/sync/clock. */ + +/* Opaque runtime context, defined by the platform layer (win32: ctx.h). */ +typedef struct vgpu_ctx vgpu_ctx; + +/* Start a capture backend. Returns 1 on success; on success the backend has + * spawned its capture thread(s) (which received ctx) and set ctx->backend / + * ctx->draw_cursor_cap. The submit contract: each captured desktop frame is + * handed to the presenter via vgpu_present_submit(). */ +typedef int (*capture_start_fn)(vgpu_ctx* ctx, int fps); + +typedef struct { + const char* name; + capture_start_fn start; +} capture_backend; + +/* Data-driven backend table; the entry point selects by env or availability. */ +const capture_backend* capture_backends(int* count); + +#endif /* VGPU_CAPTURE_H */ diff --git a/src/si/vgpu-stream/include/stream.h b/src/si/vgpu-stream/include/stream.h new file mode 100644 index 0000000..9ea7e29 --- /dev/null +++ b/src/si/vgpu-stream/include/stream.h @@ -0,0 +1,88 @@ +#ifndef VGPU_STREAM_ENGINE_H +#define VGPU_STREAM_ENGINE_H + +/* stream.h — OS-agnostic streaming protocol over the shared contract. + * Declares the neutral region-view handle (resolved contract pointers) and the + * seqlock publish / control-reconcile API. No platform headers: the engine + * operates purely on the contract; a platform layer (e.g. src/stream/win32/) + * builds the region and hands its pointers in as a vgpu_region_view. */ + +#include +#include "vgpu_stream.h" /* contract: producer/control types, slot geometry */ + +/* Neutral view of the live contract: the three resolved blocks the engine + * publishes into / reconciles against. The platform region owns the backing + * memory; this is a borrowed view (no ownership). */ +typedef struct { + vgpu_producer_t* producer; + vgpu_control_t* control; + uint8_t* ring; +} vgpu_region_view; + +/* Resolved view of the control block after a clean generation read. */ +typedef struct { + uint32_t gen; /* even generation that was read (for ctrl_ack) */ + uint32_t desired_state; /* VGPU_CMD_* */ + uint32_t target_fps; + uint32_t draw_cursor; + uint32_t full_frame_req; + uint32_t consumer_tick; + uint32_t attached; +} vgpu_control_view; + +/* Seqlock-publish a tight BGRA frame into the next ring slot. + * Clamps by SLOT_STRIDE (rejects frames that do not fit). Writes desc[], + * bumps frame_id, release-stores latest. Returns 0 on publish, 1 if dropped + * (frame too large for a slot). */ +int vgpu_publish_frame(const vgpu_region_view* rv, const uint8_t* tight_bgra, + uint32_t width, uint32_t height, uint64_t timestamp_ns); + +/* Read control block under its generation seqlock (bounded retry). Returns 1 + * on a clean read (view filled), 0 if the writer kept it busy past the limit. */ +int vgpu_control_read(const vgpu_region_view* rv, vgpu_control_view* out); + +/* Echo the applied generation back to the host. */ +void vgpu_publish_ctrl_ack(const vgpu_region_view* rv, uint32_t gen); + +/* Status / lifecycle helpers (cold line). */ +void vgpu_set_status(const vgpu_region_view* rv, uint32_t status); +void vgpu_set_backend(const vgpu_region_view* rv, uint32_t backend); +void vgpu_set_error(const vgpu_region_view* rv, uint32_t error_code); +void vgpu_set_applied_fps(const vgpu_region_view* rv, uint32_t fps); +void vgpu_bump_run_epoch(const vgpu_region_view* rv); +void vgpu_tick_heartbeat(const vgpu_region_view* rv); +void vgpu_publish_full_frame_ack(const vgpu_region_view* rv, uint32_t req); + +/* Publish the on-screen cursor position (host-RO). Position is sensor data and is + * reported independent of control.draw_cursor (host may draw its own overlay even when the + * producer does not composite the cursor). x,y are screen coords (signed; multi-monitor may + * be negative); visible!=0 when the cursor is shown. Packs x|y into one 8-aligned 64-bit + * field (single atomic store) and bumps cursor_seq last. */ +void vgpu_publish_cursor(const vgpu_region_view* rv, int32_t x, int32_t y, uint32_t visible); + +/* Publish Tier-1 cursor shape data (host-RO), written under the same cursor_seq gate as + * vgpu_publish_cursor: call this BEFORE vgpu_publish_cursor so the position publish bumps + * cursor_seq last and gates the whole cursor line consistently. hot_x/hot_y are the glyph + * hotspot; gw/gh are glyph dims; cursor_id is a VGPU_CURSOR_ID_* shape identity. */ +void vgpu_publish_cursor_shape(const vgpu_region_view* rv, + uint32_t hot_x, uint32_t hot_y, + uint32_t gw, uint32_t gh, uint32_t cursor_id); + +/* Publish the monotonic timestamp (ns) of the last scene-content change. Single 8-aligned + * atomic store (heartbeat pattern). The producer reports the raw stamp only; the host derives + * "ms idle" by subtracting from its own clock — no behavioural distillation in the producer. */ +void vgpu_publish_content_change(const vgpu_region_view* rv, uint64_t change_ns); + +/* Publish display geometry under the geom_seq seqlock (odd/even, like the frame seqlock). + * Sampled rarely (session start + reactive resample on desc-size delta / backend recreate), + * read by the host with bounded retry. virt_* is the virtual-desktop bbox (interprets negative + * cursor_pos); cap_x/cap_y is the captured output's origin in virtual-desktop coords (the + * captured surface SIZE comes from desc.width/height, not from here). dpi/refresh_mhz describe + * the captured output (96=100% / milli-Hz; 0=unknown). */ +void vgpu_publish_geometry(const vgpu_region_view* rv, + int32_t virt_x, int32_t virt_y, + uint32_t virt_w, uint32_t virt_h, + int32_t cap_x, int32_t cap_y, + uint32_t dpi, uint32_t refresh_mhz); + +#endif /* VGPU_STREAM_ENGINE_H */ diff --git a/src/si/vgpu-stream/publish.c b/src/si/vgpu-stream/publish.c new file mode 100644 index 0000000..d1cf2d1 --- /dev/null +++ b/src/si/vgpu-stream/publish.c @@ -0,0 +1,163 @@ +/* publish.c — OS-agnostic implementation of the streaming protocol. + * Operates purely on the contract through a borrowed vgpu_region_view; no + * platform headers, no runtime context. The x86-TSO ordering lives in the + * atomic shim. */ + +#include +#include "vgpu_stream.h" /* contract types / slot geometry */ +#include "atomic-shim.h" /* x86-TSO memory-order accessors */ +#include "stream.h" /* region-view handle + this API */ + +#define VGPU_CTRL_READ_TRIES 16u + +int vgpu_publish_frame(const vgpu_region_view* rv, const uint8_t* tight_bgra, + uint32_t width, uint32_t height, uint64_t timestamp_ns) { + vgpu_producer_t* p = rv->producer; + + const uint32_t stride = width * 4u; /* tight invariant */ + const uint64_t need = (uint64_t)height * stride; + if (need > VGPU_SLOT_STRIDE) /* clamp by slot size */ + return 1; + + uint32_t cur = vgpu_load_acquire32(&p->latest); + uint32_t S = (cur == VGPU_LATEST_NONE) ? 0u : ((cur + 1u) % VGPU_SLOT_COUNT); + + uint8_t* dst = rv->ring + (size_t)S * VGPU_SLOT_STRIDE; + + /* seqlock: even -> odd (writing) */ + vgpu_store_release32(&p->seq[S], p->seq[S] + 1u); + vgpu_compiler_barrier(); + + /* descriptor (self-describing slot) */ + p->desc[S].width = width; + p->desc[S].height = height; + p->desc[S].stride = stride; + p->desc[S].format = VGPU_FMT_BGRA8888; + p->desc[S].frame_id = p->frame_id + 1u; + p->desc[S].timestamp_ns = timestamp_ns; + + /* pixels (source is already tight) */ + memcpy(dst, tight_bgra, (size_t)need); + + vgpu_sfence(); + /* seqlock: odd -> even (stable) */ + vgpu_store_release32(&p->seq[S], p->seq[S] + 1u); + vgpu_sfence(); + + p->frame_id += 1u; + vgpu_store_release32(&p->latest, S); + return 0; +} + +int vgpu_control_read(const vgpu_region_view* rv, vgpu_control_view* out) { + volatile vgpu_control_t* c = rv->control; + + for (uint32_t t = 0; t < VGPU_CTRL_READ_TRIES; t++) { + uint32_t g0 = vgpu_load_acquire32(&c->ctrl_gen); + if (g0 & 1u) + continue; /* writer in progress */ + vgpu_compiler_barrier(); + + uint32_t desired = c->desired_state; + uint32_t fps = c->target_fps; + uint32_t cursor = c->draw_cursor; + uint32_t ffreq = c->full_frame_req; + uint32_t ctick = c->consumer_tick; + uint32_t att = c->attached; + + vgpu_compiler_barrier(); + uint32_t g1 = vgpu_load_acquire32(&c->ctrl_gen); + if (g0 != g1) + continue; /* torn read, retry */ + + out->gen = g0; + out->desired_state = desired; + out->target_fps = fps; + out->draw_cursor = cursor; + out->full_frame_req = ffreq; + out->consumer_tick = ctick; + out->attached = att; + return 1; + } + return 0; +} + +void vgpu_publish_ctrl_ack(const vgpu_region_view* rv, uint32_t gen) { + vgpu_store_release32(&rv->producer->ctrl_ack, gen); +} + +void vgpu_set_status(const vgpu_region_view* rv, uint32_t status) { + vgpu_store_release32(&rv->producer->status, status); +} + +void vgpu_set_backend(const vgpu_region_view* rv, uint32_t backend) { + vgpu_store_release32(&rv->producer->backend, backend); +} + +void vgpu_set_error(const vgpu_region_view* rv, uint32_t error_code) { + vgpu_store_release32(&rv->producer->error_code, error_code); +} + +void vgpu_set_applied_fps(const vgpu_region_view* rv, uint32_t fps) { + vgpu_store_release32(&rv->producer->applied_fps, fps); +} + +void vgpu_bump_run_epoch(const vgpu_region_view* rv) { + vgpu_producer_t* p = rv->producer; + vgpu_store_release32(&p->run_epoch, p->run_epoch + 1u); +} + +void vgpu_tick_heartbeat(const vgpu_region_view* rv) { + /* 64-bit aligned single MOV is atomic on x86_64; barrier orders it */ + rv->producer->heartbeat += 1u; + vgpu_compiler_barrier(); +} + +void vgpu_publish_full_frame_ack(const vgpu_region_view* rv, uint32_t req) { + vgpu_store_release32(&rv->producer->full_frame_ack, req); +} + +void vgpu_publish_cursor(const vgpu_region_view* rv, int32_t x, int32_t y, uint32_t visible) { + vgpu_producer_t* p = rv->producer; + /* pack: low 32 = x, high 32 = y (signed → two's-complement bits) */ + uint64_t packed = ((uint64_t)(uint32_t)y << 32) | (uint64_t)(uint32_t)x; + /* 64-bit aligned single MOV is atomic on x86_64; barrier orders it (heartbeat pattern) */ + p->cursor_pos = packed; + vgpu_store_release32(&p->cursor_visible, visible); + /* publish seq last: its release-store gates the pos/visible writes above for the host */ + vgpu_store_release32(&p->cursor_seq, p->cursor_seq + 1u); +} + +void vgpu_publish_cursor_shape(const vgpu_region_view* rv, uint32_t hot_x, uint32_t hot_y, + uint32_t gw, uint32_t gh, uint32_t cursor_id) { + vgpu_producer_t* p = rv->producer; + /* pack 16|16 strictly unsigned (mask low half so no sign bits bleed into the high half). + * No own seq: the following vgpu_publish_cursor bumps cursor_seq last and gates this line. */ + vgpu_store_release32(&p->cursor_hotspot, (hot_y << 16) | (hot_x & 0xFFFFu)); + vgpu_store_release32(&p->cursor_glyph, (gh << 16) | (gw & 0xFFFFu)); + vgpu_store_release32(&p->cursor_id, cursor_id); +} + +void vgpu_publish_content_change(const vgpu_region_view* rv, uint64_t change_ns) { + /* 64-bit aligned single MOV is atomic on x86_64; barrier orders it (heartbeat pattern) */ + rv->producer->content_change_ns = change_ns; + vgpu_compiler_barrier(); +} + +void vgpu_publish_geometry(const vgpu_region_view* rv, int32_t virt_x, int32_t virt_y, + uint32_t virt_w, uint32_t virt_h, + int32_t cap_x, int32_t cap_y, + uint32_t dpi, uint32_t refresh_mhz) { + vgpu_producer_t* p = rv->producer; + /* seqlock: even -> odd (writing) */ + vgpu_store_release32(&p->geom_seq, p->geom_seq + 1u); + vgpu_compiler_barrier(); + p->virt_x = virt_x; p->virt_y = virt_y; + p->virt_w = virt_w; p->virt_h = virt_h; + p->cap_x = cap_x; p->cap_y = cap_y; + p->dpi = dpi; p->refresh_mhz = refresh_mhz; + vgpu_sfence(); + /* seqlock: odd -> even (stable) */ + vgpu_store_release32(&p->geom_seq, p->geom_seq + 1u); + vgpu_sfence(); +} diff --git a/src/si/vgpu-stream/win32/capture-win32.h b/src/si/vgpu-stream/win32/capture-win32.h new file mode 100644 index 0000000..a1939a0 --- /dev/null +++ b/src/si/vgpu-stream/win32/capture-win32.h @@ -0,0 +1,19 @@ +#ifndef VGPU_CAPTURE_WIN32_H +#define VGPU_CAPTURE_WIN32_H + +/* capture-win32.h — private win32 plumbing shared by the capture backends. + * Not part of the OS-agnostic capture seam (see src/stream/include/capture.h): + * it depends on the win32 vgpu_ctx and the thread-handoff convention. */ + +#include "ctx.h" /* win32 vgpu_ctx (full definition) */ + +/* Thread argument passed to capture threads via LPVOID. Heap-allocated by the + * backend's *_start, owned and freed by the thread. Carries the explicit ctx + * (no global state) plus per-backend state pointer. */ +typedef struct { + vgpu_ctx* ctx; + int fps; + void* backend_state; /* opaque per-backend handle block */ +} capture_thread_arg; + +#endif /* VGPU_CAPTURE_WIN32_H */ diff --git a/src/si/vgpu-stream/win32/capture.c b/src/si/vgpu-stream/win32/capture.c new file mode 100644 index 0000000..3efab8a --- /dev/null +++ b/src/si/vgpu-stream/win32/capture.c @@ -0,0 +1,19 @@ +/* capture.c — win32 registration of the capture backends into the neutral + * capture seam's backend table (data-driven; no per-backend branching). */ + +#include "capture.h" /* neutral seam: capture_backend / capture_backends */ +#include "capture_nvfbc.h" +#include "capture_dda.h" +#include "capture_gdi.h" + +/* data-driven backend table; main selects by EYES env or first available */ +static const capture_backend g_backends[] = { + { "nvfbc", nvfbc_start }, + { "dda", dda_start }, + { "gdi", gdi_start }, +}; + +const capture_backend* capture_backends(int* count) { + *count = (int)(sizeof g_backends / sizeof g_backends[0]); + return g_backends; +} diff --git a/src/si/vgpu-stream/win32/capture_dda.c b/src/si/vgpu-stream/win32/capture_dda.c new file mode 100644 index 0000000..74e6d24 --- /dev/null +++ b/src/si/vgpu-stream/win32/capture_dda.c @@ -0,0 +1,198 @@ +#define WIN32_LEAN_AND_MEAN +#define COBJMACROS +#include +#include +#include +#include +#include +#include +#include "capture_dda.h" +#include "capture-win32.h" /* capture_thread_arg (win32-private) */ +#include "present.h" +#include "cursor.h" /* cursor_resolve_id + ctx->cursor compose state */ +#include "geometry.h" /* reactive geometry resample on recreate */ +#include "stream.h" /* vgpu_publish_cursor / vgpu_publish_cursor_shape */ + +typedef struct { + ID3D11Device* dev; + ID3D11DeviceContext* dctx; + IDXGIOutput1* out1; + IDXGIOutputDuplication* dup; + ID3D11Texture2D* staging; + UINT W, H; + int32_t cap_x, cap_y; /* captured output origin (virt coords) */ + UINT64 last_mouse_update; /* shape-gate by fi.LastMouseUpdateTime */ + int seeded; /* cold-start position seed done */ +} dda_state; + +/* Source the cursor from the already-fetched frame info (0 syscalls for position) and publish + * it under the cursor_seq gate. Position/visibility come from fi.PointerPosition; the shape is + * re-extracted only when fi.LastMouseUpdateTime changed (shape-gate). Cold start: fi is invalid + * until the mouse first moves (LastMouseUpdateTime==0) — seed the position once via one + * GetCursorInfo, then rely on fi. ctx->cursor compose fields are written under ctx->lock; the + * producer-block publish uses release/seq, no lock. */ +static void dda_source_cursor(vgpu_ctx* ctx, dda_state* st, + const DXGI_OUTDUPL_FRAME_INFO* fi) { + int vis = fi->PointerPosition.Visible ? 1 : 0; + int x, y; + UINT64 upd = (UINT64)fi->LastMouseUpdateTime.QuadPart; + + if (!st->seeded && upd == 0) { + CURSORINFO ci; ci.cbSize = sizeof ci; + if (GetCursorInfo(&ci)) { + vis = (ci.flags & CURSOR_SHOWING) != 0; + x = ci.ptScreenPos.x; y = ci.ptScreenPos.y; + } else { + x = ctx->cursor.x; y = ctx->cursor.y; + } + st->seeded = 1; + } else { + x = fi->PointerPosition.Position.x; + y = fi->PointerPosition.Position.y; + if (upd != 0) st->seeded = 1; + } + + /* shape-gate: re-extract only when the mouse-update stamp advanced */ + if (upd != 0 && upd != st->last_mouse_update) { + CURSORINFO ci; ci.cbSize = sizeof ci; + if (GetCursorInfo(&ci) && ci.hCursor && ci.hCursor != ctx->cursor.handle) { + EnterCriticalSection(&ctx->lock); + cursor_apply_shape(ctx, ci.hCursor); + LeaveCriticalSection(&ctx->lock); + } + st->last_mouse_update = upd; + } + + EnterCriticalSection(&ctx->lock); + ctx->cursor.visible = vis; + ctx->cursor.x = x; ctx->cursor.y = y; + uint32_t hx = (uint32_t)ctx->cursor.hot_x, hy = (uint32_t)ctx->cursor.hot_y; + uint32_t gw = (uint32_t)ctx->cursor.gw, gh = (uint32_t)ctx->cursor.gh; + uint32_t cid = (uint32_t)ctx->cursor.cursor_id; + LeaveCriticalSection(&ctx->lock); + + vgpu_publish_cursor_shape(&ctx->view, hx, hy, gw, gh, cid); + vgpu_publish_cursor(&ctx->view, (int32_t)x, (int32_t)y, (uint32_t)vis); +} + +static DWORD WINAPI dda_thread(LPVOID param) { + capture_thread_arg* arg = (capture_thread_arg*)param; + vgpu_ctx* ctx = arg->ctx; + dda_state* st = (dda_state*)arg->backend_state; + free(arg); + + for (;;) { + DXGI_OUTDUPL_FRAME_INFO fi; + IDXGIResource* res = NULL; + HRESULT hr = st->dup->lpVtbl->AcquireNextFrame(st->dup, 1000, &fi, &res); + if (hr == DXGI_ERROR_WAIT_TIMEOUT) continue; + if (hr == DXGI_ERROR_ACCESS_LOST) { + if (st->dup) { st->dup->lpVtbl->Release(st->dup); st->dup = NULL; } + if (FAILED(st->out1->lpVtbl->DuplicateOutput(st->out1, + (IUnknown*)st->dev, &st->dup))) { + Sleep(200); + } else { + /* display config may have changed across the access loss → resample geometry */ + geometry_sample_and_publish(ctx, st->cap_x, st->cap_y); + } + continue; + } + if (FAILED(hr)) { Sleep(50); continue; } + + dda_source_cursor(ctx, st, &fi); + + ID3D11Texture2D* tex = NULL; + res->lpVtbl->QueryInterface(res, &IID_ID3D11Texture2D, (void**)&tex); + if (tex) { + st->dctx->lpVtbl->CopyResource(st->dctx, + (ID3D11Resource*)st->staging, (ID3D11Resource*)tex); + D3D11_MAPPED_SUBRESOURCE m; + if (SUCCEEDED(st->dctx->lpVtbl->Map(st->dctx, + (ID3D11Resource*)st->staging, 0, D3D11_MAP_READ, 0, &m))) { + vgpu_present_submit(ctx, (const uint8_t*)m.pData, st->W, st->H, m.RowPitch); + st->dctx->lpVtbl->Unmap(st->dctx, (ID3D11Resource*)st->staging, 0); + } + tex->lpVtbl->Release(tex); + } + if (res) res->lpVtbl->Release(res); + st->dup->lpVtbl->ReleaseFrame(st->dup); + } + return 0; /* unreachable; satisfies -Wreturn-type */ +} + +int dda_start(vgpu_ctx* ctx, int fps) { + (void)fps; + dda_state* st = (dda_state*)calloc(1, sizeof *st); + if (!st) return 0; + + D3D_FEATURE_LEVEL fl; + if (FAILED(D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, 0, NULL, 0, + D3D11_SDK_VERSION, &st->dev, &fl, &st->dctx))) { + fprintf(stderr, "eyes(dda): D3D11CreateDevice failed\n"); + goto fail; + } + + IDXGIDevice* dxgiDev = NULL; + IDXGIAdapter* adapter = NULL; + IDXGIOutput* output = NULL; + st->dev->lpVtbl->QueryInterface(st->dev, &IID_IDXGIDevice, (void**)&dxgiDev); + if (dxgiDev) dxgiDev->lpVtbl->GetAdapter(dxgiDev, &adapter); + if (adapter) adapter->lpVtbl->EnumOutputs(adapter, 0, &output); + if (output) { + DXGI_OUTPUT_DESC od; + if (SUCCEEDED(output->lpVtbl->GetDesc(output, &od))) { + st->cap_x = (int32_t)od.DesktopCoordinates.left; + st->cap_y = (int32_t)od.DesktopCoordinates.top; + } + output->lpVtbl->QueryInterface(output, &IID_IDXGIOutput1, (void**)&st->out1); + } + + if (output) output->lpVtbl->Release(output); + if (adapter) adapter->lpVtbl->Release(adapter); + if (dxgiDev) dxgiDev->lpVtbl->Release(dxgiDev); + + if (!st->out1 || FAILED(st->out1->lpVtbl->DuplicateOutput(st->out1, + (IUnknown*)st->dev, &st->dup))) { + fprintf(stderr, "eyes(dda): DuplicateOutput failed\n"); + goto fail; + } + + DXGI_OUTDUPL_DESC dd; + st->dup->lpVtbl->GetDesc(st->dup, &dd); + st->W = dd.ModeDesc.Width; + st->H = dd.ModeDesc.Height; + + D3D11_TEXTURE2D_DESC td; memset(&td, 0, sizeof td); + td.Width = st->W; td.Height = st->H; td.MipLevels = 1; td.ArraySize = 1; + td.Format = DXGI_FORMAT_B8G8R8A8_UNORM; td.SampleDesc.Count = 1; + td.Usage = D3D11_USAGE_STAGING; td.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + if (FAILED(st->dev->lpVtbl->CreateTexture2D(st->dev, &td, NULL, &st->staging))) { + fprintf(stderr, "eyes(dda): CreateTexture2D failed\n"); + goto fail; + } + + capture_thread_arg* arg = (capture_thread_arg*)malloc(sizeof *arg); + if (!arg) goto fail; + arg->ctx = ctx; arg->fps = fps; arg->backend_state = st; + + ctx->backend = VGPU_BK_DDA; + ctx->draw_cursor_cap = 1; /* DDA frames are content-only → presenter draws cursor */ + + HANDLE t = CreateThread(NULL, 0, dda_thread, arg, 0, NULL); + if (!t) { free(arg); goto fail; } + CloseHandle(t); + + fprintf(stderr, "eyes(dda): desktop %ux%u (content-only; cursor by presenter)\n", + st->W, st->H); + return 1; + +fail: + /* release any COM objects created before the failure (no ref leaks) */ + if (st->staging) st->staging->lpVtbl->Release(st->staging); + if (st->dup) st->dup->lpVtbl->Release(st->dup); + if (st->out1) st->out1->lpVtbl->Release(st->out1); + if (st->dctx) st->dctx->lpVtbl->Release(st->dctx); + if (st->dev) st->dev->lpVtbl->Release(st->dev); + free(st); + return 0; +} diff --git a/src/si/vgpu-stream/win32/capture_dda.h b/src/si/vgpu-stream/win32/capture_dda.h new file mode 100644 index 0000000..8031c26 --- /dev/null +++ b/src/si/vgpu-stream/win32/capture_dda.h @@ -0,0 +1,10 @@ +#ifndef VGPU_CAPTURE_DDA_H +#define VGPU_CAPTURE_DDA_H + +/* capture_dda.h — DXGI Desktop Duplication capture backend (win32). */ + +#include "ctx.h" /* win32 vgpu_ctx */ + +int dda_start(vgpu_ctx* ctx, int fps); + +#endif /* VGPU_CAPTURE_DDA_H */ diff --git a/src/si/vgpu-stream/win32/capture_gdi.c b/src/si/vgpu-stream/win32/capture_gdi.c new file mode 100644 index 0000000..71df921 --- /dev/null +++ b/src/si/vgpu-stream/win32/capture_gdi.c @@ -0,0 +1,79 @@ +#define WIN32_LEAN_AND_MEAN +#include +#include +#include +#include +#include "capture_gdi.h" +#include "capture-win32.h" /* capture_thread_arg (win32-private) */ +#include "present.h" +#include "cursor.h" /* cursor_sample (position+shape+id) for compose+publish */ +#include "geometry.h" /* reactive geometry resample on capture-size change */ +#include "stream.h" /* vgpu_publish_cursor / vgpu_publish_cursor_shape */ + +static DWORD WINAPI gdi_thread(LPVOID param) { + capture_thread_arg* arg = (capture_thread_arg*)param; + vgpu_ctx* ctx = arg->ctx; + int fps = arg->fps > 0 ? arg->fps : 30; + free(arg); + + HDC screen = GetDC(NULL); + HDC mem = CreateCompatibleDC(screen); + HBITMAP dib = NULL; + void* bits = NULL; + int W = 0, H = 0; + const DWORD interval = (DWORD)(1000 / fps); + + for (;;) { + int w = GetSystemMetrics(SM_CXSCREEN), h = GetSystemMetrics(SM_CYSCREEN); + if (w <= 0 || h <= 0) { Sleep(200); continue; } + if (w != W || h != H || !dib) { + if (dib) DeleteObject(dib); + BITMAPINFO bi; memset(&bi, 0, sizeof bi); + bi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); + bi.bmiHeader.biWidth = w; bi.bmiHeader.biHeight = -h; + bi.bmiHeader.biPlanes = 1; bi.bmiHeader.biBitCount = 32; + bi.bmiHeader.biCompression = BI_RGB; + dib = CreateDIBSection(screen, &bi, DIB_RGB_COLORS, &bits, NULL, 0); + if (!dib) { + fprintf(stderr, "eyes(gdi): CreateDIBSection %dx%d failed\n", w, h); + Sleep(200); continue; + } + SelectObject(mem, dib); + W = w; H = h; + fprintf(stderr, "eyes(gdi): desktop %dx%d (BitBlt; cursor by presenter)\n", W, H); + /* capture size changed (primary at origin (0,0)) → resample geometry */ + geometry_sample_and_publish(ctx, 0, 0); + } + if (BitBlt(mem, 0, 0, W, H, screen, 0, 0, SRCCOPY)) + vgpu_present_submit(ctx, (const uint8_t*)bits, + (uint32_t)W, (uint32_t)H, (uint32_t)W * 4u); + + /* source the cursor for present's compositing (under ctx->lock) and publish it */ + EnterCriticalSection(&ctx->lock); + cursor_sample(ctx); + uint32_t hx = (uint32_t)ctx->cursor.hot_x, hy = (uint32_t)ctx->cursor.hot_y; + uint32_t gw = (uint32_t)ctx->cursor.gw, gh = (uint32_t)ctx->cursor.gh; + uint32_t cid = (uint32_t)ctx->cursor.cursor_id; + int32_t cx = (int32_t)ctx->cursor.x, cy = (int32_t)ctx->cursor.y; + uint32_t cvis = (uint32_t)(ctx->cursor.visible != 0); + LeaveCriticalSection(&ctx->lock); + vgpu_publish_cursor_shape(&ctx->view, hx, hy, gw, gh, cid); + vgpu_publish_cursor(&ctx->view, cx, cy, cvis); + + Sleep(interval); + } + return 0; /* unreachable; satisfies -Wreturn-type */ +} + +int gdi_start(vgpu_ctx* ctx, int fps) { + ctx->backend = VGPU_BK_GDI; + ctx->draw_cursor_cap = 1; /* GDI BitBlt excludes cursor → presenter draws it */ + + capture_thread_arg* arg = (capture_thread_arg*)malloc(sizeof *arg); + if (!arg) return 0; + arg->ctx = ctx; arg->fps = fps; arg->backend_state = NULL; + HANDLE t = CreateThread(NULL, 0, gdi_thread, arg, 0, NULL); + if (!t) { free(arg); return 0; } + CloseHandle(t); + return 1; +} diff --git a/src/si/vgpu-stream/win32/capture_gdi.h b/src/si/vgpu-stream/win32/capture_gdi.h new file mode 100644 index 0000000..dae936a --- /dev/null +++ b/src/si/vgpu-stream/win32/capture_gdi.h @@ -0,0 +1,10 @@ +#ifndef VGPU_CAPTURE_GDI_H +#define VGPU_CAPTURE_GDI_H + +/* capture_gdi.h — GDI BitBlt capture backend (win32, universal fallback). */ + +#include "ctx.h" /* win32 vgpu_ctx */ + +int gdi_start(vgpu_ctx* ctx, int fps); + +#endif /* VGPU_CAPTURE_GDI_H */ diff --git a/src/si/vgpu-stream/win32/capture_nvfbc.c b/src/si/vgpu-stream/win32/capture_nvfbc.c new file mode 100644 index 0000000..0086c22 --- /dev/null +++ b/src/si/vgpu-stream/win32/capture_nvfbc.c @@ -0,0 +1,162 @@ +#define WIN32_LEAN_AND_MEAN +#include +#include +#include +#include +#include "capture_nvfbc.h" +#include "capture-win32.h" /* capture_thread_arg (win32-private) */ +#include "present.h" +#include "cursor.h" /* cursor_apply_shape / ctx->cursor */ +#include "geometry.h" /* reactive geometry resample on recreate */ +#include "stream.h" /* vgpu_publish_cursor / vgpu_publish_cursor_shape */ +#include "nvfbc_tosys_c.h" + +typedef struct { + NvFBCToSys_c* fbc; + void* buf; + NvFBC_CreateFunctionExType create; + HCURSOR last_handle; /* shape-gate by HCURSOR change */ +} nvfbc_state; + +/* Source the cursor for an NvFBC grab and publish it under the cursor_seq gate. NvFBC reports + * only HW-cursor visibility (gi.bHWMouseVisible); position is not exposed, so one GetCursorInfo + * per frame supplies x/y (the minimum possible). Shape is re-extracted only on HCURSOR change. + * NvFBC composites the cursor itself (draw_cursor_cap==0) → present never reads ctx->cursor for + * drawing, so no ctx->lock is required around the compose fields here. + * gi.bProtectedContent / gi.dwSourcePID are available but out of scope (not in the contract). */ +static void nvfbc_source_cursor(vgpu_ctx* ctx, nvfbc_state* st, + const NvFBCFrameGrabInfo* gi) { + CURSORINFO ci; ci.cbSize = sizeof ci; + int vis = gi->bHWMouseVisible ? 1 : 0; + int x = ctx->cursor.x, y = ctx->cursor.y; + if (GetCursorInfo(&ci)) { + x = ci.ptScreenPos.x; y = ci.ptScreenPos.y; + if (ci.hCursor && ci.hCursor != st->last_handle) { + cursor_apply_shape(ctx, ci.hCursor); + st->last_handle = ci.hCursor; + } + } + ctx->cursor.visible = vis; ctx->cursor.x = x; ctx->cursor.y = y; + + vgpu_publish_cursor_shape(&ctx->view, + (uint32_t)ctx->cursor.hot_x, (uint32_t)ctx->cursor.hot_y, + (uint32_t)ctx->cursor.gw, (uint32_t)ctx->cursor.gh, + (uint32_t)ctx->cursor.cursor_id); + vgpu_publish_cursor(&ctx->view, (int32_t)x, (int32_t)y, (uint32_t)vis); +} + +static NvFBCToSys_c* nvfbc_create(NvFBC_CreateFunctionExType pCreate, void** ppBuf) { + NvFBCCreateParams cp; memset(&cp, 0, sizeof cp); + cp.dwVersion = NVFBC_CREATE_PARAMS_VER; + cp.dwInterfaceType = NVFBC_TO_SYS_C; + cp.dwAdapterIdx = 0; + if (pCreate(&cp) != NVFBC_SUCCESS || !cp.pNvFBC) return NULL; + + NvFBCToSys_c* fbc = (NvFBCToSys_c*)cp.pNvFBC; + *ppBuf = NULL; + + NVFBC_TOSYS_SETUP_PARAMS_C sp; memset(&sp, 0, sizeof sp); + sp.dwVersion = NVFBC_TOSYS_SETUP_PARAMS_VER_C; + sp.bits = 1u; /* bWithHWCursor = 1 (bit 0) */ + sp.eMode = NVFBC_TOSYS_ARGB; + sp.ppBuffer = ppBuf; + if (fbc->lpVtbl->NvFBCToSysSetUp(fbc, &sp) != NVFBC_SUCCESS || !*ppBuf) { + fbc->lpVtbl->NvFBCToSysRelease(fbc); + return NULL; + } + return fbc; +} + +static DWORD WINAPI nvfbc_thread(LPVOID param) { + capture_thread_arg* arg = (capture_thread_arg*)param; + vgpu_ctx* ctx = arg->ctx; + nvfbc_state* st = (nvfbc_state*)arg->backend_state; + free(arg); + + NvFBCToSys_c* fbc = st->fbc; + void* buf = st->buf; + + for (;;) { + NvFBCFrameGrabInfo gi; memset(&gi, 0, sizeof gi); + NVFBC_TOSYS_GRAB_FRAME_PARAMS_C gp; memset(&gp, 0, sizeof gp); + gp.dwVersion = NVFBC_TOSYS_GRAB_FRAME_PARAMS_VER_C; + gp.dwFlags = NVFBC_TOSYS_WAIT_WITH_TIMEOUT_C; + gp.dwWaitTime = 1000; + gp.eGMode = NVFBC_TOSYS_SOURCEMODE_FULL; + gp.pNvFBCFrameGrabInfo = &gi; + + NVFBCRESULT r = fbc->lpVtbl->NvFBCToSysGrabFrame(fbc, &gp); + if (r != NVFBC_SUCCESS) { + if (r == NVFBC_ERROR_INVALIDATED_SESSION || gi.bMustRecreate) { + fprintf(stderr, "eyes(nvfbc): session invalidated (r=%d), recreating\n", (int)r); + fbc->lpVtbl->NvFBCToSysRelease(fbc); + fbc = NULL; + while (!(fbc = nvfbc_create(st->create, &buf))) Sleep(200); + st->fbc = fbc; st->buf = buf; + /* grab session was recreated → display config may have changed: resample */ + geometry_sample_and_publish(ctx, 0, 0); + } else { + Sleep(50); + } + continue; + } + if (gi.dwWidth && gi.dwHeight) + vgpu_present_submit(ctx, (const uint8_t*)buf, + gi.dwWidth, gi.dwHeight, gi.dwBufferWidth * 4u); + nvfbc_source_cursor(ctx, st, &gi); + } + return 0; /* unreachable; satisfies -Wreturn-type */ +} + +int nvfbc_start(vgpu_ctx* ctx, int fps) { + (void)fps; + HMODULE lib = LoadLibraryA("NvFBC64.dll"); + if (!lib) { + fprintf(stderr, "eyes(nvfbc): LoadLibrary NvFBC64.dll failed (%lu)\n", GetLastError()); + return 0; + } + NvFBC_SetGlobalFlagsType pSetFlags = (NvFBC_SetGlobalFlagsType)(void*)GetProcAddress(lib, "NvFBC_SetGlobalFlags"); + NvFBC_EnableFunctionType pEnable = (NvFBC_EnableFunctionType)(void*)GetProcAddress(lib, "NvFBC_Enable"); + NvFBC_CreateFunctionExType pCreate = (NvFBC_CreateFunctionExType)(void*)GetProcAddress(lib, "NvFBC_CreateEx"); + NvFBC_GetStatusExFunctionType pStatus = (NvFBC_GetStatusExFunctionType)(void*)GetProcAddress(lib, "NvFBC_GetStatusEx"); + if (!pEnable || !pCreate || !pStatus) { + fprintf(stderr, "eyes(nvfbc): missing exports\n"); + return 0; + } + if (pSetFlags) pSetFlags(NVFBC_GLOBAL_FLAGS_NO_INITIAL_REFRESH); + if (pEnable(NVFBC_STATE_ENABLE) != NVFBC_SUCCESS) { + fprintf(stderr, "eyes(nvfbc): NvFBC_Enable failed\n"); + return 0; + } + NvFBCStatusEx stx; memset(&stx, 0, sizeof stx); + stx.dwVersion = NVFBC_STATUS_VER; stx.dwAdapterIdx = 0; + if (pStatus(&stx) != NVFBC_SUCCESS || !stx.bIsCapturePossible) { + fprintf(stderr, "eyes(nvfbc): capture NOT possible on this GPU/license\n"); + return 0; + } + void* buf = NULL; + NvFBCToSys_c* fbc = nvfbc_create(pCreate, &buf); + if (!fbc) { + fprintf(stderr, "eyes(nvfbc): CreateEx/ToSysSetUp failed\n"); + return 0; + } + + nvfbc_state* st = (nvfbc_state*)malloc(sizeof *st); + if (!st) { fbc->lpVtbl->NvFBCToSysRelease(fbc); return 0; } + st->fbc = fbc; st->buf = buf; st->create = pCreate; st->last_handle = NULL; + + capture_thread_arg* arg = (capture_thread_arg*)malloc(sizeof *arg); + if (!arg) { fbc->lpVtbl->NvFBCToSysRelease(fbc); free(st); return 0; } + arg->ctx = ctx; arg->fps = fps; arg->backend_state = st; + + ctx->backend = VGPU_BK_NVFBC; + ctx->draw_cursor_cap = 0; /* NvFBC composites HW cursor itself */ + + HANDLE t = CreateThread(NULL, 0, nvfbc_thread, arg, 0, NULL); + if (!t) { fbc->lpVtbl->NvFBCToSysRelease(fbc); free(st); free(arg); return 0; } + CloseHandle(t); + + fprintf(stderr, "eyes(nvfbc): session up (ToSys ARGB/BGRA), iface=0x%lx\n", + (unsigned long)stx.dwNvFBCVersion); + return 1; +} diff --git a/src/si/vgpu-stream/win32/capture_nvfbc.h b/src/si/vgpu-stream/win32/capture_nvfbc.h new file mode 100644 index 0000000..d1a5dcf --- /dev/null +++ b/src/si/vgpu-stream/win32/capture_nvfbc.h @@ -0,0 +1,10 @@ +#ifndef VGPU_CAPTURE_NVFBC_H +#define VGPU_CAPTURE_NVFBC_H + +/* capture_nvfbc.h — NVIDIA NvFBC ToSys capture backend (win32). */ + +#include "ctx.h" /* win32 vgpu_ctx */ + +int nvfbc_start(vgpu_ctx* ctx, int fps); + +#endif /* VGPU_CAPTURE_NVFBC_H */ diff --git a/src/si/vgpu-stream/win32/ctx.h b/src/si/vgpu-stream/win32/ctx.h new file mode 100644 index 0000000..977604c --- /dev/null +++ b/src/si/vgpu-stream/win32/ctx.h @@ -0,0 +1,66 @@ +#ifndef VGPU_CTX_H +#define VGPU_CTX_H + +/* ctx.h — win32 runtime context. Embeds the neutral region-view (the engine's + * borrowed handle onto the contract) alongside win32-owned staging/cursor/sync + * state. Object = memory: ctx owns the staging arena and cursor state. */ + +#include +#define WIN32_LEAN_AND_MEAN +#include +#include "stream.h" /* vgpu_region_view (neutral contract handle) */ +#include "region.h" /* vgpu_region_t (win32 pinned region) */ + +/* + * vgpu_ctx — the explicitly-passed context. Replaces all former g_* shared + * state. Object = memory: ctx owns the producer staging arena and cursor + * state; capture threads receive a vgpu_ctx* via their LPVOID thread param. + * + * Staging is a fixed arena sized for the max mode (no STL, no per-frame + * malloc). content_buf holds the latest submitted desktop; frame_buf is the + * composed (cursor-drawn) frame the publisher copies into a ring slot. + */ + +#define VGPU_STAGING_BYTES ((size_t)VGPU_MAX_WIDTH * VGPU_MAX_HEIGHT * 4u) + +/* Cursor sample/compose state (GDI). Fixed buffers, no heap. */ +typedef struct { + HCURSOR handle; + int visible; + int x, y; + int hot_x, hot_y; + int gw, gh; /* glyph dims */ + int cursor_id; /* VGPU_CURSOR_ID_* resolved on shape change */ + int mono; /* 1 = AND/XOR monochrome cursor */ + uint8_t* bgra; /* color cursor BGRA (arena) */ + uint8_t* and_mask; /* mono AND (arena) */ + uint8_t* xor_mask; /* mono XOR (arena) */ +} vgpu_cursor_t; + +typedef struct vgpu_ctx { + /* neutral contract handle (borrowed from region) — engine publishes through + * this; win32 code reads region blocks via view.producer / view.control */ + vgpu_region_view view; + + /* producer staging arena (owned) */ + uint8_t* arena; /* one VirtualAlloc block for all buffers */ + size_t arena_bytes; + uint8_t* content_buf; /* latest submitted desktop, tight BGRA */ + uint8_t* frame_buf; /* composed frame to publish, tight BGRA */ + + /* submit handoff (capture thread -> publish pump) */ + CRITICAL_SECTION lock; + HANDLE submit_event; + int64_t content_seq; /* bumped on every submit */ + uint32_t content_w, content_h; + + /* cursor */ + vgpu_cursor_t cursor; + + /* runtime config (resolved from control) */ + uint32_t default_fps; /* fps from CLI; used when target_fps==0 */ + uint32_t backend; /* VGPU_BK_* chosen */ + int draw_cursor_cap; /* backend capability: does it need SW cursor */ +} vgpu_ctx; + +#endif /* VGPU_CTX_H */ diff --git a/src/si/vgpu-stream/win32/cursor.c b/src/si/vgpu-stream/win32/cursor.c new file mode 100644 index 0000000..5941cb1 --- /dev/null +++ b/src/si/vgpu-stream/win32/cursor.c @@ -0,0 +1,175 @@ +#define WIN32_LEAN_AND_MEAN +#include +#include +#include "cursor.h" +#include "vgpu_stream.h" /* VGPU_CURSOR_ID_* */ + +/* Max supported cursor glyph; buffers are pre-arena'd in ctx (no heap here). */ +#define VGPU_CURSOR_MAX 256 + +static void read_mono(HBITMAP hbm, int w, int h, uint8_t* out /* w*h */) { + int stride = ((w + 31) / 32) * 4; + /* bounded scratch on stack: max (256/32*4)=32 bytes/row * 512 rows */ + static const int kMaxRows = VGPU_CURSOR_MAX * 2; + uint8_t raw[(VGPU_CURSOR_MAX / 32 * 4) * (VGPU_CURSOR_MAX * 2)]; + if (h > kMaxRows) h = kMaxRows; + if ((size_t)stride * h > sizeof raw) return; + + struct { BITMAPINFOHEADER hdr; RGBQUAD pal[2]; } bi; + memset(&bi, 0, sizeof bi); + bi.hdr.biSize = sizeof(BITMAPINFOHEADER); + bi.hdr.biWidth = w; bi.hdr.biHeight = -h; + bi.hdr.biPlanes = 1; bi.hdr.biBitCount = 1; bi.hdr.biCompression = BI_RGB; + HDC dc = GetDC(NULL); + GetDIBits(dc, hbm, 0, h, raw, (BITMAPINFO*)&bi, DIB_RGB_COLORS); + ReleaseDC(NULL, dc); + + memset(out, 0, (size_t)w * h); + for (int y = 0; y < h; y++) + for (int x = 0; x < w; x++) { + int bit = 7 - (x & 7); + out[(size_t)y * w + x] = (raw[(size_t)y * stride + (x >> 3)] >> bit) & 1u; + } +} + +static void extract(vgpu_ctx* ctx, HCURSOR hc) { + vgpu_cursor_t* cur = &ctx->cursor; + cur->gw = cur->gh = 0; + cur->mono = 0; + + ICONINFO ii; + if (!GetIconInfo(hc, &ii)) return; + cur->hot_x = (int)ii.xHotspot; + cur->hot_y = (int)ii.yHotspot; + + if (ii.hbmColor) { + BITMAP bm; GetObject(ii.hbmColor, sizeof bm, &bm); + int w = bm.bmWidth, h = bm.bmHeight; + if (w > VGPU_CURSOR_MAX) w = VGPU_CURSOR_MAX; + if (h > VGPU_CURSOR_MAX) h = VGPU_CURSOR_MAX; + BITMAPINFO bi; memset(&bi, 0, sizeof bi); + bi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); + bi.bmiHeader.biWidth = w; bi.bmiHeader.biHeight = -h; + bi.bmiHeader.biPlanes = 1; bi.bmiHeader.biBitCount = 32; + bi.bmiHeader.biCompression = BI_RGB; + memset(cur->bgra, 0, (size_t)w * h * 4); + HDC dc = GetDC(NULL); + GetDIBits(dc, ii.hbmColor, 0, h, cur->bgra, &bi, DIB_RGB_COLORS); + ReleaseDC(NULL, dc); + cur->gw = w; cur->gh = h; cur->mono = 0; + + int has_alpha = 0; + for (size_t i = 0; i < (size_t)w * h; i++) + if (cur->bgra[i * 4 + 3]) { has_alpha = 1; break; } + if (!has_alpha && ii.hbmMask) { + read_mono(ii.hbmMask, w, h, cur->and_mask); + for (size_t i = 0; i < (size_t)w * h; i++) + cur->bgra[i * 4 + 3] = cur->and_mask[i] ? 0 : 255; + } + } else if (ii.hbmMask) { + BITMAP bm; GetObject(ii.hbmMask, sizeof bm, &bm); + int w = bm.bmWidth, h = bm.bmHeight / 2; + if (w > VGPU_CURSOR_MAX) w = VGPU_CURSOR_MAX; + if (h > VGPU_CURSOR_MAX) h = VGPU_CURSOR_MAX; + /* read both halves into a scratch laid over xor_mask region: reuse + * and_mask for AND and xor_mask for XOR; read full into a stack pass */ + static uint8_t both[VGPU_CURSOR_MAX * VGPU_CURSOR_MAX * 2]; + read_mono(ii.hbmMask, w, bm.bmHeight, both); + for (int y = 0; y < h; y++) + for (int x = 0; x < w; x++) { + cur->and_mask[(size_t)y * w + x] = both[(size_t)y * w + x]; + cur->xor_mask[(size_t)y * w + x] = both[(size_t)(y + h) * w + x]; + } + cur->gw = w; cur->gh = h; cur->mono = 1; + } + if (ii.hbmColor) DeleteObject(ii.hbmColor); + if (ii.hbmMask) DeleteObject(ii.hbmMask); +} + +int cursor_resolve_id(HCURSOR hc) { + /* System-cursor table loaded once (IDC_* are stable per session). Lazy: built on first + * call, then a linear handle compare. UNKNOWN for custom/unrecognized cursors. */ + static const struct { LPCTSTR idc; int id; } kSpec[] = { + { IDC_ARROW, VGPU_CURSOR_ID_ARROW }, + { IDC_IBEAM, VGPU_CURSOR_ID_IBEAM }, + { IDC_WAIT, VGPU_CURSOR_ID_WAIT }, + { IDC_CROSS, VGPU_CURSOR_ID_CROSS }, + { IDC_HAND, VGPU_CURSOR_ID_HAND }, + { IDC_SIZENS, VGPU_CURSOR_ID_SIZENS }, + { IDC_SIZEWE, VGPU_CURSOR_ID_SIZEWE }, + { IDC_SIZENWSE, VGPU_CURSOR_ID_SIZENWSE }, + { IDC_SIZENESW, VGPU_CURSOR_ID_SIZENESW }, + { IDC_SIZEALL, VGPU_CURSOR_ID_SIZEALL }, + { IDC_NO, VGPU_CURSOR_ID_NO }, + { IDC_APPSTARTING, VGPU_CURSOR_ID_APPSTARTING }, + }; + enum { N = (int)(sizeof kSpec / sizeof kSpec[0]) }; + static HCURSOR cache[N]; + static int loaded = 0; + if (!loaded) { + for (int i = 0; i < N; i++) cache[i] = LoadCursor(NULL, kSpec[i].idc); + loaded = 1; + } + if (!hc) return VGPU_CURSOR_ID_UNKNOWN; + for (int i = 0; i < N; i++) + if (cache[i] == hc) return kSpec[i].id; + return VGPU_CURSOR_ID_UNKNOWN; +} + +void cursor_apply_shape(vgpu_ctx* ctx, HCURSOR hc) { + extract(ctx, hc); + ctx->cursor.cursor_id = cursor_resolve_id(hc); + ctx->cursor.handle = hc; +} + +int cursor_sample(vgpu_ctx* ctx) { + vgpu_cursor_t* cur = &ctx->cursor; + CURSORINFO ci; ci.cbSize = sizeof ci; + if (!GetCursorInfo(&ci)) { + int changed = cur->visible; + cur->visible = 0; + return changed; + } + int vis = (ci.flags & CURSOR_SHOWING) != 0; + int x = ci.ptScreenPos.x, y = ci.ptScreenPos.y; + int changed = (vis != cur->visible) || (x != cur->x) || (y != cur->y) + || (ci.hCursor != cur->handle); + if (vis && ci.hCursor && ci.hCursor != cur->handle) { + extract(ctx, ci.hCursor); + cur->cursor_id = cursor_resolve_id(ci.hCursor); + cur->handle = ci.hCursor; + } + cur->visible = vis; cur->x = x; cur->y = y; + return changed; +} + +void cursor_draw(vgpu_ctx* ctx, uint8_t* dst, uint32_t W, uint32_t H) { + vgpu_cursor_t* cur = &ctx->cursor; + if (!cur->visible || cur->gw == 0) return; + int ox = cur->x - cur->hot_x, oy = cur->y - cur->hot_y; + for (int gy = 0; gy < cur->gh; gy++) { + int dy = oy + gy; + if (dy < 0 || dy >= (int)H) continue; + for (int gx = 0; gx < cur->gw; gx++) { + int dx = ox + gx; + if (dx < 0 || dx >= (int)W) continue; + uint8_t* d = dst + ((size_t)dy * W + dx) * 4; + if (!cur->mono) { + const uint8_t* s = &cur->bgra[((size_t)gy * cur->gw + gx) * 4]; + uint32_t a = s[3]; + if (!a) continue; + d[0] = (uint8_t)((s[0] * a + d[0] * (255 - a)) / 255); + d[1] = (uint8_t)((s[1] * a + d[1] * (255 - a)) / 255); + d[2] = (uint8_t)((s[2] * a + d[2] * (255 - a)) / 255); + } else { + int a = cur->and_mask[(size_t)gy * cur->gw + gx]; + int xr = cur->xor_mask[(size_t)gy * cur->gw + gx]; + if (a == 0 && xr == 0) { d[0] = d[1] = d[2] = 0; } + else if (a == 0 && xr == 1) { d[0] = d[1] = d[2] = 255; } + else if (a == 1 && xr == 1) { d[0] = (uint8_t)(255 - d[0]); + d[1] = (uint8_t)(255 - d[1]); + d[2] = (uint8_t)(255 - d[2]); } + } + } + } +} diff --git a/src/si/vgpu-stream/win32/cursor.h b/src/si/vgpu-stream/win32/cursor.h new file mode 100644 index 0000000..81d4842 --- /dev/null +++ b/src/si/vgpu-stream/win32/cursor.h @@ -0,0 +1,26 @@ +#ifndef VGPU_CURSOR_H +#define VGPU_CURSOR_H + +/* cursor.h — win32 GDI cursor sample/compose onto a tight BGRA frame. */ + +#include +#include "ctx.h" /* win32 vgpu_ctx (cursor state) */ + +/* Sample the current cursor (position/shape) into ctx->cursor. + * Returns 1 if anything changed since last sample, else 0. */ +int cursor_sample(vgpu_ctx* ctx); + +/* Resolve a HCURSOR to a VGPU_CURSOR_ID_* by comparing against the system cursor table + * (LoadCursor(NULL, IDC_*) loaded once on first use). Returns VGPU_CURSOR_ID_UNKNOWN for + * custom cursors. Not hot-path: called only under the shape-change gate. */ +int cursor_resolve_id(HCURSOR hc); + +/* Extract glyph/hotspot/dims for hc into ctx->cursor, resolve its cursor_id, and record it as + * the current handle. For backends that source position elsewhere (DDA from frame info) and + * only need the shape on a shape-change gate. Caller serializes ctx->cursor writes. */ +void cursor_apply_shape(vgpu_ctx* ctx, HCURSOR hc); + +/* Alpha/AND-XOR compose the sampled cursor onto a tight BGRA frame. */ +void cursor_draw(vgpu_ctx* ctx, uint8_t* bgra, uint32_t width, uint32_t height); + +#endif /* VGPU_CURSOR_H */ diff --git a/src/si/vgpu-stream/win32/geometry.c b/src/si/vgpu-stream/win32/geometry.c new file mode 100644 index 0000000..39bfa2d --- /dev/null +++ b/src/si/vgpu-stream/win32/geometry.c @@ -0,0 +1,52 @@ +#define WIN32_LEAN_AND_MEAN +#include +#include "geometry.h" +#include "stream.h" /* vgpu_publish_geometry */ + +/* GetDpiForMonitor lives in Shcore.dll (per-monitor DPI awareness API). Loaded dynamically so + * the binary does not hard-depend on it; absence degrades dpi to "unknown" (0). */ +typedef HRESULT (WINAPI *GetDpiForMonitor_t)(HMONITOR, int /*MDT_*/, UINT*, UINT*); +#define VGPU_MDT_EFFECTIVE_DPI 0 + +static UINT monitor_dpi(HMONITOR mon) { + static GetDpiForMonitor_t fn = NULL; + static int tried = 0; + if (!tried) { + HMODULE lib = LoadLibraryA("Shcore.dll"); + if (lib) fn = (GetDpiForMonitor_t)(void*)GetProcAddress(lib, "GetDpiForMonitor"); + tried = 1; + } + if (!fn || !mon) return 0u; + UINT dx = 0, dy = 0; + if (fn(mon, VGPU_MDT_EFFECTIVE_DPI, &dx, &dy) != S_OK || dx == 0u) + return 0u; + return dx; +} + +static uint32_t monitor_refresh_mhz(HMONITOR mon) { + MONITORINFOEXW mi; mi.cbSize = sizeof mi; + if (!mon || !GetMonitorInfoW(mon, (MONITORINFO*)&mi)) + return 0u; + DEVMODEW dm; ZeroMemory(&dm, sizeof dm); dm.dmSize = sizeof dm; + if (!EnumDisplaySettingsW(mi.szDevice, ENUM_CURRENT_SETTINGS, &dm)) + return 0u; + if (dm.dmDisplayFrequency <= 1u) /* 0/1 = hardware default, not a real rate */ + return 0u; + return (uint32_t)dm.dmDisplayFrequency * 1000u; /* whole Hz -> milli-Hz */ +} + +void geometry_sample_and_publish(vgpu_ctx* ctx, int32_t cap_x, int32_t cap_y) { + int32_t virt_x = (int32_t)GetSystemMetrics(SM_XVIRTUALSCREEN); + int32_t virt_y = (int32_t)GetSystemMetrics(SM_YVIRTUALSCREEN); + uint32_t virt_w = (uint32_t)GetSystemMetrics(SM_CXVIRTUALSCREEN); + uint32_t virt_h = (uint32_t)GetSystemMetrics(SM_CYVIRTUALSCREEN); + + POINT origin = { cap_x, cap_y }; + HMONITOR mon = MonitorFromPoint(origin, MONITOR_DEFAULTTOPRIMARY); + + uint32_t dpi = monitor_dpi(mon); + uint32_t refresh = monitor_refresh_mhz(mon); + + vgpu_publish_geometry(&ctx->view, virt_x, virt_y, virt_w, virt_h, + cap_x, cap_y, dpi, refresh); +} diff --git a/src/si/vgpu-stream/win32/geometry.h b/src/si/vgpu-stream/win32/geometry.h new file mode 100644 index 0000000..02d339f --- /dev/null +++ b/src/si/vgpu-stream/win32/geometry.h @@ -0,0 +1,18 @@ +#ifndef VGPU_GEOMETRY_H +#define VGPU_GEOMETRY_H + +/* geometry.h — win32 display-geometry sampler. Samples the virtual-desktop bbox plus the + * captured output's origin / DPI / refresh and publishes them under the geom_seq seqlock. + * Not per-frame: called once at session start and reactively on backend recreate / capture- + * size change (the captured surface SIZE itself travels in desc.width/height, not here). */ + +#include +#include "ctx.h" /* win32 vgpu_ctx (region-view) */ + +/* Sample display geometry for the captured output whose top-left origin is (cap_x,cap_y) in + * virtual-desktop coordinates, and publish it. cap_x/cap_y is (0,0) for primary/full-screen + * backends and the duplicated output's DesktopCoordinates for DDA. The captured size is taken + * from desc.width/height and is not sampled here. */ +void geometry_sample_and_publish(vgpu_ctx* ctx, int32_t cap_x, int32_t cap_y); + +#endif /* VGPU_GEOMETRY_H */ diff --git a/src/si/vgpu-stream/win32/main.c b/src/si/vgpu-stream/win32/main.c new file mode 100644 index 0000000..09477fe --- /dev/null +++ b/src/si/vgpu-stream/win32/main.c @@ -0,0 +1,55 @@ +#define WIN32_LEAN_AND_MEAN +#include +#include +#include +#include + +#include "region.h" /* win32 pinned region */ +#include "ctx.h" /* win32 vgpu_ctx (embeds region-view) */ +#include "present.h" /* present/pump lifecycle */ +#include "stream.h" /* OS-agnostic status/error/backend setters */ +#include "capture.h" /* backend table */ + +int main(int argc, char** argv) { + int fps = argc > 1 ? atoi(argv[1]) : 30; + if (fps <= 0) fps = 30; + + vgpu_region_t region; + if (vgpu_region_create(®ion) != 0) { + fprintf(stderr, "main: region_create failed\n"); + return 1; + } + + vgpu_ctx ctx; + if (vgpu_present_init(&ctx, ®ion, (uint32_t)fps) != 0) { + fprintf(stderr, "main: present_init failed\n"); + vgpu_region_destroy(®ion); + return 1; + } + + const char* eyes = getenv("EYES"); + int n = 0; + const capture_backend* bks = capture_backends(&n); + int started = 0; + for (int i = 0; i < n && !started; i++) { + if (eyes && _stricmp(eyes, bks[i].name) != 0) continue; + fprintf(stderr, "eyes: trying %s\n", bks[i].name); + started = bks[i].start(&ctx, fps); + if (!started) fprintf(stderr, "eyes: %s unavailable\n", bks[i].name); + } + if (!started) { + fprintf(stderr, "eyes: no capture backend available\n"); + vgpu_set_status(&ctx.view, VGPU_ST_ERROR); + vgpu_set_error(&ctx.view, 2u); + vgpu_present_deinit(&ctx); + vgpu_region_destroy(®ion); + return 1; + } + + vgpu_set_backend(&ctx.view, ctx.backend); + vgpu_present_run(&ctx); /* never returns */ + + vgpu_present_deinit(&ctx); + vgpu_region_destroy(®ion); + return 0; +} diff --git a/src/si/vgpu-stream/win32/nvfbc_tosys_c.h b/src/si/vgpu-stream/win32/nvfbc_tosys_c.h new file mode 100644 index 0000000..3446f4b --- /dev/null +++ b/src/si/vgpu-stream/win32/nvfbc_tosys_c.h @@ -0,0 +1,93 @@ +#ifndef VGPU_NVFBC_TOSYS_C_H +#define VGPU_NVFBC_TOSYS_C_H + +/* + * C mirror of NvFBC's ToSys interface. The vendor header + * third_party/NvFBC/nvFBCToSys.h declares INvFBCToSys_v3 as a C++ abstract + * class (vtable of 5 pure-virtual + * __stdcall methods). We do NOT edit the vendor header; instead we replicate its + * single-inheritance vtable ABI as a COM-in-C interface so the producer stays + * pure C. Slot order MUST match declaration order in nvFBCToSys.h: + * 0 NvFBCToSysSetUp + * 1 NvFBCToSysGrabFrame + * 2 NvFBCToSysCursorCapture + * 3 NvFBCToSysGPUBasedCPUSleep + * 4 NvFBCToSysRelease + * On x64 (mingw/MSVC) `this` is the implicit first integer argument; __stdcall + * is a no-op for x64 so a plain pointer arg matches the vtable slot. + */ + +#include "NvFBC/nvFBC.h" /* vendor (third_party/): NVFBCRESULT, NvU32, param structs */ + +/* SetUp / GrabFrame param structs come from nvFBCToSys.h, but that header is C++. + * Redeclare the two we use here (layout-identical, C-clean). */ + +typedef enum { + NVFBC_TOSYS_ARGB = 0, + NVFBC_TOSYS_RGB, + NVFBC_TOSYS_YYYYUV420p, + NVFBC_TOSYS_RGB_PLANAR, + NVFBC_TOSYS_XOR, + NVFBC_TOSYS_YUV444p, + NVFBC_TOSYS_BUF_FMT_LAST +} NVFBCToSysBufferFormat_c; + +typedef enum { + NVFBC_TOSYS_SOURCEMODE_FULL = 0, + NVFBC_TOSYS_SOURCEMODE_SCALE, + NVFBC_TOSYS_SOURCEMODE_CROP, + NVFBC_TOSYS_SOURCEMODE_LAST +} NVFBCToSysGrabMode_c; + +enum { + NVFBC_TOSYS_NOFLAGS_C = 0x0, + NVFBC_TOSYS_NOWAIT_C = 0x1, + NVFBC_TOSYS_WAIT_WITH_TIMEOUT_C = 0x10 +}; + +#define NVFBC_TO_SYS_C (0x1204) + +typedef struct { + NvU32 dwVersion; + NvU32 bits; /* bWithHWCursor:1, bDiffMap:1, bSep:1, rsvd:29 */ + NVFBCToSysBufferFormat_c eMode; + NvU32 dwReserved1; + void **ppBuffer; + void **ppDiffMap; + void *hCursorCaptureEvent; + NvU32 dwReserved[58]; + void *pReserved[29]; +} NVFBC_TOSYS_SETUP_PARAMS_C; +#define NVFBC_TOSYS_SETUP_PARAMS_VER_C \ + NVFBC_STRUCT_VERSION(NVFBC_TOSYS_SETUP_PARAMS_C, 2) + +typedef struct { + NvU32 dwVersion; + NvU32 dwFlags; + NvU32 dwTargetWidth; + NvU32 dwTargetHeight; + NvU32 dwStartX; + NvU32 dwStartY; + NVFBCToSysGrabMode_c eGMode; + NvU32 dwWaitTime; + NvFBCFrameGrabInfo *pNvFBCFrameGrabInfo; + NvU32 dwReserved[56]; + void *pReserved[31]; +} NVFBC_TOSYS_GRAB_FRAME_PARAMS_C; +#define NVFBC_TOSYS_GRAB_FRAME_PARAMS_VER_C \ + NVFBC_STRUCT_VERSION(NVFBC_TOSYS_GRAB_FRAME_PARAMS_C, 1) + +/* COM-in-C interface mirror */ +typedef struct NvFBCToSys_c NvFBCToSys_c; +typedef struct { + NVFBCRESULT (__stdcall *NvFBCToSysSetUp)(NvFBCToSys_c*, NVFBC_TOSYS_SETUP_PARAMS_C*); + NVFBCRESULT (__stdcall *NvFBCToSysGrabFrame)(NvFBCToSys_c*, NVFBC_TOSYS_GRAB_FRAME_PARAMS_C*); + NVFBCRESULT (__stdcall *NvFBCToSysCursorCapture)(NvFBCToSys_c*, void*); + NVFBCRESULT (__stdcall *NvFBCToSysGPUBasedCPUSleep)(NvFBCToSys_c*, __int64); + NVFBCRESULT (__stdcall *NvFBCToSysRelease)(NvFBCToSys_c*); +} NvFBCToSys_c_vtbl; +struct NvFBCToSys_c { + const NvFBCToSys_c_vtbl* lpVtbl; +}; + +#endif /* VGPU_NVFBC_TOSYS_C_H */ diff --git a/src/si/vgpu-stream/win32/present.c b/src/si/vgpu-stream/win32/present.c new file mode 100644 index 0000000..c9513ec --- /dev/null +++ b/src/si/vgpu-stream/win32/present.c @@ -0,0 +1,212 @@ +#define WIN32_LEAN_AND_MEAN +#include +#include +#include +#include "present.h" +#include "stream.h" /* OS-agnostic publish / control API + region-view */ +#include "cursor.h" +#include "geometry.h" /* one-shot display-geometry sample at session start */ + +/* cursor arena sizing */ +#define VGPU_CUR_MAX 256u +#define VGPU_CUR_BGRA (VGPU_CUR_MAX * VGPU_CUR_MAX * 4u) +#define VGPU_CUR_MASK (VGPU_CUR_MAX * VGPU_CUR_MAX) + +static uint64_t now_ns(void) { + static LARGE_INTEGER freq = { .QuadPart = 0 }; + if (freq.QuadPart == 0) QueryPerformanceFrequency(&freq); + LARGE_INTEGER c; QueryPerformanceCounter(&c); + return (uint64_t)((double)c.QuadPart * 1e9 / (double)freq.QuadPart); +} + +int vgpu_present_init(vgpu_ctx* ctx, vgpu_region_t* region, uint32_t default_fps) { + memset(ctx, 0, sizeof *ctx); + ctx->view.producer = region->producer; + ctx->view.control = region->control; + ctx->view.ring = region->ring; + ctx->default_fps = default_fps ? default_fps : 30u; + ctx->backend = VGPU_BK_NONE; + ctx->draw_cursor_cap = 1; + + /* one arena: content + frame + cursor buffers */ + size_t bytes = VGPU_STAGING_BYTES /* content */ + + VGPU_STAGING_BYTES /* frame */ + + VGPU_CUR_BGRA /* cursor bgra */ + + VGPU_CUR_MASK /* and */ + + VGPU_CUR_MASK; /* xor */ + uint8_t* a = (uint8_t*)VirtualAlloc(NULL, bytes, MEM_RESERVE | MEM_COMMIT, + PAGE_READWRITE); + if (!a) { + fprintf(stderr, "present: arena VirtualAlloc %zu MiB failed (%lu)\n", + bytes / (1024 * 1024), GetLastError()); + return 1; + } + ctx->arena = a; + ctx->arena_bytes = bytes; + + size_t off = 0; + ctx->content_buf = a + off; off += VGPU_STAGING_BYTES; + ctx->frame_buf = a + off; off += VGPU_STAGING_BYTES; + ctx->cursor.bgra = a + off; off += VGPU_CUR_BGRA; + ctx->cursor.and_mask = a + off; off += VGPU_CUR_MASK; + ctx->cursor.xor_mask = a + off; off += VGPU_CUR_MASK; + + InitializeCriticalSection(&ctx->lock); + ctx->submit_event = CreateEvent(NULL, FALSE, FALSE, NULL); + ctx->content_seq = 0; + ctx->content_w = ctx->content_h = 0; + return 0; +} + +void vgpu_present_deinit(vgpu_ctx* ctx) { + if (ctx->submit_event) { CloseHandle(ctx->submit_event); ctx->submit_event = NULL; } + DeleteCriticalSection(&ctx->lock); + if (ctx->arena) { VirtualFree(ctx->arena, 0, MEM_RELEASE); ctx->arena = NULL; } +} + +void vgpu_present_submit(vgpu_ctx* ctx, const uint8_t* src, + uint32_t W, uint32_t H, uint32_t src_pitch) { + if (W > VGPU_MAX_WIDTH) W = VGPU_MAX_WIDTH; + if (H > VGPU_MAX_HEIGHT) H = VGPU_MAX_HEIGHT; + if (W == 0 || H == 0) return; + + EnterCriticalSection(&ctx->lock); + uint8_t* d = ctx->content_buf; + const uint32_t row = W * 4u; + for (uint32_t y = 0; y < H; y++) + memcpy(d + (size_t)y * row, src + (size_t)y * src_pitch, row); + ctx->content_w = W; + ctx->content_h = H; + ctx->content_seq++; + LeaveCriticalSection(&ctx->lock); + /* static-idle: stamp the moment the source delivered new content (the raw perception; + * the host derives "ms idle" from its own clock). Single 8-aligned MOV, off the lock. */ + vgpu_publish_content_change(&ctx->view, now_ns()); + SetEvent(ctx->submit_event); +} + +void vgpu_present_run(vgpu_ctx* ctx) { + const vgpu_region_view* rv = &ctx->view; /* neutral handle for the engine */ + const DWORD poll_ms = 8; + int64_t last_seq = -1; + uint32_t prev_state = VGPU_CMD_STOP; + uint32_t last_ff_ack = rv->producer->full_frame_ack; + DWORD last_beat = GetTickCount(); + uint64_t last_publish_ns = 0; /* 0 → first eligible frame publishes immediately */ + int last_cur_x = 0, last_cur_y = 0, last_cur_vis = 0; + HCURSOR last_cur_handle = NULL; + + /* one-shot display geometry: publish once before the loop (flat pull contract). The + * captured-output origin is (0,0) for the primary/full-screen capture path; backends + * resample reactively on recreate / capture-size change. No periodic poll in the loop. */ + geometry_sample_and_publish(ctx, 0, 0); + + for (;;) { + WaitForSingleObject(ctx->submit_event, poll_ms); + + /* --- heartbeat: always ticks, independent of desired_state --- */ + DWORD nowt = GetTickCount(); + if (nowt - last_beat >= VGPU_HEARTBEAT_PERIOD_MS) { + vgpu_tick_heartbeat(rv); + last_beat = nowt; + } + + /* --- reconcile control (gen-seqlock -> apply -> ack) --- */ + vgpu_control_view cv; + uint32_t desired = prev_state; + uint32_t draw_cursor = 1; + int force_full = 0; + uint32_t fps = ctx->default_fps; /* publish-rate cap (applied) */ + uint32_t ff_req = last_ff_ack; /* full_frame_req value to honor */ + if (vgpu_control_read(rv, &cv)) { + desired = cv.desired_state; + draw_cursor = cv.draw_cursor; + fps = cv.target_fps ? cv.target_fps : ctx->default_fps; + vgpu_set_applied_fps(rv, fps); + vgpu_publish_ctrl_ack(rv, cv.gen); + + ff_req = cv.full_frame_req; + if ((ff_req - last_ff_ack) != 0u) + force_full = 1; /* edge pending, wrap-tolerant */ + } + + /* --- lifecycle transitions --- */ + if (desired != prev_state) { + if (desired == VGPU_CMD_RUN && prev_state != VGPU_CMD_RUN) { + vgpu_bump_run_epoch(rv); + vgpu_set_status(rv, VGPU_ST_CAPTURING); + force_full = 1; /* fresh frame on start */ + } else if (desired == VGPU_CMD_PAUSE) { + vgpu_set_status(rv, VGPU_ST_PAUSED); + } else if (desired == VGPU_CMD_STOP) { + vgpu_set_status(rv, VGPU_ST_STOPPED); + } + prev_state = desired; + } else if (last_seq < 0 && desired == VGPU_CMD_RUN) { + vgpu_set_status(rv, VGPU_ST_CAPTURING); + } + + if (desired != VGPU_CMD_RUN) { + /* PAUSED/STOPPED: no new frames; heartbeat still ticks. We do NOT + * ack a pending full_frame here — acking without publishing would + * be a false "honored". A pending request is honored on the next + * transition to RUN (force_full=1 there → publish + ack). */ + continue; + } + + /* --- compose + publish on content change OR forced full frame, but + * rate-limited to the applied fps cap (the single publish point → + * contract-level cap, independent of the capture backend). A + * force_full bypasses the cap (due=1). present does NOT sample the + * cursor (capture threads source it); it only reads ctx->cursor under + * ctx->lock for compositing, and detects cursor motion via a delta so + * a pure cursor move over static desktop still recomposes. --- */ + uint64_t interval_ns = fps > 0 ? (1000000000ull / fps) : 0; + uint64_t now = now_ns(); + int due = force_full || interval_ns == 0 + || (now - last_publish_ns) >= interval_ns; + + int compose_cursor = (ctx->draw_cursor_cap && draw_cursor); + + EnterCriticalSection(&ctx->lock); + int64_t seq = ctx->content_seq; + uint32_t W = ctx->content_w, H = ctx->content_h; + int cur_changed = compose_cursor + && ((ctx->cursor.visible != last_cur_vis) + || (ctx->cursor.x != last_cur_x) + || (ctx->cursor.y != last_cur_y) + || (ctx->cursor.handle != last_cur_handle)); + int have = (W && H); + int content_new = have && (seq != last_seq || cur_changed || force_full); + /* take the frame ONLY when due — so we never drop the latest content; + * if not due, last_seq is left untouched and it publishes next due. */ + int dirty = content_new && due; + if (dirty) { + memcpy(ctx->frame_buf, ctx->content_buf, (size_t)W * H * 4u); + last_seq = seq; + if (compose_cursor) + cursor_draw(ctx, ctx->frame_buf, W, H); + last_cur_vis = ctx->cursor.visible; + last_cur_x = ctx->cursor.x; last_cur_y = ctx->cursor.y; + last_cur_handle = ctx->cursor.handle; + } + LeaveCriticalSection(&ctx->lock); + + if (!dirty) { + /* not due, or nothing to publish. A force_full with content has + * due=1 → dirty=1, so it never lands here while have is true; thus + * no spurious ack edge. */ + continue; + } + + if (vgpu_publish_frame(rv, ctx->frame_buf, W, H, now) == 0) { + last_publish_ns = now; + if (force_full) { + vgpu_publish_full_frame_ack(rv, ff_req); + last_ff_ack = ff_req; + } + } else { + vgpu_set_error(rv, 1u); /* frame too large for slot (mode > max) */ + } + } +} diff --git a/src/si/vgpu-stream/win32/present.h b/src/si/vgpu-stream/win32/present.h new file mode 100644 index 0000000..4ae9473 --- /dev/null +++ b/src/si/vgpu-stream/win32/present.h @@ -0,0 +1,24 @@ +#ifndef VGPU_PRESENT_H +#define VGPU_PRESENT_H + +/* present.h — win32 present/pump lifecycle: staging arena, submit handoff, and + * the publish loop driving the OS-agnostic engine over ctx's region-view. */ + +#include +#include "ctx.h" /* win32 vgpu_ctx + vgpu_region_t */ + +/* Initialize present/staging state inside ctx over an already-created region. + * Allocates the staging+cursor arena. Returns 0 on success. */ +int vgpu_present_init(vgpu_ctx* ctx, vgpu_region_t* region, uint32_t default_fps); +void vgpu_present_deinit(vgpu_ctx* ctx); + +/* Capture backends submit a freshly captured desktop frame (any source pitch). + * Repacked tight into ctx->content_buf, clamped to max mode. Thread-safe. */ +void vgpu_present_submit(vgpu_ctx* ctx, const uint8_t* bgra, + uint32_t width, uint32_t height, uint32_t src_pitch); + +/* Run the publish pump: reconcile control, tick heartbeat, compose cursor, + * publish on change / on full_frame_req. Never returns (process lifetime). */ +void vgpu_present_run(vgpu_ctx* ctx); + +#endif /* VGPU_PRESENT_H */ diff --git a/src/si/vgpu-stream/win32/region.c b/src/si/vgpu-stream/win32/region.c new file mode 100644 index 0000000..1181402 --- /dev/null +++ b/src/si/vgpu-stream/win32/region.c @@ -0,0 +1,172 @@ +#define WIN32_LEAN_AND_MEAN +#include +#include +#include +#include "region.h" +#include "atomic-shim.h" /* x86-TSO ordering for contract init publish */ + +#define VGPU_2MB (2u * 1024u * 1024u) + +/* Page-segregated init of the contract over an already-pinned region base. + * Init-ordering per contract: status=INIT, latest=NONE, backend, supported_formats, + * release-barrier; heartbeat starts later (in the run pump). */ +static void region_init_contract(vgpu_region_t* r) { + vgpu_producer_t* p = r->producer; + vgpu_control_t* c = r->control; + + memset(p, 0, sizeof *p); + memset(c, 0, sizeof *c); + + p->status = VGPU_ST_INIT; + p->backend = VGPU_BK_NONE; + p->error_code = 0; + p->applied_fps = 0; + p->supported_formats = (1u << VGPU_FMT_BGRA8888); + p->run_epoch = 0; + p->heartbeat = 0; + p->frame_id = 0; + p->ctrl_ack = 0; + p->full_frame_ack = 0; + for (uint32_t i = 0; i < VGPU_SLOT_COUNT; i++) + p->seq[i] = 0; + + /* control starts RUN: producer captures immediately; host may STOP/PAUSE */ + c->ctrl_gen = 0; + c->desired_state = VGPU_CMD_RUN; + c->target_fps = 0; + c->draw_cursor = 1; + c->full_frame_req = 0; + c->consumer_tick = 0; + c->attached = 0; + + /* publish latest last with a release store gating all of the above */ + vgpu_sfence(); + vgpu_store_release32(&p->latest, VGPU_LATEST_NONE); +} + +static int adjust_lock_memory_privilege(void) { + HANDLE tok; + if (!OpenProcessToken(GetCurrentProcess(), + TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &tok)) + return 0; + TOKEN_PRIVILEGES tp; + memset(&tp, 0, sizeof tp); + tp.PrivilegeCount = 1; + if (!LookupPrivilegeValueA(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid)) { + CloseHandle(tok); + return 0; + } + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + int ok = AdjustTokenPrivileges(tok, FALSE, &tp, sizeof tp, NULL, NULL) + && GetLastError() == ERROR_SUCCESS; + CloseHandle(tok); + return ok; +} + +int vgpu_region_create(vgpu_region_t* out) { + memset(out, 0, sizeof *out); + + const uint64_t bytes = VGPU_REGION_BYTES; + + void* os_base = NULL; + uint8_t* base = NULL; + uint64_t os_total = 0; + + if (adjust_lock_memory_privilege()) { + SIZE_T large_min = GetLargePageMinimum(); + if (large_min && large_min <= VGPU_2MB) { + SIZE_T rounded = (SIZE_T)((bytes + VGPU_2MB - 1) & ~(uint64_t)(VGPU_2MB - 1)); + void* p = VirtualAlloc(NULL, rounded, + MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, + PAGE_READWRITE); + if (p) { + /* large pages are >= 2 MiB → base is already 2 MiB-aligned */ + os_base = p; + base = (uint8_t*)p; + os_total = rounded; + fprintf(stderr, "region: MEM_LARGE_PAGES %llu MiB at %p\n", + (unsigned long long)(rounded / (1024 * 1024)), p); + } else { + fprintf(stderr, "region: MEM_LARGE_PAGES failed (%lu), fallback\n", + GetLastError()); + } + } + } else { + fprintf(stderr, "region: SE_LOCK_MEMORY unavailable, fallback\n"); + } + + if (!base) { + uint64_t total = bytes + VGPU_2MB; + void* p = VirtualAlloc(NULL, (SIZE_T)total, MEM_RESERVE | MEM_COMMIT, + PAGE_READWRITE); + if (!p) { + fprintf(stderr, "region: VirtualAlloc %llu MiB failed (%lu)\n", + (unsigned long long)(total / (1024 * 1024)), GetLastError()); + return 1; + } + uintptr_t addr = (uintptr_t)p; + uintptr_t aligned = (addr + VGPU_2MB - 1) & ~(uintptr_t)(VGPU_2MB - 1); + + /* The region must be RESIDENT, not merely committed: the host reads it out + * of guest RAM and only PRESENT pages are visible to it — a committed but + * demand-zero page has no PTE, so it is unreadable from the host. VirtualLock + * pins the pages into the working set, but it can lock at most the process + * MINIMUM working set, and the default quota is far below the region size + * (so a bare VirtualLock fails with ERROR_WORKING_SET_QUOTA). Raise the + * minimum first. NB: VirtualLock / SetProcessWorkingSetSize do NOT need + * SE_LOCK_MEMORY — that privilege is only for large pages / AWE. */ + SIZE_T ws_min = (SIZE_T)(bytes + 64ull * 1024 * 1024); /* region + headroom */ + SIZE_T ws_max = ws_min + 128ull * 1024 * 1024; + SIZE_T cur_min = 0, cur_max = 0; + if (GetProcessWorkingSetSize(GetCurrentProcess(), &cur_min, &cur_max)) { + if (cur_min > ws_min) ws_min = cur_min; /* never shrink an existing quota */ + if (cur_max > ws_max) ws_max = cur_max; + } + if (!SetProcessWorkingSetSize(GetCurrentProcess(), ws_min, ws_max)) + fprintf(stderr, "region: SetProcessWorkingSetSize(%llu MiB) failed (%lu)\n", + (unsigned long long)(ws_min / (1024 * 1024)), GetLastError()); + + if (!VirtualLock((void*)aligned, (SIZE_T)bytes)) { + fprintf(stderr, "region: VirtualLock failed (%lu) — pre-faulting region\n", + GetLastError()); + /* Last resort: fault every page so it is at least PRESENT now. Without + * the lock the trimmer may evict it under pressure, but the raised + * minimum working set above makes eviction far less likely. */ + volatile uint8_t* q = (volatile uint8_t*)aligned; + for (uint64_t off = 0; off < bytes; off += 4096u) q[off] = q[off]; + } + + os_base = p; + base = (uint8_t*)aligned; + os_total = total; + fprintf(stderr, "region: fallback VirtualAlloc+lock %llu MiB, aligned at %p\n", + (unsigned long long)(bytes / (1024 * 1024)), (void*)aligned); + } + + if (((uintptr_t)base & (VGPU_2MB - 1)) != 0) { + fprintf(stderr, "region: base %p not 2 MiB aligned\n", (void*)base); + VirtualFree(os_base, 0, MEM_RELEASE); + return 1; + } + + out->os_base = os_base; + out->base = base; + out->os_total = os_total; + out->producer = (vgpu_producer_t*)(base + VGPU_PRODUCER_OFFSET); + out->control = (vgpu_control_t*)(base + VGPU_CONTROL_OFFSET); + out->ring = base + VGPU_RING_OFFSET; + + region_init_contract(out); + + fprintf(stderr, "region: contract ready (producer=%p control=%p ring=%p)\n", + (void*)out->producer, (void*)out->control, (void*)out->ring); + return 0; +} + +void vgpu_region_destroy(vgpu_region_t* r) { + if (r && r->os_base) { + VirtualUnlock(r->base, (SIZE_T)VGPU_REGION_BYTES); + VirtualFree(r->os_base, 0, MEM_RELEASE); + memset(r, 0, sizeof *r); + } +} diff --git a/src/si/vgpu-stream/win32/region.h b/src/si/vgpu-stream/win32/region.h new file mode 100644 index 0000000..113eb32 --- /dev/null +++ b/src/si/vgpu-stream/win32/region.h @@ -0,0 +1,28 @@ +#ifndef VGPU_REGION_H +#define VGPU_REGION_H + +/* region.h — win32 pinned contract region (resolves blocks for the region-view). */ + +#include +#include "vgpu_stream.h" /* public contract: blocks, offsets, slot geometry */ + +/* + * One contiguous 2 MiB-aligned pinned region holding the full contract: + * producer block (page 0), control block (page 1), then SLOT_COUNT frame slots + * starting at VGPU_RING_OFFSET. Object = memory: the region owns the mapping, + * its lifetime is the mapping's lifetime. No hidden global state. + */ +typedef struct { + void* os_base; /* raw allocation base (for free) */ + uint8_t* base; /* 2 MiB-aligned region base (== contract origin) */ + uint64_t os_total; /* bytes reserved at os_base */ + vgpu_producer_t* producer; /* base + VGPU_PRODUCER_OFFSET */ + vgpu_control_t* control; /* base + VGPU_CONTROL_OFFSET */ + uint8_t* ring; /* base + VGPU_RING_OFFSET */ +} vgpu_region_t; + +/* Returns 0 on success, non-zero on failure (region zeroed on failure). */ +int vgpu_region_create(vgpu_region_t* out); +void vgpu_region_destroy(vgpu_region_t* r); + +#endif /* VGPU_REGION_H */ diff --git a/third_party/NvFBC/nvFBC.h b/third_party/NvFBC/nvFBC.h new file mode 100644 index 0000000..63d50ff --- /dev/null +++ b/third_party/NvFBC/nvFBC.h @@ -0,0 +1,275 @@ +/** + * \file This file contains definitions for NVFBC API. + * \copyright + * + * Copyright 1993-2016 NVIDIA Corporation. All rights reserved. + * NOTICE TO LICENSEE: This source code and/or documentation ("Licensed Deliverables") + * are subject to the applicable NVIDIA license agreement + * that governs the use of the Licensed Deliverables. + * + */ + +#pragma once +#include + +typedef unsigned char NvU8; +typedef unsigned long NvU32; +typedef unsigned long long NvU64; + +/** + * \defgroup NVFBC The NVIDIA Frame Buffer Capture API. + * \brief Defines a set of interfaces for high performance Capture of desktop content. + */ + +/** + * \defgroup NVFBC_ENUMS Enums + * \ingroup NVFBC + * \brief Enumerations to be used with NVFBC API + */ + +/** + * \defgroup NVFBC_STRUCTS Structs + * \ingroup NVFBC + * \brief Defines Parameter Structures to be used with NVFBC APIs. + */ + +/** + * \defgroup NVFBC_ENTRYPOINTS Entrypoints + * \ingroup NVFBC + * \brief Declarations for NVFBC Entrypoint functions + */ + +/** + * \ingroup NVFBC + * Macro to define the NVFBC API version corresponding to this distribution. + */ +#define NVFBC_DLL_VERSION 0x50 + +/** + * \ingroup NVFBC + * Macro to construct version numbers for parameter structs. + */ +#define NVFBC_STRUCT_VERSION(typeName, ver) (NvU32)(sizeof(typeName) | ((ver)<<16) | (NVFBC_DLL_VERSION << 24)) + +/** + * \ingroup NVFBC + * Calling Convention + */ +#define NVFBCAPI __stdcall + +/** + * \ingroup NVFBC + * Indicates that there are no global overrides specified for NVFBC. To be used with NVFBC_SetGlobalFlags API + */ +#define NVFBC_GLOBAL_FLAGS_NONE 0x00000000 + +/** + * \ingroup NVFBC + * Indicates to NVFBC that stereo rendering is enabled. Currently unsupported. To be used with NVFBC_SetGlobalFlags API. + */ +#define NVFBC_GLOBAL_FLAGS_STEREO_BUFFER 0x00000001 + +/** + * \ingroup NVFBC + * Indicates that NVFBC should not request a repaint of the desktop when initiating NVFBC capture. To be used with NVFBC_SetGlobalFlags API. + */ +#define NVFBC_GLOBAL_FLAGS_NO_INITIAL_REFRESH 0x00000002 + +/** + * \ingroup NVFBC + * Indicates that NVFBC should not reset the graphics driver while servicing subsequent NVFBC_Enable API requests. +*/ + +#define NVFBC_GLOBAL_FLAGS_NO_DEVICE_RESET_TOGGLE 0x00000004 + +/** + * \ingroup NVFBC_ENUMS + * \brief Enumerates status codes returned by NVFBC APIs. + */ +typedef enum _NVFBCRESULT +{ + NVFBC_SUCCESS = 0, + NVFBC_ERROR_GENERIC = -1, /**< Unexpected failure in NVFBC. */ + NVFBC_ERROR_INVALID_PARAM = -2, /**< One or more of the paramteres passed to NvFBC are invalid [This include NULL pointers]. */ + NVFBC_ERROR_INVALIDATED_SESSION = -3, /**< NvFBC session is invalid. Client needs to recreate session. */ + NVFBC_ERROR_PROTECTED_CONTENT = -4, /**< Protected content detected. Capture failed. */ + NVFBC_ERROR_DRIVER_FAILURE = -5, /**< GPU driver returned failure to process NvFBC command. */ + NVFBC_ERROR_CUDA_FAILURE = -6, /**< CUDA driver returned failure to process NvFBC command. */ + NVFBC_ERROR_UNSUPPORTED = -7, /**< API Unsupported on this version of NvFBC. */ + NVFBC_ERROR_HW_ENC_FAILURE = -8, /**< HW Encoder returned failure to process NVFBC command. */ + NVFBC_ERROR_INCOMPATIBLE_DRIVER = -9, /**< NVFBC is not compatible with this version of the GPU driver. */ + NVFBC_ERROR_UNSUPPORTED_PLATFORM = -10, /**< NVFBC is not supported on this platform. */ + NVFBC_ERROR_OUT_OF_MEMORY = -11, /**< Failed to allocate memory. */ + NVFBC_ERROR_INVALID_PTR = -12, /**< A NULL pointer was passed. */ + NVFBC_ERROR_INCOMPATIBLE_VERSION = -13, /**< An API was called with a parameter struct that has an incompatible version. Check dwVersion field of paramter struct. */ + NVFBC_ERROR_OPT_CAPTURE_FAILURE = -14, /**< Desktop Capture failed. */ + NVFBC_ERROR_INSUFFICIENT_PRIVILEGES = -15, /**< User doesn't have appropriate previlages. */ + NVFBC_ERROR_INVALID_CALL = -16, /**< NVFBC APIs called in wrong sequence. */ + NVFBC_ERROR_SYSTEM_ERROR = -17, /**< Win32 error. */ + NVFBC_ERROR_INVALID_TARGET = -18, /**< The target adapter idx can not be used for NVFBC capture. It may not correspond to an NVIDIA GPU, or may not be attached to desktop. */ + NVFBC_ERROR_DYNAMIC_DISABLE = -20, /**< NvFBC is dynamically disabled. Cannot continue to capture */ +} NVFBCRESULT; + +/** + * \ingroup NVFBC_ENUMS + * \brief Enumerates NVFBC states. To be used with NvFBC_Enable API + */ +typedef enum _NVFBC_STATE +{ + NVFBC_STATE_DISABLE = 0, /** Disables NvFBC. */ + NVFBC_STATE_ENABLE , /** Enables NvFBC. */ + NVFBC_STATE_LAST , /** Sentinel value. Shouldn't be used. */ +} NVFBC_STATE; + +/** + * \ingroup NVFBC_STRUCTS + * \brief Defines parameters that describe the grabbed data, and provides detailed information about status of the NVFBC session. + */ +typedef struct _NvFBCFrameGrabInfo +{ + DWORD dwWidth; /**< [out] Indicates the current width of captured buffer. */ + DWORD dwHeight; /**< [out] Indicates the current height of captured buffer. */ + DWORD dwBufferWidth; /**< [out] Indicates the current width of the pixel buffer(padded width). */ + DWORD dwReserved; /**< [in] Reserved, do not use. */ + BOOL bOverlayActive; /**< [out] Is set to 1 if overlay was active. */ + BOOL bMustRecreate; /**< [out] Is set to 1 if the compressor must call NvBFC_Create again. */ + BOOL bFirstBuffer; /**< [out] Is set to 1 is this was the first capture call, or first call after a desktop mode change. + Relevant only for XOR and diff modes supported by NVFBCToSys interface. */ + BOOL bHWMouseVisible; /**< [out] Is set to 1 if HW cursor was enabled by OS at the time of the grab. */ + BOOL bProtectedContent; /**< [out] Is set to 1 if protected content was active (DXVA encryption Session). */ + DWORD dwDriverInternalError; /**< [out] Indicates the status code from lower layers. 0 or 0xFBCA11F9 indicates no error was returned. */ + BOOL bStereoOn; /**< [out] Is set to 1 if stereo was on. */ + BOOL bIGPUCapture; /**< [out] Is set to 1 if the captured frame is from iGPU. 0 if capture fails or if captured from dGPU*/ + DWORD dwSourcePID; /**< [out] Indicates which process caused the last screen update that got grabbed*/ + DWORD dwReserved3; /**< [in] Reserved, do not use. */ + NvU32 dwReserved2[13]; /**< [in] Resereved, should be set to 0. */ +} NvFBCFrameGrabInfo; + +/** + * \ingroup NVFBC_STRUCTS + * \brief Deines the parameters to be used with NvFBC_GetStatusEx API + */ +typedef struct _NvFBCStatusEx +{ + NvU32 dwVersion; /**< [in] Struct version. Set to NVFBC_STATUS_VER. */ + NvU32 bIsCapturePossible :1; /**< [out] Indicates if NvFBC feature is enabled. */ + NvU32 bCurrentlyCapturing:1; /**< [out] Indicates if NVFBC is currently capturing for the Adapter ordinal specified in dwAdapterIdx. */ + NvU32 bCanCreateNow :1; /**< [out] Deprecated. Do not use. */ + NvU32 bSupportMultiHead :1; /**< [out] MultiHead grab supported. */ + NvU32 bSupportMultiClient:1; /**< [out] Multiple capture clients on same display adapter supported. */ + NvU32 bReservedBits :27; /**< [in] Reserved, do not use. */ + NvU32 dwNvFBCVersion; /**< [out] Indicates the highest NvFBC interface version supported by the loaded NVFBC library. */ + NvU32 dwAdapterIdx; /**< [in] Adapter Ordinal corresponding to the display to be grabbed. IGNORED if bCapturePID is set */ + void* pPrivateData; /**< [in] optional **/ + NvU32 dwPrivateDataSize; /**< [in] optional **/ + NvU32 dwReserved[59]; /**< [in] Reserved. Should be set to 0. */ + void* pReserved[31]; /**< [in] Reserved. Should be set to NULL. */ +} NvFBCStatusEx; +#define NVFBC_STATUS_VER_1 NVFBC_STRUCT_VERSION(NvFBCStatusEx, 1) +#define NVFBC_STATUS_VER_2 NVFBC_STRUCT_VERSION(NvFBCStatusEx, 2) +#define NVFBC_STATUS_VER NVFBC_STATUS_VER_2 + +/** + * \ingroup NVFBC_STRUCTS + * \brief Defines the parameters to be used with NvFBC_CreateEx API. + */ +typedef struct _NvFBCCreateParams +{ + NvU32 dwVersion; /**< [in] Struct version. Set to NVFBC_CREATE_PARAMS_VER. */ + NvU32 dwInterfaceType; /**< [in] ID of the NVFBC interface Type being requested. */ + NvU32 dwMaxDisplayWidth; /**< [out] Max. display width allowed. */ + NvU32 dwMaxDisplayHeight; /**< [out] Max. display height allowed. */ + void* pDevice; /**< [in] Device pointer. */ + void* pPrivateData; /**< [in] Private data [optional]. */ + NvU32 dwPrivateDataSize; /**< [in] Size of private data. */ + NvU32 dwInterfaceVersion; /**< [in] Version of the capture interface. */ + void* pNvFBC; /**< [out] A pointer to the requested NVFBC object. */ + NvU32 dwAdapterIdx; /**< [in] Adapter Ordinal corresponding to the display to be grabbed. If pDevice is set, this parameter is ignored. */ + NvU32 dwNvFBCVersion; /**< [out] Indicates the highest NvFBC interface version supported by the loaded NVFBC library. */ + void* cudaCtx; /**< [in] CUDA context created using cuD3D9CtxCreate with the D3D9 device passed as pDevice. Only used for NvFBCCuda interface. + It is mandatory to pass a valid D3D9 device if cudaCtx is passed. The call will fail otherwise. + Client must release NvFBCCuda object before destroying the cudaCtx. */ + void* pPrivateData2; /**< [in] Private data [optional]. */ + NvU32 dwPrivateData2Size; /**< [in] Size of private data. */ + NvU32 dwReserved[55]; /**< [in] Reserved. Should be set to 0. */ + void* pReserved[27]; /**< [in] Reserved. Should be set to NULL. */ +}NvFBCCreateParams; +#define NVFBC_CREATE_PARAMS_VER_1 NVFBC_STRUCT_VERSION(NvFBCCreateParams, 1) +#define NVFBC_CREATE_PARAMS_VER_2 NVFBC_STRUCT_VERSION(NvFBCCreateParams, 2) +#define NVFBC_CREATE_PARAMS_VER NVFBC_CREATE_PARAMS_VER_2 + +/** +* \ingroup NVFBC_STRUCTS +* \brief Defines parameters for a Grab\Capture call to get HW cursor data in the NVFBCToSys capture session. +*/ +typedef struct +{ + NvU32 dwVersion; /**< [in]: Struct version. Set to NVFBC_MOUSE_GRAB_INFO_VER.*/ + NvU32 dwWidth; /**< [out]: Width of mouse glyph captured.*/ + NvU32 dwHeight; /**< [out]: Height of mouse glyph captured.*/ + NvU32 dwPitch; /**< [out]: Pitch of mouse glyph captured.*/ + NvU32 bIsHwCursor : 1; /**< [out]: Tells if cursor is HW cursor or SW cursor. If set to 0, ignore height, width, pitch and pBits.*/ + NvU32 bReserved : 32; /**< [in]: Reserved.*/ + NvU32 dwPointerFlags; /**< [out]: Maps to DXGK_POINTERFLAGS::Value.*/ + NvU32 dwXHotSpot; /**< [out]: Maps to DXGKARG_SETPOINTERSHAPE::XHot.*/ + NvU32 dwYHotSpot; /**< [out]: Maps to DXGKARG_SETPOINTERSHAPE::YHot.*/ + NvU32 dwUpdateCounter; /**< [out]: Cursor update Counter. */ + NvU32 dwBufferSize; /**< [out]: Size of the buffer contaiing the captured cursor glyph. */ + void * pBits; /**< [out]: pointer to buffer containing the captured cursor glyph.*/ + NvU32 dwReservedA[22]; /**< [in]: Reserved. Set to 0.*/ + void * pReserved[15]; /**< [in]: Reserved. Set to 0.*/ +}NVFBC_CURSOR_CAPTURE_PARAMS; +#define NVFBC_CURSOR_CAPTURE_PARAMS_VER NVFBC_STRUCT_VERSION(NVFBC_CURSOR_CAPTURE_PARAMS, 1) + +/** + * \ingroup NVFBC_ENTRYPOINTS + * \brief NVFBC API to set global overrides + * \param [in] dwFlags Global overrides for NVFBC. Use ::NVFBC_GLOBAL_FLAGS value. + */ +void NVFBCAPI NvFBC_SetGlobalFlags(DWORD dwFlags); + +/** + * \ingroup NVFBC_ENTRYPOINTS + * \brief NVFBC API to create an NVFBC capture session. + * Instantiates an interface identified by NvFBCCreateParams::dwInterfaceType. + * \param [inout] pCreateParams Pointer to a struct of type ::NvFBCCreateParams, typecast to void* + * \return An applicable ::NVFBCRESULT value. + */ +NVFBCRESULT NVFBCAPI NvFBC_CreateEx(void * pCreateParams); + +/** + * \ingroup NVFBC_ENTRYPOINTS + * \brief NVFBC API to query Current NVFBC status. + * Queries the status for the adapter pointed to by the NvFBCStatusEx::dwAdapterIdx parameter. + * \param [inout] pCreateParams Pointer to a struct of type ::NvFBCStatusEx. + * \return An applicable ::NVFBCRESULT value. + */ +NVFBCRESULT NVFBCAPI NvFBC_GetStatusEx(NvFBCStatusEx *pNvFBCStatusEx); + +/** + * \ingroup NVFBC_ENTRYPOINTS + * \brief NVFBC API to enable \ disable NVFBC feature. + * \param [in] nvFBCState Refer ::NVFBC_STATE + * \return An applicable ::NVFBCRESULT value. + */ +NVFBCRESULT NVFBCAPI NvFBC_Enable(NVFBC_STATE nvFBCState); + +/** + * \ingroup NVFBC_ENTRYPOINTS + * \brief NVFBC API to query highest GRID SDK version supported by the loaded NVFBC library. + * \param [out] pVersion Pointer to a 32-bit integer to hold the supported GRID SDK version. + * \return An applicable ::NVFBCRESULT value. + */ +NVFBCRESULT NVFBCAPI NvFBC_GetSDKVersion(NvU32 * pVersion); + +/** + * \cond API_PFN + */ +typedef void (NVFBCAPI * NvFBC_SetGlobalFlagsType) (DWORD dwFlags); +typedef NVFBCRESULT (NVFBCAPI * NvFBC_CreateFunctionExType) (void * pCreateParams); +typedef NVFBCRESULT (NVFBCAPI * NvFBC_GetStatusExFunctionType) (void * pNvFBCStatus); +typedef NVFBCRESULT (NVFBCAPI * NvFBC_EnableFunctionType) (NVFBC_STATE nvFBCState); +typedef NVFBCRESULT (NVFBCAPI * NvFBC_GetSDKVersionFunctionType) (NvU32 * pVersion); +/** + * \endcond API_PFN +*/ diff --git a/third_party/NvFBC/nvFBCToSys.h b/third_party/NvFBC/nvFBCToSys.h new file mode 100644 index 0000000..67fb9e8 --- /dev/null +++ b/third_party/NvFBC/nvFBCToSys.h @@ -0,0 +1,176 @@ +/** + * \file This file contains defintions for NVFBCToSys + * + * Copyright 1993-2016 NVIDIA Corporation. All rights reserved. + * NOTICE TO LICENSEE: This source code and/or documentation ("Licensed Deliverables") + * are subject to the applicable NVIDIA license agreement + * that governs the use of the Licensed Deliverables. + * + */ + +#ifndef NVFBC_TO_SYS_H_ +#define NVFBC_TO_SYS_H_ +/** + * \defgroup NVFBC_TOSYS NVFBCToSys Interface + * \brief Interface for grabbing Desktop images and generating output in system memory. + */ + +/** + * \defgroup NVFBC_TOSYS_ENUMS Enums + * \ingroup NVFBC_TOSYS + * \brief Enumerations used with NVFBCToSys interface. + */ + +/** + * \defgroup NVFBC_TOSYS_STRUCTS Structs + * \ingroup NVFBC_TOSYS + * \brief Parameter Structs Defined for use with NVFBCToSys interface. + */ + +/** + * \defgroup NVFBC_TOSYS_INTERFACE Object Interface + * \ingroup NVFBC_TOSYS + * \brief Interface class definition for NVFBCToSys Capture API + */ + +/** + * \ingroup NVFBC_TOSYS + * \brief Macro to define the interface ID to be passed as NvFBCCreateParams::dwInterfaceType + * for creating an NVFBCToSys capture session object. + */ +#define NVFBC_TO_SYS (0x1204) + +/** + * \ingroup NVFBC_TOSYS_ENUMS + * Enumerates output buffer pixel data formats supported by NVFBCToSys. + */ +typedef enum +{ + NVFBC_TOSYS_ARGB = 0, /**< Output Pixels in ARGB format: 32bpp, one byte per channel. */ + NVFBC_TOSYS_RGB , /**< Output Pixels in RGB format: 24bpp, one byte per channel. */ + NVFBC_TOSYS_YYYYUV420p , /**< Output Pixels in YUV420 format: 12bpp, + the Y' channel at full resolution, U channel at half resolution (1 byte for four pixels), V channel at half resolution. */ + NVFBC_TOSYS_RGB_PLANAR , /**< Output Pixels in planar RGB format: 24bpp, + stored sequentially in memory as complete red channel, complete green channel, complete blue channel. */ + NVFBC_TOSYS_XOR , /**< Output Pixels in RGB format: 24bpp XOR'd with the prior frame. */ + NVFBC_TOSYS_YUV444p , /**< Output Pixels in YUV444 planar format, i.e. separate 8-bpp Y, U, V planes with no subsampling.*/ + NVFBC_TOSYS_BUF_FMT_LAST , /**< Sentinel value. Do not use.*/ +} NVFBCToSysBufferFormat; + +/** + * \ingroup NVFBC_TOSYS_ENUMS + * Enumerates Capture\Grab modes supported by NVFBCToSys. + */ +typedef enum +{ + NVFBC_TOSYS_SOURCEMODE_FULL = 0, /**< Grab full res */ + NVFBC_TOSYS_SOURCEMODE_SCALE , /**< Will convert current res to supplied resolution (dwTargetWidth and dwTargetHeight) */ + NVFBC_TOSYS_SOURCEMODE_CROP , /**< Native res, crops a subwindow, of dwTargetWidth and dwTargetHeight sizes, starting at dwStartX and dwStartY */ + NVFBC_TOSYS_SOURCEMODE_LAST , /**< Sentinel value. Do not use. */ +}NVFBCToSysGrabMode; + +/** + * \ingroup NVFBC_TOSYS_ENUMS + * \enum NVFBC_TOSYS_GRAB_FLAGS Enumerates special commands for grab\capture supported by NVFBCToSys. + */ +typedef enum +{ + NVFBC_TOSYS_NOFLAGS = 0x0, /**< Default (no flags set). Grabbing will wait for a new frame or HW mouse move. */ + NVFBC_TOSYS_NOWAIT = 0x1, /**< Grabbing will not wait for a new frame nor a HW cursor move. */ + NVFBC_TOSYS_WAIT_WITH_TIMEOUT = 0x10, /**< Grabbing will wait for a new frame or HW mouse move with a maximum wait time of NVFBC_TOSYS_GRAB_FRAME_PARAMS::dwWaitTime millisecond*/ +} NVFBC_TOSYS_GRAB_FLAGS; + +/** + * \ingroup NVFBC_TOSYS_STRUCTS + * \brief Defines parameters used to configure NVFBCToSys capture session. + */ +typedef struct +{ + NvU32 dwVersion; /**< [in]: Struct version. Set to NVFBC_TOSYS_SETUP_PARAMS_VER.*/ + NvU32 bWithHWCursor :1; /**< [in]: The client should set this to 1 if it requires the HW cursor to be composited on the captured image.*/ + NvU32 bDiffMap :1; /**< [in]: The client should set this to use the DiffMap feature.*/ + NvU32 bEnableSeparateCursorCapture : 1; /**< [in]: The client should set this to 1 if it wants to enable mouse capture in separate stream.*/ + NvU32 bReservedBits :29; /**< [in]: Reserved. Set to 0.*/ + NVFBCToSysBufferFormat eMode; /**< [in]: Output image format.*/ + NvU32 dwReserved1; /**< [in]: Reserved. Set to 0.*/ + void **ppBuffer; /**< [out]: Container to hold NvFBC output buffers.*/ + void **ppDiffMap; /**< [out]: Container to hold NvFBC output diffmap buffers.*/ + void *hCursorCaptureEvent; /**< [out]: Client should wait for mouseEventHandle event before calling MouseGrab function. */ + NvU32 dwReserved[58]; /**< [in]: Reserved. Set to 0.*/ + void *pReserved[29]; /**< [in]: Reserved. Set to 0.*/ +} NVFBC_TOSYS_SETUP_PARAMS_V2; +#define NVFBC_TOSYS_SETUP_PARAMS_VER2 NVFBC_STRUCT_VERSION(NVFBC_TOSYS_SETUP_PARAMS, 2) +typedef NVFBC_TOSYS_SETUP_PARAMS_V2 NVFBC_TOSYS_SETUP_PARAMS; +#define NVFBC_TOSYS_SETUP_PARAMS_VER NVFBC_TOSYS_SETUP_PARAMS_VER2 + +/** + * \ingroup NVFBC_TOSYS_STRUCTS + * \brief Defines parameters for a Grab\Capture call in the NVFBCToSys capture session. + * Also holds information regarding the grabbed data. + */ +typedef struct +{ + NvU32 dwVersion; /**< [in]: Struct version. Set to NVFBC_TOSYS_GRAB_FRAME_PARAMS_VER.*/ + NvU32 dwFlags; /**< [in]: Special grabbing requests. This should be a bit-mask of NVFBC_TOSYS_GRAB_FLAGS values.*/ + NvU32 dwTargetWidth; /**< [in]: Target image width. NvFBC will scale the captured image to fit taret width and height. Used with NVFBC_TOSYS_SOURCEMODE_SCALE and NVFBC_TOSYS_SOURCEMODE_CROP. */ + NvU32 dwTargetHeight; /**< [in]: Target image height. NvFBC will scale the captured image to fit taret width and height. Used with NVFBC_TOSYS_SOURCEMODE_SCALE and NVFBC_TOSYS_SOURCEMODE_CROP. */ + NvU32 dwStartX; /**< [in]: x-coordinate of starting pixel for cropping. Used with NVFBC_TOSYS_SOURCEMODE_CROP. */ + NvU32 dwStartY; /**< [in]: y-coordinate of starting pixel for cropping. Used with NVFBC_TOSYS_SOURCEMODE_CROP. .*/ + NVFBCToSysGrabMode eGMode; /**< [in]: Frame grab mode.*/ + NvU32 dwWaitTime; /**< [in]: Time limit for NvFBCToSysGrabFrame() to wait until a new frame is available or a HW mouse moves. Use with NVFBC_TOSYS_WAIT_WITH_TIMEOUT */ + NvFBCFrameGrabInfo *pNvFBCFrameGrabInfo; /**< [in/out]: Frame grab information and feedback from NvFBC driver.*/ + NvU32 dwReserved[56]; /**< [in]: Reserved. Set to 0.*/ + void *pReserved[31]; /**< [in]: Reserved. Set to NULL.*/ +} NVFBC_TOSYS_GRAB_FRAME_PARAMS_V1; +#define NVFBC_TOSYS_GRAB_FRAME_PARAMS_VER1 NVFBC_STRUCT_VERSION(NVFBC_TOSYS_GRAB_FRAME_PARAMS, 1) +typedef NVFBC_TOSYS_GRAB_FRAME_PARAMS_V1 NVFBC_TOSYS_GRAB_FRAME_PARAMS; +#define NVFBC_TOSYS_GRAB_FRAME_PARAMS_VER NVFBC_TOSYS_GRAB_FRAME_PARAMS_VER1 + + +/** + * \ingroup NVFBC_TOSYS_INTERFACE + * Interface class definition for NVFBCToSys Capture API + */ +class INvFBCToSys_v3 +{ +public: + /** + * \brief Sets up NVFBC System Memory capture according to the provided parameters. + * \param [in] pParam Pointer to a struct of type ::NVFBC_TOSYS_SETUP_PARAMS. + * \return An applicable ::NVFBCRESULT value. + */ + virtual NVFBCRESULT NVFBCAPI NvFBCToSysSetUp (NVFBC_TOSYS_SETUP_PARAMS_V2 *pParam) = 0; + + /** + * \brief Captures the desktop and dumps the captured data to a System memory buffer. + * If the API returns a failure, the client should check the return codes and ::NvFBCFrameGrabInfo output fields to determine if the session needs to be re-created. + * \param [inout] pParam Pointer to a struct of type ::NVFBC_TOSYS_GRAB_FRAME_PARAMS. + * \return An applicable ::NVFBCRESULT value. + */ + virtual NVFBCRESULT NVFBCAPI NvFBCToSysGrabFrame (NVFBC_TOSYS_GRAB_FRAME_PARAMS *pParam) = 0; + + /** + * \brief Captures HW cursor data whenever shape of mouse is changed + * \param [inout] pParam Pointer to a struct of type ::NVFBC_CURSOR_CAPTURE_PARAMS. + * \return An applicable ::NVFBCRESULT value. + */ + virtual NVFBCRESULT NVFBCAPI NvFBCToSysCursorCapture (NVFBC_CURSOR_CAPTURE_PARAMS *pParam) = 0; + + /** + * \brief A high precision implementation of Sleep(). + * Can provide sub quantum (usually 16ms) sleep that does not burn CPU cycles. + * \param [in] qwMicroSeconds The number of microseconds that the thread should sleep for. + * \return An applicable ::NVFBCRESULT value. + */ + virtual NVFBCRESULT NVFBCAPI NvFBCToSysGPUBasedCPUSleep (__int64 qwMicroSeconds) = 0; + + /** + * \brief Destroys the NVFBCToSys capture session. + * \return An applicable ::NVFBCRESULT value. + */ + virtual NVFBCRESULT NVFBCAPI NvFBCToSysRelease () = 0; +}; + +typedef INvFBCToSys_v3 NvFBCToSys; + +#endif // NVFBC_TO_SYS_H_ diff --git a/third_party/Windows.h b/third_party/Windows.h new file mode 100644 index 0000000..6d21fea --- /dev/null +++ b/third_party/Windows.h @@ -0,0 +1,2 @@ +/* Windows.h — case-compat shim for the vendor NvFBC header, not our API. */ +#include