vgpu in-guest producer in-tree, release CI, flexible vmie discovery

- src/si/vgpu-stream: in-guest vgpu producer built as a Windows cross-compiled target (if(WIN32)) - .gitea: release workflow — cross-build the agent and build/publish the deb against system vmie - cmake/makefile: resolve vmie from a source tree (LIBVMIE_PATH) or installed libvmie-dev
2026-06-26 04:36:37 +03:00 · 2026-06-22 18:35:12 +03:00
parent 9bde398b6c
commit bd8b966017
31 changed files with 2393 additions and 8 deletions
@@ -0,0 +1,52 @@
+#ifndef VGPU_ATOMIC_SHIM_H
+#define VGPU_ATOMIC_SHIM_H
+
+/* atomic-shim.h — x86-TSO memory-order accessors (arch, not OS).
+ *
+ * x86-TSO memory-order shim. NO _Atomic in the shared region type: the consumer
+ * maps the region as raw bytes. Synchronization lives entirely in the producer's
+ * accessors here. Per-compiler implementation, never exposed in the contract.
+ *
+ * On x86_64 every naturally-aligned MOV up to 8 bytes is atomic and stores are
+ * already release / loads already acquire at the hardware level; the only things
+ * we must prevent are (1) compiler reordering across the sync point and
+ * (2) store-buffer visibility delay between the data writes and the publish
+ * store, for which an explicit SFENCE is used at publish boundaries.
+ */
+
+#include <stdint.h>
+
+#if defined(_MSC_VER)
+
+#include <intrin.h>
+
+static inline void vgpu_compiler_barrier(void) { _ReadWriteBarrier(); }
+static inline void vgpu_sfence(void) { _mm_sfence(); }
+
+static inline void vgpu_store_release32(volatile uint32_t* p, uint32_t v) {
+    _ReadWriteBarrier();
+    *p = v;
+}
+
+static inline uint32_t vgpu_load_acquire32(const volatile uint32_t* p) {
+    uint32_t v = *p;
+    _ReadWriteBarrier();
+    return v;
+}
+
+#else /* gcc / mingw / clang */
+
+static inline void vgpu_compiler_barrier(void) { __asm__ __volatile__("" ::: "memory"); }
+static inline void vgpu_sfence(void) { __asm__ __volatile__("sfence" ::: "memory"); }
+
+static inline void vgpu_store_release32(volatile uint32_t* p, uint32_t v) {
+    __atomic_store_n(p, v, __ATOMIC_RELEASE);
+}
+
+static inline uint32_t vgpu_load_acquire32(const volatile uint32_t* p) {
+    return __atomic_load_n(p, __ATOMIC_ACQUIRE);
+}
+
+#endif
+
+#endif /* VGPU_ATOMIC_SHIM_H */