mirror of
https://dev.lirent.ru/Vatrog/vm-automation-signaling.git
synced 2026-06-26 04:36:37 +03:00
vmsig: management daemon, runtime endpoint lifecycle, roster, discovery, in-tree drivers, packaging
- core: runtime attach/detach of a per-endpoint adapter trio (runtime-safe add_adapter + vmsig_core_detach_endpoint, deferred reap) - roster: VMSIG_EV_ROSTER + CAP_ROSTER, retained per-endpoint and replayed to late subscribers - discovery: inotify trigger dir, vmid/endpoint slot allocator, host probe; vmsigd daemon with config + per-uid admission - input driver and vgpu perception built in-tree; vgpu perception as a separate library - memctx: own the supplied ro_fd (closed at detach) - deb packaging: install rules, systemd unit, tmpfiles, default config
This commit is contained in:
+49
-10
@@ -68,10 +68,22 @@ vmsig_core* vmsig_core_new(vmsig_ctx* ctx) {
|
||||
|
||||
int vmsig_core_add_adapter(vmsig_core* c, const vmsig_adapter_ops* ops,
|
||||
const void* cfg, uint32_t endpoint) {
|
||||
if (!c || !ops || c->nadapters >= VMSIG_MAX_ADAPTERS) return -1;
|
||||
if (!c || !ops) return -1;
|
||||
|
||||
/* Reuse a reaped (inactive) adapter entry so runtime detach/re-attach churn does
|
||||
* not exhaust the fixed table; otherwise grow up to the ceiling. */
|
||||
int id = -1;
|
||||
for (int i = 0; i < c->nadapters; i++)
|
||||
if (!c->adapters[i].active) { id = i; break; }
|
||||
if (id < 0) {
|
||||
if (c->nadapters >= VMSIG_MAX_ADAPTERS) return -1;
|
||||
id = c->nadapters++;
|
||||
}
|
||||
core_adapter_ent* e = &c->adapters[id];
|
||||
uint16_t gen = e->gen; /* generation survives the memset below */
|
||||
|
||||
vmsig_adapter* a = ops->open(cfg, endpoint);
|
||||
if (!a) return -1;
|
||||
if (!a) return -1; /* entry stays inactive (reusable) */
|
||||
|
||||
vmsig_emit emit = { core_emit_up, core_register_memctx, core_unregister_memctx, c };
|
||||
vmsig_fd_reg reg[VMSIG_ADAPTER_FDS];
|
||||
@@ -80,23 +92,48 @@ int vmsig_core_add_adapter(vmsig_core* c, const vmsig_adapter_ops* ops,
|
||||
int n = ops->attach(a, &emit, reg, VMSIG_ADAPTER_FDS);
|
||||
if (n < 0) { ops->close(a); return -1; }
|
||||
|
||||
memset(e, 0, sizeof *e);
|
||||
e->ops = ops;
|
||||
e->a = a;
|
||||
e->endpoint = endpoint;
|
||||
e->active = 1;
|
||||
e->gen = (uint16_t)(gen + 1);
|
||||
e->nslot = 0;
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
uint32_t events = reg[i].epoll_events ? reg[i].epoll_events : (uint32_t)EPOLLIN;
|
||||
core_slot* s = core_register_fd(c, reg[i].fd, events, SLOT_ADAPTER);
|
||||
if (!s) { ops->close(a); return -1; }
|
||||
if (!s) {
|
||||
/* roll back: deregister the fds enrolled so far, then close + free the entry. */
|
||||
for (int k = 0; k < e->nslot; k++) {
|
||||
epoll_ctl(c->epfd, EPOLL_CTL_DEL, e->slots[k]->fd, NULL);
|
||||
e->slots[k]->role = SLOT_DEAD;
|
||||
}
|
||||
ops->close(a);
|
||||
e->active = 0; e->a = NULL; e->nslot = 0;
|
||||
return -1;
|
||||
}
|
||||
s->ops = ops;
|
||||
s->adapter = a;
|
||||
s->cookie = reg[i].cookie;
|
||||
if (e->nslot < VMSIG_ADAPTER_FDS) e->slots[e->nslot++] = s;
|
||||
}
|
||||
|
||||
int id = c->nadapters;
|
||||
c->adapters[c->nadapters].ops = ops;
|
||||
c->adapters[c->nadapters].a = a;
|
||||
c->adapters[c->nadapters].endpoint = endpoint;
|
||||
c->nadapters++;
|
||||
return id;
|
||||
}
|
||||
|
||||
/* Request runtime detach of every adapter on `endpoint` (deferred reap after the batch,
|
||||
* mirrors core_request_drop). The teardown itself (epoch settle, SEAM_DOWN, lease release,
|
||||
* epoll DEL, ops->close) runs in core_reap_adapters on the loop thread. */
|
||||
void vmsig_core_detach_endpoint(vmsig_core* c, uint32_t endpoint) {
|
||||
if (!c || endpoint >= 64) return;
|
||||
int any = 0;
|
||||
for (int i = 0; i < c->nadapters; i++) {
|
||||
core_adapter_ent* e = &c->adapters[i];
|
||||
if (e->active && e->endpoint == endpoint) { e->reap = 1; any = 1; }
|
||||
}
|
||||
if (any) core_wake(c);
|
||||
}
|
||||
|
||||
int vmsig_core_add_control(vmsig_core* c, const vmsig_control_ops* ops, void* ctl,
|
||||
const vmsig_grant* grant) {
|
||||
if (!c || !ops) return -1;
|
||||
@@ -137,6 +174,7 @@ int vmsig_core_add_control(vmsig_core* c, const vmsig_control_ops* ops, void* ct
|
||||
* this control is qualified). For a control added BEFORE the first publication,
|
||||
* the cell is not yet valid — it receives MEMCTX via the normal multicast in pump_up. */
|
||||
core_memctx_replay(c, id);
|
||||
core_roster_replay(c, id); /* late subscriber: retained VM roster (CAP_ROSTER) */
|
||||
|
||||
return id; /* ncontrols already bumped when picking id (on growth); reuse does not grow it */
|
||||
}
|
||||
@@ -205,7 +243,8 @@ void vmsig_core_free(vmsig_core* c) {
|
||||
* FIRST: their close stops off-loop workers and unregisters their seams (e.g.
|
||||
* memctx) BEFORE destruction. */
|
||||
for (int i = 0; i < c->nadapters; i++)
|
||||
if (c->adapters[i].ops->close) c->adapters[i].ops->close(c->adapters[i].a);
|
||||
if (c->adapters[i].active && c->adapters[i].ops->close)
|
||||
c->adapters[i].ops->close(c->adapters[i].a);
|
||||
for (int i = 0; i < c->ncontrols; i++)
|
||||
if (c->controls[i].active && c->controls[i].ops->close)
|
||||
c->controls[i].ops->close(c->controls[i].ctl);
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#ifndef VMSIG_CORE_INTERNAL_H
|
||||
#define VMSIG_CORE_INTERNAL_H
|
||||
#include "vmsig_core.h"
|
||||
#include "vmsig_roster.h"
|
||||
#include <signal.h>
|
||||
|
||||
/* Private internals of the epoll core. Each registered fd carries a
|
||||
@@ -41,6 +42,11 @@ typedef struct {
|
||||
const vmsig_adapter_ops* ops;
|
||||
vmsig_adapter* a;
|
||||
uint32_t endpoint;
|
||||
int active; /* 0 = free/reaped slot (reusable) */
|
||||
int reap; /* deferred runtime detach requested */
|
||||
uint16_t gen; /* +1 on each (re)use (ABA guard / debug) */
|
||||
core_slot* slots[VMSIG_ADAPTER_FDS]; /* epoll slots we registered */
|
||||
int nslot;
|
||||
} core_adapter_ent;
|
||||
|
||||
|
||||
@@ -57,6 +63,15 @@ typedef struct {
|
||||
vmsig_memctx_reg reg; /* valid when registered */
|
||||
} core_memctx_cell;
|
||||
|
||||
/* ===== Retained VM roster (inventory coherence; daemon-published) =====
|
||||
* One value snapshot per endpoint: the last published roster datum. Simpler than the
|
||||
* MEMCTX cell — roster carries no fd and no borrowed buffer, so the cell is pure POD and
|
||||
* delivery is the ordinary broadcast (no re-describe / re-share). valid=0 on DETACH. */
|
||||
typedef struct {
|
||||
int valid; /* a roster entry is published for this endpoint */
|
||||
vmsig_roster entry; /* last published {vmid,state,action,name} (by value) */
|
||||
} core_roster_cell;
|
||||
|
||||
/* ===== Lease layer (arbitration of exclusive ownership of destructive resources) =====
|
||||
* One cell per (endpoint, lease-class): who owns it (origin) + a snapshot of arb_prio at
|
||||
* acquisition time. owner=0 => free. The snapshot (rather than the live grant) makes the
|
||||
@@ -108,6 +123,7 @@ struct vmsig_core {
|
||||
|
||||
uint32_t epoch[64]; /* per-endpoint VM session epoch */
|
||||
core_memctx_cell memctx[64]; /* per-endpoint retained context */
|
||||
core_roster_cell roster[64]; /* per-endpoint retained roster */
|
||||
|
||||
core_lease_cell lease[64][VMSIG_LEASE_CLASSES]; /* lease per (endpoint, class) */
|
||||
vmsig_arb_policy arb_cb; /* preemption policy (NULL=default) */
|
||||
@@ -150,6 +166,14 @@ void core_memctx_route(vmsig_core* c, const vmsig_event* trigger);
|
||||
* defined in loop.c). */
|
||||
void core_memctx_replay(vmsig_core* c, int ctl_id);
|
||||
|
||||
/* ===== VM roster (inventory coherence; defined in loop.c alongside the memctx seam) ===== */
|
||||
/* Publish a roster transition for `endpoint`: retain the datum (valid=0 on DETACH) and
|
||||
* broadcast VMSIG_EV_ROSTER to qualified subscribers (CAP_ROSTER + source + endpoint). */
|
||||
void core_roster_publish(vmsig_core* c, uint32_t endpoint, const vmsig_roster* entry);
|
||||
|
||||
/* Replay the retained roster to a single (late) subscriber (from vmsig_core_add_control). */
|
||||
void core_roster_replay(vmsig_core* c, int ctl_id);
|
||||
|
||||
/* Bump the endpoint's epoch on a destructive lifecycle transition: epoch++, invalidate
|
||||
* the retain cell, emit MEMCTX_INVALIDATED, request re-bootstrap from the adapter.
|
||||
* Observed by the core in pump_up on UP VM_LIFECYCLE (defined in loop.c). */
|
||||
|
||||
+115
-4
@@ -105,6 +105,7 @@ static uint32_t source_mask_for_lease_class(int cls) {
|
||||
* input actor (INPUT); otherwise CAP_OBSERVE (frames/SEAM/generic). The grant_allows_up
|
||||
* gate checks intersection, so OBSERVE|INPUT means "either of the two". */
|
||||
static uint32_t cap_for_up(const vmsig_event* ev) {
|
||||
if (ev->kind == VMSIG_EV_ROSTER) return VMSIG_CAP_ROSTER; /* host-wide inventory */
|
||||
if (ev->kind == VMSIG_EV_CURSOR_STATE) return VMSIG_CAP_OBSERVE | VMSIG_CAP_INPUT;
|
||||
return (ev->source == VMSIG_SRC_MEMCTX) ? VMSIG_CAP_MEMCTX : VMSIG_CAP_OBSERVE;
|
||||
}
|
||||
@@ -129,7 +130,7 @@ static core_adapter_ent* core_find_adapter(vmsig_core* c, uint32_t endpoint,
|
||||
vmsig_source source) {
|
||||
for (int i = 0; i < c->nadapters; i++) {
|
||||
core_adapter_ent* e = &c->adapters[i];
|
||||
if (e->ops->source == source && e->endpoint == endpoint) return e;
|
||||
if (e->active && e->ops->source == source && e->endpoint == endpoint) return e;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
@@ -316,6 +317,22 @@ void core_lease_reap_control(vmsig_core* c, int ctl_id) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Release ALL lease classes held on `endpoint` (from endpoint detach, BEFORE the adapters
|
||||
* close). Symmetric to core_lease_reap_control but keyed by endpoint, not owner: when a VM
|
||||
* disappears its leases must not survive to auto-transfer onto whatever VM later reuses the
|
||||
* same endpoint bit. The owner principal is recorded for the audit. */
|
||||
static void core_lease_reap_endpoint(vmsig_core* c, uint32_t endpoint) {
|
||||
if (endpoint >= 64) return;
|
||||
for (int cls = 0; cls < VMSIG_LEASE_CLASSES; cls++) {
|
||||
core_lease_cell* cell = &c->lease[endpoint][cls];
|
||||
if (!cell->owner) continue;
|
||||
uint32_t principal = lease_owner_principal(c, cell->owner);
|
||||
cell->owner = 0; cell->owner_prio = 0;
|
||||
vmsig_audit a = { VMSIG_AUDIT_LEASE_RECLAIMED, principal, endpoint, (uint32_t)cls, 0 };
|
||||
core_audit(c, &a);
|
||||
}
|
||||
}
|
||||
|
||||
/* DOWN emit from a control: enforcement against THIS control's grant. */
|
||||
int core_emit_down(void* token, vmsig_event* ev) {
|
||||
core_down_ctx* d = token;
|
||||
@@ -472,7 +489,57 @@ void core_memctx_replay(vmsig_core* c, int ctl_id) {
|
||||
}
|
||||
}
|
||||
|
||||
void core_epoch_bump(vmsig_core* c, uint32_t endpoint) {
|
||||
/* ===== VM roster (inventory coherence): retain + broadcast + replay-to-late ===== *
|
||||
* Mirrors the MEMCTX retain cell, but the datum is a pure inline POD (no fd, no borrowed
|
||||
* buffer): delivery is the ordinary broadcast (ops->deliver), with NO interception in
|
||||
* pump_up. Publish is SYNCHRONOUS (like core_memctx_route) so a control gets the datum
|
||||
* exactly once: current subscribers via this broadcast, a late one via core_roster_replay. */
|
||||
static void core_roster_build(uint32_t ep, const vmsig_roster* r, vmsig_event* ev) {
|
||||
memset(ev, 0, sizeof *ev);
|
||||
ev->kind = VMSIG_EV_ROSTER; ev->source = VMSIG_SRC_CORE; ev->dir = VMSIG_DIR_UP;
|
||||
ev->prio = VMSIG_PRIO_URGENT; ev->endpoint = ep;
|
||||
ev->payload.flags = VMSIG_PL_INLINE;
|
||||
memcpy(ev->inln, r, sizeof *r);
|
||||
}
|
||||
|
||||
void core_roster_publish(vmsig_core* c, uint32_t endpoint, const vmsig_roster* entry) {
|
||||
if (!c || endpoint >= 64 || !entry) return;
|
||||
core_roster_cell* cell = &c->roster[endpoint];
|
||||
cell->entry = *entry;
|
||||
/* DETACH clears the retained datum (a vacated slot is not replayed to a late subscriber),
|
||||
* but the DETACH event is still broadcast to current subscribers so they drop the VM. */
|
||||
cell->valid = (entry->action != VMSIG_ROSTER_DETACH);
|
||||
|
||||
vmsig_event ev;
|
||||
core_roster_build(endpoint, entry, &ev);
|
||||
for (int i = 0; i < c->ncontrols; i++) {
|
||||
core_control_ent* e = &c->controls[i];
|
||||
if (!e->active || !e->ops->deliver) continue;
|
||||
if (grant_allows_up(&e->grant, &ev) && sub_match(&e->sub, &ev))
|
||||
e->ops->deliver(e->ctl, &ev);
|
||||
}
|
||||
}
|
||||
|
||||
void core_roster_replay(vmsig_core* c, int ctl_id) {
|
||||
if (!c || ctl_id < 0 || ctl_id >= c->ncontrols) return;
|
||||
core_control_ent* e = &c->controls[ctl_id];
|
||||
if (!e->active || !e->ops->deliver) return;
|
||||
for (uint32_t ep = 0; ep < 64; ep++) {
|
||||
core_roster_cell* cell = &c->roster[ep];
|
||||
if (!cell->valid) continue;
|
||||
vmsig_event ev;
|
||||
core_roster_build(ep, &cell->entry, &ev);
|
||||
if (grant_allows_up(&e->grant, &ev) && sub_match(&e->sub, &ev))
|
||||
e->ops->deliver(e->ctl, &ev);
|
||||
}
|
||||
}
|
||||
|
||||
/* Bump the endpoint epoch and broadcast MEMCTX_INVALIDATED to holders. When `rebootstrap`
|
||||
* is set, ask the adapter to re-bootstrap (it re-emits MEMCTX{epoch+1} when ready) — the
|
||||
* normal destructive-lifecycle path. On endpoint TEARDOWN (detach) `rebootstrap` is 0: the
|
||||
* adapter is about to be closed, so kicking a re-bootstrap on a worker we are joining would
|
||||
* be wasted; holders still settle via the INVALIDATED broadcast + the bumped epoch. */
|
||||
static void core_epoch_invalidate_emit(vmsig_core* c, uint32_t endpoint, int rebootstrap) {
|
||||
if (endpoint >= 64) return;
|
||||
c->epoch[endpoint]++;
|
||||
core_memctx_cell* cell = &c->memctx[endpoint];
|
||||
@@ -486,11 +553,14 @@ void core_epoch_bump(vmsig_core* c, uint32_t endpoint) {
|
||||
memcpy(up.inln, &inv, sizeof inv);
|
||||
core_emit_up(c, &up); /* broadcast to holders (CAP_MEMCTX gate) */
|
||||
|
||||
/* request re-bootstrap from the adapter: it re-emits MEMCTX{epoch+1} when ready. */
|
||||
if (cell->registered && cell->reg.invalidate)
|
||||
if (rebootstrap && cell->registered && cell->reg.invalidate)
|
||||
cell->reg.invalidate(cell->reg.ctx, c->epoch[endpoint]);
|
||||
}
|
||||
|
||||
void core_epoch_bump(vmsig_core* c, uint32_t endpoint) {
|
||||
core_epoch_invalidate_emit(c, endpoint, 1); /* destructive lifecycle: re-bootstrap */
|
||||
}
|
||||
|
||||
/* UP: drain the context queue and dispatch to subscribed controls */
|
||||
static void pump_up(vmsig_core* c) {
|
||||
vmsig_event ev;
|
||||
@@ -575,6 +645,46 @@ static void core_reap(vmsig_core* c) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Deferred reap of runtime-detached adapters (after the batch). Two passes:
|
||||
* 1) per-endpoint coherence settle ONCE: release leases + bump epoch / broadcast
|
||||
* MEMCTX_INVALIDATED (no re-bootstrap — we are tearing down). Done while the memctx
|
||||
* cell is still registered.
|
||||
* 2) per-adapter teardown: SEAM_DOWN (close is silent on administrative detach), epoll
|
||||
* DEL + mark slots dead (so the loop never dispatches a half-closed adapter), then
|
||||
* ops->close (joins the worker, closes the SI handle AFTER the join).
|
||||
* Deferred (reap flag set elsewhere) so no live slot is flipped to DEAD inside the batch. */
|
||||
static void core_reap_adapters(vmsig_core* c) {
|
||||
uint64_t settled = 0; /* endpoints already coherence-settled this pass */
|
||||
for (int i = 0; i < c->nadapters; i++) {
|
||||
core_adapter_ent* e = &c->adapters[i];
|
||||
if (!e->reap || !e->active) continue;
|
||||
uint32_t ep = e->endpoint;
|
||||
if (ep < 64 && !(settled & (1ull << ep))) {
|
||||
settled |= (1ull << ep);
|
||||
core_lease_reap_endpoint(c, ep);
|
||||
core_epoch_invalidate_emit(c, ep, 0); /* settle holders; no re-bootstrap */
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < c->nadapters; i++) {
|
||||
core_adapter_ent* e = &c->adapters[i];
|
||||
if (!e->reap || !e->active) continue;
|
||||
|
||||
vmsig_event sd;
|
||||
memset(&sd, 0, sizeof sd);
|
||||
sd.kind = VMSIG_EV_SEAM_DOWN; sd.source = e->ops->source; sd.dir = VMSIG_DIR_UP;
|
||||
sd.prio = VMSIG_PRIO_URGENT; sd.endpoint = e->endpoint;
|
||||
core_emit_up(c, &sd);
|
||||
|
||||
for (int k = 0; k < e->nslot; k++) {
|
||||
if (!e->slots[k]) continue;
|
||||
epoll_ctl(c->epfd, EPOLL_CTL_DEL, e->slots[k]->fd, NULL);
|
||||
e->slots[k]->role = SLOT_DEAD;
|
||||
}
|
||||
if (e->ops->close) e->ops->close(e->a);
|
||||
e->a = NULL; e->nslot = 0; e->active = 0; e->reap = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int vmsig_core_run(vmsig_core* c) {
|
||||
if (!c) return -1;
|
||||
struct epoll_event evs[VMSIG_MAX_EVENTS];
|
||||
@@ -609,6 +719,7 @@ int vmsig_core_run(vmsig_core* c) {
|
||||
pump_up(c);
|
||||
pump_down(c);
|
||||
core_reap(c);
|
||||
core_reap_adapters(c);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user