Files
vatrog-vm-signaling/src/discovery/linux/host_probe.c
T

245 lines
9.8 KiB
C
Raw Normal View History

/* host_probe.c — the default Proxmox host-probe (see host_probe.h). The ONLY TU that knows
* /etc/pve/qemu-server, the QMP socket path convention, and `info mtree`. Pure libc +
* AF_UNIX + files; no vmie/vmctl. config() is cheap+local; live() does a bounded blocking
* QMP round-trip (query-status + info mtree) and is fail-closed: anything it cannot confirm
* leaves ok=0 (the VM is not brought up rather than guessed). */
#define _GNU_SOURCE
#include "host_probe.h"
#include "vmsig_event.h" /* VMSIG_VM_* */
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stddef.h>
#include <errno.h>
typedef struct {
const char* watch_dir; /* /dev/shm/vmsig */
const char* pve_conf; /* /etc/pve/qemu-server */
const char* qmp_dir; /* /var/run/qemu-server */
} hp_cfg;
/* ---- /etc/pve config (stage 1) ----------------------------------------------- */
/* Read a whole small file into a heap buffer (NUL-terminated). NULL on error/oversize. */
static char* read_file(const char* path, size_t cap) {
int fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd < 0) return NULL;
char* buf = malloc(cap + 1);
if (!buf) { close(fd); return NULL; }
size_t got = 0;
for (;;) {
ssize_t n = read(fd, buf + got, cap - got);
if (n < 0) { if (errno == EINTR) continue; free(buf); close(fd); return NULL; }
if (n == 0) break;
got += (size_t)n;
if (got >= cap) break;
}
close(fd);
buf[got] = 0;
return buf;
}
/* Value of a top-level "key:" line (Proxmox ini), copied trimmed into out. 1 if found. */
static int conf_val(const char* conf, const char* key, char* out, size_t cap) {
size_t klen = strlen(key);
const char* p = conf;
while (p && *p) {
const char* line = p;
const char* nl = strchr(p, '\n');
size_t llen = nl ? (size_t)(nl - line) : strlen(line);
if (llen > klen && strncmp(line, key, klen) == 0 && line[klen] == ':') {
const char* v = line + klen + 1;
while (*v == ' ' || *v == '\t') v++;
size_t vlen = (size_t)((line + llen) - v);
while (vlen && (v[vlen-1] == ' ' || v[vlen-1] == '\t' || v[vlen-1] == '\r')) vlen--;
if (vlen >= cap) vlen = cap - 1;
memcpy(out, v, vlen); out[vlen] = 0;
return 1;
}
p = nl ? nl + 1 : NULL;
}
return 0;
}
static int hp_config(const struct vmsig_host_probe* p, uint32_t vmid, vmsig_host_facts* out) {
const hp_cfg* c = p->ud;
memset(out, 0, sizeof *out);
out->vmid = vmid;
snprintf(out->ram_path, sizeof out->ram_path, "%s/vm-%u-ram", c->watch_dir, vmid);
snprintf(out->qmp_path, sizeof out->qmp_path, "%s/%u.qmp", c->qmp_dir, vmid);
char path[VMSIG_HF_PATH_MAX + 32];
snprintf(path, sizeof path, "%s/%u.conf", c->pve_conf, vmid);
char* conf = read_file(path, 64 * 1024);
if (!conf) { out->ok = 0; return 0; } /* no host config => not a known VM */
char tmp[VMSIG_HF_NAME_MAX];
if (conf_val(conf, "name", out->name, sizeof out->name) == 0)
snprintf(out->name, sizeof out->name, "vm-%u", vmid);
if (conf_val(conf, "memory", tmp, sizeof tmp))
out->cfg_ram_bytes = (uint64_t)strtoull(tmp, NULL, 10) * 1024ull * 1024ull;
/* share=on is mandatory: without it the host mmap is a private copy, not guest RAM. */
out->share_on = (strstr(conf, "share=on") != NULL) ? 1 : 0;
free(conf);
out->ok = out->share_on ? 1 : 0; /* config-level pass; liveness is stage 2 */
return 0;
}
/* ---- QMP liveness + mtree low (stage 2) -------------------------------------- */
static int qmp_connect(const char* path) {
int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
if (fd < 0) return -1;
struct timeval tv = { .tv_sec = 0, .tv_usec = 250000 }; /* 250ms bound on each recv */
setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv);
setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof tv);
struct sockaddr_un a;
memset(&a, 0, sizeof a);
a.sun_family = AF_UNIX;
size_t n = strlen(path);
socklen_t alen;
if (path[0] == '@') { /* abstract namespace */
if (n > sizeof a.sun_path) { close(fd); return -1; }
a.sun_path[0] = 0;
memcpy(a.sun_path + 1, path + 1, n - 1);
alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n);
} else {
if (n >= sizeof a.sun_path) { close(fd); return -1; }
memcpy(a.sun_path, path, n);
alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n + 1);
}
if (connect(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; }
return fd;
}
/* Read ONE '\n'-terminated QMP message into buf (QMP frames each JSON object on a line;
* an HMP string return keeps its newlines escaped, so it is still a single line). 1 / 0 / -1. */
static int qmp_read_line(int fd, char* buf, size_t cap, size_t* out_len) {
size_t got = 0;
while (got + 1 < cap) {
ssize_t r = read(fd, buf + got, cap - 1 - got);
if (r < 0) { if (errno == EINTR) continue; return -1; } /* timeout/error */
if (r == 0) return (got > 0) ? 1 : 0;
got += (size_t)r;
char* nl = memchr(buf, '\n', got);
if (nl) { *out_len = got; buf[got] = 0; return 1; }
}
*out_len = got; buf[got] = 0;
return 1; /* line longer than cap: truncated but usable for our scans */
}
/* Read messages until one carries "return"/"error", skipping async "event"s. 1 if a return,
* 0 if an error/closed, -1 on transport error. The matched message is left in buf. */
static int qmp_await_return(int fd, char* buf, size_t cap) {
for (int i = 0; i < 64; i++) {
size_t len = 0;
int r = qmp_read_line(fd, buf, cap, &len);
if (r <= 0) return r;
if (strstr(buf, "\"error\"")) return 0;
if (strstr(buf, "\"return\"")) return 1;
/* greeting {"QMP":...} or async {"event":...} -> keep reading */
}
return -1;
}
static int qmp_cmd(int fd, const char* json, char* buf, size_t cap) {
size_t n = strlen(json);
if (write(fd, json, n) != (ssize_t)n) return -1;
return qmp_await_return(fd, buf, cap);
}
/* Map a QEMU query-status "status" word to VMSIG_VM_*. Alive = running|paused. */
static int qmp_status_word(const char* buf) {
const char* s = strstr(buf, "\"status\"");
if (!s) return VMSIG_VM_UNKNOWN;
s = strchr(s, ':'); if (!s) return VMSIG_VM_UNKNOWN;
s = strchr(s, '"'); if (!s) return VMSIG_VM_UNKNOWN;
s++;
if (!strncmp(s, "running", 7)) return VMSIG_VM_RUNNING;
if (!strncmp(s, "paused", 6)) return VMSIG_VM_PAUSED;
if (!strncmp(s, "prelaunch", 9)) return VMSIG_VM_PAUSED;
if (!strncmp(s, "shutdown", 8)) return VMSIG_VM_SHUTDOWN;
if (!strncmp(s, "guest-panicked", 14) || !strncmp(s, "internal-error", 14))
return VMSIG_VM_CRASHED;
return VMSIG_VM_UNKNOWN;
}
/* Derive the below-4G split from `info mtree` text: the size of the RAM region whose guest
* physical range starts at address 0. Standard QEMU split-RAM layout puts low RAM at
* [0, low) and high RAM above 4G at file offset @low. FAIL-CLOSED: 0 if not found.
* NOTE: parses HMP text (not a stable QMP schema) — verify against real `info mtree` output. */
static uint64_t mtree_low(const char* ret) {
/* The return is a JSON string; lines inside are escaped "\n". Scan for the GPA-0 ram run:
* " 0000000000000000-<end16> (prio N, ram): ..." */
const char* p = ret;
while ((p = strstr(p, "0000000000000000-")) != NULL) {
const char* end_hex = p + 17; /* 16 zeros + '-' */
char* stop = NULL;
unsigned long long end = strtoull(end_hex, &stop, 16);
/* the descriptor after the range must mark it RAM (not the i/o "system" root) */
const char* tail = stop ? stop : end_hex;
const char* nl = strstr(tail, "\\n");
const char* lim = nl ? nl : (tail + 64);
int is_ram = 0;
for (const char* q = tail; q < lim && *q; q++)
if (!strncmp(q, "ram)", 4)) { is_ram = 1; break; }
if (is_ram && end > 0 && end != ~0ull) return end + 1ull; /* [0, end] => low=end+1 */
p = end_hex;
}
return 0;
}
static int hp_live(const struct vmsig_host_probe* p, vmsig_host_facts* io) {
(void)p;
io->retry = 0;
int fd = qmp_connect(io->qmp_path);
if (fd < 0) { io->retry = 1; io->ok = 0; return 0; } /* QMP not up yet => transient */
char* buf = malloc(256 * 1024);
if (!buf) { close(fd); io->retry = 1; io->ok = 0; return 0; }
int alive = 0;
if (qmp_cmd(fd, "{\"execute\":\"qmp_capabilities\"}\n", buf, 256 * 1024) == 1 &&
qmp_cmd(fd, "{\"execute\":\"query-status\"}\n", buf, 256 * 1024) == 1) {
io->vm_state = qmp_status_word(buf);
alive = (io->vm_state == VMSIG_VM_RUNNING || io->vm_state == VMSIG_VM_PAUSED);
} else {
io->retry = 1; /* handshake failed mid-way => transient */
}
if (alive) {
if (qmp_cmd(fd,
"{\"execute\":\"human-monitor-command\","
"\"arguments\":{\"command-line\":\"info mtree -f\"}}\n", buf, 256 * 1024) == 1) {
io->low = mtree_low(buf);
}
}
free(buf);
close(fd);
/* fail-closed: alive AND a parsed split => bring up; else not (stale / unparsable). */
io->ok = (alive && io->low != 0) ? 1 : 0;
return 0;
}
vmsig_host_probe host_probe_proxmox(const char* watch_dir, const char* pve_conf,
const char* qmp_dir) {
static hp_cfg cfg; /* single daemon-wide probe; paths are process-lifetime strings */
cfg.watch_dir = watch_dir;
cfg.pve_conf = pve_conf;
cfg.qmp_dir = qmp_dir;
vmsig_host_probe p = { hp_config, hp_live, &cfg };
return p;
}