/* host_probe.c — the default Proxmox host-probe (see host_probe.h). The ONLY TU that knows * /etc/pve/qemu-server, the QMP socket path convention, and `info mtree`. Pure libc + * AF_UNIX + files; no vmie/vmctl. config() is cheap+local; live() does a bounded blocking * QMP round-trip (query-status + info mtree) and is fail-closed: anything it cannot confirm * leaves ok=0 (the VM is not brought up rather than guessed). */ #define _GNU_SOURCE #include "host_probe.h" #include "vmsig_event.h" /* VMSIG_VM_* */ #include #include #include #include #include #include #include #include #include #include #include typedef struct { const char* watch_dir; /* /dev/shm/vmsig */ const char* pve_conf; /* /etc/pve/qemu-server */ const char* qmp_dir; /* /var/run/qemu-server */ } hp_cfg; /* ---- /etc/pve config (stage 1) ----------------------------------------------- */ /* Read a whole small file into a heap buffer (NUL-terminated). NULL on error/oversize. */ static char* read_file(const char* path, size_t cap) { int fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) return NULL; char* buf = malloc(cap + 1); if (!buf) { close(fd); return NULL; } size_t got = 0; for (;;) { ssize_t n = read(fd, buf + got, cap - got); if (n < 0) { if (errno == EINTR) continue; free(buf); close(fd); return NULL; } if (n == 0) break; got += (size_t)n; if (got >= cap) break; } close(fd); buf[got] = 0; return buf; } /* Value of a top-level "key:" line (Proxmox ini), copied trimmed into out. 1 if found. */ static int conf_val(const char* conf, const char* key, char* out, size_t cap) { size_t klen = strlen(key); const char* p = conf; while (p && *p) { const char* line = p; const char* nl = strchr(p, '\n'); size_t llen = nl ? (size_t)(nl - line) : strlen(line); if (llen > klen && strncmp(line, key, klen) == 0 && line[klen] == ':') { const char* v = line + klen + 1; while (*v == ' ' || *v == '\t') v++; size_t vlen = (size_t)((line + llen) - v); while (vlen && (v[vlen-1] == ' ' || v[vlen-1] == '\t' || v[vlen-1] == '\r')) vlen--; if (vlen >= cap) vlen = cap - 1; memcpy(out, v, vlen); out[vlen] = 0; return 1; } p = nl ? nl + 1 : NULL; } return 0; } static int hp_config(const struct vmsig_host_probe* p, uint32_t vmid, vmsig_host_facts* out) { const hp_cfg* c = p->ud; memset(out, 0, sizeof *out); out->vmid = vmid; snprintf(out->ram_path, sizeof out->ram_path, "%s/vm-%u-ram", c->watch_dir, vmid); snprintf(out->qmp_path, sizeof out->qmp_path, "%s/%u.qmp", c->qmp_dir, vmid); char path[VMSIG_HF_PATH_MAX + 32]; snprintf(path, sizeof path, "%s/%u.conf", c->pve_conf, vmid); char* conf = read_file(path, 64 * 1024); if (!conf) { out->ok = 0; return 0; } /* no host config => not a known VM */ char tmp[VMSIG_HF_NAME_MAX]; if (conf_val(conf, "name", out->name, sizeof out->name) == 0) snprintf(out->name, sizeof out->name, "vm-%u", vmid); if (conf_val(conf, "memory", tmp, sizeof tmp)) out->cfg_ram_bytes = (uint64_t)strtoull(tmp, NULL, 10) * 1024ull * 1024ull; /* share=on is mandatory: without it the host mmap is a private copy, not guest RAM. */ out->share_on = (strstr(conf, "share=on") != NULL) ? 1 : 0; free(conf); out->ok = out->share_on ? 1 : 0; /* config-level pass; liveness is stage 2 */ return 0; } /* ---- QMP liveness + mtree low (stage 2) -------------------------------------- */ static int qmp_connect(const char* path) { int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); if (fd < 0) return -1; struct timeval tv = { .tv_sec = 0, .tv_usec = 250000 }; /* 250ms bound on each recv */ setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv); setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof tv); struct sockaddr_un a; memset(&a, 0, sizeof a); a.sun_family = AF_UNIX; size_t n = strlen(path); socklen_t alen; if (path[0] == '@') { /* abstract namespace */ if (n > sizeof a.sun_path) { close(fd); return -1; } a.sun_path[0] = 0; memcpy(a.sun_path + 1, path + 1, n - 1); alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n); } else { if (n >= sizeof a.sun_path) { close(fd); return -1; } memcpy(a.sun_path, path, n); alen = (socklen_t)(offsetof(struct sockaddr_un, sun_path) + n + 1); } if (connect(fd, (struct sockaddr*)&a, alen) < 0) { close(fd); return -1; } return fd; } /* Read ONE '\n'-terminated QMP message into buf (QMP frames each JSON object on a line; * an HMP string return keeps its newlines escaped, so it is still a single line). 1 / 0 / -1. */ static int qmp_read_line(int fd, char* buf, size_t cap, size_t* out_len) { size_t got = 0; while (got + 1 < cap) { ssize_t r = read(fd, buf + got, cap - 1 - got); if (r < 0) { if (errno == EINTR) continue; return -1; } /* timeout/error */ if (r == 0) return (got > 0) ? 1 : 0; got += (size_t)r; char* nl = memchr(buf, '\n', got); if (nl) { *out_len = got; buf[got] = 0; return 1; } } *out_len = got; buf[got] = 0; return 1; /* line longer than cap: truncated but usable for our scans */ } /* Read messages until one carries "return"/"error", skipping async "event"s. 1 if a return, * 0 if an error/closed, -1 on transport error. The matched message is left in buf. */ static int qmp_await_return(int fd, char* buf, size_t cap) { for (int i = 0; i < 64; i++) { size_t len = 0; int r = qmp_read_line(fd, buf, cap, &len); if (r <= 0) return r; if (strstr(buf, "\"error\"")) return 0; if (strstr(buf, "\"return\"")) return 1; /* greeting {"QMP":...} or async {"event":...} -> keep reading */ } return -1; } static int qmp_cmd(int fd, const char* json, char* buf, size_t cap) { size_t n = strlen(json); if (write(fd, json, n) != (ssize_t)n) return -1; return qmp_await_return(fd, buf, cap); } /* Map a QEMU query-status "status" word to VMSIG_VM_*. Alive = running|paused. */ static int qmp_status_word(const char* buf) { const char* s = strstr(buf, "\"status\""); if (!s) return VMSIG_VM_UNKNOWN; s = strchr(s, ':'); if (!s) return VMSIG_VM_UNKNOWN; s = strchr(s, '"'); if (!s) return VMSIG_VM_UNKNOWN; s++; if (!strncmp(s, "running", 7)) return VMSIG_VM_RUNNING; if (!strncmp(s, "paused", 6)) return VMSIG_VM_PAUSED; if (!strncmp(s, "prelaunch", 9)) return VMSIG_VM_PAUSED; if (!strncmp(s, "shutdown", 8)) return VMSIG_VM_SHUTDOWN; if (!strncmp(s, "guest-panicked", 14) || !strncmp(s, "internal-error", 14)) return VMSIG_VM_CRASHED; return VMSIG_VM_UNKNOWN; } /* Derive the below-4G split from `info mtree` text: the size of the RAM region whose guest * physical range starts at address 0. Standard QEMU split-RAM layout puts low RAM at * [0, low) and high RAM above 4G at file offset @low. FAIL-CLOSED: 0 if not found. * NOTE: parses HMP text (not a stable QMP schema) — verify against real `info mtree` output. */ static uint64_t mtree_low(const char* ret) { /* The return is a JSON string; lines inside are escaped "\n". Scan for the GPA-0 ram run: * " 0000000000000000- (prio N, ram): ..." */ const char* p = ret; while ((p = strstr(p, "0000000000000000-")) != NULL) { const char* end_hex = p + 17; /* 16 zeros + '-' */ char* stop = NULL; unsigned long long end = strtoull(end_hex, &stop, 16); /* the descriptor after the range must mark it RAM (not the i/o "system" root) */ const char* tail = stop ? stop : end_hex; const char* nl = strstr(tail, "\\n"); const char* lim = nl ? nl : (tail + 64); int is_ram = 0; for (const char* q = tail; q < lim && *q; q++) if (!strncmp(q, "ram)", 4)) { is_ram = 1; break; } if (is_ram && end > 0 && end != ~0ull) return end + 1ull; /* [0, end] => low=end+1 */ p = end_hex; } return 0; } static int hp_live(const struct vmsig_host_probe* p, vmsig_host_facts* io) { (void)p; io->retry = 0; int fd = qmp_connect(io->qmp_path); if (fd < 0) { io->retry = 1; io->ok = 0; return 0; } /* QMP not up yet => transient */ char* buf = malloc(256 * 1024); if (!buf) { close(fd); io->retry = 1; io->ok = 0; return 0; } int alive = 0; if (qmp_cmd(fd, "{\"execute\":\"qmp_capabilities\"}\n", buf, 256 * 1024) == 1 && qmp_cmd(fd, "{\"execute\":\"query-status\"}\n", buf, 256 * 1024) == 1) { io->vm_state = qmp_status_word(buf); alive = (io->vm_state == VMSIG_VM_RUNNING || io->vm_state == VMSIG_VM_PAUSED); } else { io->retry = 1; /* handshake failed mid-way => transient */ } if (alive) { if (qmp_cmd(fd, "{\"execute\":\"human-monitor-command\"," "\"arguments\":{\"command-line\":\"info mtree -f\"}}\n", buf, 256 * 1024) == 1) { io->low = mtree_low(buf); } } free(buf); close(fd); /* fail-closed: alive AND a parsed split => bring up; else not (stale / unparsable). */ io->ok = (alive && io->low != 0) ? 1 : 0; return 0; } vmsig_host_probe host_probe_proxmox(const char* watch_dir, const char* pve_conf, const char* qmp_dir) { static hp_cfg cfg; /* single daemon-wide probe; paths are process-lifetime strings */ cfg.watch_dir = watch_dir; cfg.pve_conf = pve_conf; cfg.qmp_dir = qmp_dir; vmsig_host_probe p = { hp_config, hp_live, &cfg }; return p; }