mirror of
https://dev.lirent.ru/Vatrog/vm-automation-signaling.git
synced 2026-06-26 04:36:37 +03:00
fix(memctx): retry cold bootstrap with backoff while guest boots
The cold address-space bootstrap (host_bootstrap -> System DTB) ran once and was terminal: when the adapter attached before the guest finished booting, no System process was found, the adapter emitted a single ERROR and never retried, so the memctx datum was never published. Make it self-healing: on bootstrap failure arm a one-shot backoff timerfd (a second adapter fd, demuxed by cookie) that re-kicks the bootstrap until it succeeds; reset and disarm on success. Drop the per-failure URGENT ERROR (a still-booting guest is transient, not a fault) for a single diagnostic line on the first failure. Add a stub fail-injection (cfg fail_boots) and test_retry. Bump 0.3.5.
This commit is contained in:
+37
-1
@@ -42,7 +42,7 @@ struct holder {
|
||||
holder* peer; /* multi-VM: stop when both are ready (or NULL) */
|
||||
int is_driver; /* stops the loop on a condition */
|
||||
uint32_t expect_ep;
|
||||
int memctx, invalidated, ticks, bad_ep;
|
||||
int memctx, invalidated, ticks, bad_ep, errors;
|
||||
uint64_t last_kcr3, kcr3_e0;
|
||||
uint32_t last_epoch, last_nseg;
|
||||
int ro_ok, rw_eacces, seg0_ok;
|
||||
@@ -66,6 +66,7 @@ static int h_on_ev(void* u, const vmsig_event* ev) {
|
||||
holder* h = u;
|
||||
if (ev->kind == VMSIG_EV_VM_LIFECYCLE) h->ticks++;
|
||||
else if (ev->kind == VMSIG_EV_MEMCTX_INVALIDATED) h->invalidated++;
|
||||
else if (ev->kind == VMSIG_EV_ERROR) h->errors++; /* no boot-retry ERROR spam */
|
||||
maybe_stop(h);
|
||||
return 0;
|
||||
}
|
||||
@@ -393,6 +394,40 @@ static void test_ro_fd_ownership(void) {
|
||||
if (fcntl(ro, F_GETFD) >= 0) close(ro); /* belt-and-braces if the assert failed */
|
||||
}
|
||||
|
||||
/* ---- 7. cold-bootstrap retry: stub fails N times, then publishes via backoff ----- *
|
||||
* Regression for the cold-bootstrap-while-guest-boots bug: a failed bootstrap must NOT be
|
||||
* terminal nor emit URGENT ERROR — it arms a one-shot backoff timerfd that re-kicks the
|
||||
* bootstrap until it succeeds. fail_boots=3 makes the first three stub bootstraps fail
|
||||
* deterministically (no timing dependence); the real timerfd fires at ~50/100/200ms, so the
|
||||
* 4th kick succeeds sub-second. vmhost is added (as test_multicast) for the ticks failsafe
|
||||
* and a realistic loop; stop on memctx>=1 (stop_epoch=-1). */
|
||||
static void test_retry(void) {
|
||||
printf("test_retry\n");
|
||||
vmsig_ctx* ctx = vmsig_ctx_new();
|
||||
vmsig_core* core = vmsig_core_new(ctx);
|
||||
|
||||
holder h; memset(&h, 0, sizeof h);
|
||||
h.core = core; h.is_driver = 1; h.expect_ep = 0; h.stop_epoch = -1;
|
||||
/* OBSERVE so vmhost lifecycle ticks reach maybe_stop (ticks>30 failsafe) and ERROR
|
||||
* (if any) is counted; MEMCTX cap to receive the published context. */
|
||||
add_holder(core, &h, VMSIG_CAP_MEMCTX | VMSIG_CAP_OBSERVE, 0xFFFFFFFFu, 1ull << 0);
|
||||
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_vmhost_ops(), NULL, 0) >= 0, "add vmhost (watchdog)");
|
||||
|
||||
vmsig_memctx_cfg mc; memset(&mc, 0, sizeof mc);
|
||||
mc.stub = 1; mc.ram_path = NULL; mc.low = 0; mc.ro_fd = -1; mc.fail_boots = 3;
|
||||
CHECK(vmsig_core_add_adapter(core, vmsig_memctx_ops(), &mc, 0) >= 0, "add memctx (fail_boots=3)");
|
||||
|
||||
vmsig_core_run(core);
|
||||
|
||||
CHECK(h.memctx >= 1, "MEMCTX published after a series of bootstrap failures (retry worked)");
|
||||
CHECK(h.last_kcr3 != 0, "valid kcr3 after the successful retry");
|
||||
CHECK(h.errors == 0, "no ERROR spam during boot retries");
|
||||
|
||||
vmsig_core_free(core);
|
||||
vmsig_ctx_free(ctx);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
test_multicast();
|
||||
test_epoch();
|
||||
@@ -400,6 +435,7 @@ int main(void) {
|
||||
test_multivm();
|
||||
test_socket();
|
||||
test_ro_fd_ownership();
|
||||
test_retry();
|
||||
printf("memctx tests: %s\n", g_fail ? "FAIL" : "PASS");
|
||||
return g_fail ? 1 : 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user