add temporary QMP race fix
same as the initial version sent to qemu-devel, it won't be the final fix we plan to upstream but it should be enough band-aid to workaround how PVE uses the QMP. Signed-off-by: Stefan Reiter <s.reiter@proxmox.com> [ Thomas: add a bit reasoning to commit message body ] Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
parent
277d33454f
commit
26eee146bc
205
debian/patches/extra/0003-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
vendored
Normal file
205
debian/patches/extra/0003-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
vendored
Normal file
@ -0,0 +1,205 @@
|
|||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Reiter <s.reiter@proxmox.com>
|
||||||
|
Date: Mon, 23 Aug 2021 11:28:32 +0200
|
||||||
|
Subject: [PATCH] monitor/qmp: fix race with clients disconnecting early
|
||||||
|
|
||||||
|
The following sequence can produce a race condition that results in
|
||||||
|
responses meant for different clients being sent to the wrong one:
|
||||||
|
|
||||||
|
(QMP, no OOB)
|
||||||
|
1) client A connects
|
||||||
|
2) client A sends 'qmp_capabilities'
|
||||||
|
3) 'qmp_dispatch' runs in coroutine, schedules out to
|
||||||
|
'do_qmp_dispatch_bh' and yields
|
||||||
|
4) client A disconnects (i.e. aborts, crashes, etc...)
|
||||||
|
5) client B connects
|
||||||
|
6) 'do_qmp_dispatch_bh' runs 'qmp_capabilities' and wakes calling coroutine
|
||||||
|
7) capabilities are now set and 'mon->commands' is set to '&qmp_commands'
|
||||||
|
8) 'qmp_dispatch' returns to 'monitor_qmp_dispatch'
|
||||||
|
9) success message is sent to client B *without it ever having sent
|
||||||
|
'qmp_capabilities' itself*
|
||||||
|
9a) even if client B ignores it, it will now presumably send it's own
|
||||||
|
greeting, which will error because caps are already set
|
||||||
|
|
||||||
|
The fix proposed here uses an atomic, sequential connection number
|
||||||
|
stored in the MonitorQMP struct, which is incremented everytime a new
|
||||||
|
client connects. Since it is not changed on CHR_EVENT_CLOSED, the
|
||||||
|
behaviour of allowing a client to disconnect only one side of the
|
||||||
|
connection is retained.
|
||||||
|
|
||||||
|
The connection_nr needs to be exposed outside of the monitor subsystem,
|
||||||
|
since qmp_dispatch lives in qapi code. It needs to be checked twice,
|
||||||
|
once for actually running the command in the BH (fixes 7), and once for
|
||||||
|
sending back a response (fixes 9).
|
||||||
|
|
||||||
|
This satisfies my local reproducer - using multiple clients constantly
|
||||||
|
looping to open a connection, send the greeting, then exiting no longer
|
||||||
|
crashes other, normally behaving clients with unrelated responses.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||||
|
---
|
||||||
|
include/monitor/monitor.h | 1 +
|
||||||
|
monitor/monitor-internal.h | 7 +++++++
|
||||||
|
monitor/monitor.c | 15 +++++++++++++++
|
||||||
|
monitor/qmp.c | 15 ++++++++++++++-
|
||||||
|
qapi/qmp-dispatch.c | 21 +++++++++++++++++----
|
||||||
|
stubs/monitor-core.c | 5 +++++
|
||||||
|
6 files changed, 59 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
|
||||||
|
index af3887bb71..ff6db1448b 100644
|
||||||
|
--- a/include/monitor/monitor.h
|
||||||
|
+++ b/include/monitor/monitor.h
|
||||||
|
@@ -16,6 +16,7 @@ extern QemuOptsList qemu_mon_opts;
|
||||||
|
Monitor *monitor_cur(void);
|
||||||
|
Monitor *monitor_set_cur(Coroutine *co, Monitor *mon);
|
||||||
|
bool monitor_cur_is_qmp(void);
|
||||||
|
+int monitor_get_connection_nr(const Monitor *mon);
|
||||||
|
|
||||||
|
void monitor_init_globals(void);
|
||||||
|
void monitor_init_globals_core(void);
|
||||||
|
diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h
|
||||||
|
index 9c3a09cb01..a92be8c3f7 100644
|
||||||
|
--- a/monitor/monitor-internal.h
|
||||||
|
+++ b/monitor/monitor-internal.h
|
||||||
|
@@ -144,6 +144,13 @@ typedef struct {
|
||||||
|
QemuMutex qmp_queue_lock;
|
||||||
|
/* Input queue that holds all the parsed QMP requests */
|
||||||
|
GQueue *qmp_requests;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * A sequential number that gets incremented on every new CHR_EVENT_OPENED.
|
||||||
|
+ * Used to avoid leftover responses in BHs from being sent to the wrong
|
||||||
|
+ * client. Access with atomics.
|
||||||
|
+ */
|
||||||
|
+ int connection_nr;
|
||||||
|
} MonitorQMP;
|
||||||
|
|
||||||
|
/**
|
||||||
|
diff --git a/monitor/monitor.c b/monitor/monitor.c
|
||||||
|
index 636bcc81c5..ee5ac26c37 100644
|
||||||
|
--- a/monitor/monitor.c
|
||||||
|
+++ b/monitor/monitor.c
|
||||||
|
@@ -136,6 +136,21 @@ bool monitor_cur_is_qmp(void)
|
||||||
|
return cur_mon && monitor_is_qmp(cur_mon);
|
||||||
|
}
|
||||||
|
|
||||||
|
+/**
|
||||||
|
+ * If @mon is a QMP monitor, return the connection_nr, otherwise -1.
|
||||||
|
+ */
|
||||||
|
+int monitor_get_connection_nr(const Monitor *mon)
|
||||||
|
+{
|
||||||
|
+ MonitorQMP *qmp_mon;
|
||||||
|
+
|
||||||
|
+ if (!monitor_is_qmp(mon)) {
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ qmp_mon = container_of(mon, MonitorQMP, common);
|
||||||
|
+ return qatomic_read(&qmp_mon->connection_nr);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/**
|
||||||
|
* Is @mon is using readline?
|
||||||
|
* Note: not all HMP monitors use readline, e.g., gdbserver has a
|
||||||
|
diff --git a/monitor/qmp.c b/monitor/qmp.c
|
||||||
|
index 0ef7cebb78..3ec67e32d3 100644
|
||||||
|
--- a/monitor/qmp.c
|
||||||
|
+++ b/monitor/qmp.c
|
||||||
|
@@ -141,6 +141,8 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
|
||||||
|
QDict *rsp;
|
||||||
|
QDict *error;
|
||||||
|
|
||||||
|
+ int conn_nr_before = qatomic_read(&mon->connection_nr);
|
||||||
|
+
|
||||||
|
rsp = qmp_dispatch(mon->commands, req, qmp_oob_enabled(mon),
|
||||||
|
&mon->common);
|
||||||
|
|
||||||
|
@@ -156,7 +158,17 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- monitor_qmp_respond(mon, rsp);
|
||||||
|
+ /*
|
||||||
|
+ * qmp_dispatch might have yielded and waited for a BH, in which case there
|
||||||
|
+ * is a chance a new client connected in the meantime - if this happened,
|
||||||
|
+ * the command will not have been executed, but we also need to ensure that
|
||||||
|
+ * we don't send back a corresponding response on a line that no longer
|
||||||
|
+ * belongs to this request.
|
||||||
|
+ */
|
||||||
|
+ if (conn_nr_before == qatomic_read(&mon->connection_nr)) {
|
||||||
|
+ monitor_qmp_respond(mon, rsp);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
qobject_unref(rsp);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -444,6 +456,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
|
||||||
|
|
||||||
|
switch (event) {
|
||||||
|
case CHR_EVENT_OPENED:
|
||||||
|
+ qatomic_inc_fetch(&mon->connection_nr);
|
||||||
|
mon->commands = &qmp_cap_negotiation_commands;
|
||||||
|
monitor_qmp_caps_reset(mon);
|
||||||
|
data = qmp_greeting(mon);
|
||||||
|
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
|
||||||
|
index 59600210ce..95602446eb 100644
|
||||||
|
--- a/qapi/qmp-dispatch.c
|
||||||
|
+++ b/qapi/qmp-dispatch.c
|
||||||
|
@@ -120,16 +120,28 @@ typedef struct QmpDispatchBH {
|
||||||
|
QObject **ret;
|
||||||
|
Error **errp;
|
||||||
|
Coroutine *co;
|
||||||
|
+ int conn_nr;
|
||||||
|
} QmpDispatchBH;
|
||||||
|
|
||||||
|
static void do_qmp_dispatch_bh(void *opaque)
|
||||||
|
{
|
||||||
|
QmpDispatchBH *data = opaque;
|
||||||
|
|
||||||
|
- assert(monitor_cur() == NULL);
|
||||||
|
- monitor_set_cur(qemu_coroutine_self(), data->cur_mon);
|
||||||
|
- data->cmd->fn(data->args, data->ret, data->errp);
|
||||||
|
- monitor_set_cur(qemu_coroutine_self(), NULL);
|
||||||
|
+ /*
|
||||||
|
+ * A QMP monitor tracks it's client with a connection number, if this
|
||||||
|
+ * changes during the scheduling delay of this BH, we must not execute the
|
||||||
|
+ * command. Otherwise a badly placed 'qmp_capabilities' might affect the
|
||||||
|
+ * connection state of a client it was never meant for.
|
||||||
|
+ */
|
||||||
|
+ if (data->conn_nr == monitor_get_connection_nr(data->cur_mon)) {
|
||||||
|
+ assert(monitor_cur() == NULL);
|
||||||
|
+ monitor_set_cur(qemu_coroutine_self(), data->cur_mon);
|
||||||
|
+ data->cmd->fn(data->args, data->ret, data->errp);
|
||||||
|
+ monitor_set_cur(qemu_coroutine_self(), NULL);
|
||||||
|
+ } else {
|
||||||
|
+ error_setg(data->errp, "active monitor connection changed");
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
aio_co_wake(data->co);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -243,6 +255,7 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
|
||||||
|
.ret = &ret,
|
||||||
|
.errp = &err,
|
||||||
|
.co = qemu_coroutine_self(),
|
||||||
|
+ .conn_nr = monitor_get_connection_nr(cur_mon),
|
||||||
|
};
|
||||||
|
aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh,
|
||||||
|
&data);
|
||||||
|
diff --git a/stubs/monitor-core.c b/stubs/monitor-core.c
|
||||||
|
index d058a2a00d..3290b58120 100644
|
||||||
|
--- a/stubs/monitor-core.c
|
||||||
|
+++ b/stubs/monitor-core.c
|
||||||
|
@@ -13,6 +13,11 @@ Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
+int monitor_get_connection_nr(const Monitor *mon)
|
||||||
|
+{
|
||||||
|
+ return -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
|
||||||
|
{
|
||||||
|
}
|
1
debian/patches/series
vendored
1
debian/patches/series
vendored
@ -1,5 +1,6 @@
|
|||||||
extra/0001-monitor-qmp-fix-race-on-CHR_EVENT_CLOSED-without-OOB.patch
|
extra/0001-monitor-qmp-fix-race-on-CHR_EVENT_CLOSED-without-OOB.patch
|
||||||
extra/0002-block-io_uring-resubmit-when-result-is-EAGAIN.patch
|
extra/0002-block-io_uring-resubmit-when-result-is-EAGAIN.patch
|
||||||
|
extra/0003-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
|
||||||
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
|
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
|
||||||
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
|
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
|
||||||
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
|
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
|
||||||
|
Loading…
Reference in New Issue
Block a user