add upstream fixes for qmp_block_resize
cherry-picked cleanly from 6.0 development tree, fixes an issue with resizing RBD drives (and reportedly also on krbd or potentially other storage backends) with iothreads. Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
This commit is contained in:
parent
0be2cab670
commit
e79be6c6c4
42
debian/patches/extra/0011-block-Fix-locking-in-qmp_block_resize.patch
vendored
Normal file
42
debian/patches/extra/0011-block-Fix-locking-in-qmp_block_resize.patch
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 3 Dec 2020 18:23:10 +0100
|
||||
Subject: [PATCH] block: Fix locking in qmp_block_resize()
|
||||
|
||||
The drain functions assume that we hold the AioContext lock of the
|
||||
drained block node. Make sure to actually take the lock.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Fixes: eb94b81a94bce112e6b206df846c1551aaf6cab6
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20201203172311.68232-3-kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
---
|
||||
blockdev.c | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/blockdev.c b/blockdev.c
|
||||
index fe6fb5dc1d..9a86e9fb4b 100644
|
||||
--- a/blockdev.c
|
||||
+++ b/blockdev.c
|
||||
@@ -2481,14 +2481,17 @@ void coroutine_fn qmp_block_resize(bool has_device, const char *device,
|
||||
goto out;
|
||||
}
|
||||
|
||||
+ bdrv_co_lock(bs);
|
||||
bdrv_drained_begin(bs);
|
||||
+ bdrv_co_unlock(bs);
|
||||
+
|
||||
old_ctx = bdrv_co_enter(bs);
|
||||
blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
|
||||
bdrv_co_leave(bs, old_ctx);
|
||||
- bdrv_drained_end(bs);
|
||||
|
||||
out:
|
||||
bdrv_co_lock(bs);
|
||||
+ bdrv_drained_end(bs);
|
||||
blk_unref(blk);
|
||||
bdrv_co_unlock(bs);
|
||||
}
|
118
debian/patches/extra/0012-block-Fix-deadlock-in-bdrv_co_yield_to_drain.patch
vendored
Normal file
118
debian/patches/extra/0012-block-Fix-deadlock-in-bdrv_co_yield_to_drain.patch
vendored
Normal file
@ -0,0 +1,118 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 3 Dec 2020 18:23:11 +0100
|
||||
Subject: [PATCH] block: Fix deadlock in bdrv_co_yield_to_drain()
|
||||
|
||||
If bdrv_co_yield_to_drain() is called for draining a block node that
|
||||
runs in a different AioContext, it keeps that AioContext locked while it
|
||||
yields and schedules a BH in the AioContext to do the actual drain.
|
||||
|
||||
As long as executing the BH is the very next thing that the event loop
|
||||
of the node's AioContext does, this actually happens to work, but when
|
||||
it tries to execute something else that wants to take the AioContext
|
||||
lock, it will deadlock. (In the bug report, this other thing is a
|
||||
virtio-scsi device running virtio_scsi_data_plane_handle_cmd().)
|
||||
|
||||
Instead, always drop the AioContext lock across the yield and reacquire
|
||||
it only when the coroutine is reentered. The BH needs to unconditionally
|
||||
take the lock for itself now.
|
||||
|
||||
This fixes the 'block_resize' QMP command on a block node that runs in
|
||||
an iothread.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Fixes: eb94b81a94bce112e6b206df846c1551aaf6cab6
|
||||
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1903511
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20201203172311.68232-4-kwolf@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
---
|
||||
block/io.c | 41 ++++++++++++++++++++++++-----------------
|
||||
1 file changed, 24 insertions(+), 17 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index ec5e152bb7..a9f56a9ab1 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -306,17 +306,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
|
||||
if (bs) {
|
||||
AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
- AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
|
||||
-
|
||||
- /*
|
||||
- * When the coroutine yielded, the lock for its home context was
|
||||
- * released, so we need to re-acquire it here. If it explicitly
|
||||
- * acquired a different context, the lock is still held and we don't
|
||||
- * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
|
||||
- */
|
||||
- if (ctx == co_ctx) {
|
||||
- aio_context_acquire(ctx);
|
||||
- }
|
||||
+ aio_context_acquire(ctx);
|
||||
bdrv_dec_in_flight(bs);
|
||||
if (data->begin) {
|
||||
assert(!data->drained_end_counter);
|
||||
@@ -328,9 +318,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
|
||||
data->ignore_bds_parents,
|
||||
data->drained_end_counter);
|
||||
}
|
||||
- if (ctx == co_ctx) {
|
||||
- aio_context_release(ctx);
|
||||
- }
|
||||
+ aio_context_release(ctx);
|
||||
} else {
|
||||
assert(data->begin);
|
||||
bdrv_drain_all_begin();
|
||||
@@ -348,13 +336,16 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
int *drained_end_counter)
|
||||
{
|
||||
BdrvCoDrainData data;
|
||||
+ Coroutine *self = qemu_coroutine_self();
|
||||
+ AioContext *ctx = bdrv_get_aio_context(bs);
|
||||
+ AioContext *co_ctx = qemu_coroutine_get_aio_context(self);
|
||||
|
||||
/* Calling bdrv_drain() from a BH ensures the current coroutine yields and
|
||||
* other coroutines run if they were queued by aio_co_enter(). */
|
||||
|
||||
assert(qemu_in_coroutine());
|
||||
data = (BdrvCoDrainData) {
|
||||
- .co = qemu_coroutine_self(),
|
||||
+ .co = self,
|
||||
.bs = bs,
|
||||
.done = false,
|
||||
.begin = begin,
|
||||
@@ -368,13 +359,29 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
|
||||
if (bs) {
|
||||
bdrv_inc_in_flight(bs);
|
||||
}
|
||||
- replay_bh_schedule_oneshot_event(bdrv_get_aio_context(bs),
|
||||
- bdrv_co_drain_bh_cb, &data);
|
||||
+
|
||||
+ /*
|
||||
+ * Temporarily drop the lock across yield or we would get deadlocks.
|
||||
+ * bdrv_co_drain_bh_cb() reaquires the lock as needed.
|
||||
+ *
|
||||
+ * When we yield below, the lock for the current context will be
|
||||
+ * released, so if this is actually the lock that protects bs, don't drop
|
||||
+ * it a second time.
|
||||
+ */
|
||||
+ if (ctx != co_ctx) {
|
||||
+ aio_context_release(ctx);
|
||||
+ }
|
||||
+ replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
|
||||
|
||||
qemu_coroutine_yield();
|
||||
/* If we are resumed from some other event (such as an aio completion or a
|
||||
* timer callback), it is a bug in the caller that should be fixed. */
|
||||
assert(data.done);
|
||||
+
|
||||
+ /* Reaquire the AioContext of bs if we dropped it */
|
||||
+ if (ctx != co_ctx) {
|
||||
+ aio_context_acquire(ctx);
|
||||
+ }
|
||||
}
|
||||
|
||||
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
|
2
debian/patches/series
vendored
2
debian/patches/series
vendored
@ -8,6 +8,8 @@ extra/0007-virtiofsd-Add-_llseek-to-the-seccomp-whitelist.patch
|
||||
extra/0008-virtiofsd-Add-restart_syscall-to-the-seccomp-whiteli.patch
|
||||
extra/0009-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch
|
||||
extra/0010-monitor-qmp-fix-race-on-CHR_EVENT_CLOSED-without-OOB.patch
|
||||
extra/0011-block-Fix-locking-in-qmp_block_resize.patch
|
||||
extra/0012-block-Fix-deadlock-in-bdrv_co_yield_to_drain.patch
|
||||
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
|
||||
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
|
||||
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
|
||||
|
Loading…
Reference in New Issue
Block a user