add patch to disable graph locking

There are still some issues with graph locking, e.g. deadlocks during
backup canceling [0] and initial attempts to fix it didn't work [1].
Because the AioContext locks still exist, it should still be safe to
disable graph locking.

[0]: https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg00729.html
[1]: https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg06905.html

Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
Fiona Ebner 2023-10-17 14:10:10 +02:00 committed by Thomas Lamprecht
parent 10e1093325
commit 03ff63aa61
2 changed files with 141 additions and 0 deletions

View File

@ -0,0 +1,140 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Thu, 28 Sep 2023 10:07:03 +0200
Subject: [PATCH] Revert "Revert "graph-lock: Disable locking for now""
This reverts commit 3cce22defb4b0e47cf135444e30cc673cff5ebad.
There are still some issues with graph locking, e.g. deadlocks during
backup canceling [0]. Because the AioContext locks still exist, it
should be safe to disable locking again.
From the original 80fc5d2600 ("graph-lock: Disable locking for now"):
> We don't currently rely on graph locking yet. It is supposed to replace
> the AioContext lock eventually to enable multiqueue support, but as long
> as we still have the AioContext lock, it is sufficient without the graph
> lock. Once the AioContext lock goes away, the deadlock doesn't exist any
> more either and this commit can be reverted. (Of course, it can also be
> reverted while the AioContext lock still exists if the callers have been
> fixed.)
[0]: https://lists.nongnu.org/archive/html/qemu-devel/2023-09/msg00729.html
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
block/graph-lock.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/block/graph-lock.c b/block/graph-lock.c
index 5e66f01ae8..5c2873262a 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
/* Protects the list of aiocontext and orphaned_reader_count */
static QemuMutex aio_context_list_lock;
+#if 0
/* Written and read with atomic operations. */
static int has_writer;
+#endif
/*
* A reader coroutine could move from an AioContext to another.
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
g_free(ctx->bdrv_graph);
}
+#if 0
static uint32_t reader_count(void)
{
BdrvGraphRWlock *brdv_graph;
@@ -105,12 +108,19 @@ static uint32_t reader_count(void)
assert((int32_t)rd >= 0);
return rd;
}
+#endif
void bdrv_graph_wrlock(BlockDriverState *bs)
{
+#if 0
AioContext *ctx = NULL;
GLOBAL_STATE_CODE();
+ /*
+ * TODO Some callers hold an AioContext lock when this is called, which
+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
+ * AioContext locks are gone).
+ */
assert(!qatomic_read(&has_writer));
/*
@@ -158,11 +168,13 @@ void bdrv_graph_wrlock(BlockDriverState *bs)
if (ctx) {
aio_context_acquire(bdrv_get_aio_context(bs));
}
+#endif
}
void bdrv_graph_wrunlock(void)
{
GLOBAL_STATE_CODE();
+#if 0
QEMU_LOCK_GUARD(&aio_context_list_lock);
assert(qatomic_read(&has_writer));
@@ -174,10 +186,13 @@ void bdrv_graph_wrunlock(void)
/* Wake up all coroutine that are waiting to read the graph */
qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+#endif
}
void coroutine_fn bdrv_graph_co_rdlock(void)
{
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
@@ -237,10 +252,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
}
}
+#endif
}
void coroutine_fn bdrv_graph_co_rdunlock(void)
{
+#if 0
BdrvGraphRWlock *bdrv_graph;
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
@@ -258,6 +275,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
if (qatomic_read(&has_writer)) {
aio_wait_kick();
}
+#endif
}
void bdrv_graph_rdlock_main_loop(void)
@@ -275,13 +293,19 @@ void bdrv_graph_rdunlock_main_loop(void)
void assert_bdrv_graph_readable(void)
{
/* reader_count() is slow due to aio_context_list_lock lock contention */
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
#ifdef CONFIG_DEBUG_GRAPH_LOCK
assert(qemu_in_main_thread() || reader_count());
#endif
+#endif
}
void assert_bdrv_graph_writable(void)
{
assert(qemu_in_main_thread());
+ /* TODO Reenable when wrlock is reenabled */
+#if 0
assert(qatomic_read(&has_writer));
+#endif
}

View File

@ -3,6 +3,7 @@ extra/0002-scsi-megasas-Internal-cdbs-have-16-byte-length.patch
extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
extra/0004-migration-block-dirty-bitmap-fix-loading-bitmap-when.patch
extra/0005-hw-ide-reset-cancel-async-DMA-operation-before-reset.patch
extra/0006-Revert-Revert-graph-lock-Disable-locking-for-now.patch
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch