5919ec1446
Not difficult to run into, just have a drive with iothread, take a PBS backup and then take a snapshot or hibernate. Resuming will fail with > qemu: qemu_mutex_unlock_impl: Operation not permitted because of not acquiring the correct AioContext first. Migration is not affected, because it runs in coroutine context. Reported in the community forum: https://forum.proxmox.com/threads/129899/ Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
191 lines
6.4 KiB
Diff
191 lines
6.4 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Fiona Ebner <f.ebner@proxmox.com>
|
|
Date: Fri, 5 May 2023 13:39:53 +0200
|
|
Subject: [PATCH] migration: for snapshots, hold the BQL during setup callbacks
|
|
|
|
In spirit, this is a partial revert of commit 9b09503752 ("migration:
|
|
run setup callbacks out of big lock"), but only for the snapshot case.
|
|
|
|
For snapshots, the bdrv_writev_vmstate() function is used during setup
|
|
(in QIOChannelBlock backing the QEMUFile), but not holding the BQL
|
|
while calling it could lead to an assertion failure. To understand
|
|
how, first note the following:
|
|
|
|
1. Generated coroutine wrappers for block layer functions spawn the
|
|
coroutine and use AIO_WAIT_WHILE()/aio_poll() to wait for it.
|
|
2. If the host OS switches threads at an inconvenient time, it can
|
|
happen that a bottom half scheduled for the main thread's AioContext
|
|
is executed as part of a vCPU thread's aio_poll().
|
|
|
|
An example leading to the assertion failure is as follows:
|
|
|
|
main thread:
|
|
1. A snapshot-save QMP command gets issued.
|
|
2. snapshot_save_job_bh() is scheduled.
|
|
|
|
vCPU thread:
|
|
3. aio_poll() for the main thread's AioContext is called (e.g. when
|
|
the guest writes to a pflash device, as part of blk_pwrite which is a
|
|
generated coroutine wrapper).
|
|
4. snapshot_save_job_bh() is executed as part of aio_poll().
|
|
3. qemu_savevm_state() is called.
|
|
4. qemu_mutex_unlock_iothread() is called. Now
|
|
qemu_get_current_aio_context() returns 0x0.
|
|
5. bdrv_writev_vmstate() is executed during the usual savevm setup.
|
|
But this function is a generated coroutine wrapper, so it uses
|
|
AIO_WAIT_WHILE. There, the assertion
|
|
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
|
|
will fail.
|
|
|
|
To fix it, ensure that the BQL is held during setup. To avoid changing
|
|
the behavior for migration too, introduce conditionals for the setup
|
|
callbacks that need the BQL and only take the lock if it's not already
|
|
held.
|
|
|
|
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
---
|
|
include/migration/register.h | 2 +-
|
|
migration/block-dirty-bitmap.c | 15 ++++++++++++---
|
|
migration/block.c | 15 ++++++++++++---
|
|
migration/ram.c | 16 +++++++++++++---
|
|
migration/savevm.c | 2 --
|
|
5 files changed, 38 insertions(+), 12 deletions(-)
|
|
|
|
diff --git a/include/migration/register.h b/include/migration/register.h
|
|
index a8dfd8fefd..fa9b0b0f10 100644
|
|
--- a/include/migration/register.h
|
|
+++ b/include/migration/register.h
|
|
@@ -43,9 +43,9 @@ typedef struct SaveVMHandlers {
|
|
* by other locks.
|
|
*/
|
|
int (*save_live_iterate)(QEMUFile *f, void *opaque);
|
|
+ int (*save_setup)(QEMUFile *f, void *opaque);
|
|
|
|
/* This runs outside the iothread lock! */
|
|
- int (*save_setup)(QEMUFile *f, void *opaque);
|
|
/* Note for save_live_pending:
|
|
* must_precopy:
|
|
* - must be migrated in precopy or in stopped state
|
|
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
|
|
index 509f3df0a6..42dc4a8d61 100644
|
|
--- a/migration/block-dirty-bitmap.c
|
|
+++ b/migration/block-dirty-bitmap.c
|
|
@@ -1220,10 +1220,17 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
|
|
{
|
|
DBMSaveState *s = &((DBMState *)opaque)->save;
|
|
SaveBitmapState *dbms = NULL;
|
|
+ bool release_lock = false;
|
|
|
|
- qemu_mutex_lock_iothread();
|
|
+ /* For snapshots, the BQL is held during setup. */
|
|
+ if (!qemu_mutex_iothread_locked()) {
|
|
+ qemu_mutex_lock_iothread();
|
|
+ release_lock = true;
|
|
+ }
|
|
if (init_dirty_bitmap_migration(s) < 0) {
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
return -1;
|
|
}
|
|
|
|
@@ -1231,7 +1238,9 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
|
|
send_bitmap_start(f, s, dbms);
|
|
}
|
|
qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
diff --git a/migration/block.c b/migration/block.c
|
|
index b2497bbd32..c9d55be642 100644
|
|
--- a/migration/block.c
|
|
+++ b/migration/block.c
|
|
@@ -716,21 +716,30 @@ static void block_migration_cleanup(void *opaque)
|
|
static int block_save_setup(QEMUFile *f, void *opaque)
|
|
{
|
|
int ret;
|
|
+ bool release_lock = false;
|
|
|
|
trace_migration_block_save("setup", block_mig_state.submitted,
|
|
block_mig_state.transferred);
|
|
|
|
- qemu_mutex_lock_iothread();
|
|
+ /* For snapshots, the BQL is held during setup. */
|
|
+ if (!qemu_mutex_iothread_locked()) {
|
|
+ qemu_mutex_lock_iothread();
|
|
+ release_lock = true;
|
|
+ }
|
|
ret = init_blk_migration(f);
|
|
if (ret < 0) {
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
return ret;
|
|
}
|
|
|
|
/* start track dirty blocks */
|
|
ret = set_dirty_tracking();
|
|
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
|
|
if (ret) {
|
|
return ret;
|
|
diff --git a/migration/ram.c b/migration/ram.c
|
|
index 79d881f735..0ecbbc3202 100644
|
|
--- a/migration/ram.c
|
|
+++ b/migration/ram.c
|
|
@@ -3117,8 +3117,16 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
|
|
|
|
static void ram_init_bitmaps(RAMState *rs)
|
|
{
|
|
- /* For memory_global_dirty_log_start below. */
|
|
- qemu_mutex_lock_iothread();
|
|
+ bool release_lock = false;
|
|
+
|
|
+ /*
|
|
+ * For memory_global_dirty_log_start below.
|
|
+ * For snapshots, the BQL is held during setup.
|
|
+ */
|
|
+ if (!qemu_mutex_iothread_locked()) {
|
|
+ qemu_mutex_lock_iothread();
|
|
+ release_lock = true;
|
|
+ }
|
|
qemu_mutex_lock_ramlist();
|
|
|
|
WITH_RCU_READ_LOCK_GUARD() {
|
|
@@ -3130,7 +3138,9 @@ static void ram_init_bitmaps(RAMState *rs)
|
|
}
|
|
}
|
|
qemu_mutex_unlock_ramlist();
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
|
|
/*
|
|
* After an eventual first bitmap sync, fixup the initial bitmap
|
|
diff --git a/migration/savevm.c b/migration/savevm.c
|
|
index aa54a67fda..fc6a82a555 100644
|
|
--- a/migration/savevm.c
|
|
+++ b/migration/savevm.c
|
|
@@ -1621,10 +1621,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
|
|
memset(&compression_counters, 0, sizeof(compression_counters));
|
|
ms->to_dst_file = f;
|
|
|
|
- qemu_mutex_unlock_iothread();
|
|
qemu_savevm_state_header(f);
|
|
qemu_savevm_state_setup(f);
|
|
- qemu_mutex_lock_iothread();
|
|
|
|
while (qemu_file_get_error(f) == 0) {
|
|
if (qemu_savevm_state_iterate(f, false) > 0) {
|