191 lines
6.4 KiB
Diff
191 lines
6.4 KiB
Diff
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||
|
From: Fiona Ebner <f.ebner@proxmox.com>
|
||
|
Date: Fri, 5 May 2023 13:39:53 +0200
|
||
|
Subject: [PATCH] migration: for snapshots, hold the BQL during setup callbacks
|
||
|
|
||
|
In spirit, this is a partial revert of commit 9b09503752 ("migration:
|
||
|
run setup callbacks out of big lock"), but only for the snapshot case.
|
||
|
|
||
|
For snapshots, the bdrv_writev_vmstate() function is used during setup
|
||
|
(in QIOChannelBlock backing the QEMUFile), but not holding the BQL
|
||
|
while calling it could lead to an assertion failure. To understand
|
||
|
how, first note the following:
|
||
|
|
||
|
1. Generated coroutine wrappers for block layer functions spawn the
|
||
|
coroutine and use AIO_WAIT_WHILE()/aio_poll() to wait for it.
|
||
|
2. If the host OS switches threads at an inconvenient time, it can
|
||
|
happen that a bottom half scheduled for the main thread's AioContext
|
||
|
is executed as part of a vCPU thread's aio_poll().
|
||
|
|
||
|
An example leading to the assertion failure is as follows:
|
||
|
|
||
|
main thread:
|
||
|
1. A snapshot-save QMP command gets issued.
|
||
|
2. snapshot_save_job_bh() is scheduled.
|
||
|
|
||
|
vCPU thread:
|
||
|
3. aio_poll() for the main thread's AioContext is called (e.g. when
|
||
|
the guest writes to a pflash device, as part of blk_pwrite which is a
|
||
|
generated coroutine wrapper).
|
||
|
4. snapshot_save_job_bh() is executed as part of aio_poll().
|
||
|
3. qemu_savevm_state() is called.
|
||
|
4. qemu_mutex_unlock_iothread() is called. Now
|
||
|
qemu_get_current_aio_context() returns 0x0.
|
||
|
5. bdrv_writev_vmstate() is executed during the usual savevm setup.
|
||
|
But this function is a generated coroutine wrapper, so it uses
|
||
|
AIO_WAIT_WHILE. There, the assertion
|
||
|
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
|
||
|
will fail.
|
||
|
|
||
|
To fix it, ensure that the BQL is held during setup. To avoid changing
|
||
|
the behavior for migration too, introduce conditionals for the setup
|
||
|
callbacks that need the BQL and only take the lock if it's not already
|
||
|
held.
|
||
|
|
||
|
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||
|
---
|
||
|
include/migration/register.h | 2 +-
|
||
|
migration/block-dirty-bitmap.c | 15 ++++++++++++---
|
||
|
migration/block.c | 15 ++++++++++++---
|
||
|
migration/ram.c | 16 +++++++++++++---
|
||
|
migration/savevm.c | 2 --
|
||
|
5 files changed, 38 insertions(+), 12 deletions(-)
|
||
|
|
||
|
diff --git a/include/migration/register.h b/include/migration/register.h
|
||
|
index a8dfd8fefd..fa9b0b0f10 100644
|
||
|
--- a/include/migration/register.h
|
||
|
+++ b/include/migration/register.h
|
||
|
@@ -43,9 +43,9 @@ typedef struct SaveVMHandlers {
|
||
|
* by other locks.
|
||
|
*/
|
||
|
int (*save_live_iterate)(QEMUFile *f, void *opaque);
|
||
|
+ int (*save_setup)(QEMUFile *f, void *opaque);
|
||
|
|
||
|
/* This runs outside the iothread lock! */
|
||
|
- int (*save_setup)(QEMUFile *f, void *opaque);
|
||
|
/* Note for save_live_pending:
|
||
|
* must_precopy:
|
||
|
* - must be migrated in precopy or in stopped state
|
||
|
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
|
||
|
index a6440929fa..69fab3275c 100644
|
||
|
--- a/migration/block-dirty-bitmap.c
|
||
|
+++ b/migration/block-dirty-bitmap.c
|
||
|
@@ -1214,10 +1214,17 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
|
||
|
{
|
||
|
DBMSaveState *s = &((DBMState *)opaque)->save;
|
||
|
SaveBitmapState *dbms = NULL;
|
||
|
+ bool release_lock = false;
|
||
|
|
||
|
- qemu_mutex_lock_iothread();
|
||
|
+ /* For snapshots, the BQL is held during setup. */
|
||
|
+ if (!qemu_mutex_iothread_locked()) {
|
||
|
+ qemu_mutex_lock_iothread();
|
||
|
+ release_lock = true;
|
||
|
+ }
|
||
|
if (init_dirty_bitmap_migration(s) < 0) {
|
||
|
- qemu_mutex_unlock_iothread();
|
||
|
+ if (release_lock) {
|
||
|
+ qemu_mutex_unlock_iothread();
|
||
|
+ }
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
@@ -1225,7 +1232,9 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
|
||
|
send_bitmap_start(f, s, dbms);
|
||
|
}
|
||
|
qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
|
||
|
- qemu_mutex_unlock_iothread();
|
||
|
+ if (release_lock) {
|
||
|
+ qemu_mutex_unlock_iothread();
|
||
|
+ }
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
diff --git a/migration/block.c b/migration/block.c
|
||
|
index b2497bbd32..c9d55be642 100644
|
||
|
--- a/migration/block.c
|
||
|
+++ b/migration/block.c
|
||
|
@@ -716,21 +716,30 @@ static void block_migration_cleanup(void *opaque)
|
||
|
static int block_save_setup(QEMUFile *f, void *opaque)
|
||
|
{
|
||
|
int ret;
|
||
|
+ bool release_lock = false;
|
||
|
|
||
|
trace_migration_block_save("setup", block_mig_state.submitted,
|
||
|
block_mig_state.transferred);
|
||
|
|
||
|
- qemu_mutex_lock_iothread();
|
||
|
+ /* For snapshots, the BQL is held during setup. */
|
||
|
+ if (!qemu_mutex_iothread_locked()) {
|
||
|
+ qemu_mutex_lock_iothread();
|
||
|
+ release_lock = true;
|
||
|
+ }
|
||
|
ret = init_blk_migration(f);
|
||
|
if (ret < 0) {
|
||
|
- qemu_mutex_unlock_iothread();
|
||
|
+ if (release_lock) {
|
||
|
+ qemu_mutex_unlock_iothread();
|
||
|
+ }
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/* start track dirty blocks */
|
||
|
ret = set_dirty_tracking();
|
||
|
|
||
|
- qemu_mutex_unlock_iothread();
|
||
|
+ if (release_lock) {
|
||
|
+ qemu_mutex_unlock_iothread();
|
||
|
+ }
|
||
|
|
||
|
if (ret) {
|
||
|
return ret;
|
||
|
diff --git a/migration/ram.c b/migration/ram.c
|
||
|
index 79d881f735..0ecbbc3202 100644
|
||
|
--- a/migration/ram.c
|
||
|
+++ b/migration/ram.c
|
||
|
@@ -3117,8 +3117,16 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
|
||
|
|
||
|
static void ram_init_bitmaps(RAMState *rs)
|
||
|
{
|
||
|
- /* For memory_global_dirty_log_start below. */
|
||
|
- qemu_mutex_lock_iothread();
|
||
|
+ bool release_lock = false;
|
||
|
+
|
||
|
+ /*
|
||
|
+ * For memory_global_dirty_log_start below.
|
||
|
+ * For snapshots, the BQL is held during setup.
|
||
|
+ */
|
||
|
+ if (!qemu_mutex_iothread_locked()) {
|
||
|
+ qemu_mutex_lock_iothread();
|
||
|
+ release_lock = true;
|
||
|
+ }
|
||
|
qemu_mutex_lock_ramlist();
|
||
|
|
||
|
WITH_RCU_READ_LOCK_GUARD() {
|
||
|
@@ -3130,7 +3138,9 @@ static void ram_init_bitmaps(RAMState *rs)
|
||
|
}
|
||
|
}
|
||
|
qemu_mutex_unlock_ramlist();
|
||
|
- qemu_mutex_unlock_iothread();
|
||
|
+ if (release_lock) {
|
||
|
+ qemu_mutex_unlock_iothread();
|
||
|
+ }
|
||
|
|
||
|
/*
|
||
|
* After an eventual first bitmap sync, fixup the initial bitmap
|
||
|
diff --git a/migration/savevm.c b/migration/savevm.c
|
||
|
index aa54a67fda..fc6a82a555 100644
|
||
|
--- a/migration/savevm.c
|
||
|
+++ b/migration/savevm.c
|
||
|
@@ -1621,10 +1621,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
|
||
|
memset(&compression_counters, 0, sizeof(compression_counters));
|
||
|
ms->to_dst_file = f;
|
||
|
|
||
|
- qemu_mutex_unlock_iothread();
|
||
|
qemu_savevm_state_header(f);
|
||
|
qemu_savevm_state_setup(f);
|
||
|
- qemu_mutex_lock_iothread();
|
||
|
|
||
|
while (qemu_file_get_error(f) == 0) {
|
||
|
if (qemu_savevm_state_iterate(f, false) > 0) {
|