10e1093325
Bigger notable changes: * Commit 1a30b0f5d7 ("block: .bdrv_open is non-coroutine and unlocked") broke the PVE backup patches, in particular setting up the backup dump block driver, because bdrv_new_open_driver() cannot be called from a coroutine. To fix it, bdrv_co_open() is used instead, and while it's a much more involved function, the result should be essentially the same. The only difference I noticed is that the BDRV_O_ALLOW_RDWR flag is also set in the resulting bds (block driver state), but that shouldn't hurt. Smaller notable changes: * aio_set_fd_handler() dropped its 'is_external' parameter stating that all callers now pass false in 60f782b6b7 ("aio: remove aio_disable_external() API"). The calls in the PVE patches also passed false, so just drop the parameter too. * global_state_store() does not have a return value anymore, so the user in the PVE savevm-async patch was adapted. For context, see c33f1829f8 ("migration: never fail in global_state_store()"). * Renames affecting the PVE savevm-async patch: migrate_use_block() -> migrate_block() and ram_counters -> mig_stats 9d4b1e5f22 ("migration: Move migrate_use_block() to options.c") aff3f6606d ("migration: Rename ram_counters to mig_stats") Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
191 lines
6.4 KiB
Diff
191 lines
6.4 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Fiona Ebner <f.ebner@proxmox.com>
|
|
Date: Fri, 5 May 2023 13:39:53 +0200
|
|
Subject: [PATCH] migration: for snapshots, hold the BQL during setup callbacks
|
|
|
|
In spirit, this is a partial revert of commit 9b09503752 ("migration:
|
|
run setup callbacks out of big lock"), but only for the snapshot case.
|
|
|
|
For snapshots, the bdrv_writev_vmstate() function is used during setup
|
|
(in QIOChannelBlock backing the QEMUFile), but not holding the BQL
|
|
while calling it could lead to an assertion failure. To understand
|
|
how, first note the following:
|
|
|
|
1. Generated coroutine wrappers for block layer functions spawn the
|
|
coroutine and use AIO_WAIT_WHILE()/aio_poll() to wait for it.
|
|
2. If the host OS switches threads at an inconvenient time, it can
|
|
happen that a bottom half scheduled for the main thread's AioContext
|
|
is executed as part of a vCPU thread's aio_poll().
|
|
|
|
An example leading to the assertion failure is as follows:
|
|
|
|
main thread:
|
|
1. A snapshot-save QMP command gets issued.
|
|
2. snapshot_save_job_bh() is scheduled.
|
|
|
|
vCPU thread:
|
|
3. aio_poll() for the main thread's AioContext is called (e.g. when
|
|
the guest writes to a pflash device, as part of blk_pwrite which is a
|
|
generated coroutine wrapper).
|
|
4. snapshot_save_job_bh() is executed as part of aio_poll().
|
|
3. qemu_savevm_state() is called.
|
|
4. qemu_mutex_unlock_iothread() is called. Now
|
|
qemu_get_current_aio_context() returns 0x0.
|
|
5. bdrv_writev_vmstate() is executed during the usual savevm setup.
|
|
But this function is a generated coroutine wrapper, so it uses
|
|
AIO_WAIT_WHILE. There, the assertion
|
|
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
|
|
will fail.
|
|
|
|
To fix it, ensure that the BQL is held during setup. To avoid changing
|
|
the behavior for migration too, introduce conditionals for the setup
|
|
callbacks that need the BQL and only take the lock if it's not already
|
|
held.
|
|
|
|
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
---
|
|
include/migration/register.h | 2 +-
|
|
migration/block-dirty-bitmap.c | 15 ++++++++++++---
|
|
migration/block.c | 15 ++++++++++++---
|
|
migration/ram.c | 16 +++++++++++++---
|
|
migration/savevm.c | 2 --
|
|
5 files changed, 38 insertions(+), 12 deletions(-)
|
|
|
|
diff --git a/include/migration/register.h b/include/migration/register.h
|
|
index 90914f32f5..c728fd9120 100644
|
|
--- a/include/migration/register.h
|
|
+++ b/include/migration/register.h
|
|
@@ -43,9 +43,9 @@ typedef struct SaveVMHandlers {
|
|
* by other locks.
|
|
*/
|
|
int (*save_live_iterate)(QEMUFile *f, void *opaque);
|
|
+ int (*save_setup)(QEMUFile *f, void *opaque);
|
|
|
|
/* This runs outside the iothread lock! */
|
|
- int (*save_setup)(QEMUFile *f, void *opaque);
|
|
/* Note for save_live_pending:
|
|
* must_precopy:
|
|
* - must be migrated in precopy or in stopped state
|
|
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
|
|
index 285dd1d148..f7ee5a74d9 100644
|
|
--- a/migration/block-dirty-bitmap.c
|
|
+++ b/migration/block-dirty-bitmap.c
|
|
@@ -1219,10 +1219,17 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
|
|
{
|
|
DBMSaveState *s = &((DBMState *)opaque)->save;
|
|
SaveBitmapState *dbms = NULL;
|
|
+ bool release_lock = false;
|
|
|
|
- qemu_mutex_lock_iothread();
|
|
+ /* For snapshots, the BQL is held during setup. */
|
|
+ if (!qemu_mutex_iothread_locked()) {
|
|
+ qemu_mutex_lock_iothread();
|
|
+ release_lock = true;
|
|
+ }
|
|
if (init_dirty_bitmap_migration(s) < 0) {
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
return -1;
|
|
}
|
|
|
|
@@ -1230,7 +1237,9 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
|
|
send_bitmap_start(f, s, dbms);
|
|
}
|
|
qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
diff --git a/migration/block.c b/migration/block.c
|
|
index 86c2256a2b..8423e0c9f9 100644
|
|
--- a/migration/block.c
|
|
+++ b/migration/block.c
|
|
@@ -725,21 +725,30 @@ static void block_migration_cleanup(void *opaque)
|
|
static int block_save_setup(QEMUFile *f, void *opaque)
|
|
{
|
|
int ret;
|
|
+ bool release_lock = false;
|
|
|
|
trace_migration_block_save("setup", block_mig_state.submitted,
|
|
block_mig_state.transferred);
|
|
|
|
- qemu_mutex_lock_iothread();
|
|
+ /* For snapshots, the BQL is held during setup. */
|
|
+ if (!qemu_mutex_iothread_locked()) {
|
|
+ qemu_mutex_lock_iothread();
|
|
+ release_lock = true;
|
|
+ }
|
|
ret = init_blk_migration(f);
|
|
if (ret < 0) {
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
return ret;
|
|
}
|
|
|
|
/* start track dirty blocks */
|
|
ret = set_dirty_tracking();
|
|
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
|
|
if (ret) {
|
|
return ret;
|
|
diff --git a/migration/ram.c b/migration/ram.c
|
|
index 9040d66e61..01532c9fc9 100644
|
|
--- a/migration/ram.c
|
|
+++ b/migration/ram.c
|
|
@@ -2895,8 +2895,16 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
|
|
|
|
static void ram_init_bitmaps(RAMState *rs)
|
|
{
|
|
- /* For memory_global_dirty_log_start below. */
|
|
- qemu_mutex_lock_iothread();
|
|
+ bool release_lock = false;
|
|
+
|
|
+ /*
|
|
+ * For memory_global_dirty_log_start below.
|
|
+ * For snapshots, the BQL is held during setup.
|
|
+ */
|
|
+ if (!qemu_mutex_iothread_locked()) {
|
|
+ qemu_mutex_lock_iothread();
|
|
+ release_lock = true;
|
|
+ }
|
|
qemu_mutex_lock_ramlist();
|
|
|
|
WITH_RCU_READ_LOCK_GUARD() {
|
|
@@ -2908,7 +2916,9 @@ static void ram_init_bitmaps(RAMState *rs)
|
|
}
|
|
}
|
|
qemu_mutex_unlock_ramlist();
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
|
|
/*
|
|
* After an eventual first bitmap sync, fixup the initial bitmap
|
|
diff --git a/migration/savevm.c b/migration/savevm.c
|
|
index a2cb8855e2..ea8b30a630 100644
|
|
--- a/migration/savevm.c
|
|
+++ b/migration/savevm.c
|
|
@@ -1625,10 +1625,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
|
|
reset_vfio_bytes_transferred();
|
|
ms->to_dst_file = f;
|
|
|
|
- qemu_mutex_unlock_iothread();
|
|
qemu_savevm_state_header(f);
|
|
qemu_savevm_state_setup(f);
|
|
- qemu_mutex_lock_iothread();
|
|
|
|
while (qemu_file_get_error(f) == 0) {
|
|
if (qemu_savevm_state_iterate(f, false) > 0) {
|