db5d2a4b77
where there is no good reason to keep them separate. It's a pain during rebase if there are multiple patches changing the same code over and over again. This was especially bad for the backup-related patches. If the history of patches really is needed, it can be extracted via git. Additionally, compilation with partial application of patches was broken since a long time, because one of the master key changes became part of an earlier patch during a past rebase. If only the same files were changed by a subsequent patch and the changes felt to belong together (obvious for later bug fixes, but also done for features e.g. adding master key support for PBS), the patches were squashed together. The PBS namespace support patch was split into the individual parts it changes, i.e. PBS block driver, pbs-restore binary and QMP backup infrastructure, and squashed into the respective patches. No code change is intended, git diff in the submodule should not show any difference between applying all patches before this commit and applying all patches after this commit. The query-proxmox-support QMP function has been left as part of the "PVE-Backup: Proxmox backup patches for QEMU" patch, because it's currently only used there. If it ever is used elsewhere too, it can be split out from there. The recent alloc-track and BQL-related savevm-async changes have been left separate for now, because it's not 100% clear they are the best approach yet. This depends on what upstream decides about the BQL stuff and whether and what kind of issues with the changes pop up. The qemu-img dd snapshot patch has been re-ordered to after the other qemu-img dd patches. Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
191 lines
6.4 KiB
Diff
191 lines
6.4 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Fiona Ebner <f.ebner@proxmox.com>
|
|
Date: Fri, 5 May 2023 13:39:53 +0200
|
|
Subject: [PATCH] migration: for snapshots, hold the BQL during setup callbacks
|
|
|
|
In spirit, this is a partial revert of commit 9b09503752 ("migration:
|
|
run setup callbacks out of big lock"), but only for the snapshot case.
|
|
|
|
For snapshots, the bdrv_writev_vmstate() function is used during setup
|
|
(in QIOChannelBlock backing the QEMUFile), but not holding the BQL
|
|
while calling it could lead to an assertion failure. To understand
|
|
how, first note the following:
|
|
|
|
1. Generated coroutine wrappers for block layer functions spawn the
|
|
coroutine and use AIO_WAIT_WHILE()/aio_poll() to wait for it.
|
|
2. If the host OS switches threads at an inconvenient time, it can
|
|
happen that a bottom half scheduled for the main thread's AioContext
|
|
is executed as part of a vCPU thread's aio_poll().
|
|
|
|
An example leading to the assertion failure is as follows:
|
|
|
|
main thread:
|
|
1. A snapshot-save QMP command gets issued.
|
|
2. snapshot_save_job_bh() is scheduled.
|
|
|
|
vCPU thread:
|
|
3. aio_poll() for the main thread's AioContext is called (e.g. when
|
|
the guest writes to a pflash device, as part of blk_pwrite which is a
|
|
generated coroutine wrapper).
|
|
4. snapshot_save_job_bh() is executed as part of aio_poll().
|
|
3. qemu_savevm_state() is called.
|
|
4. qemu_mutex_unlock_iothread() is called. Now
|
|
qemu_get_current_aio_context() returns 0x0.
|
|
5. bdrv_writev_vmstate() is executed during the usual savevm setup.
|
|
But this function is a generated coroutine wrapper, so it uses
|
|
AIO_WAIT_WHILE. There, the assertion
|
|
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
|
|
will fail.
|
|
|
|
To fix it, ensure that the BQL is held during setup. To avoid changing
|
|
the behavior for migration too, introduce conditionals for the setup
|
|
callbacks that need the BQL and only take the lock if it's not already
|
|
held.
|
|
|
|
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
---
|
|
include/migration/register.h | 2 +-
|
|
migration/block-dirty-bitmap.c | 15 ++++++++++++---
|
|
migration/block.c | 15 ++++++++++++---
|
|
migration/ram.c | 16 +++++++++++++---
|
|
migration/savevm.c | 2 --
|
|
5 files changed, 38 insertions(+), 12 deletions(-)
|
|
|
|
diff --git a/include/migration/register.h b/include/migration/register.h
|
|
index a8dfd8fefd..fa9b0b0f10 100644
|
|
--- a/include/migration/register.h
|
|
+++ b/include/migration/register.h
|
|
@@ -43,9 +43,9 @@ typedef struct SaveVMHandlers {
|
|
* by other locks.
|
|
*/
|
|
int (*save_live_iterate)(QEMUFile *f, void *opaque);
|
|
+ int (*save_setup)(QEMUFile *f, void *opaque);
|
|
|
|
/* This runs outside the iothread lock! */
|
|
- int (*save_setup)(QEMUFile *f, void *opaque);
|
|
/* Note for save_live_pending:
|
|
* must_precopy:
|
|
* - must be migrated in precopy or in stopped state
|
|
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
|
|
index a6440929fa..69fab3275c 100644
|
|
--- a/migration/block-dirty-bitmap.c
|
|
+++ b/migration/block-dirty-bitmap.c
|
|
@@ -1214,10 +1214,17 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
|
|
{
|
|
DBMSaveState *s = &((DBMState *)opaque)->save;
|
|
SaveBitmapState *dbms = NULL;
|
|
+ bool release_lock = false;
|
|
|
|
- qemu_mutex_lock_iothread();
|
|
+ /* For snapshots, the BQL is held during setup. */
|
|
+ if (!qemu_mutex_iothread_locked()) {
|
|
+ qemu_mutex_lock_iothread();
|
|
+ release_lock = true;
|
|
+ }
|
|
if (init_dirty_bitmap_migration(s) < 0) {
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
return -1;
|
|
}
|
|
|
|
@@ -1225,7 +1232,9 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
|
|
send_bitmap_start(f, s, dbms);
|
|
}
|
|
qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
diff --git a/migration/block.c b/migration/block.c
|
|
index b2497bbd32..c9d55be642 100644
|
|
--- a/migration/block.c
|
|
+++ b/migration/block.c
|
|
@@ -716,21 +716,30 @@ static void block_migration_cleanup(void *opaque)
|
|
static int block_save_setup(QEMUFile *f, void *opaque)
|
|
{
|
|
int ret;
|
|
+ bool release_lock = false;
|
|
|
|
trace_migration_block_save("setup", block_mig_state.submitted,
|
|
block_mig_state.transferred);
|
|
|
|
- qemu_mutex_lock_iothread();
|
|
+ /* For snapshots, the BQL is held during setup. */
|
|
+ if (!qemu_mutex_iothread_locked()) {
|
|
+ qemu_mutex_lock_iothread();
|
|
+ release_lock = true;
|
|
+ }
|
|
ret = init_blk_migration(f);
|
|
if (ret < 0) {
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
return ret;
|
|
}
|
|
|
|
/* start track dirty blocks */
|
|
ret = set_dirty_tracking();
|
|
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
|
|
if (ret) {
|
|
return ret;
|
|
diff --git a/migration/ram.c b/migration/ram.c
|
|
index 79d881f735..0ecbbc3202 100644
|
|
--- a/migration/ram.c
|
|
+++ b/migration/ram.c
|
|
@@ -3117,8 +3117,16 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
|
|
|
|
static void ram_init_bitmaps(RAMState *rs)
|
|
{
|
|
- /* For memory_global_dirty_log_start below. */
|
|
- qemu_mutex_lock_iothread();
|
|
+ bool release_lock = false;
|
|
+
|
|
+ /*
|
|
+ * For memory_global_dirty_log_start below.
|
|
+ * For snapshots, the BQL is held during setup.
|
|
+ */
|
|
+ if (!qemu_mutex_iothread_locked()) {
|
|
+ qemu_mutex_lock_iothread();
|
|
+ release_lock = true;
|
|
+ }
|
|
qemu_mutex_lock_ramlist();
|
|
|
|
WITH_RCU_READ_LOCK_GUARD() {
|
|
@@ -3130,7 +3138,9 @@ static void ram_init_bitmaps(RAMState *rs)
|
|
}
|
|
}
|
|
qemu_mutex_unlock_ramlist();
|
|
- qemu_mutex_unlock_iothread();
|
|
+ if (release_lock) {
|
|
+ qemu_mutex_unlock_iothread();
|
|
+ }
|
|
|
|
/*
|
|
* After an eventual first bitmap sync, fixup the initial bitmap
|
|
diff --git a/migration/savevm.c b/migration/savevm.c
|
|
index aa54a67fda..fc6a82a555 100644
|
|
--- a/migration/savevm.c
|
|
+++ b/migration/savevm.c
|
|
@@ -1621,10 +1621,8 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
|
|
memset(&compression_counters, 0, sizeof(compression_counters));
|
|
ms->to_dst_file = f;
|
|
|
|
- qemu_mutex_unlock_iothread();
|
|
qemu_savevm_state_header(f);
|
|
qemu_savevm_state_setup(f);
|
|
- qemu_mutex_lock_iothread();
|
|
|
|
while (qemu_file_get_error(f) == 0) {
|
|
if (qemu_savevm_state_iterate(f, false) > 0) {
|