fix vmstate-snapshots with iothread=1
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
parent
63be960208
commit
f063a8aadb
188
debian/patches/pve/0045-savevm-async-move-more-code-to-cleanup-and-rename-to.patch
vendored
Normal file
188
debian/patches/pve/0045-savevm-async-move-more-code-to-cleanup-and-rename-to.patch
vendored
Normal file
@ -0,0 +1,188 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Reiter <s.reiter@proxmox.com>
|
||||
Date: Wed, 27 May 2020 11:33:19 +0200
|
||||
Subject: [PATCH] savevm-async: move more code to cleanup and rename to
|
||||
finalize
|
||||
|
||||
process_savevm_cleanup is renamed to process_savevm_finalize to
|
||||
accomodate more code that is not all cleanup related.
|
||||
|
||||
The benefit of this is that it allows us to call functions which need to
|
||||
run in the main AIOContext directly. It doesn't majorly affect snapshot
|
||||
performance, since the first instruction that is moved stops the VM,
|
||||
so the downtime stays about the same.
|
||||
|
||||
The target bdrv is additionally moved to the IOHandler context before
|
||||
process_savevm_co to make sure the coroutine can call functions that
|
||||
require it to own the bdrv's context. process_savevm_finalize then moves
|
||||
it back to the main context to run its part.
|
||||
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
---
|
||||
savevm-async.c | 87 +++++++++++++++++++++++++++++---------------------
|
||||
1 file changed, 51 insertions(+), 36 deletions(-)
|
||||
|
||||
diff --git a/savevm-async.c b/savevm-async.c
|
||||
index c3fe741c38..2894c94233 100644
|
||||
--- a/savevm-async.c
|
||||
+++ b/savevm-async.c
|
||||
@@ -50,7 +50,7 @@ static struct SnapshotState {
|
||||
int saved_vm_running;
|
||||
QEMUFile *file;
|
||||
int64_t total_time;
|
||||
- QEMUBH *cleanup_bh;
|
||||
+ QEMUBH *finalize_bh;
|
||||
Coroutine *co;
|
||||
} snap_state;
|
||||
|
||||
@@ -196,12 +196,42 @@ static const QEMUFileOps block_file_ops = {
|
||||
.close = block_state_close,
|
||||
};
|
||||
|
||||
-static void process_savevm_cleanup(void *opaque)
|
||||
+static void process_savevm_finalize(void *opaque)
|
||||
{
|
||||
int ret;
|
||||
- qemu_bh_delete(snap_state.cleanup_bh);
|
||||
- snap_state.cleanup_bh = NULL;
|
||||
+ AioContext *iohandler_ctx = iohandler_get_aio_context();
|
||||
+ MigrationState *ms = migrate_get_current();
|
||||
+
|
||||
+ qemu_bh_delete(snap_state.finalize_bh);
|
||||
+ snap_state.finalize_bh = NULL;
|
||||
snap_state.co = NULL;
|
||||
+
|
||||
+ /* We need to own the target bdrv's context for the following functions,
|
||||
+ * so move it back. It can stay in the main context and live out its live
|
||||
+ * there, since we're done with it after this method ends anyway.
|
||||
+ */
|
||||
+ aio_context_acquire(iohandler_ctx);
|
||||
+ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
|
||||
+ aio_context_release(iohandler_ctx);
|
||||
+
|
||||
+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
|
||||
+ if (ret < 0) {
|
||||
+ save_snapshot_error("vm_stop_force_state error %d", ret);
|
||||
+ }
|
||||
+
|
||||
+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
|
||||
+ ret = qemu_file_get_error(snap_state.file);
|
||||
+ if (ret < 0) {
|
||||
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
|
||||
+ }
|
||||
+
|
||||
+ DPRINTF("state saving complete\n");
|
||||
+
|
||||
+ /* clear migration state */
|
||||
+ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
|
||||
+ ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
|
||||
+ ms->to_dst_file = NULL;
|
||||
+
|
||||
qemu_savevm_state_cleanup();
|
||||
|
||||
ret = save_snapshot_cleanup();
|
||||
@@ -219,16 +249,15 @@ static void process_savevm_cleanup(void *opaque)
|
||||
}
|
||||
}
|
||||
|
||||
-static void process_savevm_coro(void *opaque)
|
||||
+static void coroutine_fn process_savevm_co(void *opaque)
|
||||
{
|
||||
int ret;
|
||||
int64_t maxlen;
|
||||
- MigrationState *ms = migrate_get_current();
|
||||
|
||||
ret = qemu_file_get_error(snap_state.file);
|
||||
if (ret < 0) {
|
||||
save_snapshot_error("qemu_savevm_state_setup failed");
|
||||
- goto out;
|
||||
+ return;
|
||||
}
|
||||
|
||||
while (snap_state.state == SAVE_STATE_ACTIVE) {
|
||||
@@ -245,7 +274,7 @@ static void process_savevm_coro(void *opaque)
|
||||
save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
|
||||
break;
|
||||
}
|
||||
- DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
|
||||
+ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
|
||||
} else {
|
||||
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
|
||||
ret = global_state_store();
|
||||
@@ -253,40 +282,20 @@ static void process_savevm_coro(void *opaque)
|
||||
save_snapshot_error("global_state_store error %d", ret);
|
||||
break;
|
||||
}
|
||||
- ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
|
||||
- if (ret < 0) {
|
||||
- save_snapshot_error("vm_stop_force_state error %d", ret);
|
||||
- break;
|
||||
- }
|
||||
- DPRINTF("savevm inerate finished\n");
|
||||
- /* upstream made the return value here inconsistent
|
||||
- * (-1 instead of 'ret' in one case and 0 after flush which can
|
||||
- * still set a file error...)
|
||||
- */
|
||||
- (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
|
||||
- ret = qemu_file_get_error(snap_state.file);
|
||||
- if (ret < 0) {
|
||||
- save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
|
||||
- break;
|
||||
- }
|
||||
- DPRINTF("save complete\n");
|
||||
+
|
||||
+ DPRINTF("savevm iterate complete\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
- qemu_bh_schedule(snap_state.cleanup_bh);
|
||||
-
|
||||
-out:
|
||||
- /* set migration state accordingly and clear soon-to-be stale file */
|
||||
- migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
|
||||
- ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
|
||||
- ms->to_dst_file = NULL;
|
||||
+ qemu_bh_schedule(snap_state.finalize_bh);
|
||||
}
|
||||
|
||||
void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
|
||||
{
|
||||
Error *local_err = NULL;
|
||||
MigrationState *ms = migrate_get_current();
|
||||
+ AioContext *iohandler_ctx = iohandler_get_aio_context();
|
||||
|
||||
int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
|
||||
|
||||
@@ -347,7 +356,7 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
|
||||
|
||||
/*
|
||||
* qemu_savevm_* paths use migration code and expect a migration state.
|
||||
- * State is cleared in process_savevm_thread, but has to be initialized
|
||||
+ * State is cleared in process_savevm_co, but has to be initialized
|
||||
* here (blocking main thread, from QMP) to avoid race conditions.
|
||||
*/
|
||||
migrate_init(ms);
|
||||
@@ -358,13 +367,19 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
|
||||
blk_op_block_all(snap_state.target, snap_state.blocker);
|
||||
|
||||
snap_state.state = SAVE_STATE_ACTIVE;
|
||||
- snap_state.cleanup_bh = qemu_bh_new(process_savevm_cleanup, &snap_state);
|
||||
- snap_state.co = qemu_coroutine_create(&process_savevm_coro, NULL);
|
||||
+ snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
|
||||
+ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
|
||||
qemu_mutex_unlock_iothread();
|
||||
qemu_savevm_state_header(snap_state.file);
|
||||
qemu_savevm_state_setup(snap_state.file);
|
||||
qemu_mutex_lock_iothread();
|
||||
- aio_co_schedule(iohandler_get_aio_context(), snap_state.co);
|
||||
+
|
||||
+ /* Async processing from here on out happens in iohandler context, so let
|
||||
+ * the target bdrv have its home there.
|
||||
+ */
|
||||
+ blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
|
||||
+
|
||||
+ aio_co_schedule(iohandler_ctx, snap_state.co);
|
||||
|
||||
return;
|
||||
|
86
debian/patches/pve/0046-util-async-Add-aio_co_reschedule_self.patch
vendored
Normal file
86
debian/patches/pve/0046-util-async-Add-aio_co_reschedule_self.patch
vendored
Normal file
@ -0,0 +1,86 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Wed, 27 May 2020 11:33:20 +0200
|
||||
Subject: [PATCH] util/async: Add aio_co_reschedule_self()
|
||||
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
|
||||
Add a function that can be used to move the currently running coroutine
|
||||
to a different AioContext (and therefore potentially a different
|
||||
thread).
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
include/block/aio.h | 10 ++++++++++
|
||||
util/async.c | 30 ++++++++++++++++++++++++++++++
|
||||
2 files changed, 40 insertions(+)
|
||||
|
||||
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||
index 62ed954344..d5399c67d6 100644
|
||||
--- a/include/block/aio.h
|
||||
+++ b/include/block/aio.h
|
||||
@@ -17,6 +17,7 @@
|
||||
#ifdef CONFIG_LINUX_IO_URING
|
||||
#include <liburing.h>
|
||||
#endif
|
||||
+#include "qemu/coroutine.h"
|
||||
#include "qemu/queue.h"
|
||||
#include "qemu/event_notifier.h"
|
||||
#include "qemu/thread.h"
|
||||
@@ -654,6 +655,15 @@ static inline bool aio_node_check(AioContext *ctx, bool is_external)
|
||||
*/
|
||||
void aio_co_schedule(AioContext *ctx, struct Coroutine *co);
|
||||
|
||||
+/**
|
||||
+ * aio_co_reschedule_self:
|
||||
+ * @new_ctx: the new context
|
||||
+ *
|
||||
+ * Move the currently running coroutine to new_ctx. If the coroutine is already
|
||||
+ * running in new_ctx, do nothing.
|
||||
+ */
|
||||
+void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx);
|
||||
+
|
||||
/**
|
||||
* aio_co_wake:
|
||||
* @co: the coroutine
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 3165a28f2f..4eba1e6f1b 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -558,6 +558,36 @@ void aio_co_schedule(AioContext *ctx, Coroutine *co)
|
||||
aio_context_unref(ctx);
|
||||
}
|
||||
|
||||
+typedef struct AioCoRescheduleSelf {
|
||||
+ Coroutine *co;
|
||||
+ AioContext *new_ctx;
|
||||
+} AioCoRescheduleSelf;
|
||||
+
|
||||
+static void aio_co_reschedule_self_bh(void *opaque)
|
||||
+{
|
||||
+ AioCoRescheduleSelf *data = opaque;
|
||||
+ aio_co_schedule(data->new_ctx, data->co);
|
||||
+}
|
||||
+
|
||||
+void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx)
|
||||
+{
|
||||
+ AioContext *old_ctx = qemu_get_current_aio_context();
|
||||
+
|
||||
+ if (old_ctx != new_ctx) {
|
||||
+ AioCoRescheduleSelf data = {
|
||||
+ .co = qemu_coroutine_self(),
|
||||
+ .new_ctx = new_ctx,
|
||||
+ };
|
||||
+ /*
|
||||
+ * We can't directly schedule the coroutine in the target context
|
||||
+ * because this would be racy: The other thread could try to enter the
|
||||
+ * coroutine before it has yielded in this one.
|
||||
+ */
|
||||
+ aio_bh_schedule_oneshot(old_ctx, aio_co_reschedule_self_bh, &data);
|
||||
+ qemu_coroutine_yield();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
void aio_co_wake(struct Coroutine *co)
|
||||
{
|
||||
AioContext *ctx;
|
58
debian/patches/pve/0047-savevm-async-flush-IOThread-drives-async-before-ente.patch
vendored
Normal file
58
debian/patches/pve/0047-savevm-async-flush-IOThread-drives-async-before-ente.patch
vendored
Normal file
@ -0,0 +1,58 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Reiter <s.reiter@proxmox.com>
|
||||
Date: Wed, 27 May 2020 11:33:21 +0200
|
||||
Subject: [PATCH] savevm-async: flush IOThread-drives async before entering
|
||||
blocking part
|
||||
|
||||
By flushing all drives where its possible to so before entering the
|
||||
blocking part (where the VM is stopped), we can reduce the time spent in
|
||||
said part for every disk that has an IOThread (other drives cannot be
|
||||
flushed async anyway).
|
||||
|
||||
Suggested-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
---
|
||||
savevm-async.c | 23 +++++++++++++++++++++++
|
||||
1 file changed, 23 insertions(+)
|
||||
|
||||
diff --git a/savevm-async.c b/savevm-async.c
|
||||
index 2894c94233..4ce83a0691 100644
|
||||
--- a/savevm-async.c
|
||||
+++ b/savevm-async.c
|
||||
@@ -253,6 +253,8 @@ static void coroutine_fn process_savevm_co(void *opaque)
|
||||
{
|
||||
int ret;
|
||||
int64_t maxlen;
|
||||
+ BdrvNextIterator it;
|
||||
+ BlockDriverState *bs = NULL;
|
||||
|
||||
ret = qemu_file_get_error(snap_state.file);
|
||||
if (ret < 0) {
|
||||
@@ -288,6 +290,27 @@ static void coroutine_fn process_savevm_co(void *opaque)
|
||||
}
|
||||
}
|
||||
|
||||
+ /* If a drive runs in an IOThread we can flush it async, and only
|
||||
+ * need to sync-flush whatever IO happens between now and
|
||||
+ * vm_stop_force_state. bdrv_next can only be called from main AioContext,
|
||||
+ * so move there now and after every flush.
|
||||
+ */
|
||||
+ aio_co_reschedule_self(qemu_get_aio_context());
|
||||
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
|
||||
+ /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
|
||||
+ if (bs == blk_bs(snap_state.target)) {
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ AioContext *bs_ctx = bdrv_get_aio_context(bs);
|
||||
+ if (bs_ctx != qemu_get_aio_context()) {
|
||||
+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
|
||||
+ aio_co_reschedule_self(bs_ctx);
|
||||
+ bdrv_flush(bs);
|
||||
+ aio_co_reschedule_self(qemu_get_aio_context());
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
qemu_bh_schedule(snap_state.finalize_bh);
|
||||
}
|
||||
|
80
debian/patches/pve/0048-savevm-async-add-debug-timing-prints.patch
vendored
Normal file
80
debian/patches/pve/0048-savevm-async-add-debug-timing-prints.patch
vendored
Normal file
@ -0,0 +1,80 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Reiter <s.reiter@proxmox.com>
|
||||
Date: Wed, 27 May 2020 11:33:22 +0200
|
||||
Subject: [PATCH] savevm-async: add debug timing prints
|
||||
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
[ Thomas: guard variable declaration by DEBUG #ifdef ]
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
savevm-async.c | 22 ++++++++++++++++++++++
|
||||
1 file changed, 22 insertions(+)
|
||||
|
||||
diff --git a/savevm-async.c b/savevm-async.c
|
||||
index 4ce83a0691..0388cebbe9 100644
|
||||
--- a/savevm-async.c
|
||||
+++ b/savevm-async.c
|
||||
@@ -202,6 +202,10 @@ static void process_savevm_finalize(void *opaque)
|
||||
AioContext *iohandler_ctx = iohandler_get_aio_context();
|
||||
MigrationState *ms = migrate_get_current();
|
||||
|
||||
+#ifdef DEBUG_SAVEVM_STATE
|
||||
+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
+#endif
|
||||
+
|
||||
qemu_bh_delete(snap_state.finalize_bh);
|
||||
snap_state.finalize_bh = NULL;
|
||||
snap_state.co = NULL;
|
||||
@@ -226,6 +230,8 @@ static void process_savevm_finalize(void *opaque)
|
||||
}
|
||||
|
||||
DPRINTF("state saving complete\n");
|
||||
+ DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
|
||||
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
|
||||
|
||||
/* clear migration state */
|
||||
migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
|
||||
@@ -247,6 +253,9 @@ static void process_savevm_finalize(void *opaque)
|
||||
vm_start();
|
||||
snap_state.saved_vm_running = false;
|
||||
}
|
||||
+
|
||||
+ DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
|
||||
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
|
||||
}
|
||||
|
||||
static void coroutine_fn process_savevm_co(void *opaque)
|
||||
@@ -256,6 +265,10 @@ static void coroutine_fn process_savevm_co(void *opaque)
|
||||
BdrvNextIterator it;
|
||||
BlockDriverState *bs = NULL;
|
||||
|
||||
+#ifdef DEBUG_SAVEVM_STATE
|
||||
+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
+#endif
|
||||
+
|
||||
ret = qemu_file_get_error(snap_state.file);
|
||||
if (ret < 0) {
|
||||
save_snapshot_error("qemu_savevm_state_setup failed");
|
||||
@@ -290,6 +303,12 @@ static void coroutine_fn process_savevm_co(void *opaque)
|
||||
}
|
||||
}
|
||||
|
||||
+ DPRINTF("timing: process_savevm_co took %ld ms\n",
|
||||
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
|
||||
+
|
||||
+#ifdef DEBUG_SAVEVM_STATE
|
||||
+ int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
|
||||
+#endif
|
||||
/* If a drive runs in an IOThread we can flush it async, and only
|
||||
* need to sync-flush whatever IO happens between now and
|
||||
* vm_stop_force_state. bdrv_next can only be called from main AioContext,
|
||||
@@ -311,6 +330,9 @@ static void coroutine_fn process_savevm_co(void *opaque)
|
||||
}
|
||||
}
|
||||
|
||||
+ DPRINTF("timing: async flushing took %ld ms\n",
|
||||
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
|
||||
+
|
||||
qemu_bh_schedule(snap_state.finalize_bh);
|
||||
}
|
||||
|
4
debian/patches/series
vendored
4
debian/patches/series
vendored
@ -42,3 +42,7 @@ pve/0041-PVE-Backup-avoid-use-QemuRecMutex-inside-coroutines.patch
|
||||
pve/0042-PVE-Backup-use-QemuMutex-instead-of-QemuRecMutex.patch
|
||||
pve/0043-move-savevm-async-back-into-a-coroutine.patch
|
||||
pve/0044-add-optional-buffer-size-to-QEMUFile.patch
|
||||
pve/0045-savevm-async-move-more-code-to-cleanup-and-rename-to.patch
|
||||
pve/0046-util-async-Add-aio_co_reschedule_self.patch
|
||||
pve/0047-savevm-async-flush-IOThread-drives-async-before-ente.patch
|
||||
pve/0048-savevm-async-add-debug-timing-prints.patch
|
||||
|
Loading…
Reference in New Issue
Block a user