debian/patches: squash some followup patches and regroup a bit more together

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
Thomas Lamprecht 2020-07-02 13:07:28 +02:00
parent 8efe995b49
commit d7f4e01a34
37 changed files with 384 additions and 1460 deletions

View File

@ -5,21 +5,30 @@ Subject: [PATCH] PVE: internal snapshot async
Truncate at 1024 boundary (Fabian Ebner will send a patch for stable) Truncate at 1024 boundary (Fabian Ebner will send a patch for stable)
Put qemu_savevm_state_{header,setup} into the main loop and the rest
of the iteration into a coroutine. The former need to lock the
iothread (and we can't unlock it in the coroutine), and the latter
can't deal with being in a separate thread, so a coroutine it must
be.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com> Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
--- ---
Makefile.objs | 1 + Makefile.objs | 1 +
hmp-commands-info.hx | 13 + hmp-commands-info.hx | 13 +
hmp-commands.hx | 32 +++ hmp-commands.hx | 32 +++
include/block/aio.h | 10 +
include/migration/snapshot.h | 1 + include/migration/snapshot.h | 1 +
include/monitor/hmp.h | 5 + include/monitor/hmp.h | 5 +
monitor/hmp-cmds.c | 57 +++++ monitor/hmp-cmds.c | 57 ++++
qapi/migration.json | 34 +++ qapi/migration.json | 34 +++
qapi/misc.json | 32 +++ qapi/misc.json | 32 +++
qemu-options.hx | 12 + qemu-options.hx | 12 +
savevm-async.c | 464 +++++++++++++++++++++++++++++++++++ savevm-async.c | 542 +++++++++++++++++++++++++++++++++++
softmmu/vl.c | 10 + softmmu/vl.c | 10 +
11 files changed, 661 insertions(+) util/async.c | 30 ++
13 files changed, 779 insertions(+)
create mode 100644 savevm-async.c create mode 100644 savevm-async.c
diff --git a/Makefile.objs b/Makefile.objs diff --git a/Makefile.objs b/Makefile.objs
@ -98,6 +107,34 @@ index 7f0f3974ad..81fe305d07 100644
+ .help = "Resume VM after snaphot.", + .help = "Resume VM after snaphot.",
+ .cmd = hmp_savevm_end, + .cmd = hmp_savevm_end,
+ }, + },
diff --git a/include/block/aio.h b/include/block/aio.h
index 62ed954344..d5399c67d6 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -17,6 +17,7 @@
#ifdef CONFIG_LINUX_IO_URING
#include <liburing.h>
#endif
+#include "qemu/coroutine.h"
#include "qemu/queue.h"
#include "qemu/event_notifier.h"
#include "qemu/thread.h"
@@ -654,6 +655,15 @@ static inline bool aio_node_check(AioContext *ctx, bool is_external)
*/
void aio_co_schedule(AioContext *ctx, struct Coroutine *co);
+/**
+ * aio_co_reschedule_self:
+ * @new_ctx: the new context
+ *
+ * Move the currently running coroutine to new_ctx. If the coroutine is already
+ * running in new_ctx, do nothing.
+ */
+void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx);
+
/**
* aio_co_wake:
* @co: the coroutine
diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
index c85b6ec75b..4411b7121d 100644 index c85b6ec75b..4411b7121d 100644
--- a/include/migration/snapshot.h --- a/include/migration/snapshot.h
@ -313,10 +350,10 @@ index 292d4e7c0c..55eef64ddf 100644
"-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL) "-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
diff --git a/savevm-async.c b/savevm-async.c diff --git a/savevm-async.c b/savevm-async.c
new file mode 100644 new file mode 100644
index 0000000000..54ceeae26c index 0000000000..7fd8a69374
--- /dev/null --- /dev/null
+++ b/savevm-async.c +++ b/savevm-async.c
@@ -0,0 +1,464 @@ @@ -0,0 +1,542 @@
+#include "qemu/osdep.h" +#include "qemu/osdep.h"
+#include "migration/migration.h" +#include "migration/migration.h"
+#include "migration/savevm.h" +#include "migration/savevm.h"
@ -369,8 +406,8 @@ index 0000000000..54ceeae26c
+ int saved_vm_running; + int saved_vm_running;
+ QEMUFile *file; + QEMUFile *file;
+ int64_t total_time; + int64_t total_time;
+ QEMUBH *cleanup_bh; + QEMUBH *finalize_bh;
+ QemuThread thread; + Coroutine *co;
+} snap_state; +} snap_state;
+ +
+SaveVMInfo *qmp_query_savevm(Error **errp) +SaveVMInfo *qmp_query_savevm(Error **errp)
@ -477,6 +514,7 @@ index 0000000000..54ceeae26c
+ BlkRwCo *rwco = opaque; + BlkRwCo *rwco = opaque;
+ rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size, + rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
+ rwco->qiov, 0); + rwco->qiov, 0);
+ aio_wait_kick();
+} +}
+ +
+static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov, +static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
@ -514,14 +552,50 @@ index 0000000000..54ceeae26c
+ .close = block_state_close, + .close = block_state_close,
+}; +};
+ +
+static void process_savevm_cleanup(void *opaque) +static void process_savevm_finalize(void *opaque)
+{ +{
+ int ret; + int ret;
+ qemu_bh_delete(snap_state.cleanup_bh); + AioContext *iohandler_ctx = iohandler_get_aio_context();
+ snap_state.cleanup_bh = NULL; + MigrationState *ms = migrate_get_current();
+ qemu_mutex_unlock_iothread(); +
+ qemu_thread_join(&snap_state.thread); +#ifdef DEBUG_SAVEVM_STATE
+ qemu_mutex_lock_iothread(); + int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+
+ qemu_bh_delete(snap_state.finalize_bh);
+ snap_state.finalize_bh = NULL;
+ snap_state.co = NULL;
+
+ /* We need to own the target bdrv's context for the following functions,
+ * so move it back. It can stay in the main context and live out its live
+ * there, since we're done with it after this method ends anyway.
+ */
+ aio_context_acquire(iohandler_ctx);
+ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
+ aio_context_release(iohandler_ctx);
+
+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+ if (ret < 0) {
+ save_snapshot_error("vm_stop_force_state error %d", ret);
+ }
+
+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
+ ret = qemu_file_get_error(snap_state.file);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+ }
+
+ DPRINTF("state saving complete\n");
+ DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
+
+ /* clear migration state */
+ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
+ ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
+ ms->to_dst_file = NULL;
+
+ qemu_savevm_state_cleanup();
+
+ ret = save_snapshot_cleanup(); + ret = save_snapshot_cleanup();
+ if (ret < 0) { + if (ret < 0) {
+ save_snapshot_error("save_snapshot_cleanup error %d", ret); + save_snapshot_error("save_snapshot_cleanup error %d", ret);
@ -535,23 +609,26 @@ index 0000000000..54ceeae26c
+ vm_start(); + vm_start();
+ snap_state.saved_vm_running = false; + snap_state.saved_vm_running = false;
+ } + }
+
+ DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
+} +}
+ +
+static void *process_savevm_thread(void *opaque) +static void coroutine_fn process_savevm_co(void *opaque)
+{ +{
+ int ret; + int ret;
+ int64_t maxlen; + int64_t maxlen;
+ BdrvNextIterator it;
+ BlockDriverState *bs = NULL;
+ +
+ rcu_register_thread(); +#ifdef DEBUG_SAVEVM_STATE
+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+ +
+ qemu_savevm_state_header(snap_state.file);
+ qemu_savevm_state_setup(snap_state.file);
+ ret = qemu_file_get_error(snap_state.file); + ret = qemu_file_get_error(snap_state.file);
+
+ if (ret < 0) { + if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_setup failed"); + save_snapshot_error("qemu_savevm_state_setup failed");
+ rcu_unregister_thread(); + return;
+ return NULL;
+ } + }
+ +
+ while (snap_state.state == SAVE_STATE_ACTIVE) { + while (snap_state.state == SAVE_STATE_ACTIVE) {
@ -563,54 +640,63 @@ index 0000000000..54ceeae26c
+ maxlen = blk_getlength(snap_state.target) - 30*1024*1024; + maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
+ +
+ if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) { + if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
+ qemu_mutex_lock_iothread();
+ ret = qemu_savevm_state_iterate(snap_state.file, false); + ret = qemu_savevm_state_iterate(snap_state.file, false);
+ if (ret < 0) { + if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret); + save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+ break; + break;
+ } + }
+ qemu_mutex_unlock_iothread(); + DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
+ DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
+ } else { + } else {
+ qemu_mutex_lock_iothread();
+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
+ ret = global_state_store(); + ret = global_state_store();
+ if (ret) { + if (ret) {
+ save_snapshot_error("global_state_store error %d", ret); + save_snapshot_error("global_state_store error %d", ret);
+ break; + break;
+ } + }
+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); +
+ if (ret < 0) { + DPRINTF("savevm iterate complete\n");
+ save_snapshot_error("vm_stop_force_state error %d", ret);
+ break; + break;
+ } + }
+ DPRINTF("savevm inerate finished\n"); + }
+ /* upstream made the return value here inconsistent +
+ * (-1 instead of 'ret' in one case and 0 after flush which can + DPRINTF("timing: process_savevm_co took %ld ms\n",
+ * still set a file error...) + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
+
+#ifdef DEBUG_SAVEVM_STATE
+ int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+ /* If a drive runs in an IOThread we can flush it async, and only
+ * need to sync-flush whatever IO happens between now and
+ * vm_stop_force_state. bdrv_next can only be called from main AioContext,
+ * so move there now and after every flush.
+ */ + */
+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false); + aio_co_reschedule_self(qemu_get_aio_context());
+ ret = qemu_file_get_error(snap_state.file); + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ if (ret < 0) { + /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret); + if (bs == blk_bs(snap_state.target)) {
+ break; + continue;
+ } + }
+ qemu_savevm_state_cleanup(); +
+ DPRINTF("save complete\n"); + AioContext *bs_ctx = bdrv_get_aio_context(bs);
+ break; + if (bs_ctx != qemu_get_aio_context()) {
+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
+ aio_co_reschedule_self(bs_ctx);
+ bdrv_flush(bs);
+ aio_co_reschedule_self(qemu_get_aio_context());
+ } + }
+ } + }
+ +
+ qemu_bh_schedule(snap_state.cleanup_bh); + DPRINTF("timing: async flushing took %ld ms\n",
+ qemu_mutex_unlock_iothread(); + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
+ +
+ rcu_unregister_thread(); + qemu_bh_schedule(snap_state.finalize_bh);
+ return NULL;
+} +}
+ +
+void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp) +void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
+{ +{
+ Error *local_err = NULL; + Error *local_err = NULL;
+ MigrationState *ms = migrate_get_current();
+ AioContext *iohandler_ctx = iohandler_get_aio_context();
+ +
+ int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH; + int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
+ +
@ -620,6 +706,17 @@ index 0000000000..54ceeae26c
+ return; + return;
+ } + }
+ +
+ if (migration_is_running(ms->state)) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
+ return;
+ }
+
+ if (migrate_use_block()) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "Block migration and snapshots are incompatible");
+ return;
+ }
+
+ /* initialize snapshot info */ + /* initialize snapshot info */
+ snap_state.saved_vm_running = runstate_is_running(); + snap_state.saved_vm_running = runstate_is_running();
+ snap_state.bs_pos = 0; + snap_state.bs_pos = 0;
@ -658,14 +755,32 @@ index 0000000000..54ceeae26c
+ goto restart; + goto restart;
+ } + }
+ +
+ /*
+ * qemu_savevm_* paths use migration code and expect a migration state.
+ * State is cleared in process_savevm_co, but has to be initialized
+ * here (blocking main thread, from QMP) to avoid race conditions.
+ */
+ migrate_init(ms);
+ memset(&ram_counters, 0, sizeof(ram_counters));
+ ms->to_dst_file = snap_state.file;
+ +
+ error_setg(&snap_state.blocker, "block device is in use by savevm"); + error_setg(&snap_state.blocker, "block device is in use by savevm");
+ blk_op_block_all(snap_state.target, snap_state.blocker); + blk_op_block_all(snap_state.target, snap_state.blocker);
+ +
+ snap_state.state = SAVE_STATE_ACTIVE; + snap_state.state = SAVE_STATE_ACTIVE;
+ snap_state.cleanup_bh = qemu_bh_new(process_savevm_cleanup, &snap_state); + snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
+ qemu_thread_create(&snap_state.thread, "savevm-async", process_savevm_thread, + snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
+ NULL, QEMU_THREAD_JOINABLE); + qemu_mutex_unlock_iothread();
+ qemu_savevm_state_header(snap_state.file);
+ qemu_savevm_state_setup(snap_state.file);
+ qemu_mutex_lock_iothread();
+
+ /* Async processing from here on out happens in iohandler context, so let
+ * the target bdrv have its home there.
+ */
+ blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
+
+ aio_co_schedule(iohandler_ctx, snap_state.co);
+ +
+ return; + return;
+ +
@ -816,3 +931,44 @@ index 32c0047889..4b45eb0c37 100644
} }
if (replay_mode != REPLAY_MODE_NONE) { if (replay_mode != REPLAY_MODE_NONE) {
replay_vmstate_init(); replay_vmstate_init();
diff --git a/util/async.c b/util/async.c
index 3165a28f2f..4eba1e6f1b 100644
--- a/util/async.c
+++ b/util/async.c
@@ -558,6 +558,36 @@ void aio_co_schedule(AioContext *ctx, Coroutine *co)
aio_context_unref(ctx);
}
+typedef struct AioCoRescheduleSelf {
+ Coroutine *co;
+ AioContext *new_ctx;
+} AioCoRescheduleSelf;
+
+static void aio_co_reschedule_self_bh(void *opaque)
+{
+ AioCoRescheduleSelf *data = opaque;
+ aio_co_schedule(data->new_ctx, data->co);
+}
+
+void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx)
+{
+ AioContext *old_ctx = qemu_get_current_aio_context();
+
+ if (old_ctx != new_ctx) {
+ AioCoRescheduleSelf data = {
+ .co = qemu_coroutine_self(),
+ .new_ctx = new_ctx,
+ };
+ /*
+ * We can't directly schedule the coroutine in the target context
+ * because this would be racy: The other thread could try to enter the
+ * coroutine before it has yielded in this one.
+ */
+ aio_bh_schedule_oneshot(old_ctx, aio_co_reschedule_self_bh, &data);
+ qemu_coroutine_yield();
+ }
+}
+
void aio_co_wake(struct Coroutine *co)
{
AioContext *ctx;

View File

@ -160,10 +160,10 @@ index a9b6d6ccb7..8752d27c74 100644
int qemu_get_fd(QEMUFile *f); int qemu_get_fd(QEMUFile *f);
int qemu_fclose(QEMUFile *f); int qemu_fclose(QEMUFile *f);
diff --git a/savevm-async.c b/savevm-async.c diff --git a/savevm-async.c b/savevm-async.c
index af865b9a0a..c3fe741c38 100644 index 7fd8a69374..0388cebbe9 100644
--- a/savevm-async.c --- a/savevm-async.c
+++ b/savevm-async.c +++ b/savevm-async.c
@@ -338,7 +338,7 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp) @@ -392,7 +392,7 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
goto restart; goto restart;
} }
@ -172,7 +172,7 @@ index af865b9a0a..c3fe741c38 100644
if (!snap_state.file) { if (!snap_state.file) {
error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile); error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
@@ -454,7 +454,7 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp) @@ -514,7 +514,7 @@ int load_snapshot_from_blockdev(const char *filename, Error **errp)
blk_op_block_all(be, blocker); blk_op_block_all(be, blocker);
/* restore the VM state */ /* restore the VM state */

View File

@ -1,22 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:51 +0200
Subject: [PATCH] PVE: savevm-async: kick AIO wait on block state write
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
savevm-async.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/savevm-async.c b/savevm-async.c
index 54ceeae26c..393d55af2a 100644
--- a/savevm-async.c
+++ b/savevm-async.c
@@ -158,6 +158,7 @@ static void coroutine_fn block_state_write_entry(void *opaque) {
BlkRwCo *rwco = opaque;
rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
rwco->qiov, 0);
+ aio_wait_kick();
}
static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,

View File

@ -1,38 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:52 +0200
Subject: [PATCH] PVE: move snapshot cleanup into bottom half
as per:
(0ceccd858a8d) migration: qemu_savevm_state_cleanup() in cleanup
may affect held locks and therefore change assumptions made
by that function!
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
savevm-async.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/savevm-async.c b/savevm-async.c
index 393d55af2a..790e27ae37 100644
--- a/savevm-async.c
+++ b/savevm-async.c
@@ -201,6 +201,8 @@ static void process_savevm_cleanup(void *opaque)
int ret;
qemu_bh_delete(snap_state.cleanup_bh);
snap_state.cleanup_bh = NULL;
+ qemu_savevm_state_cleanup();
+
qemu_mutex_unlock_iothread();
qemu_thread_join(&snap_state.thread);
qemu_mutex_lock_iothread();
@@ -277,7 +279,6 @@ static void *process_savevm_thread(void *opaque)
save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
break;
}
- qemu_savevm_state_cleanup();
DPRINTF("save complete\n");
break;
}

View File

@ -7,10 +7,10 @@ Subject: [PATCH] PVE-Backup: add vma backup format code
Makefile | 3 +- Makefile | 3 +-
Makefile.objs | 1 + Makefile.objs | 1 +
vma-reader.c | 857 ++++++++++++++++++++++++++++++++++++++++++++++++++ vma-reader.c | 857 ++++++++++++++++++++++++++++++++++++++++++++++++++
vma-writer.c | 771 +++++++++++++++++++++++++++++++++++++++++++++ vma-writer.c | 790 ++++++++++++++++++++++++++++++++++++++++++++++
vma.c | 837 ++++++++++++++++++++++++++++++++++++++++++++++++ vma.c | 839 ++++++++++++++++++++++++++++++++++++++++++++++++
vma.h | 150 +++++++++ vma.h | 150 +++++++++
6 files changed, 2618 insertions(+), 1 deletion(-) 6 files changed, 2639 insertions(+), 1 deletion(-)
create mode 100644 vma-reader.c create mode 100644 vma-reader.c
create mode 100644 vma-writer.c create mode 100644 vma-writer.c
create mode 100644 vma.c create mode 100644 vma.c
@ -914,10 +914,10 @@ index 0000000000..2b1d1cdab3
+ +
diff --git a/vma-writer.c b/vma-writer.c diff --git a/vma-writer.c b/vma-writer.c
new file mode 100644 new file mode 100644
index 0000000000..fe86b18a60 index 0000000000..f5d2c5d23c
--- /dev/null --- /dev/null
+++ b/vma-writer.c +++ b/vma-writer.c
@@ -0,0 +1,771 @@ @@ -0,0 +1,790 @@
+/* +/*
+ * VMA: Virtual Machine Archive + * VMA: Virtual Machine Archive
+ * + *
@ -1553,17 +1553,33 @@ index 0000000000..fe86b18a60
+ +
+ DPRINTF("VMA WRITE %d %zd\n", dev_id, cluster_num); + DPRINTF("VMA WRITE %d %zd\n", dev_id, cluster_num);
+ +
+ uint64_t dev_size = vmaw->stream_info[dev_id].size;
+ uint16_t mask = 0; + uint16_t mask = 0;
+ +
+ if (buf) { + if (buf) {
+ int i; + int i;
+ int bit = 1; + int bit = 1;
+ uint64_t byte_offset = cluster_num * VMA_CLUSTER_SIZE;
+ for (i = 0; i < 16; i++) { + for (i = 0; i < 16; i++) {
+ const unsigned char *vmablock = buf + (i*VMA_BLOCK_SIZE); + const unsigned char *vmablock = buf + (i*VMA_BLOCK_SIZE);
+ if (!buffer_is_zero(vmablock, VMA_BLOCK_SIZE)) { +
+ // Note: If the source is not 64k-aligned, we might reach 4k blocks
+ // after the end of the device. Always mark these as zero in the
+ // mask, so the restore handles them correctly.
+ if (byte_offset < dev_size &&
+ !buffer_is_zero(vmablock, VMA_BLOCK_SIZE))
+ {
+ mask |= bit; + mask |= bit;
+ memcpy(vmaw->outbuf + vmaw->outbuf_pos, vmablock, + memcpy(vmaw->outbuf + vmaw->outbuf_pos, vmablock,
+ VMA_BLOCK_SIZE); + VMA_BLOCK_SIZE);
+
+ // prevent memory leakage on unaligned last block
+ if (byte_offset + VMA_BLOCK_SIZE > dev_size) {
+ uint64_t real_data_in_block = dev_size - byte_offset;
+ memset(vmaw->outbuf + vmaw->outbuf_pos + real_data_in_block,
+ 0, VMA_BLOCK_SIZE - real_data_in_block);
+ }
+
+ vmaw->outbuf_pos += VMA_BLOCK_SIZE; + vmaw->outbuf_pos += VMA_BLOCK_SIZE;
+ } else { + } else {
+ DPRINTF("VMA WRITE %zd ZERO BLOCK %d\n", cluster_num, i); + DPRINTF("VMA WRITE %zd ZERO BLOCK %d\n", cluster_num, i);
@ -1571,6 +1587,7 @@ index 0000000000..fe86b18a60
+ *zero_bytes += VMA_BLOCK_SIZE; + *zero_bytes += VMA_BLOCK_SIZE;
+ } + }
+ +
+ byte_offset += VMA_BLOCK_SIZE;
+ bit = bit << 1; + bit = bit << 1;
+ } + }
+ } else { + } else {
@ -1596,8 +1613,8 @@ index 0000000000..fe86b18a60
+ +
+ if (dev_id != vmaw->vmstate_stream) { + if (dev_id != vmaw->vmstate_stream) {
+ uint64_t last = (cluster_num + 1) * VMA_CLUSTER_SIZE; + uint64_t last = (cluster_num + 1) * VMA_CLUSTER_SIZE;
+ if (last > vmaw->stream_info[dev_id].size) { + if (last > dev_size) {
+ uint64_t diff = last - vmaw->stream_info[dev_id].size; + uint64_t diff = last - dev_size;
+ if (diff >= VMA_CLUSTER_SIZE) { + if (diff >= VMA_CLUSTER_SIZE) {
+ vma_writer_set_error(vmaw, "vma_writer_write: " + vma_writer_set_error(vmaw, "vma_writer_write: "
+ "read after last cluster"); + "read after last cluster");
@ -1687,14 +1704,16 @@ index 0000000000..fe86b18a60
+ g_checksum_free(vmaw->md5csum); + g_checksum_free(vmaw->md5csum);
+ } + }
+ +
+ qemu_vfree(vmaw->headerbuf);
+ qemu_vfree(vmaw->outbuf);
+ g_free(vmaw); + g_free(vmaw);
+} +}
diff --git a/vma.c b/vma.c diff --git a/vma.c b/vma.c
new file mode 100644 new file mode 100644
index 0000000000..a82752448a index 0000000000..2eea2fc281
--- /dev/null --- /dev/null
+++ b/vma.c +++ b/vma.c
@@ -0,0 +1,837 @@ @@ -0,0 +1,839 @@
+/* +/*
+ * VMA: Virtual Machine Archive + * VMA: Virtual Machine Archive
+ * + *
@ -2262,6 +2281,7 @@ index 0000000000..a82752448a
+ g_warning("vma_writer_close failed %s", error_get_pretty(err)); + g_warning("vma_writer_close failed %s", error_get_pretty(err));
+ } + }
+ } + }
+ qemu_vfree(buf);
+} +}
+ +
+static int create_archive(int argc, char **argv) +static int create_archive(int argc, char **argv)
@ -2429,6 +2449,7 @@ index 0000000000..a82752448a
+ g_error("creating vma archive failed"); + g_error("creating vma archive failed");
+ } + }
+ +
+ vma_writer_destroy(vmaw);
+ return 0; + return 0;
+} +}
+ +

View File

@ -71,7 +71,7 @@ index 8ed1eba95b..f453a95efc 100644
# Hardware support # Hardware support
ifeq ($(TARGET_NAME), sparc64) ifeq ($(TARGET_NAME), sparc64)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4c8c375172..4f03881286 100644 index 4c8c375172..d485c3ac79 100644
--- a/block/monitor/block-hmp-cmds.c --- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c +++ b/block/monitor/block-hmp-cmds.c
@@ -1011,3 +1011,36 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) @@ -1011,3 +1011,36 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)

View File

@ -1,39 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Mon, 6 Apr 2020 12:17:00 +0200
Subject: [PATCH] PVE-Backup: aquire aio_context before calling
backup_job_create
And do not set target in same aoi_context as source, because
this is already done in bdrv_backup_top_append ...
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
---
pve-backup.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 9ae89fb679..38dd33e28b 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -757,17 +757,15 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l);
- // make sure target runs in same aoi_context as source
AioContext *aio_context = bdrv_get_aio_context(di->bs);
aio_context_acquire(aio_context);
- GSList *ignore = NULL;
- bdrv_set_aio_context_ignore(di->target, aio_context, &ignore);
- g_slist_free(ignore);
- aio_context_release(aio_context);
job = backup_job_create(NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
BITMAP_SYNC_MODE_NEVER, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err);
+
+ aio_context_release(aio_context);
+
if (!job || local_err != NULL) {
qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
error_setg(&backup_state.stat.error, "backup_job_create failed");

View File

@ -1,6 +1,6 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com> From: Dietmar Maurer <dietmar@proxmox.com>
Date: Mon, 6 Apr 2020 12:17:02 +0200 Date: Mon, 6 Apr 2020 12:17:00 +0200
Subject: [PATCH] PVE-Backup: avoid coroutines to fix AIO freeze, cleanups Subject: [PATCH] PVE-Backup: avoid coroutines to fix AIO freeze, cleanups
We observed various AIO pool loop freezes, so we decided to avoid We observed various AIO pool loop freezes, so we decided to avoid
@ -8,7 +8,8 @@ coroutines and restrict ourselfes using similar code as upstream
(see blockdev.c: do_backup_common). (see blockdev.c: do_backup_common).
* avoid coroutine for job related code (causes hangs with iothreads) * avoid coroutine for job related code (causes hangs with iothreads)
- this imply to use normal QemuRecMutex instead of CoMutex - We then acquire/release all mutexes outside coroutines now, so we can now
correctly use a normal mutex.
* split pvebackup_co_dump_cb into: * split pvebackup_co_dump_cb into:
- pvebackup_co_dump_pbs_cb and - pvebackup_co_dump_pbs_cb and
@ -25,33 +26,50 @@ coroutines and restrict ourselfes using similar code as upstream
There is progress on upstream to support running qmp commands inside There is progress on upstream to support running qmp commands inside
coroutines, see: coroutines, see:
https://lists.gnu.org/archive/html/qemu-devel/2020-02/msg04852.html https://lists.gnu.org/archive/html/qemu-devel/2020-02/msg04852.html
We should consider using that when it is available in upstream qemu. We should consider using that when it is available in upstream qemu.
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
--- ---
pve-backup.c | 611 +++++++++++++++++++++++++-------------------------- pve-backup.c | 638 ++++++++++++++++++++++++++-------------------------
1 file changed, 299 insertions(+), 312 deletions(-) 1 file changed, 320 insertions(+), 318 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c diff --git a/pve-backup.c b/pve-backup.c
index 38dd33e28b..169f0c68d0 100644 index 9ae89fb679..bb917ee972 100644
--- a/pve-backup.c --- a/pve-backup.c
+++ b/pve-backup.c +++ b/pve-backup.c
@@ -11,11 +11,10 @@ @@ -11,11 +11,27 @@
/* PVE backup state and related function */ /* PVE backup state and related function */
- +/*
+ * Note: A resume from a qemu_coroutine_yield can happen in a different thread,
+ * so you may not use normal mutexes within coroutines:
+ *
+ * ---bad-example---
+ * qemu_rec_mutex_lock(lock)
+ * ...
+ * qemu_coroutine_yield() // wait for something
+ * // we are now inside a different thread
+ * qemu_rec_mutex_unlock(lock) // Crash - wrong thread!!
+ * ---end-bad-example--
+ *
+ * ==> Always use CoMutext inside coroutines.
+ * ==> Never acquire/release AioContext withing coroutines (because that use QemuRecMutex)
+ *
+ */
static struct PVEBackupState { static struct PVEBackupState {
struct { struct {
- // Everithing accessed from qmp command, protected using rwlock - // Everithing accessed from qmp command, protected using rwlock
- CoRwlock rwlock; - CoRwlock rwlock;
+ // Everithing accessed from qmp_backup_query command is protected using lock + // Everithing accessed from qmp_backup_query command is protected using lock
+ QemuRecMutex lock; + QemuMutex lock;
Error *error; Error *error;
time_t start_time; time_t start_time;
time_t end_time; time_t end_time;
@@ -25,19 +24,18 @@ static struct PVEBackupState { @@ -25,19 +41,20 @@ static struct PVEBackupState {
size_t total; size_t total;
size_t transferred; size_t transferred;
size_t zero_bytes; size_t zero_bytes;
@ -62,25 +80,27 @@ index 38dd33e28b..169f0c68d0 100644
ProxmoxBackupHandle *pbs; ProxmoxBackupHandle *pbs;
GList *di_list; GList *di_list;
- CoMutex backup_mutex; - CoMutex backup_mutex;
+ QemuRecMutex backup_mutex; + QemuMutex backup_mutex;
+ CoMutex dump_callback_mutex;
} backup_state; } backup_state;
static void pvebackup_init(void) static void pvebackup_init(void)
{ {
- qemu_co_rwlock_init(&backup_state.stat.rwlock); - qemu_co_rwlock_init(&backup_state.stat.rwlock);
- qemu_co_mutex_init(&backup_state.backup_mutex); - qemu_co_mutex_init(&backup_state.backup_mutex);
+ qemu_rec_mutex_init(&backup_state.stat.lock); + qemu_mutex_init(&backup_state.stat.lock);
+ qemu_rec_mutex_init(&backup_state.backup_mutex); + qemu_mutex_init(&backup_state.backup_mutex);
+ qemu_co_mutex_init(&backup_state.dump_callback_mutex);
} }
// initialize PVEBackupState at startup // initialize PVEBackupState at startup
@@ -52,10 +50,54 @@ typedef struct PVEBackupDevInfo { @@ -52,10 +69,54 @@ typedef struct PVEBackupDevInfo {
BlockDriverState *target; BlockDriverState *target;
} PVEBackupDevInfo; } PVEBackupDevInfo;
-static void pvebackup_co_run_next_job(void); -static void pvebackup_co_run_next_job(void);
+static void pvebackup_run_next_job(void); +static void pvebackup_run_next_job(void);
+
+static BlockJob * +static BlockJob *
+lookup_active_block_job(PVEBackupDevInfo *di) +lookup_active_block_job(PVEBackupDevInfo *di)
+{ +{
@ -101,26 +121,26 @@ index 38dd33e28b..169f0c68d0 100644
+ +
+static void pvebackup_propagate_error(Error *err) +static void pvebackup_propagate_error(Error *err)
+{ +{
+ qemu_rec_mutex_lock(&backup_state.stat.lock); + qemu_mutex_lock(&backup_state.stat.lock);
+ error_propagate(&backup_state.stat.error, err); + error_propagate(&backup_state.stat.error, err);
+ qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock);
+} +}
+ +
+static bool pvebackup_error_or_canceled(void) +static bool pvebackup_error_or_canceled(void)
+{ +{
+ qemu_rec_mutex_lock(&backup_state.stat.lock); + qemu_mutex_lock(&backup_state.stat.lock);
+ bool error_or_canceled = !!backup_state.stat.error; + bool error_or_canceled = !!backup_state.stat.error;
+ qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock);
+ +
+ return error_or_canceled; + return error_or_canceled;
+} +}
+
+static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes) +static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes)
+{ +{
+ qemu_rec_mutex_lock(&backup_state.stat.lock); + qemu_mutex_lock(&backup_state.stat.lock);
+ backup_state.stat.zero_bytes += zero_bytes; + backup_state.stat.zero_bytes += zero_bytes;
+ backup_state.stat.transferred += transferred; + backup_state.stat.transferred += transferred;
+ qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock);
+} +}
+ +
+// This may get called from multiple coroutines in multiple io-threads +// This may get called from multiple coroutines in multiple io-threads
@ -131,7 +151,7 @@ index 38dd33e28b..169f0c68d0 100644
void *opaque, void *opaque,
uint64_t start, uint64_t start,
uint64_t bytes, uint64_t bytes,
@@ -67,137 +109,129 @@ pvebackup_co_dump_cb( @@ -67,137 +128,127 @@ pvebackup_co_dump_cb(
const unsigned char *buf = pbuf; const unsigned char *buf = pbuf;
PVEBackupDevInfo *di = opaque; PVEBackupDevInfo *di = opaque;
@ -143,19 +163,19 @@ index 38dd33e28b..169f0c68d0 100644
+ Error *local_err = NULL; + Error *local_err = NULL;
+ int pbs_res = -1; + int pbs_res = -1;
+ +
+ qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
- if (cancel) { - if (cancel) {
- return size; // return success - return size; // return success
+ // avoid deadlock if job is cancelled + // avoid deadlock if job is cancelled
+ if (pvebackup_error_or_canceled()) { + if (pvebackup_error_or_canceled()) {
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ return -1; + return -1;
} }
- qemu_co_mutex_lock(&backup_state.backup_mutex); - qemu_co_mutex_lock(&backup_state.backup_mutex);
+ pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err); + pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
- int ret = -1; - int ret = -1;
+ if (pbs_res < 0) { + if (pbs_res < 0) {
@ -232,9 +252,10 @@ index 38dd33e28b..169f0c68d0 100644
+ PVEBackupDevInfo *di = opaque; + PVEBackupDevInfo *di = opaque;
- pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err); - pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
+ int ret = -1;
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock); - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
+ int ret = -1; + assert(backup_state.vmaw);
- if (pbs_res < 0) { - if (pbs_res < 0) {
- error_propagate(&backup_state.stat.error, local_err); - error_propagate(&backup_state.stat.error, local_err);
@ -246,8 +267,6 @@ index 38dd33e28b..169f0c68d0 100644
- backup_state.stat.zero_bytes += size; - backup_state.stat.zero_bytes += size;
- } - }
- backup_state.stat.transferred += size; - backup_state.stat.transferred += size;
+ assert(backup_state.vmaw);
+
+ uint64_t remaining = size; + uint64_t remaining = size;
+ +
+ uint64_t cluster_num = start / VMA_CLUSTER_SIZE; + uint64_t cluster_num = start / VMA_CLUSTER_SIZE;
@ -261,17 +280,17 @@ index 38dd33e28b..169f0c68d0 100644
+ } + }
+ +
+ while (remaining > 0) { + while (remaining > 0) {
+ qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
+ // avoid deadlock if job is cancelled + // avoid deadlock if job is cancelled
+ if (pvebackup_error_or_canceled()) { + if (pvebackup_error_or_canceled()) {
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
+ return -1; + return -1;
} }
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock); - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+ size_t zero_bytes = 0; + size_t zero_bytes = 0;
+ ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num, buf, &zero_bytes); + ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num, buf, &zero_bytes);
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
- } else { - } else {
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock); - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
@ -307,18 +326,18 @@ index 38dd33e28b..169f0c68d0 100644
} }
-static void coroutine_fn pvebackup_co_cleanup(void) -static void coroutine_fn pvebackup_co_cleanup(void)
+// assumes the caller holds backup_mutex
+static void coroutine_fn pvebackup_co_cleanup(void *unused) +static void coroutine_fn pvebackup_co_cleanup(void *unused)
{ {
assert(qemu_in_coroutine()); assert(qemu_in_coroutine());
- qemu_co_mutex_lock(&backup_state.backup_mutex); - qemu_co_mutex_lock(&backup_state.backup_mutex);
+ qemu_rec_mutex_lock(&backup_state.backup_mutex); -
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock); - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
+ qemu_rec_mutex_lock(&backup_state.stat.lock); + qemu_mutex_lock(&backup_state.stat.lock);
backup_state.stat.end_time = time(NULL); backup_state.stat.end_time = time(NULL);
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock); - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+ qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock);
if (backup_state.vmaw) { if (backup_state.vmaw) {
Error *local_err = NULL; Error *local_err = NULL;
@ -353,12 +372,11 @@ index 38dd33e28b..169f0c68d0 100644
proxmox_backup_disconnect(backup_state.pbs); proxmox_backup_disconnect(backup_state.pbs);
backup_state.pbs = NULL; backup_state.pbs = NULL;
@@ -205,43 +239,14 @@ static void coroutine_fn pvebackup_co_cleanup(void) @@ -205,43 +256,14 @@ static void coroutine_fn pvebackup_co_cleanup(void)
g_list_free(backup_state.di_list); g_list_free(backup_state.di_list);
backup_state.di_list = NULL; backup_state.di_list = NULL;
- qemu_co_mutex_unlock(&backup_state.backup_mutex); - qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex);
} }
-typedef struct PVEBackupCompeteCallbackData { -typedef struct PVEBackupCompeteCallbackData {
@ -367,6 +385,7 @@ index 38dd33e28b..169f0c68d0 100644
-} PVEBackupCompeteCallbackData; -} PVEBackupCompeteCallbackData;
- -
-static void coroutine_fn pvebackup_co_complete_cb(void *opaque) -static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
+// assumes the caller holds backup_mutex
+static void coroutine_fn pvebackup_complete_stream(void *opaque) +static void coroutine_fn pvebackup_complete_stream(void *opaque)
{ {
- assert(qemu_in_coroutine()); - assert(qemu_in_coroutine());
@ -401,7 +420,7 @@ index 38dd33e28b..169f0c68d0 100644
if (backup_state.vmaw) { if (backup_state.vmaw) {
vma_writer_close_stream(backup_state.vmaw, di->dev_id); vma_writer_close_stream(backup_state.vmaw, di->dev_id);
@@ -251,108 +256,96 @@ static void coroutine_fn pvebackup_co_complete_cb(void *opaque) @@ -251,110 +273,101 @@ static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
Error *local_err = NULL; Error *local_err = NULL;
proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err); proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
if (local_err != NULL) { if (local_err != NULL) {
@ -419,19 +438,19 @@ index 38dd33e28b..169f0c68d0 100644
+static void pvebackup_complete_cb(void *opaque, int ret) +static void pvebackup_complete_cb(void *opaque, int ret)
+{ +{
+ assert(!qemu_in_coroutine()); + assert(!qemu_in_coroutine());
+
+ PVEBackupDevInfo *di = opaque;
- int pending_jobs = g_list_length(backup_state.di_list); - int pending_jobs = g_list_length(backup_state.di_list);
+ qemu_rec_mutex_lock(&backup_state.backup_mutex); + PVEBackupDevInfo *di = opaque;
- qemu_co_mutex_unlock(&backup_state.backup_mutex); - qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ di->completed = true; + qemu_mutex_lock(&backup_state.backup_mutex);
- if (pending_jobs > 0) { - if (pending_jobs > 0) {
- pvebackup_co_run_next_job(); - pvebackup_co_run_next_job();
- } else { - } else {
- pvebackup_co_cleanup(); - pvebackup_co_cleanup();
+ di->completed = true;
+
+ if (ret < 0) { + if (ret < 0) {
+ Error *local_err = NULL; + Error *local_err = NULL;
+ error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret)); + error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
@ -475,7 +494,7 @@ index 38dd33e28b..169f0c68d0 100644
- qemu_co_mutex_unlock(&backup_state.backup_mutex); - qemu_co_mutex_unlock(&backup_state.backup_mutex);
- return; - return;
- } - }
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex);
- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock); - qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
- if (!backup_state.stat.error) { - if (!backup_state.stat.error) {
@ -488,11 +507,13 @@ index 38dd33e28b..169f0c68d0 100644
+ +
+static void pvebackup_cancel(void) +static void pvebackup_cancel(void)
+{ +{
+ assert(!qemu_in_coroutine());
+
+ Error *cancel_err = NULL; + Error *cancel_err = NULL;
+ error_setg(&cancel_err, "backup canceled"); + error_setg(&cancel_err, "backup canceled");
+ pvebackup_propagate_error(cancel_err); + pvebackup_propagate_error(cancel_err);
+ +
+ qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_mutex_lock(&backup_state.backup_mutex);
if (backup_state.vmaw) { if (backup_state.vmaw) {
/* make sure vma writer does not block anymore */ /* make sure vma writer does not block anymore */
@ -515,7 +536,7 @@ index 38dd33e28b..169f0c68d0 100644
- if (job->job.driver->job_type != JOB_TYPE_BACKUP) { - if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
- continue; - continue;
- } - }
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex);
- BackupBlockJob *bjob = container_of(job, BackupBlockJob, common); - BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
- if (bjob && bjob->source_bs == di->bs) { - if (bjob && bjob->source_bs == di->bs) {
@ -531,7 +552,7 @@ index 38dd33e28b..169f0c68d0 100644
- } - }
+ BlockJob *next_job = NULL; + BlockJob *next_job = NULL;
+ +
+ qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_mutex_lock(&backup_state.backup_mutex);
+ +
+ GList *l = backup_state.di_list; + GList *l = backup_state.di_list;
+ while (l) { + while (l) {
@ -547,7 +568,7 @@ index 38dd33e28b..169f0c68d0 100644
- } - }
- qemu_co_mutex_unlock(&backup_state.backup_mutex); - qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex);
- if (running_jobs == 0) pvebackup_co_cleanup(); // else job will call completion handler - if (running_jobs == 0) pvebackup_co_cleanup(); // else job will call completion handler
+ if (next_job) { + if (next_job) {
@ -567,8 +588,11 @@ index 38dd33e28b..169f0c68d0 100644
+ pvebackup_cancel(); + pvebackup_cancel();
} }
+// assumes the caller holds backup_mutex
static int coroutine_fn pvebackup_co_add_config( static int coroutine_fn pvebackup_co_add_config(
@@ -406,46 +399,97 @@ static int coroutine_fn pvebackup_co_add_config( const char *file,
const char *name,
@@ -406,46 +419,97 @@ static int coroutine_fn pvebackup_co_add_config(
bool job_should_pause(Job *job); bool job_should_pause(Job *job);
@ -579,7 +603,7 @@ index 38dd33e28b..169f0c68d0 100644
+ assert(!qemu_in_coroutine()); + assert(!qemu_in_coroutine());
- qemu_co_mutex_lock(&backup_state.backup_mutex); - qemu_co_mutex_lock(&backup_state.backup_mutex);
+ qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_mutex_lock(&backup_state.backup_mutex);
GList *l = backup_state.di_list; GList *l = backup_state.di_list;
while (l) { while (l) {
@ -613,7 +637,7 @@ index 38dd33e28b..169f0c68d0 100644
+ BlockJob *job = lookup_active_block_job(di); + BlockJob *job = lookup_active_block_job(di);
+ +
+ if (job) { + if (job) {
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex);
+ +
+ AioContext *aio_context = job->job.aio_context; + AioContext *aio_context = job->job.aio_context;
+ aio_context_acquire(aio_context); + aio_context_acquire(aio_context);
@ -628,13 +652,12 @@ index 38dd33e28b..169f0c68d0 100644
} }
+ aio_context_release(aio_context); + aio_context_release(aio_context);
+ return; + return;
} + }
} + }
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
+
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ +
+ block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup + block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
+
+ qemu_mutex_unlock(&backup_state.backup_mutex);
+} +}
+ +
+static bool create_backup_jobs(void) { +static bool create_backup_jobs(void) {
@ -687,23 +710,25 @@ index 38dd33e28b..169f0c68d0 100644
+ bdrv_unref(di->target); + bdrv_unref(di->target);
+ di->target = NULL; + di->target = NULL;
+ } + }
+ } }
+ } }
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ +
+ return errors; + return errors;
} }
typedef struct QmpBackupTask { typedef struct QmpBackupTask {
@@ -476,7 +520,7 @@ typedef struct QmpBackupTask { @@ -476,7 +540,8 @@ typedef struct QmpBackupTask {
UuidInfo *result; UuidInfo *result;
} QmpBackupTask; } QmpBackupTask;
-static void coroutine_fn pvebackup_co_start(void *opaque) -static void coroutine_fn pvebackup_co_start(void *opaque)
+// assumes the caller holds backup_mutex
+static void coroutine_fn pvebackup_co_prepare(void *opaque) +static void coroutine_fn pvebackup_co_prepare(void *opaque)
{ {
assert(qemu_in_coroutine()); assert(qemu_in_coroutine());
@@ -495,15 +539,14 @@ static void coroutine_fn pvebackup_co_start(void *opaque) @@ -495,16 +560,12 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
GList *di_list = NULL; GList *di_list = NULL;
GList *l; GList *l;
UuidInfo *uuid_info; UuidInfo *uuid_info;
@ -713,15 +738,15 @@ index 38dd33e28b..169f0c68d0 100644
const char *firewall_name = "qemu-server.fw"; const char *firewall_name = "qemu-server.fw";
- qemu_co_mutex_lock(&backup_state.backup_mutex); - qemu_co_mutex_lock(&backup_state.backup_mutex);
+ qemu_rec_mutex_lock(&backup_state.backup_mutex); -
if (backup_state.di_list) { if (backup_state.di_list) {
- qemu_co_mutex_unlock(&backup_state.backup_mutex); - qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex); - error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, + error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished"); "previous backup not finished");
return; return;
@@ -631,7 +674,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque) }
@@ -631,7 +692,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
if (dev_id < 0) if (dev_id < 0)
goto err; goto err;
@ -730,7 +755,7 @@ index 38dd33e28b..169f0c68d0 100644
goto err; goto err;
} }
@@ -652,7 +695,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque) @@ -652,7 +713,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data; PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
l = g_list_next(l); l = g_list_next(l);
@ -739,27 +764,27 @@ index 38dd33e28b..169f0c68d0 100644
goto err; goto err;
} }
@@ -717,9 +760,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque) @@ -717,9 +778,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
} }
/* initialize global backup_state now */ /* initialize global backup_state now */
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock); - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
- -
- backup_state.stat.cancel = false; - backup_state.stat.cancel = false;
+ qemu_rec_mutex_lock(&backup_state.stat.lock); + qemu_mutex_lock(&backup_state.stat.lock);
if (backup_state.stat.error) { if (backup_state.stat.error) {
error_free(backup_state.stat.error); error_free(backup_state.stat.error);
@@ -742,7 +783,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque) @@ -742,7 +801,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
backup_state.stat.transferred = 0; backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0; backup_state.stat.zero_bytes = 0;
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock); - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+ qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock);
backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0; backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
@@ -751,45 +792,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque) @@ -751,48 +810,6 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
backup_state.di_list = di_list; backup_state.di_list = di_list;
@ -769,15 +794,17 @@ index 38dd33e28b..169f0c68d0 100644
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data; - PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
- l = g_list_next(l); - l = g_list_next(l);
- -
- // make sure target runs in same aoi_context as source
- AioContext *aio_context = bdrv_get_aio_context(di->bs); - AioContext *aio_context = bdrv_get_aio_context(di->bs);
- aio_context_acquire(aio_context); - aio_context_acquire(aio_context);
- GSList *ignore = NULL;
- bdrv_set_aio_context_ignore(di->target, aio_context, &ignore);
- g_slist_free(ignore);
- aio_context_release(aio_context);
- -
- job = backup_job_create(NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL, - job = backup_job_create(NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
- BITMAP_SYNC_MODE_NEVER, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, - BITMAP_SYNC_MODE_NEVER, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
- JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err); - JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err);
-
- aio_context_release(aio_context);
-
- if (!job || local_err != NULL) { - if (!job || local_err != NULL) {
- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock); - qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
- error_setg(&backup_state.stat.error, "backup_job_create failed"); - error_setg(&backup_state.stat.error, "backup_job_create failed");
@ -802,42 +829,45 @@ index 38dd33e28b..169f0c68d0 100644
- } else { - } else {
- pvebackup_co_cancel(NULL); - pvebackup_co_cancel(NULL);
- } - }
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex); -
uuid_info = g_malloc0(sizeof(*uuid_info)); uuid_info = g_malloc0(sizeof(*uuid_info));
uuid_info->UUID = uuid_str; uuid_info->UUID = uuid_str;
@@ -833,7 +836,7 @@ err:
@@ -835,8 +852,6 @@ err:
rmdir(backup_dir); rmdir(backup_dir);
} }
- qemu_co_mutex_unlock(&backup_state.backup_mutex); - qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex); -
task->result = NULL; task->result = NULL;
return; return;
@@ -878,32 +881,28 @@ UuidInfo *qmp_backup( }
@@ -880,32 +895,31 @@ UuidInfo *qmp_backup(
.errp = errp, .errp = errp,
}; };
- block_on_coroutine_fn(pvebackup_co_start, &task); - block_on_coroutine_fn(pvebackup_co_start, &task);
+ qemu_mutex_lock(&backup_state.backup_mutex);
- return task.result;
-}
+ block_on_coroutine_fn(pvebackup_co_prepare, &task); + block_on_coroutine_fn(pvebackup_co_prepare, &task);
+
+ if (*errp == NULL) { + if (*errp == NULL) {
+ qemu_rec_mutex_lock(&backup_state.backup_mutex);
+ create_backup_jobs(); + create_backup_jobs();
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex);
+ pvebackup_run_next_job(); + pvebackup_run_next_job();
+ } else {
+ qemu_mutex_unlock(&backup_state.backup_mutex);
+ } + }
return task.result;
}
-
-typedef struct QmpQueryBackupTask { -typedef struct QmpQueryBackupTask {
- Error **errp; - Error **errp;
- BackupStatus *result; - BackupStatus *result;
-} QmpQueryBackupTask; -} QmpQueryBackupTask;
- + return task.result;
+}
-static void coroutine_fn pvebackup_co_query(void *opaque) -static void coroutine_fn pvebackup_co_query(void *opaque)
+BackupStatus *qmp_query_backup(Error **errp) +BackupStatus *qmp_query_backup(Error **errp)
{ {
@ -848,24 +878,24 @@ index 38dd33e28b..169f0c68d0 100644
BackupStatus *info = g_malloc0(sizeof(*info)); BackupStatus *info = g_malloc0(sizeof(*info));
- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock); - qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
+ qemu_rec_mutex_lock(&backup_state.stat.lock); + qemu_mutex_lock(&backup_state.stat.lock);
if (!backup_state.stat.start_time) { if (!backup_state.stat.start_time) {
/* not started, return {} */ /* not started, return {} */
- task->result = info; - task->result = info;
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock); - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
- return; - return;
+ qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock);
+ return info; + return info;
} }
info->has_status = true; info->has_status = true;
@@ -939,19 +938,7 @@ static void coroutine_fn pvebackup_co_query(void *opaque) @@ -941,19 +955,7 @@ static void coroutine_fn pvebackup_co_query(void *opaque)
info->has_transferred = true; info->has_transferred = true;
info->transferred = backup_state.stat.transferred; info->transferred = backup_state.stat.transferred;
- task->result = info; - task->result = info;
+ qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock);
- qemu_co_rwlock_unlock(&backup_state.stat.rwlock); - qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
-} -}

View File

@ -1,88 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 8 Apr 2020 15:29:03 +0200
Subject: [PATCH] PVE: savevm-async: set up migration state
code mostly adapted from upstream savevm.c
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
savevm-async.c | 30 ++++++++++++++++++++++++++++--
1 file changed, 28 insertions(+), 2 deletions(-)
diff --git a/savevm-async.c b/savevm-async.c
index 790e27ae37..a38b15d652 100644
--- a/savevm-async.c
+++ b/savevm-async.c
@@ -225,6 +225,7 @@ static void *process_savevm_thread(void *opaque)
{
int ret;
int64_t maxlen;
+ MigrationState *ms = migrate_get_current();
rcu_register_thread();
@@ -234,8 +235,7 @@ static void *process_savevm_thread(void *opaque)
if (ret < 0) {
save_snapshot_error("qemu_savevm_state_setup failed");
- rcu_unregister_thread();
- return NULL;
+ goto out;
}
while (snap_state.state == SAVE_STATE_ACTIVE) {
@@ -287,6 +287,12 @@ static void *process_savevm_thread(void *opaque)
qemu_bh_schedule(snap_state.cleanup_bh);
qemu_mutex_unlock_iothread();
+out:
+ /* set migration state accordingly and clear soon-to-be stale file */
+ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
+ ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
+ ms->to_dst_file = NULL;
+
rcu_unregister_thread();
return NULL;
}
@@ -294,6 +300,7 @@ static void *process_savevm_thread(void *opaque)
void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
{
Error *local_err = NULL;
+ MigrationState *ms = migrate_get_current();
int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
@@ -303,6 +310,17 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
return;
}
+ if (migration_is_running(ms->state)) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
+ return;
+ }
+
+ if (migrate_use_block()) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "Block migration and snapshots are incompatible");
+ return;
+ }
+
/* initialize snapshot info */
snap_state.saved_vm_running = runstate_is_running();
snap_state.bs_pos = 0;
@@ -341,6 +359,14 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
goto restart;
}
+ /*
+ * qemu_savevm_* paths use migration code and expect a migration state.
+ * State is cleared in process_savevm_thread, but has to be initialized
+ * here (blocking main thread, from QMP) to avoid race conditions.
+ */
+ migrate_init(ms);
+ memset(&ram_counters, 0, sizeof(ram_counters));
+ ms->to_dst_file = snap_state.file;
error_setg(&snap_state.blocker, "block device is in use by savevm");
blk_op_block_all(snap_state.target, snap_state.blocker);

View File

@ -1,211 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Fri, 17 Apr 2020 08:57:47 +0200
Subject: [PATCH] PVE Backup: avoid use QemuRecMutex inside coroutines
---
pve-backup.c | 59 +++++++++++++++++++++++++++++++++-------------------
1 file changed, 38 insertions(+), 21 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index 169f0c68d0..dddf430399 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -11,6 +11,23 @@
/* PVE backup state and related function */
+/*
+ * Note: A resume from a qemu_coroutine_yield can happen in a different thread,
+ * so you may not use normal mutexes within coroutines:
+ *
+ * ---bad-example---
+ * qemu_rec_mutex_lock(lock)
+ * ...
+ * qemu_coroutine_yield() // wait for something
+ * // we are now inside a different thread
+ * qemu_rec_mutex_unlock(lock) // Crash - wrong thread!!
+ * ---end-bad-example--
+ *
+ * ==> Always use CoMutext inside coroutines.
+ * ==> Never acquire/release AioContext withing coroutines (because that use QemuRecMutex)
+ *
+ */
+
static struct PVEBackupState {
struct {
// Everithing accessed from qmp_backup_query command is protected using lock
@@ -30,12 +47,14 @@ static struct PVEBackupState {
ProxmoxBackupHandle *pbs;
GList *di_list;
QemuRecMutex backup_mutex;
+ CoMutex dump_callback_mutex;
} backup_state;
static void pvebackup_init(void)
{
qemu_rec_mutex_init(&backup_state.stat.lock);
qemu_rec_mutex_init(&backup_state.backup_mutex);
+ qemu_co_mutex_init(&backup_state.dump_callback_mutex);
}
// initialize PVEBackupState at startup
@@ -114,16 +133,16 @@ pvebackup_co_dump_pbs_cb(
Error *local_err = NULL;
int pbs_res = -1;
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
// avoid deadlock if job is cancelled
if (pvebackup_error_or_canceled()) {
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
return -1;
}
pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
if (pbs_res < 0) {
pvebackup_propagate_error(local_err);
@@ -149,7 +168,6 @@ pvebackup_co_dump_vma_cb(
const unsigned char *buf = pbuf;
PVEBackupDevInfo *di = opaque;
-
int ret = -1;
assert(backup_state.vmaw);
@@ -167,16 +185,16 @@ pvebackup_co_dump_vma_cb(
}
while (remaining > 0) {
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
// avoid deadlock if job is cancelled
if (pvebackup_error_or_canceled()) {
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
return -1;
}
size_t zero_bytes = 0;
ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num, buf, &zero_bytes);
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
++cluster_num;
if (buf) {
@@ -203,12 +221,11 @@ pvebackup_co_dump_vma_cb(
return size;
}
+// assumes the caller holds backup_mutex
static void coroutine_fn pvebackup_co_cleanup(void *unused)
{
assert(qemu_in_coroutine());
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
-
qemu_rec_mutex_lock(&backup_state.stat.lock);
backup_state.stat.end_time = time(NULL);
qemu_rec_mutex_unlock(&backup_state.stat.lock);
@@ -239,9 +256,9 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
g_list_free(backup_state.di_list);
backup_state.di_list = NULL;
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
}
+// assumes the caller holds backup_mutex
static void coroutine_fn pvebackup_complete_stream(void *opaque)
{
PVEBackupDevInfo *di = opaque;
@@ -295,6 +312,8 @@ static void pvebackup_complete_cb(void *opaque, int ret)
static void pvebackup_cancel(void)
{
+ assert(!qemu_in_coroutine());
+
Error *cancel_err = NULL;
error_setg(&cancel_err, "backup canceled");
pvebackup_propagate_error(cancel_err);
@@ -348,6 +367,7 @@ void qmp_backup_cancel(Error **errp)
pvebackup_cancel();
}
+// assumes the caller holds backup_mutex
static int coroutine_fn pvebackup_co_add_config(
const char *file,
const char *name,
@@ -431,9 +451,9 @@ static void pvebackup_run_next_job(void)
}
}
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-
block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
+
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex);
}
static bool create_backup_jobs(void) {
@@ -520,6 +540,7 @@ typedef struct QmpBackupTask {
UuidInfo *result;
} QmpBackupTask;
+// assumes the caller holds backup_mutex
static void coroutine_fn pvebackup_co_prepare(void *opaque)
{
assert(qemu_in_coroutine());
@@ -543,11 +564,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
const char *config_name = "qemu-server.conf";
const char *firewall_name = "qemu-server.fw";
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
-
if (backup_state.di_list) {
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
+ error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished");
return;
}
@@ -792,8 +810,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.di_list = di_list;
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-
uuid_info = g_malloc0(sizeof(*uuid_info));
uuid_info->UUID = uuid_str;
@@ -836,8 +852,6 @@ err:
rmdir(backup_dir);
}
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
-
task->result = NULL;
return;
}
@@ -881,13 +895,16 @@ UuidInfo *qmp_backup(
.errp = errp,
};
+ qemu_rec_mutex_lock(&backup_state.backup_mutex);
+
block_on_coroutine_fn(pvebackup_co_prepare, &task);
if (*errp == NULL) {
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
create_backup_jobs();
qemu_rec_mutex_unlock(&backup_state.backup_mutex);
pvebackup_run_next_job();
+ } else {
+ qemu_rec_mutex_unlock(&backup_state.backup_mutex);
}
return task.result;

View File

@ -1,227 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Fri, 17 Apr 2020 08:57:48 +0200
Subject: [PATCH] PVE Backup: use QemuMutex instead of QemuRecMutex
We acquire/release all mutexes outside coroutines now, so we can now
correctly use a normal mutex.
---
pve-backup.c | 58 ++++++++++++++++++++++++++--------------------------
1 file changed, 29 insertions(+), 29 deletions(-)
diff --git a/pve-backup.c b/pve-backup.c
index dddf430399..bb917ee972 100644
--- a/pve-backup.c
+++ b/pve-backup.c
@@ -31,7 +31,7 @@
static struct PVEBackupState {
struct {
// Everithing accessed from qmp_backup_query command is protected using lock
- QemuRecMutex lock;
+ QemuMutex lock;
Error *error;
time_t start_time;
time_t end_time;
@@ -46,14 +46,14 @@ static struct PVEBackupState {
VmaWriter *vmaw;
ProxmoxBackupHandle *pbs;
GList *di_list;
- QemuRecMutex backup_mutex;
+ QemuMutex backup_mutex;
CoMutex dump_callback_mutex;
} backup_state;
static void pvebackup_init(void)
{
- qemu_rec_mutex_init(&backup_state.stat.lock);
- qemu_rec_mutex_init(&backup_state.backup_mutex);
+ qemu_mutex_init(&backup_state.stat.lock);
+ qemu_mutex_init(&backup_state.backup_mutex);
qemu_co_mutex_init(&backup_state.dump_callback_mutex);
}
@@ -91,26 +91,26 @@ lookup_active_block_job(PVEBackupDevInfo *di)
static void pvebackup_propagate_error(Error *err)
{
- qemu_rec_mutex_lock(&backup_state.stat.lock);
+ qemu_mutex_lock(&backup_state.stat.lock);
error_propagate(&backup_state.stat.error, err);
- qemu_rec_mutex_unlock(&backup_state.stat.lock);
+ qemu_mutex_unlock(&backup_state.stat.lock);
}
static bool pvebackup_error_or_canceled(void)
{
- qemu_rec_mutex_lock(&backup_state.stat.lock);
+ qemu_mutex_lock(&backup_state.stat.lock);
bool error_or_canceled = !!backup_state.stat.error;
- qemu_rec_mutex_unlock(&backup_state.stat.lock);
+ qemu_mutex_unlock(&backup_state.stat.lock);
return error_or_canceled;
}
static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes)
{
- qemu_rec_mutex_lock(&backup_state.stat.lock);
+ qemu_mutex_lock(&backup_state.stat.lock);
backup_state.stat.zero_bytes += zero_bytes;
backup_state.stat.transferred += transferred;
- qemu_rec_mutex_unlock(&backup_state.stat.lock);
+ qemu_mutex_unlock(&backup_state.stat.lock);
}
// This may get called from multiple coroutines in multiple io-threads
@@ -226,9 +226,9 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
{
assert(qemu_in_coroutine());
- qemu_rec_mutex_lock(&backup_state.stat.lock);
+ qemu_mutex_lock(&backup_state.stat.lock);
backup_state.stat.end_time = time(NULL);
- qemu_rec_mutex_unlock(&backup_state.stat.lock);
+ qemu_mutex_unlock(&backup_state.stat.lock);
if (backup_state.vmaw) {
Error *local_err = NULL;
@@ -284,7 +284,7 @@ static void pvebackup_complete_cb(void *opaque, int ret)
PVEBackupDevInfo *di = opaque;
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
+ qemu_mutex_lock(&backup_state.backup_mutex);
di->completed = true;
@@ -305,7 +305,7 @@ static void pvebackup_complete_cb(void *opaque, int ret)
g_free(di);
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_mutex_unlock(&backup_state.backup_mutex);
pvebackup_run_next_job();
}
@@ -318,7 +318,7 @@ static void pvebackup_cancel(void)
error_setg(&cancel_err, "backup canceled");
pvebackup_propagate_error(cancel_err);
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
+ qemu_mutex_lock(&backup_state.backup_mutex);
if (backup_state.vmaw) {
/* make sure vma writer does not block anymore */
@@ -329,13 +329,13 @@ static void pvebackup_cancel(void)
proxmox_backup_abort(backup_state.pbs, "backup canceled");
}
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_mutex_unlock(&backup_state.backup_mutex);
for(;;) {
BlockJob *next_job = NULL;
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
+ qemu_mutex_lock(&backup_state.backup_mutex);
GList *l = backup_state.di_list;
while (l) {
@@ -349,7 +349,7 @@ static void pvebackup_cancel(void)
}
}
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_mutex_unlock(&backup_state.backup_mutex);
if (next_job) {
AioContext *aio_context = next_job->job.aio_context;
@@ -423,7 +423,7 @@ static void pvebackup_run_next_job(void)
{
assert(!qemu_in_coroutine());
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
+ qemu_mutex_lock(&backup_state.backup_mutex);
GList *l = backup_state.di_list;
while (l) {
@@ -433,7 +433,7 @@ static void pvebackup_run_next_job(void)
BlockJob *job = lookup_active_block_job(di);
if (job) {
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_mutex_unlock(&backup_state.backup_mutex);
AioContext *aio_context = job->job.aio_context;
aio_context_acquire(aio_context);
@@ -453,7 +453,7 @@ static void pvebackup_run_next_job(void)
block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_mutex_unlock(&backup_state.backup_mutex);
}
static bool create_backup_jobs(void) {
@@ -778,7 +778,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
}
/* initialize global backup_state now */
- qemu_rec_mutex_lock(&backup_state.stat.lock);
+ qemu_mutex_lock(&backup_state.stat.lock);
if (backup_state.stat.error) {
error_free(backup_state.stat.error);
@@ -801,7 +801,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
backup_state.stat.transferred = 0;
backup_state.stat.zero_bytes = 0;
- qemu_rec_mutex_unlock(&backup_state.stat.lock);
+ qemu_mutex_unlock(&backup_state.stat.lock);
backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
@@ -895,16 +895,16 @@ UuidInfo *qmp_backup(
.errp = errp,
};
- qemu_rec_mutex_lock(&backup_state.backup_mutex);
+ qemu_mutex_lock(&backup_state.backup_mutex);
block_on_coroutine_fn(pvebackup_co_prepare, &task);
if (*errp == NULL) {
create_backup_jobs();
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_mutex_unlock(&backup_state.backup_mutex);
pvebackup_run_next_job();
} else {
- qemu_rec_mutex_unlock(&backup_state.backup_mutex);
+ qemu_mutex_unlock(&backup_state.backup_mutex);
}
return task.result;
@@ -914,11 +914,11 @@ BackupStatus *qmp_query_backup(Error **errp)
{
BackupStatus *info = g_malloc0(sizeof(*info));
- qemu_rec_mutex_lock(&backup_state.stat.lock);
+ qemu_mutex_lock(&backup_state.stat.lock);
if (!backup_state.stat.start_time) {
/* not started, return {} */
- qemu_rec_mutex_unlock(&backup_state.stat.lock);
+ qemu_mutex_unlock(&backup_state.stat.lock);
return info;
}
@@ -955,7 +955,7 @@ BackupStatus *qmp_query_backup(Error **errp)
info->has_transferred = true;
info->transferred = backup_state.stat.transferred;
- qemu_rec_mutex_unlock(&backup_state.stat.lock);
+ qemu_mutex_unlock(&backup_state.stat.lock);
return info;
}

View File

@ -1,111 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Thu, 30 Apr 2020 15:55:37 +0200
Subject: [PATCH] move savevm-async back into a coroutine
Move qemu_savevm_state_{header,setup} into the main loop and
the rest of the iteration into a coroutine. The former need
to lock the iothread (and we can't unlock it in the
coroutine), and the latter can't deal with being in a
separate thread, so a coroutine it must be.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
savevm-async.c | 28 +++++++++-------------------
1 file changed, 9 insertions(+), 19 deletions(-)
diff --git a/savevm-async.c b/savevm-async.c
index a38b15d652..af865b9a0a 100644
--- a/savevm-async.c
+++ b/savevm-async.c
@@ -51,7 +51,7 @@ static struct SnapshotState {
QEMUFile *file;
int64_t total_time;
QEMUBH *cleanup_bh;
- QemuThread thread;
+ Coroutine *co;
} snap_state;
SaveVMInfo *qmp_query_savevm(Error **errp)
@@ -201,11 +201,9 @@ static void process_savevm_cleanup(void *opaque)
int ret;
qemu_bh_delete(snap_state.cleanup_bh);
snap_state.cleanup_bh = NULL;
+ snap_state.co = NULL;
qemu_savevm_state_cleanup();
- qemu_mutex_unlock_iothread();
- qemu_thread_join(&snap_state.thread);
- qemu_mutex_lock_iothread();
ret = save_snapshot_cleanup();
if (ret < 0) {
save_snapshot_error("save_snapshot_cleanup error %d", ret);
@@ -221,18 +219,13 @@ static void process_savevm_cleanup(void *opaque)
}
}
-static void *process_savevm_thread(void *opaque)
+static void process_savevm_coro(void *opaque)
{
int ret;
int64_t maxlen;
MigrationState *ms = migrate_get_current();
- rcu_register_thread();
-
- qemu_savevm_state_header(snap_state.file);
- qemu_savevm_state_setup(snap_state.file);
ret = qemu_file_get_error(snap_state.file);
-
if (ret < 0) {
save_snapshot_error("qemu_savevm_state_setup failed");
goto out;
@@ -247,16 +240,13 @@ static void *process_savevm_thread(void *opaque)
maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
- qemu_mutex_lock_iothread();
ret = qemu_savevm_state_iterate(snap_state.file, false);
if (ret < 0) {
save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
break;
}
- qemu_mutex_unlock_iothread();
DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
} else {
- qemu_mutex_lock_iothread();
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
ret = global_state_store();
if (ret) {
@@ -285,16 +275,12 @@ static void *process_savevm_thread(void *opaque)
}
qemu_bh_schedule(snap_state.cleanup_bh);
- qemu_mutex_unlock_iothread();
out:
/* set migration state accordingly and clear soon-to-be stale file */
migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
ms->to_dst_file = NULL;
-
- rcu_unregister_thread();
- return NULL;
}
void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
@@ -373,8 +359,12 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
snap_state.state = SAVE_STATE_ACTIVE;
snap_state.cleanup_bh = qemu_bh_new(process_savevm_cleanup, &snap_state);
- qemu_thread_create(&snap_state.thread, "savevm-async", process_savevm_thread,
- NULL, QEMU_THREAD_JOINABLE);
+ snap_state.co = qemu_coroutine_create(&process_savevm_coro, NULL);
+ qemu_mutex_unlock_iothread();
+ qemu_savevm_state_header(snap_state.file);
+ qemu_savevm_state_setup(snap_state.file);
+ qemu_mutex_lock_iothread();
+ aio_co_schedule(iohandler_get_aio_context(), snap_state.co);
return;

View File

@ -1,188 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 27 May 2020 11:33:19 +0200
Subject: [PATCH] savevm-async: move more code to cleanup and rename to
finalize
process_savevm_cleanup is renamed to process_savevm_finalize to
accomodate more code that is not all cleanup related.
The benefit of this is that it allows us to call functions which need to
run in the main AIOContext directly. It doesn't majorly affect snapshot
performance, since the first instruction that is moved stops the VM,
so the downtime stays about the same.
The target bdrv is additionally moved to the IOHandler context before
process_savevm_co to make sure the coroutine can call functions that
require it to own the bdrv's context. process_savevm_finalize then moves
it back to the main context to run its part.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
savevm-async.c | 87 +++++++++++++++++++++++++++++---------------------
1 file changed, 51 insertions(+), 36 deletions(-)
diff --git a/savevm-async.c b/savevm-async.c
index c3fe741c38..2894c94233 100644
--- a/savevm-async.c
+++ b/savevm-async.c
@@ -50,7 +50,7 @@ static struct SnapshotState {
int saved_vm_running;
QEMUFile *file;
int64_t total_time;
- QEMUBH *cleanup_bh;
+ QEMUBH *finalize_bh;
Coroutine *co;
} snap_state;
@@ -196,12 +196,42 @@ static const QEMUFileOps block_file_ops = {
.close = block_state_close,
};
-static void process_savevm_cleanup(void *opaque)
+static void process_savevm_finalize(void *opaque)
{
int ret;
- qemu_bh_delete(snap_state.cleanup_bh);
- snap_state.cleanup_bh = NULL;
+ AioContext *iohandler_ctx = iohandler_get_aio_context();
+ MigrationState *ms = migrate_get_current();
+
+ qemu_bh_delete(snap_state.finalize_bh);
+ snap_state.finalize_bh = NULL;
snap_state.co = NULL;
+
+ /* We need to own the target bdrv's context for the following functions,
+ * so move it back. It can stay in the main context and live out its live
+ * there, since we're done with it after this method ends anyway.
+ */
+ aio_context_acquire(iohandler_ctx);
+ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
+ aio_context_release(iohandler_ctx);
+
+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+ if (ret < 0) {
+ save_snapshot_error("vm_stop_force_state error %d", ret);
+ }
+
+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
+ ret = qemu_file_get_error(snap_state.file);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+ }
+
+ DPRINTF("state saving complete\n");
+
+ /* clear migration state */
+ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
+ ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
+ ms->to_dst_file = NULL;
+
qemu_savevm_state_cleanup();
ret = save_snapshot_cleanup();
@@ -219,16 +249,15 @@ static void process_savevm_cleanup(void *opaque)
}
}
-static void process_savevm_coro(void *opaque)
+static void coroutine_fn process_savevm_co(void *opaque)
{
int ret;
int64_t maxlen;
- MigrationState *ms = migrate_get_current();
ret = qemu_file_get_error(snap_state.file);
if (ret < 0) {
save_snapshot_error("qemu_savevm_state_setup failed");
- goto out;
+ return;
}
while (snap_state.state == SAVE_STATE_ACTIVE) {
@@ -245,7 +274,7 @@ static void process_savevm_coro(void *opaque)
save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
break;
}
- DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
+ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
} else {
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
ret = global_state_store();
@@ -253,40 +282,20 @@ static void process_savevm_coro(void *opaque)
save_snapshot_error("global_state_store error %d", ret);
break;
}
- ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
- if (ret < 0) {
- save_snapshot_error("vm_stop_force_state error %d", ret);
- break;
- }
- DPRINTF("savevm inerate finished\n");
- /* upstream made the return value here inconsistent
- * (-1 instead of 'ret' in one case and 0 after flush which can
- * still set a file error...)
- */
- (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
- ret = qemu_file_get_error(snap_state.file);
- if (ret < 0) {
- save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
- break;
- }
- DPRINTF("save complete\n");
+
+ DPRINTF("savevm iterate complete\n");
break;
}
}
- qemu_bh_schedule(snap_state.cleanup_bh);
-
-out:
- /* set migration state accordingly and clear soon-to-be stale file */
- migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
- ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
- ms->to_dst_file = NULL;
+ qemu_bh_schedule(snap_state.finalize_bh);
}
void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
{
Error *local_err = NULL;
MigrationState *ms = migrate_get_current();
+ AioContext *iohandler_ctx = iohandler_get_aio_context();
int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
@@ -347,7 +356,7 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
/*
* qemu_savevm_* paths use migration code and expect a migration state.
- * State is cleared in process_savevm_thread, but has to be initialized
+ * State is cleared in process_savevm_co, but has to be initialized
* here (blocking main thread, from QMP) to avoid race conditions.
*/
migrate_init(ms);
@@ -358,13 +367,19 @@ void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
blk_op_block_all(snap_state.target, snap_state.blocker);
snap_state.state = SAVE_STATE_ACTIVE;
- snap_state.cleanup_bh = qemu_bh_new(process_savevm_cleanup, &snap_state);
- snap_state.co = qemu_coroutine_create(&process_savevm_coro, NULL);
+ snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
+ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
qemu_mutex_unlock_iothread();
qemu_savevm_state_header(snap_state.file);
qemu_savevm_state_setup(snap_state.file);
qemu_mutex_lock_iothread();
- aio_co_schedule(iohandler_get_aio_context(), snap_state.co);
+
+ /* Async processing from here on out happens in iohandler context, so let
+ * the target bdrv have its home there.
+ */
+ blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
+
+ aio_co_schedule(iohandler_ctx, snap_state.co);
return;

View File

@ -1,84 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 27 May 2020 11:33:20 +0200
Subject: [PATCH] util/async: Add aio_co_reschedule_self()
Add a function that can be used to move the currently running coroutine
to a different AioContext (and therefore potentially a different
thread).
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
include/block/aio.h | 10 ++++++++++
util/async.c | 30 ++++++++++++++++++++++++++++++
2 files changed, 40 insertions(+)
diff --git a/include/block/aio.h b/include/block/aio.h
index 62ed954344..d5399c67d6 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -17,6 +17,7 @@
#ifdef CONFIG_LINUX_IO_URING
#include <liburing.h>
#endif
+#include "qemu/coroutine.h"
#include "qemu/queue.h"
#include "qemu/event_notifier.h"
#include "qemu/thread.h"
@@ -654,6 +655,15 @@ static inline bool aio_node_check(AioContext *ctx, bool is_external)
*/
void aio_co_schedule(AioContext *ctx, struct Coroutine *co);
+/**
+ * aio_co_reschedule_self:
+ * @new_ctx: the new context
+ *
+ * Move the currently running coroutine to new_ctx. If the coroutine is already
+ * running in new_ctx, do nothing.
+ */
+void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx);
+
/**
* aio_co_wake:
* @co: the coroutine
diff --git a/util/async.c b/util/async.c
index 3165a28f2f..4eba1e6f1b 100644
--- a/util/async.c
+++ b/util/async.c
@@ -558,6 +558,36 @@ void aio_co_schedule(AioContext *ctx, Coroutine *co)
aio_context_unref(ctx);
}
+typedef struct AioCoRescheduleSelf {
+ Coroutine *co;
+ AioContext *new_ctx;
+} AioCoRescheduleSelf;
+
+static void aio_co_reschedule_self_bh(void *opaque)
+{
+ AioCoRescheduleSelf *data = opaque;
+ aio_co_schedule(data->new_ctx, data->co);
+}
+
+void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx)
+{
+ AioContext *old_ctx = qemu_get_current_aio_context();
+
+ if (old_ctx != new_ctx) {
+ AioCoRescheduleSelf data = {
+ .co = qemu_coroutine_self(),
+ .new_ctx = new_ctx,
+ };
+ /*
+ * We can't directly schedule the coroutine in the target context
+ * because this would be racy: The other thread could try to enter the
+ * coroutine before it has yielded in this one.
+ */
+ aio_bh_schedule_oneshot(old_ctx, aio_co_reschedule_self_bh, &data);
+ qemu_coroutine_yield();
+ }
+}
+
void aio_co_wake(struct Coroutine *co)
{
AioContext *ctx;

View File

@ -1,58 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 27 May 2020 11:33:21 +0200
Subject: [PATCH] savevm-async: flush IOThread-drives async before entering
blocking part
By flushing all drives where its possible to so before entering the
blocking part (where the VM is stopped), we can reduce the time spent in
said part for every disk that has an IOThread (other drives cannot be
flushed async anyway).
Suggested-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
savevm-async.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/savevm-async.c b/savevm-async.c
index 2894c94233..4ce83a0691 100644
--- a/savevm-async.c
+++ b/savevm-async.c
@@ -253,6 +253,8 @@ static void coroutine_fn process_savevm_co(void *opaque)
{
int ret;
int64_t maxlen;
+ BdrvNextIterator it;
+ BlockDriverState *bs = NULL;
ret = qemu_file_get_error(snap_state.file);
if (ret < 0) {
@@ -288,6 +290,27 @@ static void coroutine_fn process_savevm_co(void *opaque)
}
}
+ /* If a drive runs in an IOThread we can flush it async, and only
+ * need to sync-flush whatever IO happens between now and
+ * vm_stop_force_state. bdrv_next can only be called from main AioContext,
+ * so move there now and after every flush.
+ */
+ aio_co_reschedule_self(qemu_get_aio_context());
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
+ if (bs == blk_bs(snap_state.target)) {
+ continue;
+ }
+
+ AioContext *bs_ctx = bdrv_get_aio_context(bs);
+ if (bs_ctx != qemu_get_aio_context()) {
+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
+ aio_co_reschedule_self(bs_ctx);
+ bdrv_flush(bs);
+ aio_co_reschedule_self(qemu_get_aio_context());
+ }
+ }
+
qemu_bh_schedule(snap_state.finalize_bh);
}

View File

@ -1,80 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Wed, 27 May 2020 11:33:22 +0200
Subject: [PATCH] savevm-async: add debug timing prints
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
[ Thomas: guard variable declaration by DEBUG #ifdef ]
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
savevm-async.c | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/savevm-async.c b/savevm-async.c
index 4ce83a0691..0388cebbe9 100644
--- a/savevm-async.c
+++ b/savevm-async.c
@@ -202,6 +202,10 @@ static void process_savevm_finalize(void *opaque)
AioContext *iohandler_ctx = iohandler_get_aio_context();
MigrationState *ms = migrate_get_current();
+#ifdef DEBUG_SAVEVM_STATE
+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+
qemu_bh_delete(snap_state.finalize_bh);
snap_state.finalize_bh = NULL;
snap_state.co = NULL;
@@ -226,6 +230,8 @@ static void process_savevm_finalize(void *opaque)
}
DPRINTF("state saving complete\n");
+ DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
/* clear migration state */
migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
@@ -247,6 +253,9 @@ static void process_savevm_finalize(void *opaque)
vm_start();
snap_state.saved_vm_running = false;
}
+
+ DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
}
static void coroutine_fn process_savevm_co(void *opaque)
@@ -256,6 +265,10 @@ static void coroutine_fn process_savevm_co(void *opaque)
BdrvNextIterator it;
BlockDriverState *bs = NULL;
+#ifdef DEBUG_SAVEVM_STATE
+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+
ret = qemu_file_get_error(snap_state.file);
if (ret < 0) {
save_snapshot_error("qemu_savevm_state_setup failed");
@@ -290,6 +303,12 @@ static void coroutine_fn process_savevm_co(void *opaque)
}
}
+ DPRINTF("timing: process_savevm_co took %ld ms\n",
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
+
+#ifdef DEBUG_SAVEVM_STATE
+ int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
/* If a drive runs in an IOThread we can flush it async, and only
* need to sync-flush whatever IO happens between now and
* vm_stop_force_state. bdrv_next can only be called from main AioContext,
@@ -311,6 +330,9 @@ static void coroutine_fn process_savevm_co(void *opaque)
}
}
+ DPRINTF("timing: async flushing took %ld ms\n",
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
+
qemu_bh_schedule(snap_state.finalize_bh);
}

View File

@ -1,44 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 22 Jun 2020 14:54:00 +0200
Subject: [PATCH] Add some qemu_vfree statements to prevent memory leaks
Suggested-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
vma-writer.c | 2 ++
vma.c | 2 ++
2 files changed, 4 insertions(+)
diff --git a/vma-writer.c b/vma-writer.c
index fe86b18a60..06cbc02b1e 100644
--- a/vma-writer.c
+++ b/vma-writer.c
@@ -767,5 +767,7 @@ void vma_writer_destroy(VmaWriter *vmaw)
g_checksum_free(vmaw->md5csum);
}
+ qemu_vfree(vmaw->headerbuf);
+ qemu_vfree(vmaw->outbuf);
g_free(vmaw);
}
diff --git a/vma.c b/vma.c
index a82752448a..2eea2fc281 100644
--- a/vma.c
+++ b/vma.c
@@ -565,6 +565,7 @@ out:
g_warning("vma_writer_close failed %s", error_get_pretty(err));
}
}
+ qemu_vfree(buf);
}
static int create_archive(int argc, char **argv)
@@ -732,6 +733,7 @@ static int create_archive(int argc, char **argv)
g_error("creating vma archive failed");
}
+ vma_writer_destroy(vmaw);
return 0;
}

View File

@ -1,80 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stefan Reiter <s.reiter@proxmox.com>
Date: Mon, 22 Jun 2020 14:54:02 +0200
Subject: [PATCH] Fix backup for not 64k-aligned storages
Zero out clusters after the end of the device, this makes restore handle
it correctly (even if it may try to write those zeros, it won't fail and
just ignore the out-of-bounds write to disk).
For not even 4k-aligned disks, there is a potential buffer overrun in
the memcpy (since always 4k are copied), which causes host-memory
leakage into VMA archives. Fix this by always zeroing the affected area
in the output-buffer.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
Reported-by: Roland Kammerer <roland.kammerer@linbit.com>
Suggested-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Tested-by: Roland Kammerer <roland.kammerer@linbit.com>
---
vma-writer.c | 23 ++++++++++++++++++++---
1 file changed, 20 insertions(+), 3 deletions(-)
diff --git a/vma-writer.c b/vma-writer.c
index 06cbc02b1e..f5d2c5d23c 100644
--- a/vma-writer.c
+++ b/vma-writer.c
@@ -633,17 +633,33 @@ vma_writer_write(VmaWriter *vmaw, uint8_t dev_id, int64_t cluster_num,
DPRINTF("VMA WRITE %d %zd\n", dev_id, cluster_num);
+ uint64_t dev_size = vmaw->stream_info[dev_id].size;
uint16_t mask = 0;
if (buf) {
int i;
int bit = 1;
+ uint64_t byte_offset = cluster_num * VMA_CLUSTER_SIZE;
for (i = 0; i < 16; i++) {
const unsigned char *vmablock = buf + (i*VMA_BLOCK_SIZE);
- if (!buffer_is_zero(vmablock, VMA_BLOCK_SIZE)) {
+
+ // Note: If the source is not 64k-aligned, we might reach 4k blocks
+ // after the end of the device. Always mark these as zero in the
+ // mask, so the restore handles them correctly.
+ if (byte_offset < dev_size &&
+ !buffer_is_zero(vmablock, VMA_BLOCK_SIZE))
+ {
mask |= bit;
memcpy(vmaw->outbuf + vmaw->outbuf_pos, vmablock,
VMA_BLOCK_SIZE);
+
+ // prevent memory leakage on unaligned last block
+ if (byte_offset + VMA_BLOCK_SIZE > dev_size) {
+ uint64_t real_data_in_block = dev_size - byte_offset;
+ memset(vmaw->outbuf + vmaw->outbuf_pos + real_data_in_block,
+ 0, VMA_BLOCK_SIZE - real_data_in_block);
+ }
+
vmaw->outbuf_pos += VMA_BLOCK_SIZE;
} else {
DPRINTF("VMA WRITE %zd ZERO BLOCK %d\n", cluster_num, i);
@@ -651,6 +667,7 @@ vma_writer_write(VmaWriter *vmaw, uint8_t dev_id, int64_t cluster_num,
*zero_bytes += VMA_BLOCK_SIZE;
}
+ byte_offset += VMA_BLOCK_SIZE;
bit = bit << 1;
}
} else {
@@ -676,8 +693,8 @@ vma_writer_write(VmaWriter *vmaw, uint8_t dev_id, int64_t cluster_num,
if (dev_id != vmaw->vmstate_stream) {
uint64_t last = (cluster_num + 1) * VMA_CLUSTER_SIZE;
- if (last > vmaw->stream_info[dev_id].size) {
- uint64_t diff = last - vmaw->stream_info[dev_id].size;
+ if (last > dev_size) {
+ uint64_t diff = last - dev_size;
if (diff >= VMA_CLUSTER_SIZE) {
vma_writer_set_error(vmaw, "vma_writer_write: "
"read after last cluster");

55
debian/patches/series vendored
View File

@ -16,37 +16,24 @@ pve/0014-PVE-virtio-balloon-improve-query-balloon.patch
pve/0015-PVE-qapi-modify-query-machines.patch pve/0015-PVE-qapi-modify-query-machines.patch
pve/0016-PVE-qapi-modify-spice-query.patch pve/0016-PVE-qapi-modify-spice-query.patch
pve/0017-PVE-internal-snapshot-async.patch pve/0017-PVE-internal-snapshot-async.patch
pve/0018-PVE-block-add-the-zeroinit-block-driver-filter.patch pve/0018-add-optional-buffer-size-to-QEMUFile.patch
pve/0019-PVE-Add-dummy-id-command-line-parameter.patch pve/0019-PVE-block-add-the-zeroinit-block-driver-filter.patch
pve/0020-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch pve/0020-PVE-Add-dummy-id-command-line-parameter.patch
pve/0021-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch pve/0021-PVE-Config-Revert-target-i386-disable-LINT0-after-re.patch
pve/0022-PVE-savevm-async-kick-AIO-wait-on-block-state-write.patch pve/0022-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
pve/0023-PVE-move-snapshot-cleanup-into-bottom-half.patch pve/0023-PVE-monitor-disable-oob-capability.patch
pve/0024-PVE-monitor-disable-oob-capability.patch pve/0024-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
pve/0025-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch pve/0025-PVE-Allow-version-code-in-machine-type.patch
pve/0026-PVE-Allow-version-code-in-machine-type.patch pve/0026-PVE-Backup-modify-job-api.patch
pve/0027-PVE-Backup-modify-job-api.patch pve/0027-PVE-Backup-add-vma-backup-format-code.patch
pve/0028-PVE-Backup-add-vma-backup-format-code.patch pve/0028-PVE-Backup-add-backup-dump-block-driver.patch
pve/0029-PVE-Backup-add-backup-dump-block-driver.patch pve/0029-PVE-Backup-proxmox-backup-patches-for-qemu.patch
pve/0030-PVE-Backup-proxmox-backup-patches-for-qemu.patch pve/0030-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch
pve/0031-PVE-Backup-aquire-aio_context-before-calling-backup_.patch pve/0031-PVE-Backup-avoid-coroutines-to-fix-AIO-freeze-cleanu.patch
pve/0032-PVE-Backup-pbs-restore-new-command-to-restore-from-p.patch pve/0032-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
pve/0033-PVE-Backup-avoid-coroutines-to-fix-AIO-freeze-cleanu.patch pve/0033-drive-mirror-add-support-for-conditional-and-always-.patch
pve/0034-drive-mirror-add-support-for-sync-bitmap-mode-never.patch pve/0034-mirror-add-check-for-bitmap-mode-without-bitmap.patch
pve/0035-drive-mirror-add-support-for-conditional-and-always-.patch pve/0035-mirror-switch-to-bdrv_dirty_bitmap_merge_internal.patch
pve/0036-mirror-add-check-for-bitmap-mode-without-bitmap.patch pve/0036-iotests-add-test-for-bitmap-mirror.patch
pve/0037-mirror-switch-to-bdrv_dirty_bitmap_merge_internal.patch pve/0037-mirror-move-some-checks-to-qmp.patch
pve/0038-iotests-add-test-for-bitmap-mirror.patch pve/0038-PVE-Backup-Add-dirty-bitmap-tracking-for-incremental.patch
pve/0039-mirror-move-some-checks-to-qmp.patch
pve/0040-PVE-savevm-async-set-up-migration-state.patch
pve/0041-PVE-Backup-avoid-use-QemuRecMutex-inside-coroutines.patch
pve/0042-PVE-Backup-use-QemuMutex-instead-of-QemuRecMutex.patch
pve/0043-move-savevm-async-back-into-a-coroutine.patch
pve/0044-add-optional-buffer-size-to-QEMUFile.patch
pve/0045-savevm-async-move-more-code-to-cleanup-and-rename-to.patch
pve/0046-util-async-Add-aio_co_reschedule_self.patch
pve/0047-savevm-async-flush-IOThread-drives-async-before-ente.patch
pve/0048-savevm-async-add-debug-timing-prints.patch
pve/0049-Add-some-qemu_vfree-statements-to-prevent-memory-lea.patch
pve/0050-Fix-backup-for-not-64k-aligned-storages.patch
pve/0051-PVE-Backup-Add-dirty-bitmap-tracking-for-incremental.patch