pve-qemu-qoup/debian/patches/pve/0017-PVE-internal-snapshot-async.patch
Stefan Reiter 60ae3775bf update to QEMU 5.1
No major semantic changes, mostly just deprecations and changed function
signatures. Drop the extra/ patches, as they have been applied upstream.

The added extra/ patch was accepted upstream[0] but has not been picked
up for 5.1. It is required for non-4M aligned backups to work with PBS.

[0] https://lists.gnu.org/archive/html/qemu-devel/2020-08/msg01671.html

Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-08-20 13:40:36 +02:00

975 lines
29 KiB
Diff

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:46 +0200
Subject: [PATCH] PVE: internal snapshot async
Truncate at 1024 boundary (Fabian Ebner will send a patch for stable)
Put qemu_savevm_state_{header,setup} into the main loop and the rest
of the iteration into a coroutine. The former need to lock the
iothread (and we can't unlock it in the coroutine), and the latter
can't deal with being in a separate thread, so a coroutine it must
be.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
Makefile.objs | 1 +
hmp-commands-info.hx | 13 +
hmp-commands.hx | 32 +++
include/block/aio.h | 10 +
include/migration/snapshot.h | 1 +
include/monitor/hmp.h | 5 +
monitor/hmp-cmds.c | 57 ++++
qapi/migration.json | 34 +++
qapi/misc.json | 32 +++
qemu-options.hx | 12 +
savevm-async.c | 542 +++++++++++++++++++++++++++++++++++
softmmu/vl.c | 10 +
util/async.c | 30 ++
13 files changed, 779 insertions(+)
create mode 100644 savevm-async.c
diff --git a/Makefile.objs b/Makefile.objs
index d22b3b45d7..a1307c12a8 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -46,6 +46,7 @@ common-obj-y += bootdevice.o iothread.o
common-obj-y += dump/
common-obj-y += job-qmp.o
common-obj-y += monitor/
+common-obj-y += savevm-async.o
common-obj-y += net/
common-obj-y += qdev-monitor.o
common-obj-$(CONFIG_WIN32) += os-win32.o
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index 30209e3903..ae8ff21789 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -580,6 +580,19 @@ SRST
Show current migration xbzrle cache size.
ERST
+ {
+ .name = "savevm",
+ .args_type = "",
+ .params = "",
+ .help = "show savevm status",
+ .cmd = hmp_info_savevm,
+ },
+
+SRST
+ ``info savevm``
+ Show savevm status.
+ERST
+
{
.name = "balloon",
.args_type = "",
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 60f395c276..2b58ac4a1c 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1829,3 +1829,35 @@ ERST
.flags = "p",
},
+
+ {
+ .name = "savevm-start",
+ .args_type = "statefile:s?",
+ .params = "[statefile]",
+ .help = "Prepare for snapshot and halt VM. Save VM state to statefile.",
+ .cmd = hmp_savevm_start,
+ },
+
+ {
+ .name = "snapshot-drive",
+ .args_type = "device:s,name:s",
+ .params = "device name",
+ .help = "Create internal snapshot.",
+ .cmd = hmp_snapshot_drive,
+ },
+
+ {
+ .name = "delete-drive-snapshot",
+ .args_type = "device:s,name:s",
+ .params = "device name",
+ .help = "Delete internal snapshot.",
+ .cmd = hmp_delete_drive_snapshot,
+ },
+
+ {
+ .name = "savevm-end",
+ .args_type = "",
+ .params = "",
+ .help = "Resume VM after snaphot.",
+ .cmd = hmp_savevm_end,
+ },
diff --git a/include/block/aio.h b/include/block/aio.h
index b2f703fa3f..c37617b404 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -17,6 +17,7 @@
#ifdef CONFIG_LINUX_IO_URING
#include <liburing.h>
#endif
+#include "qemu/coroutine.h"
#include "qemu/queue.h"
#include "qemu/event_notifier.h"
#include "qemu/thread.h"
@@ -654,6 +655,15 @@ static inline bool aio_node_check(AioContext *ctx, bool is_external)
*/
void aio_co_schedule(AioContext *ctx, struct Coroutine *co);
+/**
+ * aio_co_reschedule_self:
+ * @new_ctx: the new context
+ *
+ * Move the currently running coroutine to new_ctx. If the coroutine is already
+ * running in new_ctx, do nothing.
+ */
+void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx);
+
/**
* aio_co_wake:
* @co: the coroutine
diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
index c85b6ec75b..4411b7121d 100644
--- a/include/migration/snapshot.h
+++ b/include/migration/snapshot.h
@@ -17,5 +17,6 @@
int save_snapshot(const char *name, Error **errp);
int load_snapshot(const char *name, Error **errp);
+int load_snapshot_from_blockdev(const char *filename, Error **errp);
#endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index c986cfd28b..243952d32f 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -25,6 +25,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
void hmp_info_uuid(Monitor *mon, const QDict *qdict);
void hmp_info_chardev(Monitor *mon, const QDict *qdict);
void hmp_info_mice(Monitor *mon, const QDict *qdict);
+void hmp_info_savevm(Monitor *mon, const QDict *qdict);
void hmp_info_migrate(Monitor *mon, const QDict *qdict);
void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
@@ -83,6 +84,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
void hmp_netdev_del(Monitor *mon, const QDict *qdict);
void hmp_getfd(Monitor *mon, const QDict *qdict);
void hmp_closefd(Monitor *mon, const QDict *qdict);
+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
void hmp_sendkey(Monitor *mon, const QDict *qdict);
void hmp_screendump(Monitor *mon, const QDict *qdict);
void hmp_chardev_add(Monitor *mon, const QDict *qdict);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 6e26ea2cd0..280bb447a6 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1904,6 +1904,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, err);
}
+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *statefile = qdict_get_try_str(qdict, "statefile");
+
+ qmp_savevm_start(statefile != NULL, statefile, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *name = qdict_get_str(qdict, "name");
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_snapshot_drive(device, name, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+ const char *name = qdict_get_str(qdict, "name");
+ const char *device = qdict_get_str(qdict, "device");
+
+ qmp_delete_drive_snapshot(device, name, &errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_savevm_end(Monitor *mon, const QDict *qdict)
+{
+ Error *errp = NULL;
+
+ qmp_savevm_end(&errp);
+ hmp_handle_error(mon, errp);
+}
+
+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
+{
+ SaveVMInfo *info;
+ info = qmp_query_savevm(NULL);
+
+ if (info->has_status) {
+ monitor_printf(mon, "savevm status: %s\n", info->status);
+ monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
+ info->total_time);
+ } else {
+ monitor_printf(mon, "savevm status: not running\n");
+ }
+ if (info->has_bytes) {
+ monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
+ }
+ if (info->has_error) {
+ monitor_printf(mon, "Error: %s\n", info->error);
+ }
+}
+
void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
{
IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
diff --git a/qapi/migration.json b/qapi/migration.json
index ea53b23dca..c556257544 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -225,6 +225,40 @@
'*compression': 'CompressionStats',
'*socket-address': ['SocketAddress'] } }
+##
+# @SaveVMInfo:
+#
+# Information about current migration process.
+#
+# @status: string describing the current savevm status.
+# This can be 'active', 'completed', 'failed'.
+# If this field is not returned, no savevm process
+# has been initiated
+#
+# @error: string containing error message is status is failed.
+#
+# @total-time: total amount of milliseconds since savevm started.
+# If savevm has ended, it returns the total save time
+#
+# @bytes: total amount of data transfered
+#
+# Since: 1.3
+##
+{ 'struct': 'SaveVMInfo',
+ 'data': {'*status': 'str', '*error': 'str',
+ '*total-time': 'int', '*bytes': 'int'} }
+
+##
+# @query-savevm:
+#
+# Returns information about current savevm process.
+#
+# Returns: @SaveVMInfo
+#
+# Since: 1.3
+##
+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
+
##
# @query-migrate:
#
diff --git a/qapi/misc.json b/qapi/misc.json
index 44b1fb6fa7..9895899f8b 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -1168,6 +1168,38 @@
##
{ 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
+##
+# @savevm-start:
+#
+# Prepare for snapshot and halt VM. Save VM state to statefile.
+#
+##
+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
+
+##
+# @snapshot-drive:
+#
+# Create an internal drive snapshot.
+#
+##
+{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @delete-drive-snapshot:
+#
+# Delete a drive snapshot.
+#
+##
+{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @savevm-end:
+#
+# Resume VM after a snapshot.
+#
+##
+{ 'command': 'savevm-end' }
+
##
# @AcpiTableOptions:
#
diff --git a/qemu-options.hx b/qemu-options.hx
index 708583b4ce..d32995cc50 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3866,6 +3866,18 @@ SRST
Start right away with a saved state (``loadvm`` in monitor)
ERST
+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
+ "-loadstate file\n" \
+ " start right away with a saved state\n",
+ QEMU_ARCH_ALL)
+SRST
+``-loadstate file``
+ Start right away with a saved state. This option does not rollback
+ disk state like @code{loadvm}, so user must make sure that disk
+ have correct state. @var{file} can be any valid device URL. See the section
+ for "Device URL Syntax" for more information.
+ERST
+
#ifndef _WIN32
DEF("daemonize", 0, QEMU_OPTION_daemonize, \
"-daemonize daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
diff --git a/savevm-async.c b/savevm-async.c
new file mode 100644
index 0000000000..f918e18dce
--- /dev/null
+++ b/savevm-async.c
@@ -0,0 +1,542 @@
+#include "qemu/osdep.h"
+#include "migration/migration.h"
+#include "migration/savevm.h"
+#include "migration/snapshot.h"
+#include "migration/global_state.h"
+#include "migration/ram.h"
+#include "migration/qemu-file.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/runstate.h"
+#include "block/block.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qapi-commands-migration.h"
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-commands-block.h"
+#include "qemu/cutils.h"
+#include "qemu/main-loop.h"
+#include "qemu/rcu.h"
+
+/* #define DEBUG_SAVEVM_STATE */
+
+/* used while emulated sync operation in progress */
+#define NOT_DONE -EINPROGRESS
+
+#ifdef DEBUG_SAVEVM_STATE
+#define DPRINTF(fmt, ...) \
+ do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+ do { } while (0)
+#endif
+
+enum {
+ SAVE_STATE_DONE,
+ SAVE_STATE_ERROR,
+ SAVE_STATE_ACTIVE,
+ SAVE_STATE_COMPLETED,
+ SAVE_STATE_CANCELLED
+};
+
+
+static struct SnapshotState {
+ BlockBackend *target;
+ size_t bs_pos;
+ int state;
+ Error *error;
+ Error *blocker;
+ int saved_vm_running;
+ QEMUFile *file;
+ int64_t total_time;
+ QEMUBH *finalize_bh;
+ Coroutine *co;
+} snap_state;
+
+SaveVMInfo *qmp_query_savevm(Error **errp)
+{
+ SaveVMInfo *info = g_malloc0(sizeof(*info));
+ struct SnapshotState *s = &snap_state;
+
+ if (s->state != SAVE_STATE_DONE) {
+ info->has_bytes = true;
+ info->bytes = s->bs_pos;
+ switch (s->state) {
+ case SAVE_STATE_ERROR:
+ info->has_status = true;
+ info->status = g_strdup("failed");
+ info->has_total_time = true;
+ info->total_time = s->total_time;
+ if (s->error) {
+ info->has_error = true;
+ info->error = g_strdup(error_get_pretty(s->error));
+ }
+ break;
+ case SAVE_STATE_ACTIVE:
+ info->has_status = true;
+ info->status = g_strdup("active");
+ info->has_total_time = true;
+ info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
+ - s->total_time;
+ break;
+ case SAVE_STATE_COMPLETED:
+ info->has_status = true;
+ info->status = g_strdup("completed");
+ info->has_total_time = true;
+ info->total_time = s->total_time;
+ break;
+ }
+ }
+
+ return info;
+}
+
+static int save_snapshot_cleanup(void)
+{
+ int ret = 0;
+
+ DPRINTF("save_snapshot_cleanup\n");
+
+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
+ snap_state.total_time;
+
+ if (snap_state.file) {
+ ret = qemu_fclose(snap_state.file);
+ }
+
+ if (snap_state.target) {
+ /* try to truncate, but ignore errors (will fail on block devices).
+ * note1: bdrv_read() need whole blocks, so we need to round up
+ * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
+ */
+ size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
+ blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
+ blk_op_unblock_all(snap_state.target, snap_state.blocker);
+ error_free(snap_state.blocker);
+ snap_state.blocker = NULL;
+ blk_unref(snap_state.target);
+ snap_state.target = NULL;
+ }
+
+ return ret;
+}
+
+static void save_snapshot_error(const char *fmt, ...)
+{
+ va_list ap;
+ char *msg;
+
+ va_start(ap, fmt);
+ msg = g_strdup_vprintf(fmt, ap);
+ va_end(ap);
+
+ DPRINTF("save_snapshot_error: %s\n", msg);
+
+ if (!snap_state.error) {
+ error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
+ }
+
+ g_free (msg);
+
+ snap_state.state = SAVE_STATE_ERROR;
+}
+
+static int block_state_close(void *opaque, Error **errp)
+{
+ snap_state.file = NULL;
+ return blk_flush(snap_state.target);
+}
+
+typedef struct BlkRwCo {
+ int64_t offset;
+ QEMUIOVector *qiov;
+ ssize_t ret;
+} BlkRwCo;
+
+static void coroutine_fn block_state_write_entry(void *opaque) {
+ BlkRwCo *rwco = opaque;
+ rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
+ rwco->qiov, 0);
+ aio_wait_kick();
+}
+
+static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
+ int iovcnt, int64_t pos, Error **errp)
+{
+ QEMUIOVector qiov;
+ BlkRwCo rwco;
+
+ assert(pos == snap_state.bs_pos);
+ rwco = (BlkRwCo) {
+ .offset = pos,
+ .qiov = &qiov,
+ .ret = NOT_DONE,
+ };
+
+ qemu_iovec_init_external(&qiov, iov, iovcnt);
+
+ if (qemu_in_coroutine()) {
+ block_state_write_entry(&rwco);
+ } else {
+ Coroutine *co = qemu_coroutine_create(&block_state_write_entry, &rwco);
+ bdrv_coroutine_enter(blk_bs(snap_state.target), co);
+ BDRV_POLL_WHILE(blk_bs(snap_state.target), rwco.ret == NOT_DONE);
+ }
+ if (rwco.ret < 0) {
+ return rwco.ret;
+ }
+
+ snap_state.bs_pos += qiov.size;
+ return qiov.size;
+}
+
+static const QEMUFileOps block_file_ops = {
+ .writev_buffer = block_state_writev_buffer,
+ .close = block_state_close,
+};
+
+static void process_savevm_finalize(void *opaque)
+{
+ int ret;
+ AioContext *iohandler_ctx = iohandler_get_aio_context();
+ MigrationState *ms = migrate_get_current();
+
+#ifdef DEBUG_SAVEVM_STATE
+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+
+ qemu_bh_delete(snap_state.finalize_bh);
+ snap_state.finalize_bh = NULL;
+ snap_state.co = NULL;
+
+ /* We need to own the target bdrv's context for the following functions,
+ * so move it back. It can stay in the main context and live out its live
+ * there, since we're done with it after this method ends anyway.
+ */
+ aio_context_acquire(iohandler_ctx);
+ blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
+ aio_context_release(iohandler_ctx);
+
+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+ if (ret < 0) {
+ save_snapshot_error("vm_stop_force_state error %d", ret);
+ }
+
+ (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
+ ret = qemu_file_get_error(snap_state.file);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+ }
+
+ DPRINTF("state saving complete\n");
+ DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
+
+ /* clear migration state */
+ migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
+ ret ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
+ ms->to_dst_file = NULL;
+
+ qemu_savevm_state_cleanup();
+
+ ret = save_snapshot_cleanup();
+ if (ret < 0) {
+ save_snapshot_error("save_snapshot_cleanup error %d", ret);
+ } else if (snap_state.state == SAVE_STATE_ACTIVE) {
+ snap_state.state = SAVE_STATE_COMPLETED;
+ } else {
+ save_snapshot_error("process_savevm_cleanup: invalid state: %d",
+ snap_state.state);
+ }
+ if (snap_state.saved_vm_running) {
+ vm_start();
+ snap_state.saved_vm_running = false;
+ }
+
+ DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
+}
+
+static void coroutine_fn process_savevm_co(void *opaque)
+{
+ int ret;
+ int64_t maxlen;
+ BdrvNextIterator it;
+ BlockDriverState *bs = NULL;
+
+#ifdef DEBUG_SAVEVM_STATE
+ int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+
+ ret = qemu_file_get_error(snap_state.file);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_setup failed");
+ return;
+ }
+
+ while (snap_state.state == SAVE_STATE_ACTIVE) {
+ uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
+
+ qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
+ pending_size = pend_precopy + pend_compatible + pend_postcopy;
+
+ maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
+
+ if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
+ ret = qemu_savevm_state_iterate(snap_state.file, false);
+ if (ret < 0) {
+ save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+ break;
+ }
+ DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
+ } else {
+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
+ ret = global_state_store();
+ if (ret) {
+ save_snapshot_error("global_state_store error %d", ret);
+ break;
+ }
+
+ DPRINTF("savevm iterate complete\n");
+ break;
+ }
+ }
+
+ DPRINTF("timing: process_savevm_co took %ld ms\n",
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
+
+#ifdef DEBUG_SAVEVM_STATE
+ int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+ /* If a drive runs in an IOThread we can flush it async, and only
+ * need to sync-flush whatever IO happens between now and
+ * vm_stop_force_state. bdrv_next can only be called from main AioContext,
+ * so move there now and after every flush.
+ */
+ aio_co_reschedule_self(qemu_get_aio_context());
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
+ if (bs == blk_bs(snap_state.target)) {
+ continue;
+ }
+
+ AioContext *bs_ctx = bdrv_get_aio_context(bs);
+ if (bs_ctx != qemu_get_aio_context()) {
+ DPRINTF("savevm: async flushing drive %s\n", bs->filename);
+ aio_co_reschedule_self(bs_ctx);
+ bdrv_flush(bs);
+ aio_co_reschedule_self(qemu_get_aio_context());
+ }
+ }
+
+ DPRINTF("timing: async flushing took %ld ms\n",
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
+
+ qemu_bh_schedule(snap_state.finalize_bh);
+}
+
+void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
+{
+ Error *local_err = NULL;
+ MigrationState *ms = migrate_get_current();
+ AioContext *iohandler_ctx = iohandler_get_aio_context();
+
+ int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
+
+ if (snap_state.state != SAVE_STATE_DONE) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "VM snapshot already started\n");
+ return;
+ }
+
+ if (migration_is_running(ms->state)) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
+ return;
+ }
+
+ if (migrate_use_block()) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "Block migration and snapshots are incompatible");
+ return;
+ }
+
+ /* initialize snapshot info */
+ snap_state.saved_vm_running = runstate_is_running();
+ snap_state.bs_pos = 0;
+ snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ snap_state.blocker = NULL;
+
+ if (snap_state.error) {
+ error_free(snap_state.error);
+ snap_state.error = NULL;
+ }
+
+ if (!has_statefile) {
+ vm_stop(RUN_STATE_SAVE_VM);
+ snap_state.state = SAVE_STATE_COMPLETED;
+ return;
+ }
+
+ if (qemu_savevm_state_blocked(errp)) {
+ return;
+ }
+
+ /* Open the image */
+ QDict *options = NULL;
+ options = qdict_new();
+ qdict_put_str(options, "driver", "raw");
+ snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
+ if (!snap_state.target) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
+ goto restart;
+ }
+
+ snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
+
+ if (!snap_state.file) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
+ goto restart;
+ }
+
+ /*
+ * qemu_savevm_* paths use migration code and expect a migration state.
+ * State is cleared in process_savevm_co, but has to be initialized
+ * here (blocking main thread, from QMP) to avoid race conditions.
+ */
+ migrate_init(ms);
+ memset(&ram_counters, 0, sizeof(ram_counters));
+ ms->to_dst_file = snap_state.file;
+
+ error_setg(&snap_state.blocker, "block device is in use by savevm");
+ blk_op_block_all(snap_state.target, snap_state.blocker);
+
+ snap_state.state = SAVE_STATE_ACTIVE;
+ snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
+ snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
+ qemu_mutex_unlock_iothread();
+ qemu_savevm_state_header(snap_state.file);
+ qemu_savevm_state_setup(snap_state.file);
+ qemu_mutex_lock_iothread();
+
+ /* Async processing from here on out happens in iohandler context, so let
+ * the target bdrv have its home there.
+ */
+ blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
+
+ aio_co_schedule(iohandler_ctx, snap_state.co);
+
+ return;
+
+restart:
+
+ save_snapshot_error("setup failed");
+
+ if (snap_state.saved_vm_running) {
+ vm_start();
+ }
+}
+
+void qmp_savevm_end(Error **errp)
+{
+ if (snap_state.state == SAVE_STATE_DONE) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+ "VM snapshot not started\n");
+ return;
+ }
+
+ if (snap_state.state == SAVE_STATE_ACTIVE) {
+ snap_state.state = SAVE_STATE_CANCELLED;
+ return;
+ }
+
+ if (snap_state.saved_vm_running) {
+ vm_start();
+ }
+
+ snap_state.state = SAVE_STATE_DONE;
+}
+
+// FIXME: Deprecated
+void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
+{
+ // Compatibility to older qemu-server.
+ qmp_blockdev_snapshot_internal_sync(device, name, errp);
+}
+
+// FIXME: Deprecated
+void qmp_delete_drive_snapshot(const char *device, const char *name,
+ Error **errp)
+{
+ // Compatibility to older qemu-server.
+ (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
+ true, name, errp);
+}
+
+static ssize_t loadstate_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
+ size_t size, Error **errp)
+{
+ BlockBackend *be = opaque;
+ int64_t maxlen = blk_getlength(be);
+ if (pos > maxlen) {
+ return -EIO;
+ }
+ if ((pos + size) > maxlen) {
+ size = maxlen - pos - 1;
+ }
+ if (size == 0) {
+ return 0;
+ }
+ return blk_pread(be, pos, buf, size);
+}
+
+static const QEMUFileOps loadstate_file_ops = {
+ .get_buffer = loadstate_get_buffer,
+};
+
+int load_snapshot_from_blockdev(const char *filename, Error **errp)
+{
+ BlockBackend *be;
+ Error *local_err = NULL;
+ Error *blocker = NULL;
+
+ QEMUFile *f;
+ int ret = -EINVAL;
+
+ be = blk_new_open(filename, NULL, NULL, 0, &local_err);
+
+ if (!be) {
+ error_setg(errp, "Could not open VM state file");
+ goto the_end;
+ }
+
+ error_setg(&blocker, "block device is in use by load state");
+ blk_op_block_all(be, blocker);
+
+ /* restore the VM state */
+ f = qemu_fopen_ops(be, &loadstate_file_ops);
+ if (!f) {
+ error_setg(errp, "Could not open VM state file");
+ goto the_end;
+ }
+
+ qemu_system_reset(SHUTDOWN_CAUSE_NONE);
+ ret = qemu_loadvm_state(f);
+
+ qemu_fclose(f);
+ migration_incoming_state_destroy();
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Error while loading VM state");
+ goto the_end;
+ }
+
+ ret = 0;
+
+ the_end:
+ if (be) {
+ blk_op_unblock_all(be, blocker);
+ error_free(blocker);
+ blk_unref(be);
+ }
+ return ret;
+}
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 4eb9d1f7fd..670b7e427c 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2844,6 +2844,7 @@ void qemu_init(int argc, char **argv, char **envp)
int optind;
const char *optarg;
const char *loadvm = NULL;
+ const char *loadstate = NULL;
MachineClass *machine_class;
const char *cpu_option;
const char *vga_model = NULL;
@@ -3408,6 +3409,9 @@ void qemu_init(int argc, char **argv, char **envp)
case QEMU_OPTION_loadvm:
loadvm = optarg;
break;
+ case QEMU_OPTION_loadstate:
+ loadstate = optarg;
+ break;
case QEMU_OPTION_full_screen:
dpy.has_full_screen = true;
dpy.full_screen = true;
@@ -4464,6 +4468,12 @@ void qemu_init(int argc, char **argv, char **envp)
autostart = 0;
exit(1);
}
+ } else if (loadstate) {
+ Error *local_err = NULL;
+ if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
+ error_report_err(local_err);
+ autostart = 0;
+ }
}
if (replay_mode != REPLAY_MODE_NONE) {
replay_vmstate_init();
diff --git a/util/async.c b/util/async.c
index 1319eee3bc..b68e73f488 100644
--- a/util/async.c
+++ b/util/async.c
@@ -559,6 +559,36 @@ void aio_co_schedule(AioContext *ctx, Coroutine *co)
aio_context_unref(ctx);
}
+typedef struct AioCoRescheduleSelf {
+ Coroutine *co;
+ AioContext *new_ctx;
+} AioCoRescheduleSelf;
+
+static void aio_co_reschedule_self_bh(void *opaque)
+{
+ AioCoRescheduleSelf *data = opaque;
+ aio_co_schedule(data->new_ctx, data->co);
+}
+
+void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx)
+{
+ AioContext *old_ctx = qemu_get_current_aio_context();
+
+ if (old_ctx != new_ctx) {
+ AioCoRescheduleSelf data = {
+ .co = qemu_coroutine_self(),
+ .new_ctx = new_ctx,
+ };
+ /*
+ * We can't directly schedule the coroutine in the target context
+ * because this would be racy: The other thread could try to enter the
+ * coroutine before it has yielded in this one.
+ */
+ aio_bh_schedule_oneshot(old_ctx, aio_co_reschedule_self_bh, &data);
+ qemu_coroutine_yield();
+ }
+}
+
void aio_co_wake(struct Coroutine *co)
{
AioContext *ctx;