pve-qemu-qoup/debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Mon, 6 Apr 2020 12:16:46 +0200
Subject: [PATCH] PVE: add savevm-async for background state snapshots

Put qemu_savevm_state_{header,setup} into the main loop and the rest
of the iteration into a coroutine. The former need to lock the
iothread (and we can't unlock it in the coroutine), and the latter
can't deal with being in a separate thread, so a coroutine it must
be.

Truncate output file at 1024 boundary.

Do not block the VM and save the state on aborting a snapshot, as the
snapshot will be invalid anyway.

Also, when aborting, wait for the target file to be closed, otherwise a
client might run into race-conditions when trying to remove the file
still opened by QEMU.

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
[SR: improve aborting
     register yank before migration_incoming_state_destroy]
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
[FE: further improve aborting
     adapt to removal of QEMUFileOps
     improve condition for entering final stage
     adapt to QAPI and other changes for 8.2]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
 hmp-commands-info.hx         |  13 +
 hmp-commands.hx              |  17 ++
 include/migration/snapshot.h |   2 +
 include/monitor/hmp.h        |   3 +
 migration/meson.build        |   1 +
 migration/savevm-async.c     | 534 +++++++++++++++++++++++++++++++++++
 monitor/hmp-cmds.c           |  38 +++
 qapi/migration.json          |  34 +++
 qapi/misc.json               |  16 ++
 qemu-options.hx              |  12 +
 system/vl.c                  |  10 +
 11 files changed, 680 insertions(+)
 create mode 100644 migration/savevm-async.c

diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index f5b37eb74a..10fdd822e0 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -525,6 +525,19 @@ SRST
     Show current migration parameters.
 ERST

+    {
+        .name       = "savevm",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show savevm status",
+        .cmd = hmp_info_savevm,
+    },
+
+SRST
+  ``info savevm``
+    Show savevm status.
+ERST
+
     {
         .name       = "balloon",
         .args_type  = "",
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 765349ed14..893c3bd240 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1875,3 +1875,20 @@ SRST
   List event channels in the guest
 ERST
 #endif
+
+    {
+        .name       = "savevm-start",
+        .args_type  = "statefile:s?",
+        .params     = "[statefile]",
+        .help       = "Prepare for snapshot and halt VM. Save VM state to statefile.",
+        .cmd = hmp_savevm_start,
+    },
+
+    {
+        .name       = "savevm-end",
+        .args_type  = "",
+        .params     = "",
+        .help       = "Resume VM after snaphot.",
+        .cmd        = hmp_savevm_end,
+        .coroutine  = true,
+    },
diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
index e72083b117..c846d37806 100644
--- a/include/migration/snapshot.h
+++ b/include/migration/snapshot.h
@@ -61,4 +61,6 @@ bool delete_snapshot(const char *name,
                     bool has_devices, strList *devices,
                     Error **errp);

+int load_snapshot_from_blockdev(const char *filename, Error **errp);
+
 #endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 13f9a2dedb..7a7def7530 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -28,6 +28,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
 void hmp_info_mice(Monitor *mon, const QDict *qdict);
+void hmp_info_savevm(Monitor *mon, const QDict *qdict);
 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
@@ -94,6 +95,8 @@ void hmp_closefd(Monitor *mon, const QDict *qdict);
 void hmp_mouse_move(Monitor *mon, const QDict *qdict);
 void hmp_mouse_button(Monitor *mon, const QDict *qdict);
 void hmp_mouse_set(Monitor *mon, const QDict *qdict);
+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
 void hmp_sendkey(Monitor *mon, const QDict *qdict);
 void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
diff --git a/migration/meson.build b/migration/meson.build
index 0e689eac09..8f9d122187 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -27,6 +27,7 @@ system_ss.add(files(
   'options.c',
   'postcopy-ram.c',
   'savevm.c',
+  'savevm-async.c',
   'socket.c',
   'tls.c',
   'threadinfo.c',
diff --git a/migration/savevm-async.c b/migration/savevm-async.c
new file mode 100644
index 0000000000..8f63c4c637
--- /dev/null
+++ b/migration/savevm-async.c
@@ -0,0 +1,534 @@
+#include "qemu/osdep.h"
+#include "migration/channel-savevm-async.h"
+#include "migration/migration.h"
+#include "migration/migration-stats.h"
+#include "migration/options.h"
+#include "migration/savevm.h"
+#include "migration/snapshot.h"
+#include "migration/global_state.h"
+#include "migration/ram.h"
+#include "migration/qemu-file.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/runstate.h"
+#include "block/block.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qapi-commands-migration.h"
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-commands-block.h"
+#include "qemu/cutils.h"
+#include "qemu/timer.h"
+#include "qemu/main-loop.h"
+#include "qemu/rcu.h"
+#include "qemu/yank.h"
+
+/* #define DEBUG_SAVEVM_STATE */
+
+#ifdef DEBUG_SAVEVM_STATE
+#define DPRINTF(fmt, ...) \
+    do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+enum {
+    SAVE_STATE_DONE,
+    SAVE_STATE_ERROR,
+    SAVE_STATE_ACTIVE,
+    SAVE_STATE_COMPLETED,
+    SAVE_STATE_CANCELLED
+};
+
+
+static struct SnapshotState {
+    BlockBackend *target;
+    size_t bs_pos;
+    int state;
+    Error *error;
+    Error *blocker;
+    int saved_vm_running;
+    QEMUFile *file;
+    int64_t total_time;
+    QEMUBH *finalize_bh;
+    Coroutine *co;
+    QemuCoSleep target_close_wait;
+} snap_state;
+
+static bool savevm_aborted(void)
+{
+    return snap_state.state == SAVE_STATE_CANCELLED ||
+        snap_state.state == SAVE_STATE_ERROR;
+}
+
+SaveVMInfo *qmp_query_savevm(Error **errp)
+{
+    SaveVMInfo *info = g_malloc0(sizeof(*info));
+    struct SnapshotState *s = &snap_state;
+
+    if (s->state != SAVE_STATE_DONE) {
+        info->has_bytes = true;
+        info->bytes = s->bs_pos;
+        switch (s->state) {
+        case SAVE_STATE_ERROR:
+            info->status = g_strdup("failed");
+            info->has_total_time = true;
+            info->total_time = s->total_time;
+            if (s->error) {
+                info->error = g_strdup(error_get_pretty(s->error));
+            }
+            break;
+        case SAVE_STATE_ACTIVE:
+            info->status = g_strdup("active");
+            info->has_total_time = true;
+            info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
+                - s->total_time;
+            break;
+        case SAVE_STATE_COMPLETED:
+            info->status = g_strdup("completed");
+            info->has_total_time = true;
+            info->total_time = s->total_time;
+            break;
+        }
+    }
+
+    return info;
+}
+
+static int save_snapshot_cleanup(void)
+{
+    int ret = 0;
+
+    DPRINTF("save_snapshot_cleanup\n");
+
+    snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
+        snap_state.total_time;
+
+    if (snap_state.file) {
+        ret = qemu_fclose(snap_state.file);
+        snap_state.file = NULL;
+    }
+
+    if (snap_state.target) {
+        if (!savevm_aborted()) {
+            /* try to truncate, but ignore errors (will fail on block devices).
+            * note1: bdrv_read() need whole blocks, so we need to round up
+            * note2: PVE requires 1024 (BDRV_SECTOR_SIZE*2) alignment
+            */
+            size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE*2);
+            blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, 0, NULL);
+        }
+        blk_op_unblock_all(snap_state.target, snap_state.blocker);
+        error_free(snap_state.blocker);
+        snap_state.blocker = NULL;
+        blk_unref(snap_state.target);
+        snap_state.target = NULL;
+
+        qemu_co_sleep_wake(&snap_state.target_close_wait);
+    }
+
+    return ret;
+}
+
+static void G_GNUC_PRINTF(1, 2) save_snapshot_error(const char *fmt, ...)
+{
+    va_list ap;
+    char *msg;
+
+    va_start(ap, fmt);
+    msg = g_strdup_vprintf(fmt, ap);
+    va_end(ap);
+
+    DPRINTF("save_snapshot_error: %s\n", msg);
+
+    if (!snap_state.error) {
+        error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
+    }
+
+    g_free (msg);
+
+    snap_state.state = SAVE_STATE_ERROR;
+}
+
+static void process_savevm_finalize(void *opaque)
+{
+    int ret;
+    AioContext *iohandler_ctx = iohandler_get_aio_context();
+    MigrationState *ms = migrate_get_current();
+
+    bool aborted = savevm_aborted();
+
+#ifdef DEBUG_SAVEVM_STATE
+    int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+
+    qemu_bh_delete(snap_state.finalize_bh);
+    snap_state.finalize_bh = NULL;
+    snap_state.co = NULL;
+
+    /* We need to own the target bdrv's context for the following functions,
+     * so move it back. It can stay in the main context and live out its live
+     * there, since we're done with it after this method ends anyway.
+     */
+    aio_context_acquire(iohandler_ctx);
+    blk_set_aio_context(snap_state.target, qemu_get_aio_context(), NULL);
+    aio_context_release(iohandler_ctx);
+
+    ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+    if (ret < 0) {
+        save_snapshot_error("vm_stop_force_state error %d", ret);
+    }
+
+    if (!aborted) {
+        /* skip state saving if we aborted, snapshot will be invalid anyway */
+        (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
+        ret = qemu_file_get_error(snap_state.file);
+        if (ret < 0) {
+            save_snapshot_error("qemu_savevm_state_complete_precopy error %d", ret);
+        }
+    }
+
+    DPRINTF("state saving complete\n");
+    DPRINTF("timing: process_savevm_finalize (state saving) took %ld ms\n",
+        qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
+
+    /* clear migration state */
+    migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP,
+        ret || aborted ? MIGRATION_STATUS_FAILED : MIGRATION_STATUS_COMPLETED);
+    ms->to_dst_file = NULL;
+
+    qemu_savevm_state_cleanup();
+
+    ret = save_snapshot_cleanup();
+    if (ret < 0) {
+        save_snapshot_error("save_snapshot_cleanup error %d", ret);
+    } else if (snap_state.state == SAVE_STATE_ACTIVE) {
+        snap_state.state = SAVE_STATE_COMPLETED;
+    } else if (aborted) {
+        /*
+         * If there was an error, there's no need to set a new one here.
+         * If the snapshot was canceled, leave setting the state to
+         * qmp_savevm_end(), which is waked by save_snapshot_cleanup().
+         */
+    } else {
+        save_snapshot_error("process_savevm_cleanup: invalid state: %d",
+                            snap_state.state);
+    }
+    if (snap_state.saved_vm_running) {
+        vm_start();
+        snap_state.saved_vm_running = false;
+    }
+
+    DPRINTF("timing: process_savevm_finalize (full) took %ld ms\n",
+        qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
+}
+
+static void coroutine_fn process_savevm_co(void *opaque)
+{
+    int ret;
+    int64_t maxlen;
+    BdrvNextIterator it;
+    BlockDriverState *bs = NULL;
+
+#ifdef DEBUG_SAVEVM_STATE
+    int64_t start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+
+    ret = qemu_file_get_error(snap_state.file);
+    if (ret < 0) {
+        save_snapshot_error("qemu_savevm_state_setup failed");
+        return;
+    }
+
+    while (snap_state.state == SAVE_STATE_ACTIVE) {
+        uint64_t pending_size, pend_precopy, pend_postcopy;
+        uint64_t threshold = 400 * 1000;
+
+        /*
+         * pending_{estimate,exact} are expected to be called without iothread
+         * lock. Similar to what is done in migration.c, call the exact variant
+         * only once pend_precopy in the estimate is below the threshold.
+         */
+        qemu_mutex_unlock_iothread();
+        qemu_savevm_state_pending_estimate(&pend_precopy, &pend_postcopy);
+        if (pend_precopy <= threshold) {
+            qemu_savevm_state_pending_exact(&pend_precopy, &pend_postcopy);
+        }
+        qemu_mutex_lock_iothread();
+        pending_size = pend_precopy + pend_postcopy;
+
+        /*
+         * A guest reaching this cutoff is dirtying lots of RAM. It should be
+         * large enough so that the guest can't dirty this much between the
+         * check and the guest actually being stopped, but it should be small
+         * enough to avoid long downtimes for non-hibernation snapshots.
+         */
+        maxlen = blk_getlength(snap_state.target) - 100*1024*1024;
+
+        /* Note that there is no progress for pend_postcopy when iterating */
+        if (pend_precopy > threshold && snap_state.bs_pos + pending_size < maxlen) {
+            ret = qemu_savevm_state_iterate(snap_state.file, false);
+            if (ret < 0) {
+                save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+                break;
+            }
+            DPRINTF("savevm iterate pending size %lu ret %d\n", pending_size, ret);
+        } else {
+            qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
+            global_state_store();
+
+            DPRINTF("savevm iterate complete\n");
+            break;
+        }
+    }
+
+    DPRINTF("timing: process_savevm_co took %ld ms\n",
+        qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time);
+
+#ifdef DEBUG_SAVEVM_STATE
+    int64_t start_time_flush = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+#endif
+    /* If a drive runs in an IOThread we can flush it async, and only
+     * need to sync-flush whatever IO happens between now and
+     * vm_stop_force_state. bdrv_next can only be called from main AioContext,
+     * so move there now and after every flush.
+     */
+    aio_co_reschedule_self(qemu_get_aio_context());
+    bdrv_graph_co_rdlock();
+    bs = bdrv_first(&it);
+    bdrv_graph_co_rdunlock();
+    while (bs) {
+        /* target has BDRV_O_NO_FLUSH, no sense calling bdrv_flush on it */
+        if (bs != blk_bs(snap_state.target)) {
+            AioContext *bs_ctx = bdrv_get_aio_context(bs);
+            if (bs_ctx != qemu_get_aio_context()) {
+                DPRINTF("savevm: async flushing drive %s\n", bs->filename);
+                aio_co_reschedule_self(bs_ctx);
+                bdrv_graph_co_rdlock();
+                bdrv_flush(bs);
+                bdrv_graph_co_rdunlock();
+                aio_co_reschedule_self(qemu_get_aio_context());
+            }
+        }
+        bdrv_graph_co_rdlock();
+        bs = bdrv_next(&it);
+        bdrv_graph_co_rdunlock();
+    }
+
+    DPRINTF("timing: async flushing took %ld ms\n",
+        qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - start_time_flush);
+
+    qemu_bh_schedule(snap_state.finalize_bh);
+}
+
+void qmp_savevm_start(const char *statefile, Error **errp)
+{
+    Error *local_err = NULL;
+    MigrationState *ms = migrate_get_current();
+    AioContext *iohandler_ctx = iohandler_get_aio_context();
+
+    int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
+
+    if (snap_state.state != SAVE_STATE_DONE) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+                  "VM snapshot already started\n");
+        return;
+    }
+
+    if (migration_is_running(ms->state)) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, QERR_MIGRATION_ACTIVE);
+        return;
+    }
+
+    if (migrate_block()) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+                  "Block migration and snapshots are incompatible");
+        return;
+    }
+
+    /* initialize snapshot info */
+    snap_state.saved_vm_running = runstate_is_running();
+    snap_state.bs_pos = 0;
+    snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    snap_state.blocker = NULL;
+    snap_state.target_close_wait = (QemuCoSleep){ .to_wake = NULL };
+
+    if (snap_state.error) {
+        error_free(snap_state.error);
+        snap_state.error = NULL;
+    }
+
+    if (!statefile) {
+        vm_stop(RUN_STATE_SAVE_VM);
+        snap_state.state = SAVE_STATE_COMPLETED;
+        return;
+    }
+
+    if (qemu_savevm_state_blocked(errp)) {
+        return;
+    }
+
+    /* Open the image */
+    QDict *options = NULL;
+    options = qdict_new();
+    qdict_put_str(options, "driver", "raw");
+    snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
+    if (!snap_state.target) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
+        goto restart;
+    }
+
+    QIOChannel *ioc = QIO_CHANNEL(qio_channel_savevm_async_new(snap_state.target,
+                                                               &snap_state.bs_pos));
+    snap_state.file = qemu_file_new_output(ioc);
+
+    if (!snap_state.file) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
+        goto restart;
+    }
+
+    /*
+     * qemu_savevm_* paths use migration code and expect a migration state.
+     * State is cleared in process_savevm_co, but has to be initialized
+     * here (blocking main thread, from QMP) to avoid race conditions.
+     */
+    if (migrate_init(ms, errp)) {
+        return;
+    }
+    memset(&mig_stats, 0, sizeof(mig_stats));
+    ms->to_dst_file = snap_state.file;
+
+    error_setg(&snap_state.blocker, "block device is in use by savevm");
+    blk_op_block_all(snap_state.target, snap_state.blocker);
+
+    snap_state.state = SAVE_STATE_ACTIVE;
+    snap_state.finalize_bh = qemu_bh_new(process_savevm_finalize, &snap_state);
+    snap_state.co = qemu_coroutine_create(&process_savevm_co, NULL);
+    qemu_savevm_state_header(snap_state.file);
+    qemu_savevm_state_setup(snap_state.file);
+
+    /* Async processing from here on out happens in iohandler context, so let
+     * the target bdrv have its home there.
+     */
+    blk_set_aio_context(snap_state.target, iohandler_ctx, &local_err);
+
+    aio_co_schedule(iohandler_ctx, snap_state.co);
+
+    return;
+
+restart:
+
+    save_snapshot_error("setup failed");
+
+    if (snap_state.saved_vm_running) {
+        vm_start();
+        snap_state.saved_vm_running = false;
+    }
+}
+
+void coroutine_fn qmp_savevm_end(Error **errp)
+{
+    int64_t timeout;
+
+    if (snap_state.state == SAVE_STATE_DONE) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+                  "VM snapshot not started\n");
+        return;
+    }
+
+    if (snap_state.state == SAVE_STATE_ACTIVE) {
+        snap_state.state = SAVE_STATE_CANCELLED;
+        goto wait_for_close;
+    }
+
+    if (snap_state.saved_vm_running) {
+        vm_start();
+        snap_state.saved_vm_running = false;
+    }
+
+    snap_state.state = SAVE_STATE_DONE;
+
+wait_for_close:
+    if (!snap_state.target) {
+        DPRINTF("savevm-end: no target file open\n");
+        return;
+    }
+
+    /* wait until cleanup is done before returning, this ensures that after this
+     * call exits the statefile will be closed and can be removed immediately */
+    DPRINTF("savevm-end: waiting for cleanup\n");
+    timeout = 30L * 1000 * 1000 * 1000;
+    qemu_co_sleep_ns_wakeable(&snap_state.target_close_wait,
+                              QEMU_CLOCK_REALTIME, timeout);
+    if (snap_state.target) {
+        save_snapshot_error("timeout waiting for target file close in "
+                            "qmp_savevm_end");
+        /* we cannot assume the snapshot finished in this case, so leave the
+         * state alone - caller has to figure something out */
+        return;
+    }
+
+    // File closed and no other error, so ensure next snapshot can be started.
+    if (snap_state.state != SAVE_STATE_ERROR) {
+        snap_state.state = SAVE_STATE_DONE;
+    }
+
+    DPRINTF("savevm-end: cleanup done\n");
+}
+
+int load_snapshot_from_blockdev(const char *filename, Error **errp)
+{
+    BlockBackend *be;
+    Error *local_err = NULL;
+    Error *blocker = NULL;
+
+    QEMUFile *f;
+    size_t bs_pos = 0;
+    int ret = -EINVAL;
+
+    be = blk_new_open(filename, NULL, NULL, 0, &local_err);
+
+    if (!be) {
+        error_setg(errp, "Could not open VM state file");
+        goto the_end;
+    }
+
+    error_setg(&blocker, "block device is in use by load state");
+    blk_op_block_all(be, blocker);
+
+    /* restore the VM state */
+    f = qemu_file_new_input(QIO_CHANNEL(qio_channel_savevm_async_new(be, &bs_pos)));
+    if (!f) {
+        error_setg(errp, "Could not open VM state file");
+        goto the_end;
+    }
+
+    qemu_system_reset(SHUTDOWN_CAUSE_NONE);
+    ret = qemu_loadvm_state(f);
+
+    /* dirty bitmap migration has a special case we need to trigger manually */
+    dirty_bitmap_mig_before_vm_start();
+
+    qemu_fclose(f);
+
+    /* state_destroy assumes a real migration which would have added a yank */
+    yank_register_instance(MIGRATION_YANK_INSTANCE, &error_abort);
+
+    migration_incoming_state_destroy();
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Error while loading VM state");
+        goto the_end;
+    }
+
+    ret = 0;
+
+ the_end:
+    if (be) {
+        blk_op_unblock_all(be, blocker);
+        error_free(blocker);
+        blk_unref(be);
+    }
+    return ret;
+}
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 871898ac46..ef4634e5c1 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -22,6 +22,7 @@
 #include "monitor/monitor-internal.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-control.h"
+#include "qapi/qapi-commands-migration.h"
 #include "qapi/qapi-commands-misc.h"
 #include "qapi/qmp/qdict.h"
 #include "qemu/cutils.h"
@@ -443,3 +444,40 @@ void hmp_info_mtree(Monitor *mon, const QDict *qdict)

     mtree_info(flatview, dispatch_tree, owner, disabled);
 }
+
+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
+{
+    Error *errp = NULL;
+    const char *statefile = qdict_get_try_str(qdict, "statefile");
+
+    qmp_savevm_start(statefile, &errp);
+    hmp_handle_error(mon, errp);
+}
+
+void coroutine_fn hmp_savevm_end(Monitor *mon, const QDict *qdict)
+{
+    Error *errp = NULL;
+
+    qmp_savevm_end(&errp);
+    hmp_handle_error(mon, errp);
+}
+
+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
+{
+    SaveVMInfo *info;
+    info = qmp_query_savevm(NULL);
+
+    if (info->status) {
+        monitor_printf(mon, "savevm status: %s\n", info->status);
+        monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
+                       info->total_time);
+    } else {
+        monitor_printf(mon, "savevm status: not running\n");
+    }
+    if (info->has_bytes) {
+        monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
+    }
+    if (info->error) {
+        monitor_printf(mon, "Error: %s\n", info->error);
+    }
+}
diff --git a/qapi/migration.json b/qapi/migration.json
index 197d3faa43..b41465fbe9 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -298,6 +298,40 @@
            '*dirty-limit-throttle-time-per-round': 'uint64',
            '*dirty-limit-ring-full-time': 'uint64'} }

+##
+# @SaveVMInfo:
+#
+# Information about current migration process.
+#
+# @status: string describing the current savevm status.
+#          This can be 'active', 'completed', 'failed'.
+#          If this field is not returned, no savevm process
+#          has been initiated
+#
+# @error: string containing error message is status is failed.
+#
+# @total-time: total amount of milliseconds since savevm started.
+#              If savevm has ended, it returns the total save time
+#
+# @bytes: total amount of data transfered
+#
+# Since: 1.3
+##
+{ 'struct': 'SaveVMInfo',
+  'data': {'*status': 'str', '*error': 'str',
+           '*total-time': 'int', '*bytes': 'int'} }
+
+##
+# @query-savevm:
+#
+# Returns information about current savevm process.
+#
+# Returns: @SaveVMInfo
+#
+# Since: 1.3
+##
+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
+
 ##
 # @query-migrate:
 #
diff --git a/qapi/misc.json b/qapi/misc.json
index cda2effa81..94a58bb0bf 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -456,6 +456,22 @@
 ##
 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }

+##
+# @savevm-start:
+#
+# Prepare for snapshot and halt VM. Save VM state to statefile.
+#
+##
+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
+
+##
+# @savevm-end:
+#
+# Resume VM after a snapshot.
+#
+##
+{ 'command': 'savevm-end', 'coroutine': true }
+
 ##
 # @CommandLineParameterType:
 #
diff --git a/qemu-options.hx b/qemu-options.hx
index b6b4ad9e67..881b0b3c43 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4590,6 +4590,18 @@ SRST
     Start right away with a saved state (``loadvm`` in monitor)
 ERST

+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
+    "-loadstate file\n" \
+    "                start right away with a saved state\n",
+    QEMU_ARCH_ALL)
+SRST
+``-loadstate file``
+  Start right away with a saved state. This option does not rollback
+  disk state like @code{loadvm}, so user must make sure that disk
+  have correct state. @var{file} can be any valid device URL. See the section
+  for "Device URL Syntax" for more information.
+ERST
+
 #ifndef _WIN32
 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
     "-daemonize      daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
diff --git a/system/vl.c b/system/vl.c
index d2a3b3f457..57f7ba0525 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -163,6 +163,7 @@ static const char *accelerators;
 static bool have_custom_ram_size;
 static const char *ram_memdev_id;
 static QDict *machine_opts_dict;
+static const char *loadstate;
 static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
 static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
 static int display_remote;
@@ -2715,6 +2716,12 @@ void qmp_x_exit_preconfig(Error **errp)

     if (loadvm) {
         load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
+    } else if (loadstate) {
+        Error *local_err = NULL;
+        if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
+            error_report_err(local_err);
+            autostart = 0;
+        }
     }
     if (replay_mode != REPLAY_MODE_NONE) {
         replay_vmstate_init();
@@ -3265,6 +3272,9 @@ void qemu_init(int argc, char **argv)
             case QEMU_OPTION_loadvm:
                 loadvm = optarg;
                 break;
+            case QEMU_OPTION_loadstate:
+                loadstate = optarg;
+                break;
             case QEMU_OPTION_full_screen:
                 dpy.has_full_screen = true;
                 dpy.full_screen = true;