bf251437e9
Many changes were necessary this time around: * QAPI was changed to avoid redundant has_* variables, see commit 44ea9d9be3 ("qapi: Start to elide redundant has_FOO in generated C") for details. This affected many QMP commands added by Proxmox too. * Pending querying for migration got split into two functions, one to estimate, one for exact value, see commit c8df4a7aef ("migration: Split save_live_pending() into state_pending_*") for details. Relevant for savevm-async and PBS dirty bitmap. * Some block (driver) functions got converted to coroutines, so the Proxmox block drivers needed to be adapted. * Alloc track auto-detaching during PBS live restore got broken by AioContext-related changes resulting in a deadlock. The current, hacky method was replaced by a simpler one. Stefan apparently ran into a problem with that when he wrote the driver, but there were improvements in the stream job code since then and I didn't manage to reproduce the issue. It's a separate patch "alloc-track: fix deadlock during drop" for now, you can find the details there. * Async snapshot-related changes: - The pending querying got adapted to the above-mentioned split and a patch is added to optimize it/make it more similar to what upstream code does. - Added initialization of the compression counters (for future-proofing). - It's necessary the hold the BQL (big QEMU lock = iothread mutex) during the setup phase, because block layer functions are used there and not doing so leads to racy, hard-to-debug crashes or hangs. It's necessary to change some upstream code too for this, a version of the patch "migration: for snapshots, hold the BQL during setup callbacks" is intended to be upstreamed. - Need to take the bdrv graph read lock before flushing. * hmp_info_balloon was moved to a different file. * Needed to include a new headers from time to time to still get the correct functions. Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
283 lines
8.1 KiB
Diff
283 lines
8.1 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Fiona Ebner <f.ebner@proxmox.com>
|
|
Date: Thu, 13 Oct 2022 11:33:50 +0200
|
|
Subject: [PATCH] PVE: add IOChannel implementation for savevm-async
|
|
|
|
based on migration/channel-block.c and the implementation that was
|
|
present in migration/savevm-async.c before QEMU 7.1.
|
|
|
|
Passes along read/write requests to the given BlockBackend, while
|
|
ensuring that a read request going beyond the end results in a
|
|
graceful short read.
|
|
|
|
Additionally, allows tracking the current position from the outside
|
|
(intended to be used for progress tracking).
|
|
|
|
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
---
|
|
migration/channel-savevm-async.c | 183 +++++++++++++++++++++++++++++++
|
|
migration/channel-savevm-async.h | 51 +++++++++
|
|
migration/meson.build | 1 +
|
|
3 files changed, 235 insertions(+)
|
|
create mode 100644 migration/channel-savevm-async.c
|
|
create mode 100644 migration/channel-savevm-async.h
|
|
|
|
diff --git a/migration/channel-savevm-async.c b/migration/channel-savevm-async.c
|
|
new file mode 100644
|
|
index 0000000000..aab081ce07
|
|
--- /dev/null
|
|
+++ b/migration/channel-savevm-async.c
|
|
@@ -0,0 +1,183 @@
|
|
+/*
|
|
+ * QIO Channel implementation to be used by savevm-async QMP calls
|
|
+ */
|
|
+#include "qemu/osdep.h"
|
|
+#include "migration/channel-savevm-async.h"
|
|
+#include "qapi/error.h"
|
|
+#include "sysemu/block-backend.h"
|
|
+#include "trace.h"
|
|
+
|
|
+QIOChannelSavevmAsync *
|
|
+qio_channel_savevm_async_new(BlockBackend *be, size_t *bs_pos)
|
|
+{
|
|
+ QIOChannelSavevmAsync *ioc;
|
|
+
|
|
+ ioc = QIO_CHANNEL_SAVEVM_ASYNC(object_new(TYPE_QIO_CHANNEL_SAVEVM_ASYNC));
|
|
+
|
|
+ bdrv_ref(blk_bs(be));
|
|
+ ioc->be = be;
|
|
+ ioc->bs_pos = bs_pos;
|
|
+
|
|
+ return ioc;
|
|
+}
|
|
+
|
|
+
|
|
+static void
|
|
+qio_channel_savevm_async_finalize(Object *obj)
|
|
+{
|
|
+ QIOChannelSavevmAsync *ioc = QIO_CHANNEL_SAVEVM_ASYNC(obj);
|
|
+
|
|
+ if (ioc->be) {
|
|
+ bdrv_unref(blk_bs(ioc->be));
|
|
+ ioc->be = NULL;
|
|
+ }
|
|
+ ioc->bs_pos = NULL;
|
|
+}
|
|
+
|
|
+
|
|
+static ssize_t
|
|
+qio_channel_savevm_async_readv(QIOChannel *ioc,
|
|
+ const struct iovec *iov,
|
|
+ size_t niov,
|
|
+ int **fds,
|
|
+ size_t *nfds,
|
|
+ int flags,
|
|
+ Error **errp)
|
|
+{
|
|
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
|
|
+ BlockBackend *be = saioc->be;
|
|
+ int64_t maxlen = blk_getlength(be);
|
|
+ QEMUIOVector qiov;
|
|
+ size_t size;
|
|
+ int ret;
|
|
+
|
|
+ qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
|
|
+
|
|
+ if (*saioc->bs_pos >= maxlen) {
|
|
+ error_setg(errp, "cannot read beyond maxlen");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ if (maxlen - *saioc->bs_pos < qiov.size) {
|
|
+ size = maxlen - *saioc->bs_pos;
|
|
+ } else {
|
|
+ size = qiov.size;
|
|
+ }
|
|
+
|
|
+ // returns 0 on success
|
|
+ ret = blk_preadv(be, *saioc->bs_pos, size, &qiov, 0);
|
|
+ if (ret < 0) {
|
|
+ error_setg_errno(errp, -ret, "blk_preadv failed");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ *saioc->bs_pos += size;
|
|
+ return size;
|
|
+}
|
|
+
|
|
+
|
|
+static ssize_t
|
|
+qio_channel_savevm_async_writev(QIOChannel *ioc,
|
|
+ const struct iovec *iov,
|
|
+ size_t niov,
|
|
+ int *fds,
|
|
+ size_t nfds,
|
|
+ int flags,
|
|
+ Error **errp)
|
|
+{
|
|
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
|
|
+ BlockBackend *be = saioc->be;
|
|
+ QEMUIOVector qiov;
|
|
+ int ret;
|
|
+
|
|
+ qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
|
|
+
|
|
+ if (qemu_in_coroutine()) {
|
|
+ ret = blk_co_pwritev(be, *saioc->bs_pos, qiov.size, &qiov, 0);
|
|
+ aio_wait_kick();
|
|
+ } else {
|
|
+ ret = blk_pwritev(be, *saioc->bs_pos, qiov.size, &qiov, 0);
|
|
+ }
|
|
+
|
|
+ if (ret < 0) {
|
|
+ error_setg_errno(errp, -ret, "blk(_co)_pwritev failed");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ *saioc->bs_pos += qiov.size;
|
|
+ return qiov.size;
|
|
+}
|
|
+
|
|
+
|
|
+static int
|
|
+qio_channel_savevm_async_set_blocking(QIOChannel *ioc,
|
|
+ bool enabled,
|
|
+ Error **errp)
|
|
+{
|
|
+ if (!enabled) {
|
|
+ error_setg(errp, "Non-blocking mode not supported for savevm-async");
|
|
+ return -1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int
|
|
+qio_channel_savevm_async_close(QIOChannel *ioc,
|
|
+ Error **errp)
|
|
+{
|
|
+ QIOChannelSavevmAsync *saioc = QIO_CHANNEL_SAVEVM_ASYNC(ioc);
|
|
+ int rv = bdrv_flush(blk_bs(saioc->be));
|
|
+
|
|
+ if (rv < 0) {
|
|
+ error_setg_errno(errp, -rv, "Unable to flush VMState");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ bdrv_unref(blk_bs(saioc->be));
|
|
+ saioc->be = NULL;
|
|
+ saioc->bs_pos = NULL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static void
|
|
+qio_channel_savevm_async_set_aio_fd_handler(QIOChannel *ioc,
|
|
+ AioContext *ctx,
|
|
+ IOHandler *io_read,
|
|
+ IOHandler *io_write,
|
|
+ void *opaque)
|
|
+{
|
|
+ // if channel-block starts doing something, check if this needs adaptation
|
|
+}
|
|
+
|
|
+
|
|
+static void
|
|
+qio_channel_savevm_async_class_init(ObjectClass *klass,
|
|
+ void *class_data G_GNUC_UNUSED)
|
|
+{
|
|
+ QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
|
|
+
|
|
+ ioc_klass->io_writev = qio_channel_savevm_async_writev;
|
|
+ ioc_klass->io_readv = qio_channel_savevm_async_readv;
|
|
+ ioc_klass->io_set_blocking = qio_channel_savevm_async_set_blocking;
|
|
+ ioc_klass->io_close = qio_channel_savevm_async_close;
|
|
+ ioc_klass->io_set_aio_fd_handler = qio_channel_savevm_async_set_aio_fd_handler;
|
|
+}
|
|
+
|
|
+static const TypeInfo qio_channel_savevm_async_info = {
|
|
+ .parent = TYPE_QIO_CHANNEL,
|
|
+ .name = TYPE_QIO_CHANNEL_SAVEVM_ASYNC,
|
|
+ .instance_size = sizeof(QIOChannelSavevmAsync),
|
|
+ .instance_finalize = qio_channel_savevm_async_finalize,
|
|
+ .class_init = qio_channel_savevm_async_class_init,
|
|
+};
|
|
+
|
|
+static void
|
|
+qio_channel_savevm_async_register_types(void)
|
|
+{
|
|
+ type_register_static(&qio_channel_savevm_async_info);
|
|
+}
|
|
+
|
|
+type_init(qio_channel_savevm_async_register_types);
|
|
diff --git a/migration/channel-savevm-async.h b/migration/channel-savevm-async.h
|
|
new file mode 100644
|
|
index 0000000000..17ae2cb261
|
|
--- /dev/null
|
|
+++ b/migration/channel-savevm-async.h
|
|
@@ -0,0 +1,51 @@
|
|
+/*
|
|
+ * QEMU I/O channels driver for savevm-async.c
|
|
+ *
|
|
+ * Copyright (c) 2022 Proxmox Server Solutions
|
|
+ *
|
|
+ * Authors:
|
|
+ * Fiona Ebner (f.ebner@proxmox.com)
|
|
+ *
|
|
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
+ * See the COPYING file in the top-level directory.
|
|
+ */
|
|
+
|
|
+#ifndef QIO_CHANNEL_SAVEVM_ASYNC_H
|
|
+#define QIO_CHANNEL_SAVEVM_ASYNC_H
|
|
+
|
|
+#include "io/channel.h"
|
|
+#include "qom/object.h"
|
|
+
|
|
+#define TYPE_QIO_CHANNEL_SAVEVM_ASYNC "qio-channel-savevm-async"
|
|
+OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelSavevmAsync, QIO_CHANNEL_SAVEVM_ASYNC)
|
|
+
|
|
+
|
|
+/**
|
|
+ * QIOChannelSavevmAsync:
|
|
+ *
|
|
+ * The QIOChannelBlock object provides a channel implementation that is able to
|
|
+ * perform I/O on any BlockBackend whose BlockDriverState directly contains a
|
|
+ * VMState (as opposed to indirectly, like qcow2). It allows tracking the
|
|
+ * current position from the outside.
|
|
+ */
|
|
+struct QIOChannelSavevmAsync {
|
|
+ QIOChannel parent;
|
|
+ BlockBackend *be;
|
|
+ size_t *bs_pos;
|
|
+};
|
|
+
|
|
+
|
|
+/**
|
|
+ * qio_channel_savevm_async_new:
|
|
+ * @be: the block backend
|
|
+ * @bs_pos: used to keep track of the IOChannels current position
|
|
+ *
|
|
+ * Create a new IO channel object that can perform I/O on a BlockBackend object
|
|
+ * whose BlockDriverState directly contains a VMState.
|
|
+ *
|
|
+ * Returns: the new channel object
|
|
+ */
|
|
+QIOChannelSavevmAsync *
|
|
+qio_channel_savevm_async_new(BlockBackend *be, size_t *bs_pos);
|
|
+
|
|
+#endif /* QIO_CHANNEL_SAVEVM_ASYNC_H */
|
|
diff --git a/migration/meson.build b/migration/meson.build
|
|
index 0d1bb9f96e..8a142fc7a9 100644
|
|
--- a/migration/meson.build
|
|
+++ b/migration/meson.build
|
|
@@ -13,6 +13,7 @@ softmmu_ss.add(files(
|
|
'block-dirty-bitmap.c',
|
|
'channel.c',
|
|
'channel-block.c',
|
|
+ 'channel-savevm-async.c',
|
|
'colo-failover.c',
|
|
'colo.c',
|
|
'exec.c',
|