5f9cb29c3a
Reported in the community forum [0]. By default, there can be large amounts of memory left assigned to the QEMU process after backup. Likely because of fragmentation, it's necessary to explicitly call malloc_trim() to tell glibc that it shouldn't keep all that memory resident for the process. QEMU itself already does a malloc_trim() in the RCU thread, but that code path might not be reached (or not for a long time) under usual operation. The value of 4 MiB for the argument was also copied from there. Example with the following configuration: > agent: 1 > boot: order=scsi0 > cores: 4 > cpu: x86-64-v2-AES > ide2: none,media=cdrom > memory: 1024 > name: backup-mem > net0: virtio=DA:58:18:26:59:9F,bridge=vmbr0,firewall=1 > numa: 0 > ostype: l26 > scsi0: rbd:base-107-disk-0/vm-106-disk-1,size=4302M > scsihw: virtio-scsi-pci > smbios1: uuid=b2d4511e-8d01-44f1-afd6-9581b30c24a6 > sockets: 2 > startup: order=2 > virtio0: lvmthin:vm-106-disk-1,iothread=1,size=1G > virtio1: lvmthin:vm-106-disk-2,iothread=1,size=1G > virtio2: lvmthin:vm-106-disk-3,iothread=1,size=1G > vmgenid: 0a1d8751-5e02-449d-977e-c0160e900231 Before the change: > root@pve8a1 ~ # grep VmRSS /proc/$(cat /var/run/qemu-server/106.pid)/status > VmRSS: 370948 kB > root@pve8a1 ~ # vzdump 106 --storage pbs > (...) > INFO: Backup job finished successfully > root@pve8a1 ~ # grep VmRSS /proc/$(cat /var/run/qemu-server/106.pid)/status > VmRSS: 2114964 kB After the change: > root@pve8a1 ~ # grep VmRSS /proc/$(cat /var/run/qemu-server/106.pid)/status > VmRSS: 398788 kB > root@pve8a1 ~ # vzdump 106 --storage pbs > (...) > INFO: Backup job finished successfully > root@pve8a1 ~ # grep VmRSS /proc/$(cat /var/run/qemu-server/106.pid)/status > VmRSS: 424356 kB [0]: https://forum.proxmox.com/threads/131339/ Co-diagnosed-by: Friedrich Weber <f.weber@proxmox.com> Co-diagnosed-by: Dominik Csapak <d.csapak@proxmox.com> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> Acked-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
213 lines
6.8 KiB
Diff
213 lines
6.8 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Stefan Reiter <s.reiter@proxmox.com>
|
|
Date: Thu, 22 Oct 2020 17:34:18 +0200
|
|
Subject: [PATCH] PVE: Migrate dirty bitmap state via savevm
|
|
|
|
QEMU provides 'savevm' registrations as a mechanism for arbitrary state
|
|
to be migrated along with a VM. Use this to send a serialized version of
|
|
dirty bitmap state data from proxmox-backup-qemu, and restore it on the
|
|
target node.
|
|
|
|
Also add a flag to query-proxmox-support so qemu-server can determine if
|
|
safe migration is possible and makes sense.
|
|
|
|
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
|
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
|
[FE: split up state_pending for 8.0]
|
|
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
---
|
|
include/migration/misc.h | 3 ++
|
|
migration/meson.build | 2 +
|
|
migration/migration.c | 1 +
|
|
migration/pbs-state.c | 104 +++++++++++++++++++++++++++++++++++++++
|
|
pve-backup.c | 1 +
|
|
qapi/block-core.json | 6 +++
|
|
6 files changed, 117 insertions(+)
|
|
create mode 100644 migration/pbs-state.c
|
|
|
|
diff --git a/include/migration/misc.h b/include/migration/misc.h
|
|
index 8b49841016..78f63ca400 100644
|
|
--- a/include/migration/misc.h
|
|
+++ b/include/migration/misc.h
|
|
@@ -77,4 +77,7 @@ bool migration_in_bg_snapshot(void);
|
|
/* migration/block-dirty-bitmap.c */
|
|
void dirty_bitmap_mig_init(void);
|
|
|
|
+/* migration/pbs-state.c */
|
|
+void pbs_state_mig_init(void);
|
|
+
|
|
#endif
|
|
diff --git a/migration/meson.build b/migration/meson.build
|
|
index a7824b5266..de6a271b58 100644
|
|
--- a/migration/meson.build
|
|
+++ b/migration/meson.build
|
|
@@ -6,8 +6,10 @@ migration_files = files(
|
|
'vmstate.c',
|
|
'qemu-file.c',
|
|
'yank_functions.c',
|
|
+ 'pbs-state.c',
|
|
)
|
|
softmmu_ss.add(migration_files)
|
|
+softmmu_ss.add(libproxmox_backup_qemu)
|
|
|
|
softmmu_ss.add(files(
|
|
'block-dirty-bitmap.c',
|
|
diff --git a/migration/migration.c b/migration/migration.c
|
|
index 99f86bd6c2..db229e72c9 100644
|
|
--- a/migration/migration.c
|
|
+++ b/migration/migration.c
|
|
@@ -245,6 +245,7 @@ void migration_object_init(void)
|
|
blk_mig_init();
|
|
ram_mig_init();
|
|
dirty_bitmap_mig_init();
|
|
+ pbs_state_mig_init();
|
|
}
|
|
|
|
void migration_cancel(const Error *error)
|
|
diff --git a/migration/pbs-state.c b/migration/pbs-state.c
|
|
new file mode 100644
|
|
index 0000000000..887e998b9e
|
|
--- /dev/null
|
|
+++ b/migration/pbs-state.c
|
|
@@ -0,0 +1,104 @@
|
|
+/*
|
|
+ * PBS (dirty-bitmap) state migration
|
|
+ */
|
|
+
|
|
+#include "qemu/osdep.h"
|
|
+#include "migration/misc.h"
|
|
+#include "qemu-file.h"
|
|
+#include "migration/vmstate.h"
|
|
+#include "migration/register.h"
|
|
+#include "proxmox-backup-qemu.h"
|
|
+
|
|
+typedef struct PBSState {
|
|
+ bool active;
|
|
+} PBSState;
|
|
+
|
|
+/* state is accessed via this static variable directly, 'opaque' is NULL */
|
|
+static PBSState pbs_state;
|
|
+
|
|
+static void pbs_state_pending(void *opaque, uint64_t *must_precopy,
|
|
+ uint64_t *can_postcopy)
|
|
+{
|
|
+ /* we send everything in save_setup, so nothing is ever pending */
|
|
+}
|
|
+
|
|
+/* receive PBS state via f and deserialize, called on target */
|
|
+static int pbs_state_load(QEMUFile *f, void *opaque, int version_id)
|
|
+{
|
|
+ /* safe cast, we cannot migrate to target with less bits than source */
|
|
+ size_t buf_size = (size_t)qemu_get_be64(f);
|
|
+
|
|
+ uint8_t *buf = (uint8_t *)malloc(buf_size);
|
|
+ size_t read = qemu_get_buffer(f, buf, buf_size);
|
|
+
|
|
+ if (read < buf_size) {
|
|
+ fprintf(stderr, "error receiving PBS state: not enough data\n");
|
|
+ return -EIO;
|
|
+ }
|
|
+
|
|
+ proxmox_import_state(buf, buf_size);
|
|
+
|
|
+ free(buf);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* serialize PBS state and send to target via f, called on source */
|
|
+static int pbs_state_save_setup(QEMUFile *f, void *opaque)
|
|
+{
|
|
+ size_t buf_size;
|
|
+ uint8_t *buf = proxmox_export_state(&buf_size);
|
|
+
|
|
+ /* LV encoding */
|
|
+ qemu_put_be64(f, buf_size);
|
|
+ qemu_put_buffer(f, buf, buf_size);
|
|
+
|
|
+ proxmox_free_state_buf(buf);
|
|
+ pbs_state.active = false;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static bool pbs_state_is_active(void *opaque)
|
|
+{
|
|
+ /* we need to return active exactly once, else .save_setup is never called,
|
|
+ * but if we'd just return true the migration doesn't make progress since
|
|
+ * it'd be waiting for us */
|
|
+ return pbs_state.active;
|
|
+}
|
|
+
|
|
+static bool pbs_state_is_active_iterate(void *opaque)
|
|
+{
|
|
+ /* we don't iterate, everything is sent in save_setup */
|
|
+ return pbs_state_is_active(opaque);
|
|
+}
|
|
+
|
|
+static bool pbs_state_has_postcopy(void *opaque)
|
|
+{
|
|
+ /* PBS state can't change during a migration (since that's blocking any
|
|
+ * potential backups), so we can copy everything before the VM is stopped */
|
|
+ return false;
|
|
+}
|
|
+
|
|
+static void pbs_state_save_cleanup(void *opaque)
|
|
+{
|
|
+ /* reset active after migration succeeds or fails */
|
|
+ pbs_state.active = false;
|
|
+}
|
|
+
|
|
+static SaveVMHandlers savevm_pbs_state_handlers = {
|
|
+ .save_setup = pbs_state_save_setup,
|
|
+ .has_postcopy = pbs_state_has_postcopy,
|
|
+ .state_pending_exact = pbs_state_pending,
|
|
+ .state_pending_estimate = pbs_state_pending,
|
|
+ .is_active_iterate = pbs_state_is_active_iterate,
|
|
+ .load_state = pbs_state_load,
|
|
+ .is_active = pbs_state_is_active,
|
|
+ .save_cleanup = pbs_state_save_cleanup,
|
|
+};
|
|
+
|
|
+void pbs_state_mig_init(void)
|
|
+{
|
|
+ pbs_state.active = true;
|
|
+ register_savevm_live("pbs-state", 0, 1,
|
|
+ &savevm_pbs_state_handlers,
|
|
+ NULL);
|
|
+}
|
|
diff --git a/pve-backup.c b/pve-backup.c
|
|
index 10ca8a0b1d..0a5ce2cab8 100644
|
|
--- a/pve-backup.c
|
|
+++ b/pve-backup.c
|
|
@@ -1102,6 +1102,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
|
|
ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version());
|
|
ret->pbs_dirty_bitmap = true;
|
|
ret->pbs_dirty_bitmap_savevm = true;
|
|
+ ret->pbs_dirty_bitmap_migration = true;
|
|
ret->query_bitmap_info = true;
|
|
ret->pbs_masterkey = true;
|
|
ret->backup_max_workers = true;
|
|
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
|
index 47118bf83e..809f3c61bc 100644
|
|
--- a/qapi/block-core.json
|
|
+++ b/qapi/block-core.json
|
|
@@ -984,6 +984,11 @@
|
|
# @pbs-dirty-bitmap-savevm: True if 'dirty-bitmaps' migration capability can
|
|
# safely be set for savevm-async.
|
|
#
|
|
+# @pbs-dirty-bitmap-migration: True if safe migration of dirty-bitmaps including
|
|
+# PBS state is supported. Enabling 'dirty-bitmaps'
|
|
+# migration cap if this is false/unset may lead
|
|
+# to crashes on migration!
|
|
+#
|
|
# @pbs-masterkey: True if the QMP backup call supports the 'master_keyfile'
|
|
# parameter.
|
|
#
|
|
@@ -994,6 +999,7 @@
|
|
'data': { 'pbs-dirty-bitmap': 'bool',
|
|
'query-bitmap-info': 'bool',
|
|
'pbs-dirty-bitmap-savevm': 'bool',
|
|
+ 'pbs-dirty-bitmap-migration': 'bool',
|
|
'pbs-masterkey': 'bool',
|
|
'pbs-library-version': 'str',
|
|
'backup-max-workers': 'bool' } }
|