From 0cffb504e7505193def620ca1429400ee0400e35 Mon Sep 17 00:00:00 2001 From: Fiona Ebner Date: Wed, 6 Sep 2023 10:45:11 +0200 Subject: [PATCH] backup: create jobs in a drained section With the drive-backup QMP command, upstream QEMU uses a drained section for the source drive when creating the backup job. Do the same here to avoid subtle bugs. There, the drained section extends until after the job is started, but this cannot be done here for multi-disk backups (could at most start the first job). The important thing is that the cbw (copy-before-write) node is in place and the bcs (block-copy-state) bitmap is initialized, which both happen during job creation (ensured by the "block/backup: move bcs bitmap initialization to job creation" PVE patch). One such bug is one reported in the community forum [0], where using a drive with iothread can lead to an overlapping block-copy request and consequently an assertion failure. The block-copy code relies on the bcs bitmap to determine if a request for a certain range can be created. Each time a request is created, it resets the bcs bitmap at that range to indicate that it's being handled. The duplicate request can happen as follows: Thread A attaches the cbw node Thread B creates a request and resets the bitmap at that range Thread A clears the bitmap and merges it with the PBS bitmap The merging can lead to the bitmap being set again at the range of the previous request, so the block-copy code thinks it's fine to create a request there. Thread B creates another requests at an overlapping range before the other request is finished. The drained section ensures that nothing else can interfere with the bcs bitmap between attaching the copy-before-write block node and initialization of the bitmap. [0]: https://forum.proxmox.com/threads/133149/ Signed-off-by: Fiona Ebner --- ...E-Backup-Proxmox-backup-patches-for-QEMU.patch | 15 ++++++++++----- ...VE-Migrate-dirty-bitmap-state-via-savevm.patch | 4 ++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/debian/patches/pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch b/debian/patches/pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch index d873601..0c69d85 100644 --- a/debian/patches/pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch +++ b/debian/patches/pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch @@ -80,7 +80,8 @@ Signed-off-by: Wolfgang Bumiller adapt to QAPI changes improve canceling allow passing max-workers setting - use malloc_trim after backup] + use malloc_trim after backup + create jobs in a drained section] Signed-off-by: Fiona Ebner --- block/meson.build | 5 + @@ -93,11 +94,11 @@ Signed-off-by: Fiona Ebner monitor/hmp-cmds.c | 72 +++ proxmox-backup-client.c | 146 +++++ proxmox-backup-client.h | 60 ++ - pve-backup.c | 1109 ++++++++++++++++++++++++++++++++ + pve-backup.c | 1113 ++++++++++++++++++++++++++++++++ qapi/block-core.json | 226 +++++++ qapi/common.json | 13 + qapi/machine.json | 15 +- - 14 files changed, 1723 insertions(+), 13 deletions(-) + 14 files changed, 1727 insertions(+), 13 deletions(-) create mode 100644 proxmox-backup-client.c create mode 100644 proxmox-backup-client.h create mode 100644 pve-backup.c @@ -588,10 +589,10 @@ index 0000000000..8cbf645b2c +#endif /* PROXMOX_BACKUP_CLIENT_H */ diff --git a/pve-backup.c b/pve-backup.c new file mode 100644 -index 0000000000..10ca8a0b1d +index 0000000000..c5454e7acc --- /dev/null +++ b/pve-backup.c -@@ -0,0 +1,1109 @@ +@@ -0,0 +1,1113 @@ +#include "proxmox-backup-client.h" +#include "vma.h" + @@ -1116,12 +1117,16 @@ index 0000000000..10ca8a0b1d + AioContext *aio_context = bdrv_get_aio_context(di->bs); + aio_context_acquire(aio_context); + ++ bdrv_drained_begin(di->bs); ++ + BlockJob *job = backup_job_create( + NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap, + bitmap_mode, false, NULL, &backup_state.perf, BLOCKDEV_ON_ERROR_REPORT, + BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, + &local_err); + ++ bdrv_drained_end(di->bs); ++ + aio_context_release(aio_context); + + di->job = job; diff --git a/debian/patches/pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch b/debian/patches/pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch index 7a906e9..cd7a613 100644 --- a/debian/patches/pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch +++ b/debian/patches/pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch @@ -175,10 +175,10 @@ index 0000000000..887e998b9e + NULL); +} diff --git a/pve-backup.c b/pve-backup.c -index 10ca8a0b1d..0a5ce2cab8 100644 +index c5454e7acc..30bc6ff9ed 100644 --- a/pve-backup.c +++ b/pve-backup.c -@@ -1102,6 +1102,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp) +@@ -1106,6 +1106,7 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp) ret->pbs_library_version = g_strdup(proxmox_backup_qemu_version()); ret->pbs_dirty_bitmap = true; ret->pbs_dirty_bitmap_savevm = true;