bf251437e9
Many changes were necessary this time around: * QAPI was changed to avoid redundant has_* variables, see commit 44ea9d9be3 ("qapi: Start to elide redundant has_FOO in generated C") for details. This affected many QMP commands added by Proxmox too. * Pending querying for migration got split into two functions, one to estimate, one for exact value, see commit c8df4a7aef ("migration: Split save_live_pending() into state_pending_*") for details. Relevant for savevm-async and PBS dirty bitmap. * Some block (driver) functions got converted to coroutines, so the Proxmox block drivers needed to be adapted. * Alloc track auto-detaching during PBS live restore got broken by AioContext-related changes resulting in a deadlock. The current, hacky method was replaced by a simpler one. Stefan apparently ran into a problem with that when he wrote the driver, but there were improvements in the stream job code since then and I didn't manage to reproduce the issue. It's a separate patch "alloc-track: fix deadlock during drop" for now, you can find the details there. * Async snapshot-related changes: - The pending querying got adapted to the above-mentioned split and a patch is added to optimize it/make it more similar to what upstream code does. - Added initialization of the compression counters (for future-proofing). - It's necessary the hold the BQL (big QEMU lock = iothread mutex) during the setup phase, because block layer functions are used there and not doing so leads to racy, hard-to-debug crashes or hangs. It's necessary to change some upstream code too for this, a version of the patch "migration: for snapshots, hold the BQL during setup callbacks" is intended to be upstreamed. - Need to take the bdrv graph read lock before flushing. * hmp_info_balloon was moved to a different file. * Needed to include a new headers from time to time to still get the correct functions. Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
294 lines
9.3 KiB
Diff
294 lines
9.3 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Stefan Reiter <s.reiter@proxmox.com>
|
|
Date: Thu, 20 Aug 2020 14:25:00 +0200
|
|
Subject: [PATCH] PVE-Backup: Use a transaction to synchronize job states
|
|
|
|
By using a JobTxn, we can sync dirty bitmaps only when *all* jobs were
|
|
successful - meaning we don't need to remove them when the backup fails,
|
|
since QEMU's BITMAP_SYNC_MODE_ON_SUCCESS will now handle that for us.
|
|
|
|
To keep the rate-limiting and IO impact from before, we use a sequential
|
|
transaction, so drives will still be backed up one after the other.
|
|
|
|
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
|
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
|
[FE: add new force parameter to job_cancel_sync calls
|
|
adapt for new job lock mechanism replacing AioContext locks]
|
|
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
---
|
|
pve-backup.c | 163 ++++++++++++++++-----------------------------------
|
|
1 file changed, 50 insertions(+), 113 deletions(-)
|
|
|
|
diff --git a/pve-backup.c b/pve-backup.c
|
|
index d4abe6e703..214c839c0b 100644
|
|
--- a/pve-backup.c
|
|
+++ b/pve-backup.c
|
|
@@ -54,6 +54,7 @@ static struct PVEBackupState {
|
|
VmaWriter *vmaw;
|
|
ProxmoxBackupHandle *pbs;
|
|
GList *di_list;
|
|
+ JobTxn *txn;
|
|
QemuMutex backup_mutex;
|
|
CoMutex dump_callback_mutex;
|
|
} backup_state;
|
|
@@ -73,34 +74,12 @@ typedef struct PVEBackupDevInfo {
|
|
size_t size;
|
|
uint64_t block_size;
|
|
uint8_t dev_id;
|
|
- bool completed;
|
|
char targetfile[PATH_MAX];
|
|
BdrvDirtyBitmap *bitmap;
|
|
BlockDriverState *target;
|
|
+ BlockJob *job;
|
|
} PVEBackupDevInfo;
|
|
|
|
-static void pvebackup_run_next_job(void);
|
|
-
|
|
-static BlockJob *
|
|
-lookup_active_block_job(PVEBackupDevInfo *di)
|
|
-{
|
|
- if (!di->completed && di->bs) {
|
|
- WITH_JOB_LOCK_GUARD() {
|
|
- for (BlockJob *job = block_job_next_locked(NULL); job; job = block_job_next_locked(job)) {
|
|
- if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
|
|
- continue;
|
|
- }
|
|
-
|
|
- BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
|
|
- if (bjob && bjob->source_bs == di->bs) {
|
|
- return job;
|
|
- }
|
|
- }
|
|
- }
|
|
- }
|
|
- return NULL;
|
|
-}
|
|
-
|
|
static void pvebackup_propagate_error(Error *err)
|
|
{
|
|
qemu_mutex_lock(&backup_state.stat.lock);
|
|
@@ -276,18 +255,6 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
|
|
if (local_err != NULL) {
|
|
pvebackup_propagate_error(local_err);
|
|
}
|
|
- } else {
|
|
- // on error or cancel we cannot ensure synchronization of dirty
|
|
- // bitmaps with backup server, so remove all and do full backup next
|
|
- GList *l = backup_state.di_list;
|
|
- while (l) {
|
|
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
|
|
- l = g_list_next(l);
|
|
-
|
|
- if (di->bitmap) {
|
|
- bdrv_release_dirty_bitmap(di->bitmap);
|
|
- }
|
|
- }
|
|
}
|
|
|
|
proxmox_backup_disconnect(backup_state.pbs);
|
|
@@ -326,8 +293,6 @@ static void pvebackup_complete_cb(void *opaque, int ret)
|
|
|
|
qemu_mutex_lock(&backup_state.backup_mutex);
|
|
|
|
- di->completed = true;
|
|
-
|
|
if (ret < 0) {
|
|
Error *local_err = NULL;
|
|
error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
|
|
@@ -340,20 +305,17 @@ static void pvebackup_complete_cb(void *opaque, int ret)
|
|
|
|
block_on_coroutine_fn(pvebackup_complete_stream, di);
|
|
|
|
- // remove self from job queue
|
|
+ // remove self from job list
|
|
backup_state.di_list = g_list_remove(backup_state.di_list, di);
|
|
|
|
- if (di->bitmap && ret < 0) {
|
|
- // on error or cancel we cannot ensure synchronization of dirty
|
|
- // bitmaps with backup server, so remove all and do full backup next
|
|
- bdrv_release_dirty_bitmap(di->bitmap);
|
|
- }
|
|
-
|
|
g_free(di);
|
|
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
+ /* call cleanup if we're the last job */
|
|
+ if (!g_list_first(backup_state.di_list)) {
|
|
+ block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
|
|
+ }
|
|
|
|
- pvebackup_run_next_job();
|
|
+ qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
}
|
|
|
|
static void pvebackup_cancel(void)
|
|
@@ -375,32 +337,28 @@ static void pvebackup_cancel(void)
|
|
proxmox_backup_abort(backup_state.pbs, "backup canceled");
|
|
}
|
|
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
-
|
|
- for(;;) {
|
|
-
|
|
- BlockJob *next_job = NULL;
|
|
+ /* it's enough to cancel one job in the transaction, the rest will follow
|
|
+ * automatically */
|
|
+ GList *bdi = g_list_first(backup_state.di_list);
|
|
+ BlockJob *cancel_job = bdi && bdi->data ?
|
|
+ ((PVEBackupDevInfo *)bdi->data)->job :
|
|
+ NULL;
|
|
|
|
- qemu_mutex_lock(&backup_state.backup_mutex);
|
|
-
|
|
- GList *l = backup_state.di_list;
|
|
- while (l) {
|
|
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
|
|
- l = g_list_next(l);
|
|
-
|
|
- BlockJob *job = lookup_active_block_job(di);
|
|
- if (job != NULL) {
|
|
- next_job = job;
|
|
- break;
|
|
- }
|
|
+ /* ref the job before releasing the mutex, just to be safe */
|
|
+ if (cancel_job) {
|
|
+ WITH_JOB_LOCK_GUARD() {
|
|
+ job_ref_locked(&cancel_job->job);
|
|
}
|
|
+ }
|
|
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
+ /* job_cancel_sync may enter the job, so we need to release the
|
|
+ * backup_mutex to avoid deadlock */
|
|
+ qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
|
|
- if (next_job) {
|
|
- job_cancel_sync(&next_job->job, true);
|
|
- } else {
|
|
- break;
|
|
+ if (cancel_job) {
|
|
+ WITH_JOB_LOCK_GUARD() {
|
|
+ job_cancel_sync_locked(&cancel_job->job, true);
|
|
+ job_unref_locked(&cancel_job->job);
|
|
}
|
|
}
|
|
}
|
|
@@ -460,49 +418,19 @@ static int coroutine_fn pvebackup_co_add_config(
|
|
goto out;
|
|
}
|
|
|
|
-bool job_should_pause_locked(Job *job);
|
|
-
|
|
-static void pvebackup_run_next_job(void)
|
|
-{
|
|
- assert(!qemu_in_coroutine());
|
|
-
|
|
- qemu_mutex_lock(&backup_state.backup_mutex);
|
|
-
|
|
- GList *l = backup_state.di_list;
|
|
- while (l) {
|
|
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
|
|
- l = g_list_next(l);
|
|
-
|
|
- BlockJob *job = lookup_active_block_job(di);
|
|
-
|
|
- if (job) {
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
-
|
|
- WITH_JOB_LOCK_GUARD() {
|
|
- if (job_should_pause_locked(&job->job)) {
|
|
- bool error_or_canceled = pvebackup_error_or_canceled();
|
|
- if (error_or_canceled) {
|
|
- job_cancel_sync_locked(&job->job, true);
|
|
- } else {
|
|
- job_resume_locked(&job->job);
|
|
- }
|
|
- }
|
|
- }
|
|
- return;
|
|
- }
|
|
- }
|
|
-
|
|
- block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
|
|
-
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
-}
|
|
-
|
|
static bool create_backup_jobs(void) {
|
|
|
|
assert(!qemu_in_coroutine());
|
|
|
|
Error *local_err = NULL;
|
|
|
|
+ /* create job transaction to synchronize bitmap commit and cancel all
|
|
+ * jobs in case one errors */
|
|
+ if (backup_state.txn) {
|
|
+ job_txn_unref(backup_state.txn);
|
|
+ }
|
|
+ backup_state.txn = job_txn_new_seq();
|
|
+
|
|
BackupPerf perf = { .max_workers = 16 };
|
|
|
|
/* create and start all jobs (paused state) */
|
|
@@ -525,7 +453,7 @@ static bool create_backup_jobs(void) {
|
|
BlockJob *job = backup_job_create(
|
|
NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
|
|
bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
|
|
- JOB_DEFAULT, pvebackup_complete_cb, di, NULL, &local_err);
|
|
+ JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, &local_err);
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
@@ -537,7 +465,8 @@ static bool create_backup_jobs(void) {
|
|
pvebackup_propagate_error(create_job_err);
|
|
break;
|
|
}
|
|
- job_start(&job->job);
|
|
+
|
|
+ di->job = job;
|
|
|
|
bdrv_unref(di->target);
|
|
di->target = NULL;
|
|
@@ -555,6 +484,12 @@ static bool create_backup_jobs(void) {
|
|
bdrv_unref(di->target);
|
|
di->target = NULL;
|
|
}
|
|
+
|
|
+ if (di->job) {
|
|
+ WITH_JOB_LOCK_GUARD() {
|
|
+ job_unref_locked(&di->job->job);
|
|
+ }
|
|
+ }
|
|
}
|
|
}
|
|
|
|
@@ -937,10 +872,6 @@ err:
|
|
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
|
|
l = g_list_next(l);
|
|
|
|
- if (di->bitmap) {
|
|
- bdrv_release_dirty_bitmap(di->bitmap);
|
|
- }
|
|
-
|
|
if (di->target) {
|
|
bdrv_unref(di->target);
|
|
}
|
|
@@ -1021,9 +952,15 @@ UuidInfo *qmp_backup(
|
|
block_on_coroutine_fn(pvebackup_co_prepare, &task);
|
|
|
|
if (*errp == NULL) {
|
|
- create_backup_jobs();
|
|
+ bool errors = create_backup_jobs();
|
|
qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
- pvebackup_run_next_job();
|
|
+
|
|
+ if (!errors) {
|
|
+ /* start the first job in the transaction
|
|
+ * note: this might directly enter the job, so we need to do this
|
|
+ * after unlocking the backup_mutex */
|
|
+ job_txn_start_seq(backup_state.txn);
|
|
+ }
|
|
} else {
|
|
qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
}
|