d03e1b3ce3
User-facing breaking change: The slirp submodule for user networking got removed. It would be necessary to add the --enable-slirp option to the build and/or install the appropriate library to continue building it. Since PVE is not explicitly supporting it, it would require additionally installing the libslirp0 package on all installations and there is *very* little mention on the community forum when searching for "slirp" or "netdev user", the plan is to only enable it again if there is some real demand for it. Notable changes: * The big change for this release is the rework of job locking, using a job mutex and introducing _locked() variants of job API functions moving away from call-side AioContext locking. See (in the qemu submodule) commit 6f592e5aca ("job.c: enable job lock/unlock and remove Aiocontext locks") and previous commits for context. Changes required for the backup patches: * Use WITH_JOB_LOCK_GUARD() and call the _locked() variant of job API functions where appropriate (many are only availalbe as a _locked() variant). * Remove acquiring/releasing AioContext around functions taking the job mutex lock internally. The patch introducing sequential transaction support for jobs needs to temporarily unlock the job mutex to call job_start() when starting the next job in the transaction. * The zeroinit block driver now marks its child as primary. The documentation in include/block/block-common.h states: > Filter node has exactly one FILTERED|PRIMARY child, and may have > other children which must not have these bits Without this, an assert will trigger when copying to a zeroinit target with qemu-img convert, because bdrv_child_cb_attach() expects any non-PRIMARY child to be not FILTERED: > qemu-img convert -n -p -f raw -O raw input.raw zeroinit:output.raw > qemu-img: ../block.c:1476: bdrv_child_cb_attach: Assertion > `!(child->role & BDRV_CHILD_FILTERED)' failed. Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
294 lines
9.3 KiB
Diff
294 lines
9.3 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Stefan Reiter <s.reiter@proxmox.com>
|
|
Date: Thu, 20 Aug 2020 14:25:00 +0200
|
|
Subject: [PATCH] PVE-Backup: Use a transaction to synchronize job states
|
|
|
|
By using a JobTxn, we can sync dirty bitmaps only when *all* jobs were
|
|
successful - meaning we don't need to remove them when the backup fails,
|
|
since QEMU's BITMAP_SYNC_MODE_ON_SUCCESS will now handle that for us.
|
|
|
|
To keep the rate-limiting and IO impact from before, we use a sequential
|
|
transaction, so drives will still be backed up one after the other.
|
|
|
|
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
|
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
|
[FE: add new force parameter to job_cancel_sync calls
|
|
adapt for new job lock mechanism replacing AioContext locks]
|
|
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
---
|
|
pve-backup.c | 163 ++++++++++++++++-----------------------------------
|
|
1 file changed, 50 insertions(+), 113 deletions(-)
|
|
|
|
diff --git a/pve-backup.c b/pve-backup.c
|
|
index c85b2ecd83..b5fb844434 100644
|
|
--- a/pve-backup.c
|
|
+++ b/pve-backup.c
|
|
@@ -52,6 +52,7 @@ static struct PVEBackupState {
|
|
VmaWriter *vmaw;
|
|
ProxmoxBackupHandle *pbs;
|
|
GList *di_list;
|
|
+ JobTxn *txn;
|
|
QemuMutex backup_mutex;
|
|
CoMutex dump_callback_mutex;
|
|
} backup_state;
|
|
@@ -71,34 +72,12 @@ typedef struct PVEBackupDevInfo {
|
|
size_t size;
|
|
uint64_t block_size;
|
|
uint8_t dev_id;
|
|
- bool completed;
|
|
char targetfile[PATH_MAX];
|
|
BdrvDirtyBitmap *bitmap;
|
|
BlockDriverState *target;
|
|
+ BlockJob *job;
|
|
} PVEBackupDevInfo;
|
|
|
|
-static void pvebackup_run_next_job(void);
|
|
-
|
|
-static BlockJob *
|
|
-lookup_active_block_job(PVEBackupDevInfo *di)
|
|
-{
|
|
- if (!di->completed && di->bs) {
|
|
- WITH_JOB_LOCK_GUARD() {
|
|
- for (BlockJob *job = block_job_next_locked(NULL); job; job = block_job_next_locked(job)) {
|
|
- if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
|
|
- continue;
|
|
- }
|
|
-
|
|
- BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
|
|
- if (bjob && bjob->source_bs == di->bs) {
|
|
- return job;
|
|
- }
|
|
- }
|
|
- }
|
|
- }
|
|
- return NULL;
|
|
-}
|
|
-
|
|
static void pvebackup_propagate_error(Error *err)
|
|
{
|
|
qemu_mutex_lock(&backup_state.stat.lock);
|
|
@@ -274,18 +253,6 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
|
|
if (local_err != NULL) {
|
|
pvebackup_propagate_error(local_err);
|
|
}
|
|
- } else {
|
|
- // on error or cancel we cannot ensure synchronization of dirty
|
|
- // bitmaps with backup server, so remove all and do full backup next
|
|
- GList *l = backup_state.di_list;
|
|
- while (l) {
|
|
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
|
|
- l = g_list_next(l);
|
|
-
|
|
- if (di->bitmap) {
|
|
- bdrv_release_dirty_bitmap(di->bitmap);
|
|
- }
|
|
- }
|
|
}
|
|
|
|
proxmox_backup_disconnect(backup_state.pbs);
|
|
@@ -324,8 +291,6 @@ static void pvebackup_complete_cb(void *opaque, int ret)
|
|
|
|
qemu_mutex_lock(&backup_state.backup_mutex);
|
|
|
|
- di->completed = true;
|
|
-
|
|
if (ret < 0) {
|
|
Error *local_err = NULL;
|
|
error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
|
|
@@ -338,20 +303,17 @@ static void pvebackup_complete_cb(void *opaque, int ret)
|
|
|
|
block_on_coroutine_fn(pvebackup_complete_stream, di);
|
|
|
|
- // remove self from job queue
|
|
+ // remove self from job list
|
|
backup_state.di_list = g_list_remove(backup_state.di_list, di);
|
|
|
|
- if (di->bitmap && ret < 0) {
|
|
- // on error or cancel we cannot ensure synchronization of dirty
|
|
- // bitmaps with backup server, so remove all and do full backup next
|
|
- bdrv_release_dirty_bitmap(di->bitmap);
|
|
- }
|
|
-
|
|
g_free(di);
|
|
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
+ /* call cleanup if we're the last job */
|
|
+ if (!g_list_first(backup_state.di_list)) {
|
|
+ block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
|
|
+ }
|
|
|
|
- pvebackup_run_next_job();
|
|
+ qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
}
|
|
|
|
static void pvebackup_cancel(void)
|
|
@@ -373,32 +335,28 @@ static void pvebackup_cancel(void)
|
|
proxmox_backup_abort(backup_state.pbs, "backup canceled");
|
|
}
|
|
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
-
|
|
- for(;;) {
|
|
-
|
|
- BlockJob *next_job = NULL;
|
|
+ /* it's enough to cancel one job in the transaction, the rest will follow
|
|
+ * automatically */
|
|
+ GList *bdi = g_list_first(backup_state.di_list);
|
|
+ BlockJob *cancel_job = bdi && bdi->data ?
|
|
+ ((PVEBackupDevInfo *)bdi->data)->job :
|
|
+ NULL;
|
|
|
|
- qemu_mutex_lock(&backup_state.backup_mutex);
|
|
-
|
|
- GList *l = backup_state.di_list;
|
|
- while (l) {
|
|
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
|
|
- l = g_list_next(l);
|
|
-
|
|
- BlockJob *job = lookup_active_block_job(di);
|
|
- if (job != NULL) {
|
|
- next_job = job;
|
|
- break;
|
|
- }
|
|
+ /* ref the job before releasing the mutex, just to be safe */
|
|
+ if (cancel_job) {
|
|
+ WITH_JOB_LOCK_GUARD() {
|
|
+ job_ref_locked(&cancel_job->job);
|
|
}
|
|
+ }
|
|
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
+ /* job_cancel_sync may enter the job, so we need to release the
|
|
+ * backup_mutex to avoid deadlock */
|
|
+ qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
|
|
- if (next_job) {
|
|
- job_cancel_sync(&next_job->job, true);
|
|
- } else {
|
|
- break;
|
|
+ if (cancel_job) {
|
|
+ WITH_JOB_LOCK_GUARD() {
|
|
+ job_cancel_sync_locked(&cancel_job->job, true);
|
|
+ job_unref_locked(&cancel_job->job);
|
|
}
|
|
}
|
|
}
|
|
@@ -458,49 +416,19 @@ static int coroutine_fn pvebackup_co_add_config(
|
|
goto out;
|
|
}
|
|
|
|
-bool job_should_pause_locked(Job *job);
|
|
-
|
|
-static void pvebackup_run_next_job(void)
|
|
-{
|
|
- assert(!qemu_in_coroutine());
|
|
-
|
|
- qemu_mutex_lock(&backup_state.backup_mutex);
|
|
-
|
|
- GList *l = backup_state.di_list;
|
|
- while (l) {
|
|
- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
|
|
- l = g_list_next(l);
|
|
-
|
|
- BlockJob *job = lookup_active_block_job(di);
|
|
-
|
|
- if (job) {
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
-
|
|
- WITH_JOB_LOCK_GUARD() {
|
|
- if (job_should_pause_locked(&job->job)) {
|
|
- bool error_or_canceled = pvebackup_error_or_canceled();
|
|
- if (error_or_canceled) {
|
|
- job_cancel_sync_locked(&job->job, true);
|
|
- } else {
|
|
- job_resume_locked(&job->job);
|
|
- }
|
|
- }
|
|
- }
|
|
- return;
|
|
- }
|
|
- }
|
|
-
|
|
- block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
|
|
-
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
-}
|
|
-
|
|
static bool create_backup_jobs(void) {
|
|
|
|
assert(!qemu_in_coroutine());
|
|
|
|
Error *local_err = NULL;
|
|
|
|
+ /* create job transaction to synchronize bitmap commit and cancel all
|
|
+ * jobs in case one errors */
|
|
+ if (backup_state.txn) {
|
|
+ job_txn_unref(backup_state.txn);
|
|
+ }
|
|
+ backup_state.txn = job_txn_new_seq();
|
|
+
|
|
BackupPerf perf = { .max_workers = 16 };
|
|
|
|
/* create and start all jobs (paused state) */
|
|
@@ -523,7 +451,7 @@ static bool create_backup_jobs(void) {
|
|
BlockJob *job = backup_job_create(
|
|
NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
|
|
bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
|
|
- JOB_DEFAULT, pvebackup_complete_cb, di, NULL, &local_err);
|
|
+ JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, &local_err);
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
@@ -535,7 +463,8 @@ static bool create_backup_jobs(void) {
|
|
pvebackup_propagate_error(create_job_err);
|
|
break;
|
|
}
|
|
- job_start(&job->job);
|
|
+
|
|
+ di->job = job;
|
|
|
|
bdrv_unref(di->target);
|
|
di->target = NULL;
|
|
@@ -553,6 +482,12 @@ static bool create_backup_jobs(void) {
|
|
bdrv_unref(di->target);
|
|
di->target = NULL;
|
|
}
|
|
+
|
|
+ if (di->job) {
|
|
+ WITH_JOB_LOCK_GUARD() {
|
|
+ job_unref_locked(&di->job->job);
|
|
+ }
|
|
+ }
|
|
}
|
|
}
|
|
|
|
@@ -943,10 +878,6 @@ err:
|
|
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
|
|
l = g_list_next(l);
|
|
|
|
- if (di->bitmap) {
|
|
- bdrv_release_dirty_bitmap(di->bitmap);
|
|
- }
|
|
-
|
|
if (di->target) {
|
|
bdrv_unref(di->target);
|
|
}
|
|
@@ -1035,9 +966,15 @@ UuidInfo *qmp_backup(
|
|
block_on_coroutine_fn(pvebackup_co_prepare, &task);
|
|
|
|
if (*errp == NULL) {
|
|
- create_backup_jobs();
|
|
+ bool errors = create_backup_jobs();
|
|
qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
- pvebackup_run_next_job();
|
|
+
|
|
+ if (!errors) {
|
|
+ /* start the first job in the transaction
|
|
+ * note: this might directly enter the job, so we need to do this
|
|
+ * after unlocking the backup_mutex */
|
|
+ job_txn_start_seq(backup_state.txn);
|
|
+ }
|
|
} else {
|
|
qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
}
|