d03e1b3ce3
User-facing breaking change: The slirp submodule for user networking got removed. It would be necessary to add the --enable-slirp option to the build and/or install the appropriate library to continue building it. Since PVE is not explicitly supporting it, it would require additionally installing the libslirp0 package on all installations and there is *very* little mention on the community forum when searching for "slirp" or "netdev user", the plan is to only enable it again if there is some real demand for it. Notable changes: * The big change for this release is the rework of job locking, using a job mutex and introducing _locked() variants of job API functions moving away from call-side AioContext locking. See (in the qemu submodule) commit 6f592e5aca ("job.c: enable job lock/unlock and remove Aiocontext locks") and previous commits for context. Changes required for the backup patches: * Use WITH_JOB_LOCK_GUARD() and call the _locked() variant of job API functions where appropriate (many are only availalbe as a _locked() variant). * Remove acquiring/releasing AioContext around functions taking the job mutex lock internally. The patch introducing sequential transaction support for jobs needs to temporarily unlock the job mutex to call job_start() when starting the next job in the transaction. * The zeroinit block driver now marks its child as primary. The documentation in include/block/block-common.h states: > Filter node has exactly one FILTERED|PRIMARY child, and may have > other children which must not have these bits Without this, an assert will trigger when copying to a zeroinit target with qemu-img convert, because bdrv_child_cb_attach() expects any non-PRIMARY child to be not FILTERED: > qemu-img convert -n -p -f raw -O raw input.raw zeroinit:output.raw > qemu-img: ../block.c:1476: bdrv_child_cb_attach: Assertion > `!(child->role & BDRV_CHILD_FILTERED)' failed. Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
500 lines
16 KiB
Diff
500 lines
16 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Stefan Reiter <s.reiter@proxmox.com>
|
|
Date: Mon, 28 Sep 2020 13:40:51 +0200
|
|
Subject: [PATCH] PVE-Backup: Don't block on finishing and cleanup
|
|
create_backup_jobs
|
|
|
|
proxmox_backup_co_finish is already async, but previously we would wait
|
|
for the coroutine using block_on_coroutine_fn(). Avoid this by
|
|
scheduling pvebackup_co_complete_stream (and thus pvebackup_co_cleanup)
|
|
as a real coroutine when calling from pvebackup_complete_cb. This is ok,
|
|
since complete_stream uses the backup_mutex internally to synchronize,
|
|
and other streams can happily continue writing in the meantime anyway.
|
|
|
|
To accomodate, backup_mutex is converted to a CoMutex. This means
|
|
converting every user to a coroutine. This is not just useful here, but
|
|
will come in handy once this series[0] is merged, and QMP calls can be
|
|
yield-able coroutines too. Then we can also finally get rid of
|
|
block_on_coroutine_fn.
|
|
|
|
Cases of aio_context_acquire/release from within what is now a coroutine
|
|
are changed to aio_co_reschedule_self, which works since a running
|
|
coroutine always holds the aio lock for the context it is running in.
|
|
|
|
job_cancel_sync is called from a BH since it can't be run from a
|
|
coroutine (uses AIO_WAIT_WHILE internally).
|
|
|
|
Same thing for create_backup_jobs, which is converted to a BH too.
|
|
|
|
To communicate the finishing state, a new property is introduced to
|
|
query-backup: 'finishing'. A new state is explicitly not used, since
|
|
that would break compatibility with older qemu-server versions.
|
|
|
|
Also fix create_backup_jobs:
|
|
|
|
No more weird bool returns, just the standard "errp" format used
|
|
everywhere else too. With this, if backup_job_create fails, the error
|
|
message is actually returned over QMP and can be shown to the user.
|
|
|
|
To facilitate correct cleanup on such an error, we call
|
|
create_backup_jobs as a bottom half directly from pvebackup_co_prepare.
|
|
This additionally allows us to actually hold the backup_mutex during
|
|
operation.
|
|
|
|
Also add a job_cancel_sync before job_unref, since a job must be in
|
|
STATUS_NULL to be deleted by unref, which could trigger an assert
|
|
before.
|
|
|
|
[0] https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg03515.html
|
|
|
|
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
|
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
|
[FE: add new force parameter to job_cancel_sync calls]
|
|
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
|
---
|
|
pve-backup.c | 212 +++++++++++++++++++++++++++----------------
|
|
qapi/block-core.json | 5 +-
|
|
2 files changed, 138 insertions(+), 79 deletions(-)
|
|
|
|
diff --git a/pve-backup.c b/pve-backup.c
|
|
index b5fb844434..88268bb586 100644
|
|
--- a/pve-backup.c
|
|
+++ b/pve-backup.c
|
|
@@ -33,7 +33,9 @@ const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
|
|
|
|
static struct PVEBackupState {
|
|
struct {
|
|
- // Everithing accessed from qmp_backup_query command is protected using lock
|
|
+ // Everything accessed from qmp_backup_query command is protected using
|
|
+ // this lock. Do NOT hold this lock for long times, as it is sometimes
|
|
+ // acquired from coroutines, and thus any wait time may block the guest.
|
|
QemuMutex lock;
|
|
Error *error;
|
|
time_t start_time;
|
|
@@ -47,20 +49,22 @@ static struct PVEBackupState {
|
|
size_t reused;
|
|
size_t zero_bytes;
|
|
GList *bitmap_list;
|
|
+ bool finishing;
|
|
+ bool starting;
|
|
} stat;
|
|
int64_t speed;
|
|
VmaWriter *vmaw;
|
|
ProxmoxBackupHandle *pbs;
|
|
GList *di_list;
|
|
JobTxn *txn;
|
|
- QemuMutex backup_mutex;
|
|
+ CoMutex backup_mutex;
|
|
CoMutex dump_callback_mutex;
|
|
} backup_state;
|
|
|
|
static void pvebackup_init(void)
|
|
{
|
|
qemu_mutex_init(&backup_state.stat.lock);
|
|
- qemu_mutex_init(&backup_state.backup_mutex);
|
|
+ qemu_co_mutex_init(&backup_state.backup_mutex);
|
|
qemu_co_mutex_init(&backup_state.dump_callback_mutex);
|
|
}
|
|
|
|
@@ -72,6 +76,7 @@ typedef struct PVEBackupDevInfo {
|
|
size_t size;
|
|
uint64_t block_size;
|
|
uint8_t dev_id;
|
|
+ int completed_ret; // INT_MAX if not completed
|
|
char targetfile[PATH_MAX];
|
|
BdrvDirtyBitmap *bitmap;
|
|
BlockDriverState *target;
|
|
@@ -227,12 +232,12 @@ pvebackup_co_dump_vma_cb(
|
|
}
|
|
|
|
// assumes the caller holds backup_mutex
|
|
-static void coroutine_fn pvebackup_co_cleanup(void *unused)
|
|
+static void coroutine_fn pvebackup_co_cleanup(void)
|
|
{
|
|
assert(qemu_in_coroutine());
|
|
|
|
qemu_mutex_lock(&backup_state.stat.lock);
|
|
- backup_state.stat.end_time = time(NULL);
|
|
+ backup_state.stat.finishing = true;
|
|
qemu_mutex_unlock(&backup_state.stat.lock);
|
|
|
|
if (backup_state.vmaw) {
|
|
@@ -261,35 +266,29 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
|
|
|
|
g_list_free(backup_state.di_list);
|
|
backup_state.di_list = NULL;
|
|
+
|
|
+ qemu_mutex_lock(&backup_state.stat.lock);
|
|
+ backup_state.stat.end_time = time(NULL);
|
|
+ backup_state.stat.finishing = false;
|
|
+ qemu_mutex_unlock(&backup_state.stat.lock);
|
|
}
|
|
|
|
-// assumes the caller holds backup_mutex
|
|
-static void coroutine_fn pvebackup_complete_stream(void *opaque)
|
|
+static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
|
|
{
|
|
PVEBackupDevInfo *di = opaque;
|
|
+ int ret = di->completed_ret;
|
|
|
|
- bool error_or_canceled = pvebackup_error_or_canceled();
|
|
-
|
|
- if (backup_state.vmaw) {
|
|
- vma_writer_close_stream(backup_state.vmaw, di->dev_id);
|
|
+ qemu_mutex_lock(&backup_state.stat.lock);
|
|
+ bool starting = backup_state.stat.starting;
|
|
+ qemu_mutex_unlock(&backup_state.stat.lock);
|
|
+ if (starting) {
|
|
+ /* in 'starting' state, no tasks have been run yet, meaning we can (and
|
|
+ * must) skip all cleanup, as we don't know what has and hasn't been
|
|
+ * initialized yet. */
|
|
+ return;
|
|
}
|
|
|
|
- if (backup_state.pbs && !error_or_canceled) {
|
|
- Error *local_err = NULL;
|
|
- proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
|
|
- if (local_err != NULL) {
|
|
- pvebackup_propagate_error(local_err);
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-static void pvebackup_complete_cb(void *opaque, int ret)
|
|
-{
|
|
- assert(!qemu_in_coroutine());
|
|
-
|
|
- PVEBackupDevInfo *di = opaque;
|
|
-
|
|
- qemu_mutex_lock(&backup_state.backup_mutex);
|
|
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
|
|
|
|
if (ret < 0) {
|
|
Error *local_err = NULL;
|
|
@@ -301,7 +300,19 @@ static void pvebackup_complete_cb(void *opaque, int ret)
|
|
|
|
assert(di->target == NULL);
|
|
|
|
- block_on_coroutine_fn(pvebackup_complete_stream, di);
|
|
+ bool error_or_canceled = pvebackup_error_or_canceled();
|
|
+
|
|
+ if (backup_state.vmaw) {
|
|
+ vma_writer_close_stream(backup_state.vmaw, di->dev_id);
|
|
+ }
|
|
+
|
|
+ if (backup_state.pbs && !error_or_canceled) {
|
|
+ Error *local_err = NULL;
|
|
+ proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
|
|
+ if (local_err != NULL) {
|
|
+ pvebackup_propagate_error(local_err);
|
|
+ }
|
|
+ }
|
|
|
|
// remove self from job list
|
|
backup_state.di_list = g_list_remove(backup_state.di_list, di);
|
|
@@ -310,21 +321,46 @@ static void pvebackup_complete_cb(void *opaque, int ret)
|
|
|
|
/* call cleanup if we're the last job */
|
|
if (!g_list_first(backup_state.di_list)) {
|
|
- block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
|
|
+ pvebackup_co_cleanup();
|
|
}
|
|
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
|
}
|
|
|
|
-static void pvebackup_cancel(void)
|
|
+static void pvebackup_complete_cb(void *opaque, int ret)
|
|
{
|
|
- assert(!qemu_in_coroutine());
|
|
+ PVEBackupDevInfo *di = opaque;
|
|
+ di->completed_ret = ret;
|
|
+
|
|
+ /*
|
|
+ * Schedule stream cleanup in async coroutine. close_image and finish might
|
|
+ * take a while, so we can't block on them here. This way it also doesn't
|
|
+ * matter if we're already running in a coroutine or not.
|
|
+ * Note: di is a pointer to an entry in the global backup_state struct, so
|
|
+ * it stays valid.
|
|
+ */
|
|
+ Coroutine *co = qemu_coroutine_create(pvebackup_co_complete_stream, di);
|
|
+ aio_co_enter(qemu_get_aio_context(), co);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * job_cancel(_sync) does not like to be called from coroutines, so defer to
|
|
+ * main loop processing via a bottom half.
|
|
+ */
|
|
+static void job_cancel_bh(void *opaque) {
|
|
+ CoCtxData *data = (CoCtxData*)opaque;
|
|
+ Job *job = (Job*)data->data;
|
|
+ job_cancel_sync(job, true);
|
|
+ aio_co_enter(data->ctx, data->co);
|
|
+}
|
|
|
|
+static void coroutine_fn pvebackup_co_cancel(void *opaque)
|
|
+{
|
|
Error *cancel_err = NULL;
|
|
error_setg(&cancel_err, "backup canceled");
|
|
pvebackup_propagate_error(cancel_err);
|
|
|
|
- qemu_mutex_lock(&backup_state.backup_mutex);
|
|
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
|
|
|
|
if (backup_state.vmaw) {
|
|
/* make sure vma writer does not block anymore */
|
|
@@ -342,28 +378,22 @@ static void pvebackup_cancel(void)
|
|
((PVEBackupDevInfo *)bdi->data)->job :
|
|
NULL;
|
|
|
|
- /* ref the job before releasing the mutex, just to be safe */
|
|
if (cancel_job) {
|
|
- WITH_JOB_LOCK_GUARD() {
|
|
- job_ref_locked(&cancel_job->job);
|
|
- }
|
|
+ CoCtxData data = {
|
|
+ .ctx = qemu_get_current_aio_context(),
|
|
+ .co = qemu_coroutine_self(),
|
|
+ .data = &cancel_job->job,
|
|
+ };
|
|
+ aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
|
|
+ qemu_coroutine_yield();
|
|
}
|
|
|
|
- /* job_cancel_sync may enter the job, so we need to release the
|
|
- * backup_mutex to avoid deadlock */
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
-
|
|
- if (cancel_job) {
|
|
- WITH_JOB_LOCK_GUARD() {
|
|
- job_cancel_sync_locked(&cancel_job->job, true);
|
|
- job_unref_locked(&cancel_job->job);
|
|
- }
|
|
- }
|
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
|
}
|
|
|
|
void qmp_backup_cancel(Error **errp)
|
|
{
|
|
- pvebackup_cancel();
|
|
+ block_on_coroutine_fn(pvebackup_co_cancel, NULL);
|
|
}
|
|
|
|
// assumes the caller holds backup_mutex
|
|
@@ -416,10 +446,18 @@ static int coroutine_fn pvebackup_co_add_config(
|
|
goto out;
|
|
}
|
|
|
|
-static bool create_backup_jobs(void) {
|
|
+/*
|
|
+ * backup_job_create can *not* be run from a coroutine (and requires an
|
|
+ * acquired AioContext), so this can't either.
|
|
+ * The caller is responsible that backup_mutex is held nonetheless.
|
|
+ */
|
|
+static void create_backup_jobs_bh(void *opaque) {
|
|
|
|
assert(!qemu_in_coroutine());
|
|
|
|
+ CoCtxData *data = (CoCtxData*)opaque;
|
|
+ Error **errp = (Error**)data->data;
|
|
+
|
|
Error *local_err = NULL;
|
|
|
|
/* create job transaction to synchronize bitmap commit and cancel all
|
|
@@ -455,24 +493,19 @@ static bool create_backup_jobs(void) {
|
|
|
|
aio_context_release(aio_context);
|
|
|
|
- if (!job || local_err != NULL) {
|
|
- Error *create_job_err = NULL;
|
|
- error_setg(&create_job_err, "backup_job_create failed: %s",
|
|
- local_err ? error_get_pretty(local_err) : "null");
|
|
+ di->job = job;
|
|
|
|
- pvebackup_propagate_error(create_job_err);
|
|
+ if (!job || local_err) {
|
|
+ error_setg(errp, "backup_job_create failed: %s",
|
|
+ local_err ? error_get_pretty(local_err) : "null");
|
|
break;
|
|
}
|
|
|
|
- di->job = job;
|
|
-
|
|
bdrv_unref(di->target);
|
|
di->target = NULL;
|
|
}
|
|
|
|
- bool errors = pvebackup_error_or_canceled();
|
|
-
|
|
- if (errors) {
|
|
+ if (*errp) {
|
|
l = backup_state.di_list;
|
|
while (l) {
|
|
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
|
|
@@ -485,13 +518,15 @@ static bool create_backup_jobs(void) {
|
|
|
|
if (di->job) {
|
|
WITH_JOB_LOCK_GUARD() {
|
|
+ job_cancel_sync_locked(&di->job->job, true);
|
|
job_unref_locked(&di->job->job);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
- return errors;
|
|
+ /* return */
|
|
+ aio_co_enter(data->ctx, data->co);
|
|
}
|
|
|
|
typedef struct QmpBackupTask {
|
|
@@ -528,11 +563,12 @@ typedef struct QmpBackupTask {
|
|
UuidInfo *result;
|
|
} QmpBackupTask;
|
|
|
|
-// assumes the caller holds backup_mutex
|
|
static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
|
{
|
|
assert(qemu_in_coroutine());
|
|
|
|
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
|
|
+
|
|
QmpBackupTask *task = opaque;
|
|
|
|
task->result = NULL; // just to be sure
|
|
@@ -553,8 +589,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
|
const char *firewall_name = "qemu-server.fw";
|
|
|
|
if (backup_state.di_list) {
|
|
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
|
|
+ error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
|
|
"previous backup not finished");
|
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
|
return;
|
|
}
|
|
|
|
@@ -621,6 +658,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
|
}
|
|
di->size = size;
|
|
total += size;
|
|
+
|
|
+ di->completed_ret = INT_MAX;
|
|
}
|
|
|
|
uuid_generate(uuid);
|
|
@@ -852,6 +891,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
|
backup_state.stat.dirty = total - backup_state.stat.reused;
|
|
backup_state.stat.transferred = 0;
|
|
backup_state.stat.zero_bytes = 0;
|
|
+ backup_state.stat.finishing = false;
|
|
+ backup_state.stat.starting = true;
|
|
|
|
qemu_mutex_unlock(&backup_state.stat.lock);
|
|
|
|
@@ -866,6 +907,33 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
|
uuid_info->UUID = uuid_str;
|
|
|
|
task->result = uuid_info;
|
|
+
|
|
+ /* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
|
|
+ * backup_mutex locked. This is fine, a CoMutex can be held across yield
|
|
+ * points, and we'll release it as soon as the BH reschedules us.
|
|
+ */
|
|
+ CoCtxData waker = {
|
|
+ .co = qemu_coroutine_self(),
|
|
+ .ctx = qemu_get_current_aio_context(),
|
|
+ .data = &local_err,
|
|
+ };
|
|
+ aio_bh_schedule_oneshot(waker.ctx, create_backup_jobs_bh, &waker);
|
|
+ qemu_coroutine_yield();
|
|
+
|
|
+ if (local_err) {
|
|
+ error_propagate(task->errp, local_err);
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
|
+
|
|
+ qemu_mutex_lock(&backup_state.stat.lock);
|
|
+ backup_state.stat.starting = false;
|
|
+ qemu_mutex_unlock(&backup_state.stat.lock);
|
|
+
|
|
+ /* start the first job in the transaction */
|
|
+ job_txn_start_seq(backup_state.txn);
|
|
+
|
|
return;
|
|
|
|
err_mutex:
|
|
@@ -888,6 +956,7 @@ err:
|
|
g_free(di);
|
|
}
|
|
g_list_free(di_list);
|
|
+ backup_state.di_list = NULL;
|
|
|
|
if (devs) {
|
|
g_strfreev(devs);
|
|
@@ -908,6 +977,8 @@ err:
|
|
}
|
|
|
|
task->result = NULL;
|
|
+
|
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
|
return;
|
|
}
|
|
|
|
@@ -961,24 +1032,8 @@ UuidInfo *qmp_backup(
|
|
.errp = errp,
|
|
};
|
|
|
|
- qemu_mutex_lock(&backup_state.backup_mutex);
|
|
-
|
|
block_on_coroutine_fn(pvebackup_co_prepare, &task);
|
|
|
|
- if (*errp == NULL) {
|
|
- bool errors = create_backup_jobs();
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
-
|
|
- if (!errors) {
|
|
- /* start the first job in the transaction
|
|
- * note: this might directly enter the job, so we need to do this
|
|
- * after unlocking the backup_mutex */
|
|
- job_txn_start_seq(backup_state.txn);
|
|
- }
|
|
- } else {
|
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
|
- }
|
|
-
|
|
return task.result;
|
|
}
|
|
|
|
@@ -1030,6 +1085,7 @@ BackupStatus *qmp_query_backup(Error **errp)
|
|
info->transferred = backup_state.stat.transferred;
|
|
info->has_reused = true;
|
|
info->reused = backup_state.stat.reused;
|
|
+ info->finishing = backup_state.stat.finishing;
|
|
|
|
qemu_mutex_unlock(&backup_state.stat.lock);
|
|
|
|
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
|
index 7fde927621..bf559c6d52 100644
|
|
--- a/qapi/block-core.json
|
|
+++ b/qapi/block-core.json
|
|
@@ -770,12 +770,15 @@
|
|
#
|
|
# @uuid: uuid for this backup job
|
|
#
|
|
+# @finishing: if status='active' and finishing=true, then the backup process is
|
|
+# waiting for the target to finish.
|
|
+#
|
|
##
|
|
{ 'struct': 'BackupStatus',
|
|
'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int',
|
|
'*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int',
|
|
'*start-time': 'int', '*end-time': 'int',
|
|
- '*backup-file': 'str', '*uuid': 'str' } }
|
|
+ '*backup-file': 'str', '*uuid': 'str', 'finishing': 'bool' } }
|
|
|
|
##
|
|
# @BackupFormat:
|