Several fixes for backup abort and error reporting
Also add my Signed-off-by to some patches where it was missing. Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
This commit is contained in:
parent
60a607ad0f
commit
72ae34ecce
@ -14,13 +14,13 @@ Subject: [PATCH] PVE-Backup: proxmox backup patches for qemu
|
||||
include/block/block_int.h | 2 +-
|
||||
include/monitor/hmp.h | 3 +
|
||||
monitor/hmp-cmds.c | 44 ++
|
||||
proxmox-backup-client.c | 182 +++++++
|
||||
proxmox-backup-client.h | 52 ++
|
||||
proxmox-backup-client.c | 176 ++++++
|
||||
proxmox-backup-client.h | 59 ++
|
||||
pve-backup.c | 955 +++++++++++++++++++++++++++++++++
|
||||
qapi/block-core.json | 109 ++++
|
||||
qapi/common.json | 13 +
|
||||
qapi/misc.json | 13 -
|
||||
16 files changed, 1439 insertions(+), 15 deletions(-)
|
||||
16 files changed, 1440 insertions(+), 15 deletions(-)
|
||||
create mode 100644 proxmox-backup-client.c
|
||||
create mode 100644 proxmox-backup-client.h
|
||||
create mode 100644 pve-backup.c
|
||||
@ -278,10 +278,10 @@ index 280bb447a6..0e2d166552 100644
|
||||
switch (addr->type) {
|
||||
diff --git a/proxmox-backup-client.c b/proxmox-backup-client.c
|
||||
new file mode 100644
|
||||
index 0000000000..b7bc7f2574
|
||||
index 0000000000..a8f6653a81
|
||||
--- /dev/null
|
||||
+++ b/proxmox-backup-client.c
|
||||
@@ -0,0 +1,182 @@
|
||||
@@ -0,0 +1,176 @@
|
||||
+#include "proxmox-backup-client.h"
|
||||
+#include "qemu/main-loop.h"
|
||||
+#include "block/aio-wait.h"
|
||||
@ -296,12 +296,6 @@ index 0000000000..b7bc7f2574
|
||||
+ bool finished;
|
||||
+} BlockOnCoroutineWrapper;
|
||||
+
|
||||
+// Waker implementaion to syncronice with proxmox backup rust code
|
||||
+typedef struct ProxmoxBackupWaker {
|
||||
+ Coroutine *co;
|
||||
+ AioContext *ctx;
|
||||
+} ProxmoxBackupWaker;
|
||||
+
|
||||
+static void coroutine_fn block_on_coroutine_wrapper(void *opaque)
|
||||
+{
|
||||
+ BlockOnCoroutineWrapper *wrapper = opaque;
|
||||
@ -328,7 +322,7 @@ index 0000000000..b7bc7f2574
|
||||
+
|
||||
+// This is called from another thread, so we use aio_co_schedule()
|
||||
+static void proxmox_backup_schedule_wake(void *data) {
|
||||
+ ProxmoxBackupWaker *waker = (ProxmoxBackupWaker *)data;
|
||||
+ CoCtxData *waker = (CoCtxData *)data;
|
||||
+ aio_co_schedule(waker->ctx, waker->co);
|
||||
+}
|
||||
+
|
||||
@ -337,7 +331,7 @@ index 0000000000..b7bc7f2574
|
||||
+{
|
||||
+ Coroutine *co = qemu_coroutine_self();
|
||||
+ AioContext *ctx = qemu_get_current_aio_context();
|
||||
+ ProxmoxBackupWaker waker = { .co = co, .ctx = ctx };
|
||||
+ CoCtxData waker = { .co = co, .ctx = ctx };
|
||||
+ char *pbs_err = NULL;
|
||||
+ int pbs_res = -1;
|
||||
+
|
||||
@ -360,7 +354,7 @@ index 0000000000..b7bc7f2574
|
||||
+{
|
||||
+ Coroutine *co = qemu_coroutine_self();
|
||||
+ AioContext *ctx = qemu_get_current_aio_context();
|
||||
+ ProxmoxBackupWaker waker = { .co = co, .ctx = ctx };
|
||||
+ CoCtxData waker = { .co = co, .ctx = ctx };
|
||||
+ char *pbs_err = NULL;
|
||||
+ int pbs_res = -1;
|
||||
+
|
||||
@ -383,7 +377,7 @@ index 0000000000..b7bc7f2574
|
||||
+{
|
||||
+ Coroutine *co = qemu_coroutine_self();
|
||||
+ AioContext *ctx = qemu_get_current_aio_context();
|
||||
+ ProxmoxBackupWaker waker = { .co = co, .ctx = ctx };
|
||||
+ CoCtxData waker = { .co = co, .ctx = ctx };
|
||||
+ char *pbs_err = NULL;
|
||||
+ int pbs_res = -1;
|
||||
+
|
||||
@ -404,7 +398,7 @@ index 0000000000..b7bc7f2574
|
||||
+{
|
||||
+ Coroutine *co = qemu_coroutine_self();
|
||||
+ AioContext *ctx = qemu_get_current_aio_context();
|
||||
+ ProxmoxBackupWaker waker = { .co = co, .ctx = ctx };
|
||||
+ CoCtxData waker = { .co = co, .ctx = ctx };
|
||||
+ char *pbs_err = NULL;
|
||||
+ int pbs_res = -1;
|
||||
+
|
||||
@ -426,7 +420,7 @@ index 0000000000..b7bc7f2574
|
||||
+{
|
||||
+ Coroutine *co = qemu_coroutine_self();
|
||||
+ AioContext *ctx = qemu_get_current_aio_context();
|
||||
+ ProxmoxBackupWaker waker = { .co = co, .ctx = ctx };
|
||||
+ CoCtxData waker = { .co = co, .ctx = ctx };
|
||||
+ char *pbs_err = NULL;
|
||||
+ int pbs_res = -1;
|
||||
+
|
||||
@ -451,7 +445,7 @@ index 0000000000..b7bc7f2574
|
||||
+{
|
||||
+ Coroutine *co = qemu_coroutine_self();
|
||||
+ AioContext *ctx = qemu_get_current_aio_context();
|
||||
+ ProxmoxBackupWaker waker = { .co = co, .ctx = ctx };
|
||||
+ CoCtxData waker = { .co = co, .ctx = ctx };
|
||||
+ char *pbs_err = NULL;
|
||||
+ int pbs_res = -1;
|
||||
+
|
||||
@ -466,10 +460,10 @@ index 0000000000..b7bc7f2574
|
||||
+}
|
||||
diff --git a/proxmox-backup-client.h b/proxmox-backup-client.h
|
||||
new file mode 100644
|
||||
index 0000000000..b311bf8de8
|
||||
index 0000000000..1dda8b7d8f
|
||||
--- /dev/null
|
||||
+++ b/proxmox-backup-client.h
|
||||
@@ -0,0 +1,52 @@
|
||||
@@ -0,0 +1,59 @@
|
||||
+#ifndef PROXMOX_BACKUP_CLIENT_H
|
||||
+#define PROXMOX_BACKUP_CLIENT_H
|
||||
+
|
||||
@ -477,6 +471,13 @@ index 0000000000..b311bf8de8
|
||||
+#include "qemu/coroutine.h"
|
||||
+#include "proxmox-backup-qemu.h"
|
||||
+
|
||||
+typedef struct CoCtxData {
|
||||
+ Coroutine *co;
|
||||
+ AioContext *ctx;
|
||||
+ void *data;
|
||||
+} CoCtxData;
|
||||
+
|
||||
+// FIXME: Remove once coroutines are supported for QMP
|
||||
+void block_on_coroutine_fn(CoroutineEntry *entry, void *entry_arg);
|
||||
+
|
||||
+int coroutine_fn
|
||||
|
@ -99,10 +99,10 @@ index 0e2d166552..3ff014d32a 100644
|
||||
|
||||
qapi_free_BackupStatus(info);
|
||||
diff --git a/proxmox-backup-client.c b/proxmox-backup-client.c
|
||||
index b7bc7f2574..0e9c584701 100644
|
||||
index a8f6653a81..4ce7bc0b5e 100644
|
||||
--- a/proxmox-backup-client.c
|
||||
+++ b/proxmox-backup-client.c
|
||||
@@ -95,6 +95,7 @@ proxmox_backup_co_register_image(
|
||||
@@ -89,6 +89,7 @@ proxmox_backup_co_register_image(
|
||||
ProxmoxBackupHandle *pbs,
|
||||
const char *device_name,
|
||||
uint64_t size,
|
||||
@ -110,7 +110,7 @@ index b7bc7f2574..0e9c584701 100644
|
||||
Error **errp)
|
||||
{
|
||||
Coroutine *co = qemu_coroutine_self();
|
||||
@@ -104,7 +105,7 @@ proxmox_backup_co_register_image(
|
||||
@@ -98,7 +99,7 @@ proxmox_backup_co_register_image(
|
||||
int pbs_res = -1;
|
||||
|
||||
proxmox_backup_register_image_async(
|
||||
@ -120,10 +120,10 @@ index b7bc7f2574..0e9c584701 100644
|
||||
if (pbs_res < 0) {
|
||||
if (errp) error_setg(errp, "backup register image failed: %s", pbs_err ? pbs_err : "unknown error");
|
||||
diff --git a/proxmox-backup-client.h b/proxmox-backup-client.h
|
||||
index b311bf8de8..20fd6b1719 100644
|
||||
index 1dda8b7d8f..8cbf645b2c 100644
|
||||
--- a/proxmox-backup-client.h
|
||||
+++ b/proxmox-backup-client.h
|
||||
@@ -25,6 +25,7 @@ proxmox_backup_co_register_image(
|
||||
@@ -32,6 +32,7 @@ proxmox_backup_co_register_image(
|
||||
ProxmoxBackupHandle *pbs,
|
||||
const char *device_name,
|
||||
uint64_t size,
|
||||
|
@ -5,6 +5,8 @@ Subject: [PATCH] PVE: redirect stderr to journal when daemonized
|
||||
|
||||
QEMU uses the logging for error messages usually, so LOG_ERR is most
|
||||
fitting.
|
||||
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
---
|
||||
Makefile.objs | 1 +
|
||||
os-posix.c | 7 +++++--
|
||||
|
@ -3,6 +3,7 @@ From: Stefan Reiter <s.reiter@proxmox.com>
|
||||
Date: Thu, 20 Aug 2020 14:31:59 +0200
|
||||
Subject: [PATCH] PVE: Add sequential job transaction support
|
||||
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
---
|
||||
include/qemu/job.h | 12 ++++++++++++
|
||||
job.c | 24 ++++++++++++++++++++++++
|
||||
|
@ -9,6 +9,8 @@ since QEMU's BITMAP_SYNC_MODE_ON_SUCCESS will now handle that for us.
|
||||
|
||||
To keep the rate-limiting and IO impact from before, we use a sequential
|
||||
transaction, so drives will still be backed up one after the other.
|
||||
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
---
|
||||
pve-backup.c | 167 +++++++++++++++------------------------------------
|
||||
1 file changed, 49 insertions(+), 118 deletions(-)
|
||||
|
@ -20,38 +20,25 @@ Cases of aio_context_acquire/release from within what is now a coroutine
|
||||
are changed to aio_co_reschedule_self, which works since a running
|
||||
coroutine always holds the aio lock for the context it is running in.
|
||||
|
||||
job_cancel_sync is changed to regular job_cancel, since job_cancel_sync
|
||||
uses AIO_WAIT_WHILE internally, which is forbidden from Coroutines.
|
||||
job_cancel_sync is called from a BH since it can't be run from a
|
||||
coroutine (uses AIO_WAIT_WHILE internally).
|
||||
|
||||
create_backup_jobs cannot be run from a coroutine, so it is run
|
||||
directly. This does however mean that it runs unlocked, as it cannot
|
||||
acquire a CoMutex (see it's comment for rational on why that's ok).
|
||||
Same thing for create_backup_jobs, which is converted to a BH too.
|
||||
|
||||
To communicate the finishing state, a new property is introduced to
|
||||
query-backup: 'finishing'. A new state is explicitly not used, since
|
||||
that would break compatibility with older qemu-server versions.
|
||||
|
||||
[0] https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg03515.html
|
||||
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
---
|
||||
proxmox-backup-client.h | 1 +
|
||||
pve-backup.c | 124 ++++++++++++++++++++++------------------
|
||||
qapi/block-core.json | 5 +-
|
||||
3 files changed, 74 insertions(+), 56 deletions(-)
|
||||
pve-backup.c | 148 ++++++++++++++++++++++++++-----------------
|
||||
qapi/block-core.json | 5 +-
|
||||
2 files changed, 95 insertions(+), 58 deletions(-)
|
||||
|
||||
diff --git a/proxmox-backup-client.h b/proxmox-backup-client.h
|
||||
index 20fd6b1719..a4781c5851 100644
|
||||
--- a/proxmox-backup-client.h
|
||||
+++ b/proxmox-backup-client.h
|
||||
@@ -5,6 +5,7 @@
|
||||
#include "qemu/coroutine.h"
|
||||
#include "proxmox-backup-qemu.h"
|
||||
|
||||
+// FIXME: Remove once coroutines are supported for QMP
|
||||
void block_on_coroutine_fn(CoroutineEntry *entry, void *entry_arg);
|
||||
|
||||
int coroutine_fn
|
||||
diff --git a/pve-backup.c b/pve-backup.c
|
||||
index 9562e9c98d..53cf23ed5a 100644
|
||||
index 9562e9c98d..0466145bec 100644
|
||||
--- a/pve-backup.c
|
||||
+++ b/pve-backup.c
|
||||
@@ -33,7 +33,9 @@ const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
|
||||
@ -172,7 +159,7 @@ index 9562e9c98d..53cf23ed5a 100644
|
||||
|
||||
// remove self from job list
|
||||
backup_state.di_list = g_list_remove(backup_state.di_list, di);
|
||||
@@ -310,21 +310,36 @@ static void pvebackup_complete_cb(void *opaque, int ret)
|
||||
@@ -310,21 +310,49 @@ static void pvebackup_complete_cb(void *opaque, int ret)
|
||||
|
||||
/* call cleanup if we're the last job */
|
||||
if (!g_list_first(backup_state.di_list)) {
|
||||
@ -187,19 +174,33 @@ index 9562e9c98d..53cf23ed5a 100644
|
||||
-static void pvebackup_cancel(void)
|
||||
+static void pvebackup_complete_cb(void *opaque, int ret)
|
||||
{
|
||||
assert(!qemu_in_coroutine());
|
||||
|
||||
- assert(!qemu_in_coroutine());
|
||||
+ PVEBackupDevInfo *di = opaque;
|
||||
+ di->completed_ret = ret;
|
||||
+
|
||||
+ /*
|
||||
+ * Schedule stream cleanup in async coroutine. close_image and finish might
|
||||
+ * take a while, so we can't block on them here.
|
||||
+ * take a while, so we can't block on them here. This way it also doesn't
|
||||
+ * matter if we're already running in a coroutine or not.
|
||||
+ * Note: di is a pointer to an entry in the global backup_state struct, so
|
||||
+ * it stays valid.
|
||||
+ */
|
||||
+ Coroutine *co = qemu_coroutine_create(pvebackup_co_complete_stream, di);
|
||||
+ aio_co_schedule(qemu_get_aio_context(), co);
|
||||
+ aio_co_enter(qemu_get_aio_context(), co);
|
||||
+}
|
||||
|
||||
+/*
|
||||
+ * job_cancel(_sync) does not like to be called from coroutines, so defer to
|
||||
+ * main loop processing via a bottom half.
|
||||
+ */
|
||||
+static void job_cancel_bh(void *opaque) {
|
||||
+ CoCtxData *data = (CoCtxData*)opaque;
|
||||
+ Job *job = (Job*)data->data;
|
||||
+ AioContext *job_ctx = job->aio_context;
|
||||
+ aio_context_acquire(job_ctx);
|
||||
+ job_cancel_sync(job);
|
||||
+ aio_context_release(job_ctx);
|
||||
+ aio_co_enter(data->ctx, data->co);
|
||||
+}
|
||||
+
|
||||
+static void coroutine_fn pvebackup_co_cancel(void *opaque)
|
||||
@ -213,14 +214,20 @@ index 9562e9c98d..53cf23ed5a 100644
|
||||
|
||||
if (backup_state.vmaw) {
|
||||
/* make sure vma writer does not block anymore */
|
||||
@@ -342,27 +357,16 @@ static void pvebackup_cancel(void)
|
||||
@@ -342,27 +370,22 @@ static void pvebackup_cancel(void)
|
||||
((PVEBackupDevInfo *)bdi->data)->job :
|
||||
NULL;
|
||||
|
||||
- /* ref the job before releasing the mutex, just to be safe */
|
||||
if (cancel_job) {
|
||||
- job_ref(&cancel_job->job);
|
||||
+ job_cancel(&cancel_job->job, false);
|
||||
+ CoCtxData data = {
|
||||
+ .ctx = qemu_get_current_aio_context(),
|
||||
+ .co = qemu_coroutine_self(),
|
||||
+ .data = &cancel_job->job,
|
||||
+ };
|
||||
+ aio_bh_schedule_oneshot(data.ctx, job_cancel_bh, &data);
|
||||
+ qemu_coroutine_yield();
|
||||
}
|
||||
|
||||
- /* job_cancel_sync may enter the job, so we need to release the
|
||||
@ -244,7 +251,7 @@ index 9562e9c98d..53cf23ed5a 100644
|
||||
}
|
||||
|
||||
// assumes the caller holds backup_mutex
|
||||
@@ -415,6 +419,14 @@ static int coroutine_fn pvebackup_co_add_config(
|
||||
@@ -415,6 +438,14 @@ static int coroutine_fn pvebackup_co_add_config(
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -259,7 +266,7 @@ index 9562e9c98d..53cf23ed5a 100644
|
||||
static bool create_backup_jobs(void) {
|
||||
|
||||
assert(!qemu_in_coroutine());
|
||||
@@ -523,11 +535,12 @@ typedef struct QmpBackupTask {
|
||||
@@ -523,11 +554,12 @@ typedef struct QmpBackupTask {
|
||||
UuidInfo *result;
|
||||
} QmpBackupTask;
|
||||
|
||||
@ -273,7 +280,18 @@ index 9562e9c98d..53cf23ed5a 100644
|
||||
QmpBackupTask *task = opaque;
|
||||
|
||||
task->result = NULL; // just to be sure
|
||||
@@ -616,6 +629,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
||||
@@ -548,8 +580,9 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
||||
const char *firewall_name = "qemu-server.fw";
|
||||
|
||||
if (backup_state.di_list) {
|
||||
- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
|
||||
+ error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
|
||||
"previous backup not finished");
|
||||
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -616,6 +649,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
||||
}
|
||||
di->size = size;
|
||||
total += size;
|
||||
@ -282,7 +300,7 @@ index 9562e9c98d..53cf23ed5a 100644
|
||||
}
|
||||
|
||||
uuid_generate(uuid);
|
||||
@@ -847,6 +862,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
||||
@@ -847,6 +882,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
||||
backup_state.stat.dirty = total - backup_state.stat.reused;
|
||||
backup_state.stat.transferred = 0;
|
||||
backup_state.stat.zero_bytes = 0;
|
||||
@ -290,7 +308,7 @@ index 9562e9c98d..53cf23ed5a 100644
|
||||
|
||||
qemu_mutex_unlock(&backup_state.stat.lock);
|
||||
|
||||
@@ -861,6 +877,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
||||
@@ -861,6 +897,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
||||
uuid_info->UUID = uuid_str;
|
||||
|
||||
task->result = uuid_info;
|
||||
@ -299,7 +317,7 @@ index 9562e9c98d..53cf23ed5a 100644
|
||||
return;
|
||||
|
||||
err_mutex:
|
||||
@@ -903,6 +921,8 @@ err:
|
||||
@@ -903,6 +941,8 @@ err:
|
||||
}
|
||||
|
||||
task->result = NULL;
|
||||
@ -308,7 +326,7 @@ index 9562e9c98d..53cf23ed5a 100644
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -956,22 +976,15 @@ UuidInfo *qmp_backup(
|
||||
@@ -956,22 +996,15 @@ UuidInfo *qmp_backup(
|
||||
.errp = errp,
|
||||
};
|
||||
|
||||
@ -332,7 +350,7 @@ index 9562e9c98d..53cf23ed5a 100644
|
||||
}
|
||||
|
||||
return task.result;
|
||||
@@ -1025,6 +1038,7 @@ BackupStatus *qmp_query_backup(Error **errp)
|
||||
@@ -1025,6 +1058,7 @@ BackupStatus *qmp_query_backup(Error **errp)
|
||||
info->transferred = backup_state.stat.transferred;
|
||||
info->has_reused = true;
|
||||
info->reused = backup_state.stat.reused;
|
||||
|
187
debian/patches/pve/0053-PVE-fix-and-clean-up-error-handling-for-create_backu.patch
vendored
Normal file
187
debian/patches/pve/0053-PVE-fix-and-clean-up-error-handling-for-create_backu.patch
vendored
Normal file
@ -0,0 +1,187 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Reiter <s.reiter@proxmox.com>
|
||||
Date: Thu, 22 Oct 2020 17:01:07 +0200
|
||||
Subject: [PATCH] PVE: fix and clean up error handling for create_backup_jobs
|
||||
|
||||
No more weird bool returns, just the standard "errp" format used
|
||||
everywhere else too. With this, if backup_job_create fails, the error
|
||||
message is actually returned over QMP and can be shown to the user.
|
||||
|
||||
To facilitate correct cleanup on such an error, we call
|
||||
create_backup_jobs as a bottom half directly from pvebackup_co_prepare.
|
||||
This additionally allows us to actually hold the backup_mutex during
|
||||
operation.
|
||||
|
||||
Also add a job_cancel_sync before job_unref, since a job must be in
|
||||
STATUS_NULL to be deleted by unref, which could trigger an assert
|
||||
before.
|
||||
|
||||
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
|
||||
---
|
||||
pve-backup.c | 79 +++++++++++++++++++++++++++++++++++-----------------
|
||||
1 file changed, 54 insertions(+), 25 deletions(-)
|
||||
|
||||
diff --git a/pve-backup.c b/pve-backup.c
|
||||
index 0466145bec..1a2647e7a5 100644
|
||||
--- a/pve-backup.c
|
||||
+++ b/pve-backup.c
|
||||
@@ -50,6 +50,7 @@ static struct PVEBackupState {
|
||||
size_t zero_bytes;
|
||||
GList *bitmap_list;
|
||||
bool finishing;
|
||||
+ bool starting;
|
||||
} stat;
|
||||
int64_t speed;
|
||||
VmaWriter *vmaw;
|
||||
@@ -277,6 +278,16 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
|
||||
PVEBackupDevInfo *di = opaque;
|
||||
int ret = di->completed_ret;
|
||||
|
||||
+ qemu_mutex_lock(&backup_state.stat.lock);
|
||||
+ bool starting = backup_state.stat.starting;
|
||||
+ qemu_mutex_unlock(&backup_state.stat.lock);
|
||||
+ if (starting) {
|
||||
+ /* in 'starting' state, no tasks have been run yet, meaning we can (and
|
||||
+ * must) skip all cleanup, as we don't know what has and hasn't been
|
||||
+ * initialized yet. */
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
qemu_co_mutex_lock(&backup_state.backup_mutex);
|
||||
|
||||
if (ret < 0) {
|
||||
@@ -441,15 +452,15 @@ static int coroutine_fn pvebackup_co_add_config(
|
||||
/*
|
||||
* backup_job_create can *not* be run from a coroutine (and requires an
|
||||
* acquired AioContext), so this can't either.
|
||||
- * This does imply that this function cannot run with backup_mutex acquired.
|
||||
- * That is ok because it is only ever called between setting up the backup_state
|
||||
- * struct and starting the jobs, and from within a QMP call. This means that no
|
||||
- * other QMP call can interrupt, and no background job is running yet.
|
||||
+ * The caller is responsible that backup_mutex is held nonetheless.
|
||||
*/
|
||||
-static bool create_backup_jobs(void) {
|
||||
+static void create_backup_jobs_bh(void *opaque) {
|
||||
|
||||
assert(!qemu_in_coroutine());
|
||||
|
||||
+ CoCtxData *data = (CoCtxData*)opaque;
|
||||
+ Error **errp = (Error**)data->data;
|
||||
+
|
||||
Error *local_err = NULL;
|
||||
|
||||
/* create job transaction to synchronize bitmap commit and cancel all
|
||||
@@ -483,24 +494,19 @@ static bool create_backup_jobs(void) {
|
||||
|
||||
aio_context_release(aio_context);
|
||||
|
||||
- if (!job || local_err != NULL) {
|
||||
- Error *create_job_err = NULL;
|
||||
- error_setg(&create_job_err, "backup_job_create failed: %s",
|
||||
- local_err ? error_get_pretty(local_err) : "null");
|
||||
+ di->job = job;
|
||||
|
||||
- pvebackup_propagate_error(create_job_err);
|
||||
+ if (!job || local_err) {
|
||||
+ error_setg(errp, "backup_job_create failed: %s",
|
||||
+ local_err ? error_get_pretty(local_err) : "null");
|
||||
break;
|
||||
}
|
||||
|
||||
- di->job = job;
|
||||
-
|
||||
bdrv_unref(di->target);
|
||||
di->target = NULL;
|
||||
}
|
||||
|
||||
- bool errors = pvebackup_error_or_canceled();
|
||||
-
|
||||
- if (errors) {
|
||||
+ if (*errp) {
|
||||
l = backup_state.di_list;
|
||||
while (l) {
|
||||
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
|
||||
@@ -512,12 +518,17 @@ static bool create_backup_jobs(void) {
|
||||
}
|
||||
|
||||
if (di->job) {
|
||||
+ AioContext *ctx = di->job->job.aio_context;
|
||||
+ aio_context_acquire(ctx);
|
||||
+ job_cancel_sync(&di->job->job);
|
||||
job_unref(&di->job->job);
|
||||
+ aio_context_release(ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- return errors;
|
||||
+ /* return */
|
||||
+ aio_co_enter(data->ctx, data->co);
|
||||
}
|
||||
|
||||
typedef struct QmpBackupTask {
|
||||
@@ -883,6 +894,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
||||
backup_state.stat.transferred = 0;
|
||||
backup_state.stat.zero_bytes = 0;
|
||||
backup_state.stat.finishing = false;
|
||||
+ backup_state.stat.starting = true;
|
||||
|
||||
qemu_mutex_unlock(&backup_state.stat.lock);
|
||||
|
||||
@@ -898,7 +910,32 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
|
||||
|
||||
task->result = uuid_info;
|
||||
|
||||
+ /* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
|
||||
+ * backup_mutex locked. This is fine, a CoMutex can be held across yield
|
||||
+ * points, and we'll release it as soon as the BH reschedules us.
|
||||
+ */
|
||||
+ CoCtxData waker = {
|
||||
+ .co = qemu_coroutine_self(),
|
||||
+ .ctx = qemu_get_current_aio_context(),
|
||||
+ .data = &local_err,
|
||||
+ };
|
||||
+ aio_bh_schedule_oneshot(waker.ctx, create_backup_jobs_bh, &waker);
|
||||
+ qemu_coroutine_yield();
|
||||
+
|
||||
+ if (local_err) {
|
||||
+ error_propagate(task->errp, local_err);
|
||||
+ goto err;
|
||||
+ }
|
||||
+
|
||||
qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||||
+
|
||||
+ qemu_mutex_lock(&backup_state.stat.lock);
|
||||
+ backup_state.stat.starting = false;
|
||||
+ qemu_mutex_unlock(&backup_state.stat.lock);
|
||||
+
|
||||
+ /* start the first job in the transaction */
|
||||
+ job_txn_start_seq(backup_state.txn);
|
||||
+
|
||||
return;
|
||||
|
||||
err_mutex:
|
||||
@@ -921,6 +958,7 @@ err:
|
||||
g_free(di);
|
||||
}
|
||||
g_list_free(di_list);
|
||||
+ backup_state.di_list = NULL;
|
||||
|
||||
if (devs) {
|
||||
g_strfreev(devs);
|
||||
@@ -998,15 +1036,6 @@ UuidInfo *qmp_backup(
|
||||
|
||||
block_on_coroutine_fn(pvebackup_co_prepare, &task);
|
||||
|
||||
- if (*errp == NULL) {
|
||||
- bool errors = create_backup_jobs();
|
||||
-
|
||||
- if (!errors) {
|
||||
- // start the first job in the transaction
|
||||
- job_txn_start_seq(backup_state.txn);
|
||||
- }
|
||||
- }
|
||||
-
|
||||
return task.result;
|
||||
}
|
||||
|
1
debian/patches/series
vendored
1
debian/patches/series
vendored
@ -53,3 +53,4 @@ pve/0049-PVE-redirect-stderr-to-journal-when-daemonized.patch
|
||||
pve/0050-PVE-Add-sequential-job-transaction-support.patch
|
||||
pve/0051-PVE-Backup-Use-a-transaction-to-synchronize-job-stat.patch
|
||||
pve/0052-PVE-Backup-Use-more-coroutines-and-don-t-block-on-fi.patch
|
||||
pve/0053-PVE-fix-and-clean-up-error-handling-for-create_backu.patch
|
||||
|
Loading…
Reference in New Issue
Block a user