pve-qemu-qoup/debian/patches/pve/0033-qmp_backup-use-a-CoMutex-to-protect-access-to-backup.patch
Thomas Lamprecht cbb547903c run backup related code inside co-routines and improve locking
Patches-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2019-10-23 09:14:51 +02:00

275 lines
9.1 KiB
Diff

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Tue, 22 Oct 2019 12:48:18 +0200
Subject: [PATCH] qmp_backup: use a CoMutex to protect access to backup_state
And use job_cancel instead of job_cancel_sync (which hangs sometimes)
Also avoid calling pvebackup_co_cleanup if not necessary. If there are
running jobs, this is called from the block job completion callbacks
anyways.
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
---
blockdev.c | 74 ++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 52 insertions(+), 22 deletions(-)
diff --git a/blockdev.c b/blockdev.c
index 85031de942..7e9241cf42 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3195,7 +3195,7 @@ static struct PVEBackupState {
size_t total;
size_t transferred;
size_t zero_bytes;
- QemuMutex backup_mutex;
+ CoMutex backup_mutex;
bool backup_mutex_initialized;
} backup_state;
@@ -3220,7 +3220,10 @@ static int coroutine_fn pvebackup_co_dump_cb(void *opaque, BlockBackend *target,
const unsigned char *buf = pbuf;
PVEBackupDevInfo *di = opaque;
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
+
if (backup_state.cancel) {
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
return size; // return success
}
@@ -3231,6 +3234,7 @@ static int coroutine_fn pvebackup_co_dump_cb(void *opaque, BlockBackend *target,
"got unaligned write inside backup dump "
"callback (sector %ld)", start);
}
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
return -1; // not aligned to cluster size
}
@@ -3272,6 +3276,8 @@ static int coroutine_fn pvebackup_co_dump_cb(void *opaque, BlockBackend *target,
backup_state.transferred += size;
}
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+
// Note: always return success, because we want that writes succeed anyways.
return size;
@@ -3281,10 +3287,9 @@ static void coroutine_fn pvebackup_co_cleanup(void)
{
assert(qemu_in_coroutine());
- qemu_mutex_lock(&backup_state.backup_mutex);
- // Avoid race between block jobs and backup-cancel command:
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
if (!backup_state.vmaw) {
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
return;
}
@@ -3299,7 +3304,7 @@ static void coroutine_fn pvebackup_co_cleanup(void)
g_list_free(backup_state.di_list);
backup_state.di_list = NULL;
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
typedef struct PVEBackupCompeteCallbackData {
@@ -3313,6 +3318,8 @@ static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
PVEBackupCompeteCallbackData *cb_data = opaque;
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
+
PVEBackupDevInfo *di = cb_data->di;
int ret = cb_data->result;
@@ -3331,12 +3338,14 @@ static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
}
// remove self from job queue
- qemu_mutex_lock(&backup_state.backup_mutex);
backup_state.di_list = g_list_remove(backup_state.di_list, di);
g_free(di);
- qemu_mutex_unlock(&backup_state.backup_mutex);
- if (!backup_state.cancel) {
+ bool cancel = backup_state.cancel;
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+
+ if (!cancel) {
pvebackup_co_run_next_job();
}
}
@@ -3356,11 +3365,13 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
{
assert(qemu_in_coroutine());
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
+
backup_state.cancel = true;
- qemu_mutex_lock(&backup_state.backup_mutex);
+
// Avoid race between block jobs and backup-cancel command:
if (!backup_state.vmaw) {
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
return;
}
@@ -3373,6 +3384,7 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
vma_writer_set_error(backup_state.vmaw, "backup cancelled");
}
+ bool running_jobs = 0;
GList *l = backup_state.di_list;
while (l) {
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
@@ -3383,6 +3395,7 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
AioContext *aio_context = blk_get_aio_context(job->blk);
aio_context_acquire(aio_context);
if (!di->completed) {
+ running_jobs += 1;
job_cancel(&job->job, false);
}
aio_context_release(aio_context);
@@ -3391,7 +3404,8 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
}
qemu_co_mutex_unlock(&backup_state.backup_mutex);
- pvebackup_co_cleanup();
+
+ if (running_jobs == 0) pvebackup_co_cleanup(); // else job will call completion handler
}
void qmp_backup_cancel(Error **errp)
@@ -3445,7 +3459,7 @@ static void coroutine_fn pvebackup_co_run_next_job(void)
{
assert(qemu_in_coroutine());
- qemu_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
GList *l = backup_state.di_list;
while (l) {
@@ -3454,11 +3468,13 @@ static void coroutine_fn pvebackup_co_run_next_job(void)
if (!di->completed && di->bs && di->bs->job) {
BlockJob *job = di->bs->job;
AioContext *aio_context = blk_get_aio_context(job->blk);
+ bool cancel_job = backup_state.error || backup_state.cancel;
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+
aio_context_acquire(aio_context);
- qemu_mutex_unlock(&backup_state.backup_mutex);
if (job_should_pause(&job->job)) {
- if (backup_state.error || backup_state.cancel) {
- job_cancel_sync(&job->job);
+ if (cancel_job) {
+ job_cancel(&job->job, false);
} else {
job_resume(&job->job);
}
@@ -3467,7 +3483,7 @@ static void coroutine_fn pvebackup_co_run_next_job(void)
return;
}
}
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
// no more jobs, run the cleanup
pvebackup_co_cleanup();
@@ -3510,11 +3526,14 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
BlockJob *job;
if (!backup_state.backup_mutex_initialized) {
- qemu_mutex_init(&backup_state.backup_mutex);
+ qemu_co_mutex_init(&backup_state.backup_mutex);
backup_state.backup_mutex_initialized = true;
}
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
+
if (backup_state.di_list) {
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
"previous backup not finished");
return;
@@ -3686,7 +3705,6 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
uuid_copy(backup_state.uuid, uuid);
uuid_unparse_lower(uuid, backup_state.uuid_str);
- qemu_mutex_lock(&backup_state.backup_mutex);
backup_state.di_list = di_list;
backup_state.total = total;
@@ -3704,20 +3722,21 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
1, NULL, &local_err);
if (!job || local_err != NULL) {
error_setg(&backup_state.error, "backup_job_create failed");
- pvebackup_co_cancel(NULL);
- } else {
- job_start(&job->job);
+ break;
}
+ job_start(&job->job);
if (di->target) {
bdrv_unref(di->target);
di->target = NULL;
}
}
- qemu_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
if (!backup_state.error) {
pvebackup_co_run_next_job(); // run one job
+ } else {
+ pvebackup_co_cancel(NULL);
}
uuid_info = g_malloc0(sizeof(*uuid_info));
@@ -3758,6 +3777,8 @@ err:
rmdir(backup_dir);
}
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
+
task->result = NULL;
return;
}
@@ -3785,6 +3806,7 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
};
block_on_coroutine_fn(pvebackup_co_start, &task);
+
return task.result;
}
@@ -3802,9 +3824,15 @@ static void coroutine_fn pvebackup_co_query(void *opaque)
BackupStatus *info = g_malloc0(sizeof(*info));
+ if (!backup_state.backup_mutex_initialized)
+ return;
+
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
+
if (!backup_state.start_time) {
/* not started, return {} */
task->result = info;
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
return;
}
@@ -3842,6 +3870,8 @@ static void coroutine_fn pvebackup_co_query(void *opaque)
info->transferred = backup_state.transferred;
task->result = info;
+
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
}
BackupStatus *qmp_query_backup(Error **errp)