275 lines
9.1 KiB
Diff
275 lines
9.1 KiB
Diff
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||
|
From: Dietmar Maurer <dietmar@proxmox.com>
|
||
|
Date: Tue, 22 Oct 2019 12:48:18 +0200
|
||
|
Subject: [PATCH] qmp_backup: use a CoMutex to protect access to backup_state
|
||
|
|
||
|
And use job_cancel instead of job_cancel_sync (which hangs sometimes)
|
||
|
Also avoid calling pvebackup_co_cleanup if not necessary. If there are
|
||
|
running jobs, this is called from the block job completion callbacks
|
||
|
anyways.
|
||
|
|
||
|
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
|
||
|
---
|
||
|
blockdev.c | 74 ++++++++++++++++++++++++++++++++++++++----------------
|
||
|
1 file changed, 52 insertions(+), 22 deletions(-)
|
||
|
|
||
|
diff --git a/blockdev.c b/blockdev.c
|
||
|
index 85031de942..7e9241cf42 100644
|
||
|
--- a/blockdev.c
|
||
|
+++ b/blockdev.c
|
||
|
@@ -3195,7 +3195,7 @@ static struct PVEBackupState {
|
||
|
size_t total;
|
||
|
size_t transferred;
|
||
|
size_t zero_bytes;
|
||
|
- QemuMutex backup_mutex;
|
||
|
+ CoMutex backup_mutex;
|
||
|
bool backup_mutex_initialized;
|
||
|
} backup_state;
|
||
|
|
||
|
@@ -3220,7 +3220,10 @@ static int coroutine_fn pvebackup_co_dump_cb(void *opaque, BlockBackend *target,
|
||
|
const unsigned char *buf = pbuf;
|
||
|
PVEBackupDevInfo *di = opaque;
|
||
|
|
||
|
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
|
||
|
+
|
||
|
if (backup_state.cancel) {
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
return size; // return success
|
||
|
}
|
||
|
|
||
|
@@ -3231,6 +3234,7 @@ static int coroutine_fn pvebackup_co_dump_cb(void *opaque, BlockBackend *target,
|
||
|
"got unaligned write inside backup dump "
|
||
|
"callback (sector %ld)", start);
|
||
|
}
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
return -1; // not aligned to cluster size
|
||
|
}
|
||
|
|
||
|
@@ -3272,6 +3276,8 @@ static int coroutine_fn pvebackup_co_dump_cb(void *opaque, BlockBackend *target,
|
||
|
backup_state.transferred += size;
|
||
|
}
|
||
|
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
+
|
||
|
// Note: always return success, because we want that writes succeed anyways.
|
||
|
|
||
|
return size;
|
||
|
@@ -3281,10 +3287,9 @@ static void coroutine_fn pvebackup_co_cleanup(void)
|
||
|
{
|
||
|
assert(qemu_in_coroutine());
|
||
|
|
||
|
- qemu_mutex_lock(&backup_state.backup_mutex);
|
||
|
- // Avoid race between block jobs and backup-cancel command:
|
||
|
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
|
||
|
if (!backup_state.vmaw) {
|
||
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
@@ -3299,7 +3304,7 @@ static void coroutine_fn pvebackup_co_cleanup(void)
|
||
|
|
||
|
g_list_free(backup_state.di_list);
|
||
|
backup_state.di_list = NULL;
|
||
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
}
|
||
|
|
||
|
typedef struct PVEBackupCompeteCallbackData {
|
||
|
@@ -3313,6 +3318,8 @@ static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
|
||
|
|
||
|
PVEBackupCompeteCallbackData *cb_data = opaque;
|
||
|
|
||
|
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
|
||
|
+
|
||
|
PVEBackupDevInfo *di = cb_data->di;
|
||
|
int ret = cb_data->result;
|
||
|
|
||
|
@@ -3331,12 +3338,14 @@ static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
|
||
|
}
|
||
|
|
||
|
// remove self from job queue
|
||
|
- qemu_mutex_lock(&backup_state.backup_mutex);
|
||
|
backup_state.di_list = g_list_remove(backup_state.di_list, di);
|
||
|
g_free(di);
|
||
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
||
|
|
||
|
- if (!backup_state.cancel) {
|
||
|
+ bool cancel = backup_state.cancel;
|
||
|
+
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
+
|
||
|
+ if (!cancel) {
|
||
|
pvebackup_co_run_next_job();
|
||
|
}
|
||
|
}
|
||
|
@@ -3356,11 +3365,13 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
|
||
|
{
|
||
|
assert(qemu_in_coroutine());
|
||
|
|
||
|
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
|
||
|
+
|
||
|
backup_state.cancel = true;
|
||
|
- qemu_mutex_lock(&backup_state.backup_mutex);
|
||
|
+
|
||
|
// Avoid race between block jobs and backup-cancel command:
|
||
|
if (!backup_state.vmaw) {
|
||
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
@@ -3373,6 +3384,7 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
|
||
|
vma_writer_set_error(backup_state.vmaw, "backup cancelled");
|
||
|
}
|
||
|
|
||
|
+ bool running_jobs = 0;
|
||
|
GList *l = backup_state.di_list;
|
||
|
while (l) {
|
||
|
PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
|
||
|
@@ -3383,6 +3395,7 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
|
||
|
AioContext *aio_context = blk_get_aio_context(job->blk);
|
||
|
aio_context_acquire(aio_context);
|
||
|
if (!di->completed) {
|
||
|
+ running_jobs += 1;
|
||
|
job_cancel(&job->job, false);
|
||
|
}
|
||
|
aio_context_release(aio_context);
|
||
|
@@ -3391,7 +3404,8 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
|
||
|
}
|
||
|
|
||
|
qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
- pvebackup_co_cleanup();
|
||
|
+
|
||
|
+ if (running_jobs == 0) pvebackup_co_cleanup(); // else job will call completion handler
|
||
|
}
|
||
|
|
||
|
void qmp_backup_cancel(Error **errp)
|
||
|
@@ -3445,7 +3459,7 @@ static void coroutine_fn pvebackup_co_run_next_job(void)
|
||
|
{
|
||
|
assert(qemu_in_coroutine());
|
||
|
|
||
|
- qemu_mutex_lock(&backup_state.backup_mutex);
|
||
|
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
|
||
|
|
||
|
GList *l = backup_state.di_list;
|
||
|
while (l) {
|
||
|
@@ -3454,11 +3468,13 @@ static void coroutine_fn pvebackup_co_run_next_job(void)
|
||
|
if (!di->completed && di->bs && di->bs->job) {
|
||
|
BlockJob *job = di->bs->job;
|
||
|
AioContext *aio_context = blk_get_aio_context(job->blk);
|
||
|
+ bool cancel_job = backup_state.error || backup_state.cancel;
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
+
|
||
|
aio_context_acquire(aio_context);
|
||
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
||
|
if (job_should_pause(&job->job)) {
|
||
|
- if (backup_state.error || backup_state.cancel) {
|
||
|
- job_cancel_sync(&job->job);
|
||
|
+ if (cancel_job) {
|
||
|
+ job_cancel(&job->job, false);
|
||
|
} else {
|
||
|
job_resume(&job->job);
|
||
|
}
|
||
|
@@ -3467,7 +3483,7 @@ static void coroutine_fn pvebackup_co_run_next_job(void)
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
|
||
|
// no more jobs, run the cleanup
|
||
|
pvebackup_co_cleanup();
|
||
|
@@ -3510,11 +3526,14 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
|
||
|
BlockJob *job;
|
||
|
|
||
|
if (!backup_state.backup_mutex_initialized) {
|
||
|
- qemu_mutex_init(&backup_state.backup_mutex);
|
||
|
+ qemu_co_mutex_init(&backup_state.backup_mutex);
|
||
|
backup_state.backup_mutex_initialized = true;
|
||
|
}
|
||
|
|
||
|
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
|
||
|
+
|
||
|
if (backup_state.di_list) {
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
|
||
|
"previous backup not finished");
|
||
|
return;
|
||
|
@@ -3686,7 +3705,6 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
|
||
|
uuid_copy(backup_state.uuid, uuid);
|
||
|
uuid_unparse_lower(uuid, backup_state.uuid_str);
|
||
|
|
||
|
- qemu_mutex_lock(&backup_state.backup_mutex);
|
||
|
backup_state.di_list = di_list;
|
||
|
|
||
|
backup_state.total = total;
|
||
|
@@ -3704,20 +3722,21 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
|
||
|
1, NULL, &local_err);
|
||
|
if (!job || local_err != NULL) {
|
||
|
error_setg(&backup_state.error, "backup_job_create failed");
|
||
|
- pvebackup_co_cancel(NULL);
|
||
|
- } else {
|
||
|
- job_start(&job->job);
|
||
|
+ break;
|
||
|
}
|
||
|
+ job_start(&job->job);
|
||
|
if (di->target) {
|
||
|
bdrv_unref(di->target);
|
||
|
di->target = NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
- qemu_mutex_unlock(&backup_state.backup_mutex);
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
|
||
|
if (!backup_state.error) {
|
||
|
pvebackup_co_run_next_job(); // run one job
|
||
|
+ } else {
|
||
|
+ pvebackup_co_cancel(NULL);
|
||
|
}
|
||
|
|
||
|
uuid_info = g_malloc0(sizeof(*uuid_info));
|
||
|
@@ -3758,6 +3777,8 @@ err:
|
||
|
rmdir(backup_dir);
|
||
|
}
|
||
|
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
+
|
||
|
task->result = NULL;
|
||
|
return;
|
||
|
}
|
||
|
@@ -3785,6 +3806,7 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format,
|
||
|
};
|
||
|
|
||
|
block_on_coroutine_fn(pvebackup_co_start, &task);
|
||
|
+
|
||
|
return task.result;
|
||
|
}
|
||
|
|
||
|
@@ -3802,9 +3824,15 @@ static void coroutine_fn pvebackup_co_query(void *opaque)
|
||
|
|
||
|
BackupStatus *info = g_malloc0(sizeof(*info));
|
||
|
|
||
|
+ if (!backup_state.backup_mutex_initialized)
|
||
|
+ return;
|
||
|
+
|
||
|
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
|
||
|
+
|
||
|
if (!backup_state.start_time) {
|
||
|
/* not started, return {} */
|
||
|
task->result = info;
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
@@ -3842,6 +3870,8 @@ static void coroutine_fn pvebackup_co_query(void *opaque)
|
||
|
info->transferred = backup_state.transferred;
|
||
|
|
||
|
task->result = info;
|
||
|
+
|
||
|
+ qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||
|
}
|
||
|
|
||
|
BackupStatus *qmp_query_backup(Error **errp)
|