From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Tue, 22 Oct 2019 12:48:18 +0200 Subject: [PATCH] qmp_backup: use a CoMutex to protect access to backup_state And use job_cancel instead of job_cancel_sync (which hangs sometimes) Also avoid calling pvebackup_co_cleanup if not necessary. If there are running jobs, this is called from the block job completion callbacks anyways. Signed-off-by: Dietmar Maurer --- blockdev.c | 74 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/blockdev.c b/blockdev.c index 85031de942..7e9241cf42 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3195,7 +3195,7 @@ static struct PVEBackupState { size_t total; size_t transferred; size_t zero_bytes; - QemuMutex backup_mutex; + CoMutex backup_mutex; bool backup_mutex_initialized; } backup_state; @@ -3220,7 +3220,10 @@ static int coroutine_fn pvebackup_co_dump_cb(void *opaque, BlockBackend *target, const unsigned char *buf = pbuf; PVEBackupDevInfo *di = opaque; + qemu_co_mutex_lock(&backup_state.backup_mutex); + if (backup_state.cancel) { + qemu_co_mutex_unlock(&backup_state.backup_mutex); return size; // return success } @@ -3231,6 +3234,7 @@ static int coroutine_fn pvebackup_co_dump_cb(void *opaque, BlockBackend *target, "got unaligned write inside backup dump " "callback (sector %ld)", start); } + qemu_co_mutex_unlock(&backup_state.backup_mutex); return -1; // not aligned to cluster size } @@ -3272,6 +3276,8 @@ static int coroutine_fn pvebackup_co_dump_cb(void *opaque, BlockBackend *target, backup_state.transferred += size; } + qemu_co_mutex_unlock(&backup_state.backup_mutex); + // Note: always return success, because we want that writes succeed anyways. return size; @@ -3281,10 +3287,9 @@ static void coroutine_fn pvebackup_co_cleanup(void) { assert(qemu_in_coroutine()); - qemu_mutex_lock(&backup_state.backup_mutex); - // Avoid race between block jobs and backup-cancel command: + qemu_co_mutex_lock(&backup_state.backup_mutex); if (!backup_state.vmaw) { - qemu_mutex_unlock(&backup_state.backup_mutex); + qemu_co_mutex_unlock(&backup_state.backup_mutex); return; } @@ -3299,7 +3304,7 @@ static void coroutine_fn pvebackup_co_cleanup(void) g_list_free(backup_state.di_list); backup_state.di_list = NULL; - qemu_mutex_unlock(&backup_state.backup_mutex); + qemu_co_mutex_unlock(&backup_state.backup_mutex); } typedef struct PVEBackupCompeteCallbackData { @@ -3313,6 +3318,8 @@ static void coroutine_fn pvebackup_co_complete_cb(void *opaque) PVEBackupCompeteCallbackData *cb_data = opaque; + qemu_co_mutex_lock(&backup_state.backup_mutex); + PVEBackupDevInfo *di = cb_data->di; int ret = cb_data->result; @@ -3331,12 +3338,14 @@ static void coroutine_fn pvebackup_co_complete_cb(void *opaque) } // remove self from job queue - qemu_mutex_lock(&backup_state.backup_mutex); backup_state.di_list = g_list_remove(backup_state.di_list, di); g_free(di); - qemu_mutex_unlock(&backup_state.backup_mutex); - if (!backup_state.cancel) { + bool cancel = backup_state.cancel; + + qemu_co_mutex_unlock(&backup_state.backup_mutex); + + if (!cancel) { pvebackup_co_run_next_job(); } } @@ -3356,11 +3365,13 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque) { assert(qemu_in_coroutine()); + qemu_co_mutex_lock(&backup_state.backup_mutex); + backup_state.cancel = true; - qemu_mutex_lock(&backup_state.backup_mutex); + // Avoid race between block jobs and backup-cancel command: if (!backup_state.vmaw) { - qemu_mutex_unlock(&backup_state.backup_mutex); + qemu_co_mutex_unlock(&backup_state.backup_mutex); return; } @@ -3373,6 +3384,7 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque) vma_writer_set_error(backup_state.vmaw, "backup cancelled"); } + bool running_jobs = 0; GList *l = backup_state.di_list; while (l) { PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data; @@ -3383,6 +3395,7 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque) AioContext *aio_context = blk_get_aio_context(job->blk); aio_context_acquire(aio_context); if (!di->completed) { + running_jobs += 1; job_cancel(&job->job, false); } aio_context_release(aio_context); @@ -3391,7 +3404,8 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque) } qemu_co_mutex_unlock(&backup_state.backup_mutex); - pvebackup_co_cleanup(); + + if (running_jobs == 0) pvebackup_co_cleanup(); // else job will call completion handler } void qmp_backup_cancel(Error **errp) @@ -3445,7 +3459,7 @@ static void coroutine_fn pvebackup_co_run_next_job(void) { assert(qemu_in_coroutine()); - qemu_mutex_lock(&backup_state.backup_mutex); + qemu_co_mutex_lock(&backup_state.backup_mutex); GList *l = backup_state.di_list; while (l) { @@ -3454,11 +3468,13 @@ static void coroutine_fn pvebackup_co_run_next_job(void) if (!di->completed && di->bs && di->bs->job) { BlockJob *job = di->bs->job; AioContext *aio_context = blk_get_aio_context(job->blk); + bool cancel_job = backup_state.error || backup_state.cancel; + qemu_co_mutex_unlock(&backup_state.backup_mutex); + aio_context_acquire(aio_context); - qemu_mutex_unlock(&backup_state.backup_mutex); if (job_should_pause(&job->job)) { - if (backup_state.error || backup_state.cancel) { - job_cancel_sync(&job->job); + if (cancel_job) { + job_cancel(&job->job, false); } else { job_resume(&job->job); } @@ -3467,7 +3483,7 @@ static void coroutine_fn pvebackup_co_run_next_job(void) return; } } - qemu_mutex_unlock(&backup_state.backup_mutex); + qemu_co_mutex_unlock(&backup_state.backup_mutex); // no more jobs, run the cleanup pvebackup_co_cleanup(); @@ -3510,11 +3526,14 @@ static void coroutine_fn pvebackup_co_start(void *opaque) BlockJob *job; if (!backup_state.backup_mutex_initialized) { - qemu_mutex_init(&backup_state.backup_mutex); + qemu_co_mutex_init(&backup_state.backup_mutex); backup_state.backup_mutex_initialized = true; } + qemu_co_mutex_lock(&backup_state.backup_mutex); + if (backup_state.di_list) { + qemu_co_mutex_unlock(&backup_state.backup_mutex); error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "previous backup not finished"); return; @@ -3686,7 +3705,6 @@ static void coroutine_fn pvebackup_co_start(void *opaque) uuid_copy(backup_state.uuid, uuid); uuid_unparse_lower(uuid, backup_state.uuid_str); - qemu_mutex_lock(&backup_state.backup_mutex); backup_state.di_list = di_list; backup_state.total = total; @@ -3704,20 +3722,21 @@ static void coroutine_fn pvebackup_co_start(void *opaque) 1, NULL, &local_err); if (!job || local_err != NULL) { error_setg(&backup_state.error, "backup_job_create failed"); - pvebackup_co_cancel(NULL); - } else { - job_start(&job->job); + break; } + job_start(&job->job); if (di->target) { bdrv_unref(di->target); di->target = NULL; } } - qemu_mutex_unlock(&backup_state.backup_mutex); + qemu_co_mutex_unlock(&backup_state.backup_mutex); if (!backup_state.error) { pvebackup_co_run_next_job(); // run one job + } else { + pvebackup_co_cancel(NULL); } uuid_info = g_malloc0(sizeof(*uuid_info)); @@ -3758,6 +3777,8 @@ err: rmdir(backup_dir); } + qemu_co_mutex_unlock(&backup_state.backup_mutex); + task->result = NULL; return; } @@ -3785,6 +3806,7 @@ UuidInfo *qmp_backup(const char *backup_file, bool has_format, }; block_on_coroutine_fn(pvebackup_co_start, &task); + return task.result; } @@ -3802,9 +3824,15 @@ static void coroutine_fn pvebackup_co_query(void *opaque) BackupStatus *info = g_malloc0(sizeof(*info)); + if (!backup_state.backup_mutex_initialized) + return; + + qemu_co_mutex_lock(&backup_state.backup_mutex); + if (!backup_state.start_time) { /* not started, return {} */ task->result = info; + qemu_co_mutex_unlock(&backup_state.backup_mutex); return; } @@ -3842,6 +3870,8 @@ static void coroutine_fn pvebackup_co_query(void *opaque) info->transferred = backup_state.transferred; task->result = info; + + qemu_co_mutex_unlock(&backup_state.backup_mutex); } BackupStatus *qmp_query_backup(Error **errp)