From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Fri, 17 Apr 2020 08:57:47 +0200 Subject: [PATCH] PVE Backup: avoid use QemuRecMutex inside coroutines --- pve-backup.c | 59 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/pve-backup.c b/pve-backup.c index 169f0c68d0..dddf430399 100644 --- a/pve-backup.c +++ b/pve-backup.c @@ -11,6 +11,23 @@ /* PVE backup state and related function */ +/* + * Note: A resume from a qemu_coroutine_yield can happen in a different thread, + * so you may not use normal mutexes within coroutines: + * + * ---bad-example--- + * qemu_rec_mutex_lock(lock) + * ... + * qemu_coroutine_yield() // wait for something + * // we are now inside a different thread + * qemu_rec_mutex_unlock(lock) // Crash - wrong thread!! + * ---end-bad-example-- + * + * ==> Always use CoMutext inside coroutines. + * ==> Never acquire/release AioContext withing coroutines (because that use QemuRecMutex) + * + */ + static struct PVEBackupState { struct { // Everithing accessed from qmp_backup_query command is protected using lock @@ -30,12 +47,14 @@ static struct PVEBackupState { ProxmoxBackupHandle *pbs; GList *di_list; QemuRecMutex backup_mutex; + CoMutex dump_callback_mutex; } backup_state; static void pvebackup_init(void) { qemu_rec_mutex_init(&backup_state.stat.lock); qemu_rec_mutex_init(&backup_state.backup_mutex); + qemu_co_mutex_init(&backup_state.dump_callback_mutex); } // initialize PVEBackupState at startup @@ -114,16 +133,16 @@ pvebackup_co_dump_pbs_cb( Error *local_err = NULL; int pbs_res = -1; - qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_co_mutex_lock(&backup_state.dump_callback_mutex); // avoid deadlock if job is cancelled if (pvebackup_error_or_canceled()) { - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_co_mutex_unlock(&backup_state.dump_callback_mutex); return -1; } pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err); - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_co_mutex_unlock(&backup_state.dump_callback_mutex); if (pbs_res < 0) { pvebackup_propagate_error(local_err); @@ -149,7 +168,6 @@ pvebackup_co_dump_vma_cb( const unsigned char *buf = pbuf; PVEBackupDevInfo *di = opaque; - int ret = -1; assert(backup_state.vmaw); @@ -167,16 +185,16 @@ pvebackup_co_dump_vma_cb( } while (remaining > 0) { - qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_co_mutex_lock(&backup_state.dump_callback_mutex); // avoid deadlock if job is cancelled if (pvebackup_error_or_canceled()) { - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_co_mutex_unlock(&backup_state.dump_callback_mutex); return -1; } size_t zero_bytes = 0; ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num, buf, &zero_bytes); - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_co_mutex_unlock(&backup_state.dump_callback_mutex); ++cluster_num; if (buf) { @@ -203,12 +221,11 @@ pvebackup_co_dump_vma_cb( return size; } +// assumes the caller holds backup_mutex static void coroutine_fn pvebackup_co_cleanup(void *unused) { assert(qemu_in_coroutine()); - qemu_rec_mutex_lock(&backup_state.backup_mutex); - qemu_rec_mutex_lock(&backup_state.stat.lock); backup_state.stat.end_time = time(NULL); qemu_rec_mutex_unlock(&backup_state.stat.lock); @@ -239,9 +256,9 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused) g_list_free(backup_state.di_list); backup_state.di_list = NULL; - qemu_rec_mutex_unlock(&backup_state.backup_mutex); } +// assumes the caller holds backup_mutex static void coroutine_fn pvebackup_complete_stream(void *opaque) { PVEBackupDevInfo *di = opaque; @@ -295,6 +312,8 @@ static void pvebackup_complete_cb(void *opaque, int ret) static void pvebackup_cancel(void) { + assert(!qemu_in_coroutine()); + Error *cancel_err = NULL; error_setg(&cancel_err, "backup canceled"); pvebackup_propagate_error(cancel_err); @@ -348,6 +367,7 @@ void qmp_backup_cancel(Error **errp) pvebackup_cancel(); } +// assumes the caller holds backup_mutex static int coroutine_fn pvebackup_co_add_config( const char *file, const char *name, @@ -431,9 +451,9 @@ static void pvebackup_run_next_job(void) } } - qemu_rec_mutex_unlock(&backup_state.backup_mutex); - block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup + + qemu_rec_mutex_unlock(&backup_state.backup_mutex); } static bool create_backup_jobs(void) { @@ -520,6 +540,7 @@ typedef struct QmpBackupTask { UuidInfo *result; } QmpBackupTask; +// assumes the caller holds backup_mutex static void coroutine_fn pvebackup_co_prepare(void *opaque) { assert(qemu_in_coroutine()); @@ -543,11 +564,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque) const char *config_name = "qemu-server.conf"; const char *firewall_name = "qemu-server.fw"; - qemu_rec_mutex_lock(&backup_state.backup_mutex); - if (backup_state.di_list) { - qemu_rec_mutex_unlock(&backup_state.backup_mutex); - error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, + error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "previous backup not finished"); return; } @@ -792,8 +810,6 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque) backup_state.di_list = di_list; - qemu_rec_mutex_unlock(&backup_state.backup_mutex); - uuid_info = g_malloc0(sizeof(*uuid_info)); uuid_info->UUID = uuid_str; @@ -836,8 +852,6 @@ err: rmdir(backup_dir); } - qemu_rec_mutex_unlock(&backup_state.backup_mutex); - task->result = NULL; return; } @@ -881,13 +895,16 @@ UuidInfo *qmp_backup( .errp = errp, }; + qemu_rec_mutex_lock(&backup_state.backup_mutex); + block_on_coroutine_fn(pvebackup_co_prepare, &task); if (*errp == NULL) { - qemu_rec_mutex_lock(&backup_state.backup_mutex); create_backup_jobs(); qemu_rec_mutex_unlock(&backup_state.backup_mutex); pvebackup_run_next_job(); + } else { + qemu_rec_mutex_unlock(&backup_state.backup_mutex); } return task.result;