From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Stefan Reiter Date: Thu, 16 Apr 2020 11:40:16 +0200 Subject: [PATCH] PVE: Use non-recursive locks Release the lock on qemu_coroutine_yield, so coroutines don't deadlock. Signed-off-by: Stefan Reiter --- pve-backup.c | 82 +++++++++++++++++++++++++++------------------------- vma-writer.c | 28 ++++++++++++++---- vma.h | 1 + 3 files changed, 66 insertions(+), 45 deletions(-) diff --git a/pve-backup.c b/pve-backup.c index 169f0c68d0..46a3d6f995 100644 --- a/pve-backup.c +++ b/pve-backup.c @@ -14,7 +14,7 @@ static struct PVEBackupState { struct { // Everithing accessed from qmp_backup_query command is protected using lock - QemuRecMutex lock; + QemuMutex lock; Error *error; time_t start_time; time_t end_time; @@ -29,13 +29,13 @@ static struct PVEBackupState { VmaWriter *vmaw; ProxmoxBackupHandle *pbs; GList *di_list; - QemuRecMutex backup_mutex; + QemuMutex backup_mutex; } backup_state; static void pvebackup_init(void) { - qemu_rec_mutex_init(&backup_state.stat.lock); - qemu_rec_mutex_init(&backup_state.backup_mutex); + qemu_mutex_init(&backup_state.stat.lock); + qemu_mutex_init(&backup_state.backup_mutex); } // initialize PVEBackupState at startup @@ -72,26 +72,26 @@ lookup_active_block_job(PVEBackupDevInfo *di) static void pvebackup_propagate_error(Error *err) { - qemu_rec_mutex_lock(&backup_state.stat.lock); + qemu_mutex_lock(&backup_state.stat.lock); error_propagate(&backup_state.stat.error, err); - qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock); } static bool pvebackup_error_or_canceled(void) { - qemu_rec_mutex_lock(&backup_state.stat.lock); + qemu_mutex_lock(&backup_state.stat.lock); bool error_or_canceled = !!backup_state.stat.error; - qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock); return error_or_canceled; } static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes) { - qemu_rec_mutex_lock(&backup_state.stat.lock); + qemu_mutex_lock(&backup_state.stat.lock); backup_state.stat.zero_bytes += zero_bytes; backup_state.stat.transferred += transferred; - qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock); } // This may get called from multiple coroutines in multiple io-threads @@ -114,16 +114,16 @@ pvebackup_co_dump_pbs_cb( Error *local_err = NULL; int pbs_res = -1; - qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_mutex_lock(&backup_state.backup_mutex); // avoid deadlock if job is cancelled if (pvebackup_error_or_canceled()) { - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); return -1; } pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err); - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); if (pbs_res < 0) { pvebackup_propagate_error(local_err); @@ -167,16 +167,16 @@ pvebackup_co_dump_vma_cb( } while (remaining > 0) { - qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_mutex_lock(&backup_state.backup_mutex); // avoid deadlock if job is cancelled if (pvebackup_error_or_canceled()) { - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); return -1; } size_t zero_bytes = 0; ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num, buf, &zero_bytes); - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); ++cluster_num; if (buf) { @@ -207,11 +207,11 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused) { assert(qemu_in_coroutine()); - qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_mutex_lock(&backup_state.backup_mutex); - qemu_rec_mutex_lock(&backup_state.stat.lock); + qemu_mutex_lock(&backup_state.stat.lock); backup_state.stat.end_time = time(NULL); - qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock); if (backup_state.vmaw) { Error *local_err = NULL; @@ -239,7 +239,7 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused) g_list_free(backup_state.di_list); backup_state.di_list = NULL; - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); } static void coroutine_fn pvebackup_complete_stream(void *opaque) @@ -267,7 +267,7 @@ static void pvebackup_complete_cb(void *opaque, int ret) PVEBackupDevInfo *di = opaque; - qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_mutex_lock(&backup_state.backup_mutex); di->completed = true; @@ -288,7 +288,7 @@ static void pvebackup_complete_cb(void *opaque, int ret) g_free(di); - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); pvebackup_run_next_job(); } @@ -299,7 +299,7 @@ static void pvebackup_cancel(void) error_setg(&cancel_err, "backup canceled"); pvebackup_propagate_error(cancel_err); - qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_mutex_lock(&backup_state.backup_mutex); if (backup_state.vmaw) { /* make sure vma writer does not block anymore */ @@ -310,13 +310,13 @@ static void pvebackup_cancel(void) proxmox_backup_abort(backup_state.pbs, "backup canceled"); } - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); for(;;) { BlockJob *next_job = NULL; - qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_mutex_lock(&backup_state.backup_mutex); GList *l = backup_state.di_list; while (l) { @@ -330,7 +330,7 @@ static void pvebackup_cancel(void) } } - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); if (next_job) { AioContext *aio_context = next_job->job.aio_context; @@ -403,7 +403,7 @@ static void pvebackup_run_next_job(void) { assert(!qemu_in_coroutine()); - qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_mutex_lock(&backup_state.backup_mutex); GList *l = backup_state.di_list; while (l) { @@ -413,7 +413,7 @@ static void pvebackup_run_next_job(void) BlockJob *job = lookup_active_block_job(di); if (job) { - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); AioContext *aio_context = job->job.aio_context; aio_context_acquire(aio_context); @@ -431,7 +431,7 @@ static void pvebackup_run_next_job(void) } } - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup } @@ -543,10 +543,10 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque) const char *config_name = "qemu-server.conf"; const char *firewall_name = "qemu-server.fw"; - qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_mutex_lock(&backup_state.backup_mutex); if (backup_state.di_list) { - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); error_set(task->errp, ERROR_CLASS_GENERIC_ERROR, "previous backup not finished"); return; @@ -689,6 +689,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque) goto err; } + vma_writer_set_external_lock(vmaw, &backup_state.backup_mutex); + /* register all devices for vma writer */ l = di_list; while (l) { @@ -760,7 +762,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque) } /* initialize global backup_state now */ - qemu_rec_mutex_lock(&backup_state.stat.lock); + qemu_mutex_lock(&backup_state.stat.lock); if (backup_state.stat.error) { error_free(backup_state.stat.error); @@ -783,7 +785,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque) backup_state.stat.transferred = 0; backup_state.stat.zero_bytes = 0; - qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock); backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0; @@ -792,7 +794,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque) backup_state.di_list = di_list; - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); uuid_info = g_malloc0(sizeof(*uuid_info)); uuid_info->UUID = uuid_str; @@ -836,7 +838,7 @@ err: rmdir(backup_dir); } - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); task->result = NULL; return; @@ -884,9 +886,9 @@ UuidInfo *qmp_backup( block_on_coroutine_fn(pvebackup_co_prepare, &task); if (*errp == NULL) { - qemu_rec_mutex_lock(&backup_state.backup_mutex); + qemu_mutex_lock(&backup_state.backup_mutex); create_backup_jobs(); - qemu_rec_mutex_unlock(&backup_state.backup_mutex); + qemu_mutex_unlock(&backup_state.backup_mutex); pvebackup_run_next_job(); } @@ -897,11 +899,11 @@ BackupStatus *qmp_query_backup(Error **errp) { BackupStatus *info = g_malloc0(sizeof(*info)); - qemu_rec_mutex_lock(&backup_state.stat.lock); + qemu_mutex_lock(&backup_state.stat.lock); if (!backup_state.stat.start_time) { /* not started, return {} */ - qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock); return info; } @@ -938,7 +940,7 @@ BackupStatus *qmp_query_backup(Error **errp) info->has_transferred = true; info->transferred = backup_state.stat.transferred; - qemu_rec_mutex_unlock(&backup_state.stat.lock); + qemu_mutex_unlock(&backup_state.stat.lock); return info; } diff --git a/vma-writer.c b/vma-writer.c index fe86b18a60..f3fa8e3b4c 100644 --- a/vma-writer.c +++ b/vma-writer.c @@ -68,8 +68,14 @@ struct VmaWriter { uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */ uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */ uint32_t config_count; + + QemuMutex *external_lock; }; +void vma_writer_set_external_lock(VmaWriter *vmaw, QemuMutex *lock) { + vmaw->external_lock = lock; +} + void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...) { va_list ap; @@ -199,13 +205,20 @@ int vma_writer_register_stream(VmaWriter *vmaw, const char *devname, return n; } -static void coroutine_fn yield_until_fd_writable(int fd) +static void coroutine_fn yield_until_fd_writable(int fd, QemuMutex *external_lock) { assert(qemu_in_coroutine()); AioContext *ctx = qemu_get_current_aio_context(); aio_set_fd_handler(ctx, fd, false, NULL, (IOHandler *)qemu_coroutine_enter, NULL, qemu_coroutine_self()); + if (external_lock) { + /* still protected from re-entering via flush_lock */ + qemu_mutex_unlock(external_lock); + } qemu_coroutine_yield(); + if (!external_lock) { + qemu_mutex_lock(external_lock); + } aio_set_fd_handler(ctx, fd, false, NULL, NULL, NULL, NULL); } @@ -227,11 +240,16 @@ vma_queue_write(VmaWriter *vmaw, const void *buf, size_t bytes) while (done < bytes) { if (vmaw->status < 0) { - DPRINTF("vma_queue_write detected canceled backup\n"); + DPRINTF("vma_queue_write detected cancelled backup\n"); + done = -1; + break; + } + yield_until_fd_writable(vmaw->fd, vmaw->external_lock); + if (vmaw->closed || vmaw->status < 0) { + DPRINTF("vma_queue_write backup cancelled after waiting for fd\n"); done = -1; break; } - yield_until_fd_writable(vmaw->fd); ret = write(vmaw->fd, buf + done, bytes - done); if (ret > 0) { done += ret; @@ -501,7 +519,7 @@ vma_writer_flush_output(VmaWriter *vmaw) int ret = vma_writer_flush(vmaw); qemu_co_mutex_unlock(&vmaw->flush_lock); if (ret < 0) { - vma_writer_set_error(vmaw, "vma_writer_flush_header failed"); + vma_writer_set_error(vmaw, "vma_writer_flush_output failed"); } return ret; } @@ -570,7 +588,7 @@ static int vma_writer_get_buffer(VmaWriter *vmaw) while (vmaw->outbuf_count >= (VMA_BLOCKS_PER_EXTENT - 1)) { ret = vma_writer_flush(vmaw); if (ret < 0) { - vma_writer_set_error(vmaw, "vma_writer_get_buffer: flush failed"); + vma_writer_set_error(vmaw, "vma_writer_get_header: flush failed"); break; } } diff --git a/vma.h b/vma.h index c895c97f6d..ec6f09e83e 100644 --- a/vma.h +++ b/vma.h @@ -115,6 +115,7 @@ typedef struct VmaDeviceInfo { } VmaDeviceInfo; VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp); +void vma_writer_set_external_lock(VmaWriter *vmaw, QemuMutex *lock); int vma_writer_close(VmaWriter *vmaw, Error **errp); void vma_writer_error_propagate(VmaWriter *vmaw, Error **errp); void vma_writer_destroy(VmaWriter *vmaw);