Signed-off-by: Stefan Reiter <s.reiter@proxmox.com> bump build-dependency on libproxmox-backup-qemu0-dev with version query support Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
		
			
				
	
	
		
			188 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			188 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 | 
						|
From: Stefan Reiter <s.reiter@proxmox.com>
 | 
						|
Date: Thu, 22 Oct 2020 17:01:07 +0200
 | 
						|
Subject: [PATCH] PVE: fix and clean up error handling for create_backup_jobs
 | 
						|
 | 
						|
No more weird bool returns, just the standard "errp" format used
 | 
						|
everywhere else too. With this, if backup_job_create fails, the error
 | 
						|
message is actually returned over QMP and can be shown to the user.
 | 
						|
 | 
						|
To facilitate correct cleanup on such an error, we call
 | 
						|
create_backup_jobs as a bottom half directly from pvebackup_co_prepare.
 | 
						|
This additionally allows us to actually hold the backup_mutex during
 | 
						|
operation.
 | 
						|
 | 
						|
Also add a job_cancel_sync before job_unref, since a job must be in
 | 
						|
STATUS_NULL to be deleted by unref, which could trigger an assert
 | 
						|
before.
 | 
						|
 | 
						|
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
 | 
						|
---
 | 
						|
 pve-backup.c | 79 +++++++++++++++++++++++++++++++++++-----------------
 | 
						|
 1 file changed, 54 insertions(+), 25 deletions(-)
 | 
						|
 | 
						|
diff --git a/pve-backup.c b/pve-backup.c
 | 
						|
index 4402c0cb17..c7cde0fb0e 100644
 | 
						|
--- a/pve-backup.c
 | 
						|
+++ b/pve-backup.c
 | 
						|
@@ -50,6 +50,7 @@ static struct PVEBackupState {
 | 
						|
         size_t zero_bytes;
 | 
						|
         GList *bitmap_list;
 | 
						|
         bool finishing;
 | 
						|
+        bool starting;
 | 
						|
     } stat;
 | 
						|
     int64_t speed;
 | 
						|
     VmaWriter *vmaw;
 | 
						|
@@ -277,6 +278,16 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
 | 
						|
     PVEBackupDevInfo *di = opaque;
 | 
						|
     int ret = di->completed_ret;
 | 
						|
 
 | 
						|
+    qemu_mutex_lock(&backup_state.stat.lock);
 | 
						|
+    bool starting = backup_state.stat.starting;
 | 
						|
+    qemu_mutex_unlock(&backup_state.stat.lock);
 | 
						|
+    if (starting) {
 | 
						|
+        /* in 'starting' state, no tasks have been run yet, meaning we can (and
 | 
						|
+         * must) skip all cleanup, as we don't know what has and hasn't been
 | 
						|
+         * initialized yet. */
 | 
						|
+        return;
 | 
						|
+    }
 | 
						|
+
 | 
						|
     qemu_co_mutex_lock(&backup_state.backup_mutex);
 | 
						|
 
 | 
						|
     if (ret < 0) {
 | 
						|
@@ -441,15 +452,15 @@ static int coroutine_fn pvebackup_co_add_config(
 | 
						|
 /*
 | 
						|
  * backup_job_create can *not* be run from a coroutine (and requires an
 | 
						|
  * acquired AioContext), so this can't either.
 | 
						|
- * This does imply that this function cannot run with backup_mutex acquired.
 | 
						|
- * That is ok because it is only ever called between setting up the backup_state
 | 
						|
- * struct and starting the jobs, and from within a QMP call. This means that no
 | 
						|
- * other QMP call can interrupt, and no background job is running yet.
 | 
						|
+ * The caller is responsible that backup_mutex is held nonetheless.
 | 
						|
  */
 | 
						|
-static bool create_backup_jobs(void) {
 | 
						|
+static void create_backup_jobs_bh(void *opaque) {
 | 
						|
 
 | 
						|
     assert(!qemu_in_coroutine());
 | 
						|
 
 | 
						|
+    CoCtxData *data = (CoCtxData*)opaque;
 | 
						|
+    Error **errp = (Error**)data->data;
 | 
						|
+
 | 
						|
     Error *local_err = NULL;
 | 
						|
 
 | 
						|
     /* create job transaction to synchronize bitmap commit and cancel all
 | 
						|
@@ -483,24 +494,19 @@ static bool create_backup_jobs(void) {
 | 
						|
 
 | 
						|
         aio_context_release(aio_context);
 | 
						|
 
 | 
						|
-        if (!job || local_err != NULL) {
 | 
						|
-            Error *create_job_err = NULL;
 | 
						|
-            error_setg(&create_job_err, "backup_job_create failed: %s",
 | 
						|
-                       local_err ? error_get_pretty(local_err) : "null");
 | 
						|
+        di->job = job;
 | 
						|
 
 | 
						|
-            pvebackup_propagate_error(create_job_err);
 | 
						|
+        if (!job || local_err) {
 | 
						|
+            error_setg(errp, "backup_job_create failed: %s",
 | 
						|
+                       local_err ? error_get_pretty(local_err) : "null");
 | 
						|
             break;
 | 
						|
         }
 | 
						|
 
 | 
						|
-        di->job = job;
 | 
						|
-
 | 
						|
         bdrv_unref(di->target);
 | 
						|
         di->target = NULL;
 | 
						|
     }
 | 
						|
 
 | 
						|
-    bool errors = pvebackup_error_or_canceled();
 | 
						|
-
 | 
						|
-    if (errors) {
 | 
						|
+    if (*errp) {
 | 
						|
         l = backup_state.di_list;
 | 
						|
         while (l) {
 | 
						|
             PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
 | 
						|
@@ -512,12 +518,17 @@ static bool create_backup_jobs(void) {
 | 
						|
             }
 | 
						|
 
 | 
						|
             if (di->job) {
 | 
						|
+                AioContext *ctx = di->job->job.aio_context;
 | 
						|
+                aio_context_acquire(ctx);
 | 
						|
+                job_cancel_sync(&di->job->job);
 | 
						|
                 job_unref(&di->job->job);
 | 
						|
+                aio_context_release(ctx);
 | 
						|
             }
 | 
						|
         }
 | 
						|
     }
 | 
						|
 
 | 
						|
-    return errors;
 | 
						|
+    /* return */
 | 
						|
+    aio_co_enter(data->ctx, data->co);
 | 
						|
 }
 | 
						|
 
 | 
						|
 typedef struct QmpBackupTask {
 | 
						|
@@ -883,6 +894,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
 | 
						|
     backup_state.stat.transferred = 0;
 | 
						|
     backup_state.stat.zero_bytes = 0;
 | 
						|
     backup_state.stat.finishing = false;
 | 
						|
+    backup_state.stat.starting = true;
 | 
						|
 
 | 
						|
     qemu_mutex_unlock(&backup_state.stat.lock);
 | 
						|
 
 | 
						|
@@ -898,7 +910,32 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
 | 
						|
 
 | 
						|
     task->result = uuid_info;
 | 
						|
 
 | 
						|
+    /* Run create_backup_jobs_bh outside of coroutine (in BH) but keep
 | 
						|
+    * backup_mutex locked. This is fine, a CoMutex can be held across yield
 | 
						|
+    * points, and we'll release it as soon as the BH reschedules us.
 | 
						|
+    */
 | 
						|
+    CoCtxData waker = {
 | 
						|
+        .co = qemu_coroutine_self(),
 | 
						|
+        .ctx = qemu_get_current_aio_context(),
 | 
						|
+        .data = &local_err,
 | 
						|
+    };
 | 
						|
+    aio_bh_schedule_oneshot(waker.ctx, create_backup_jobs_bh, &waker);
 | 
						|
+    qemu_coroutine_yield();
 | 
						|
+
 | 
						|
+    if (local_err) {
 | 
						|
+        error_propagate(task->errp, local_err);
 | 
						|
+        goto err;
 | 
						|
+    }
 | 
						|
+
 | 
						|
     qemu_co_mutex_unlock(&backup_state.backup_mutex);
 | 
						|
+
 | 
						|
+    qemu_mutex_lock(&backup_state.stat.lock);
 | 
						|
+    backup_state.stat.starting = false;
 | 
						|
+    qemu_mutex_unlock(&backup_state.stat.lock);
 | 
						|
+
 | 
						|
+    /* start the first job in the transaction */
 | 
						|
+    job_txn_start_seq(backup_state.txn);
 | 
						|
+
 | 
						|
     return;
 | 
						|
 
 | 
						|
 err_mutex:
 | 
						|
@@ -921,6 +958,7 @@ err:
 | 
						|
         g_free(di);
 | 
						|
     }
 | 
						|
     g_list_free(di_list);
 | 
						|
+    backup_state.di_list = NULL;
 | 
						|
 
 | 
						|
     if (devs) {
 | 
						|
         g_strfreev(devs);
 | 
						|
@@ -998,15 +1036,6 @@ UuidInfo *qmp_backup(
 | 
						|
 
 | 
						|
     block_on_coroutine_fn(pvebackup_co_prepare, &task);
 | 
						|
 
 | 
						|
-    if (*errp == NULL) {
 | 
						|
-        bool errors = create_backup_jobs();
 | 
						|
-
 | 
						|
-        if (!errors) {
 | 
						|
-            // start the first job in the transaction
 | 
						|
-            job_txn_start_seq(backup_state.txn);
 | 
						|
-        }
 | 
						|
-    }
 | 
						|
-
 | 
						|
     return task.result;
 | 
						|
 }
 | 
						|
 
 |