118 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
		
		
			
		
	
	
			118 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
|   | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||
|  | From: Fiona Ebner <f.ebner@proxmox.com> | ||
|  | Date: Mon, 29 Apr 2024 14:43:58 +0200 | ||
|  | Subject: [PATCH] PVE backup: improve error when copy-before-write fails for | ||
|  |  fleecing | ||
|  | 
 | ||
|  | With fleecing, failure for copy-before-write does not fail the guest | ||
|  | write, but only sets the snapshot error that is associated to the | ||
|  | copy-before-write filter, making further requests to the snapshot | ||
|  | access fail with EACCES, which then also fails the job. But that error | ||
|  | code is not the root cause of why the backup failed, so bubble up the | ||
|  | original snapshot error instead. | ||
|  | 
 | ||
|  | Reported-by: Friedrich Weber <f.weber@proxmox.com> | ||
|  | Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> | ||
|  | Tested-by: Friedrich Weber <f.weber@proxmox.com> | ||
|  | ---
 | ||
|  |  block/copy-before-write.c | 18 ++++++++++++------ | ||
|  |  block/copy-before-write.h |  1 + | ||
|  |  pve-backup.c              |  9 +++++++++ | ||
|  |  3 files changed, 22 insertions(+), 6 deletions(-) | ||
|  | 
 | ||
|  | diff --git a/block/copy-before-write.c b/block/copy-before-write.c
 | ||
|  | index bba58326d7..50cc4c7aae 100644
 | ||
|  | --- a/block/copy-before-write.c
 | ||
|  | +++ b/block/copy-before-write.c
 | ||
|  | @@ -27,6 +27,7 @@
 | ||
|  |  #include "qapi/qmp/qjson.h" | ||
|  |   | ||
|  |  #include "sysemu/block-backend.h" | ||
|  | +#include "qemu/atomic.h"
 | ||
|  |  #include "qemu/cutils.h" | ||
|  |  #include "qapi/error.h" | ||
|  |  #include "block/block_int.h" | ||
|  | @@ -74,7 +75,8 @@ typedef struct BDRVCopyBeforeWriteState {
 | ||
|  |       * @snapshot_error is normally zero. But on first copy-before-write failure | ||
|  |       * when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes | ||
|  |       * value of this error (<0). After that all in-flight and further | ||
|  | -     * snapshot-API requests will fail with that error.
 | ||
|  | +     * snapshot-API requests will fail with that error. To be accessed with
 | ||
|  | +     * atomics.
 | ||
|  |       */ | ||
|  |      int snapshot_error; | ||
|  |  } BDRVCopyBeforeWriteState; | ||
|  | @@ -114,7 +116,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
 | ||
|  |          return 0; | ||
|  |      } | ||
|  |   | ||
|  | -    if (s->snapshot_error) {
 | ||
|  | +    if (qatomic_read(&s->snapshot_error)) {
 | ||
|  |          return 0; | ||
|  |      } | ||
|  |   | ||
|  | @@ -138,9 +140,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
 | ||
|  |      WITH_QEMU_LOCK_GUARD(&s->lock) { | ||
|  |          if (ret < 0) { | ||
|  |              assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT); | ||
|  | -            if (!s->snapshot_error) {
 | ||
|  | -                s->snapshot_error = ret;
 | ||
|  | -            }
 | ||
|  | +            qatomic_cmpxchg(&s->snapshot_error, 0, ret);
 | ||
|  |          } else { | ||
|  |              bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off); | ||
|  |          } | ||
|  | @@ -214,7 +214,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
 | ||
|  |   | ||
|  |      QEMU_LOCK_GUARD(&s->lock); | ||
|  |   | ||
|  | -    if (s->snapshot_error) {
 | ||
|  | +    if (qatomic_read(&s->snapshot_error)) {
 | ||
|  |          g_free(req); | ||
|  |          return NULL; | ||
|  |      } | ||
|  | @@ -585,6 +585,12 @@ void bdrv_cbw_drop(BlockDriverState *bs)
 | ||
|  |      bdrv_unref(bs); | ||
|  |  } | ||
|  |   | ||
|  | +int bdrv_cbw_snapshot_error(BlockDriverState *bs)
 | ||
|  | +{
 | ||
|  | +    BDRVCopyBeforeWriteState *s = bs->opaque;
 | ||
|  | +    return qatomic_read(&s->snapshot_error);
 | ||
|  | +}
 | ||
|  | +
 | ||
|  |  static void cbw_init(void) | ||
|  |  { | ||
|  |      bdrv_register(&bdrv_cbw_filter); | ||
|  | diff --git a/block/copy-before-write.h b/block/copy-before-write.h
 | ||
|  | index dc6cafe7fa..a27d2d7d9f 100644
 | ||
|  | --- a/block/copy-before-write.h
 | ||
|  | +++ b/block/copy-before-write.h
 | ||
|  | @@ -44,5 +44,6 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
 | ||
|  |                                    BlockCopyState **bcs, | ||
|  |                                    Error **errp); | ||
|  |  void bdrv_cbw_drop(BlockDriverState *bs); | ||
|  | +int bdrv_cbw_snapshot_error(BlockDriverState *bs);
 | ||
|  |   | ||
|  |  #endif /* COPY_BEFORE_WRITE_H */ | ||
|  | diff --git a/pve-backup.c b/pve-backup.c
 | ||
|  | index 7cc1dd3724..07709aa350 100644
 | ||
|  | --- a/pve-backup.c
 | ||
|  | +++ b/pve-backup.c
 | ||
|  | @@ -379,6 +379,15 @@ static void pvebackup_complete_cb(void *opaque, int ret)
 | ||
|  |          di->fleecing.snapshot_access = NULL; | ||
|  |      } | ||
|  |      if (di->fleecing.cbw) { | ||
|  | +        /*
 | ||
|  | +         * With fleecing, failure for cbw does not fail the guest write, but only sets the snapshot
 | ||
|  | +         * error, making further requests to the snapshot fail with EACCES, which then also fail the
 | ||
|  | +         * job. But that code is not the root cause and just confusing, so update it.
 | ||
|  | +         */
 | ||
|  | +        int snapshot_error = bdrv_cbw_snapshot_error(di->fleecing.cbw);
 | ||
|  | +        if (di->completed_ret == -EACCES && snapshot_error) {
 | ||
|  | +            di->completed_ret = snapshot_error;
 | ||
|  | +        }
 | ||
|  |          bdrv_cbw_drop(di->fleecing.cbw); | ||
|  |          di->fleecing.cbw = NULL; | ||
|  |      } |