add alloc-track block driver patch
See added patches for more info, overview: 0044: slightly increase PBS performance by reducing allocations 0045: slightly increase block-stream performance for Ceph 0046: don't crash with block-stream on RBD 0047: add alloc-track driver for live restore Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
This commit is contained in:
		
							parent
							
								
									e9b36665c7
								
							
						
					
					
						commit
						677d0d169f
					
				
							
								
								
									
										52
									
								
								debian/patches/pve/0044-PVE-block-pbs-fast-path-reads-without-allocation-if-.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								debian/patches/pve/0044-PVE-block-pbs-fast-path-reads-without-allocation-if-.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,52 @@ | |||||||
|  | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Stefan Reiter <s.reiter@proxmox.com> | ||||||
|  | Date: Wed, 9 Dec 2020 11:46:57 +0100 | ||||||
|  | Subject: [PATCH] PVE: block/pbs: fast-path reads without allocation if | ||||||
|  |  possible | ||||||
|  | 
 | ||||||
|  | ...and switch over to g_malloc/g_free while at it to align with other | ||||||
|  | QEMU code. | ||||||
|  | 
 | ||||||
|  | Tracing shows the fast-path is taken almost all the time, though not | ||||||
|  | 100% so the slow one is still necessary. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Stefan Reiter <s.reiter@proxmox.com> | ||||||
|  | ---
 | ||||||
|  |  block/pbs.c | 17 ++++++++++++++--- | ||||||
|  |  1 file changed, 14 insertions(+), 3 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/block/pbs.c b/block/pbs.c
 | ||||||
|  | index 1481a2bfd1..fbf0d8d845 100644
 | ||||||
|  | --- a/block/pbs.c
 | ||||||
|  | +++ b/block/pbs.c
 | ||||||
|  | @@ -200,7 +200,16 @@ static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
 | ||||||
|  |      BDRVPBSState *s = bs->opaque; | ||||||
|  |      int ret; | ||||||
|  |      char *pbs_error = NULL; | ||||||
|  | -    uint8_t *buf = malloc(bytes);
 | ||||||
|  | +    uint8_t *buf;
 | ||||||
|  | +    bool inline_buf = true;
 | ||||||
|  | +
 | ||||||
|  | +    /* for single-buffer IO vectors we can fast-path the write directly to it */
 | ||||||
|  | +    if (qiov->niov == 1 && qiov->iov->iov_len >= bytes) {
 | ||||||
|  | +        buf = qiov->iov->iov_base;
 | ||||||
|  | +    } else {
 | ||||||
|  | +        inline_buf = false;
 | ||||||
|  | +        buf = g_malloc(bytes);
 | ||||||
|  | +    }
 | ||||||
|  |   | ||||||
|  |      ReadCallbackData rcb = { | ||||||
|  |          .co = qemu_coroutine_self(), | ||||||
|  | @@ -218,8 +227,10 @@ static coroutine_fn int pbs_co_preadv(BlockDriverState *bs,
 | ||||||
|  |          return -EIO; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  | -    qemu_iovec_from_buf(qiov, 0, buf, bytes);
 | ||||||
|  | -    free(buf);
 | ||||||
|  | +    if (!inline_buf) {
 | ||||||
|  | +        qemu_iovec_from_buf(qiov, 0, buf, bytes);
 | ||||||
|  | +        g_free(buf);
 | ||||||
|  | +    }
 | ||||||
|  |   | ||||||
|  |      return ret; | ||||||
|  |  } | ||||||
							
								
								
									
										23
									
								
								debian/patches/pve/0045-PVE-block-stream-increase-chunk-size.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								debian/patches/pve/0045-PVE-block-stream-increase-chunk-size.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,23 @@ | |||||||
|  | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Stefan Reiter <s.reiter@proxmox.com> | ||||||
|  | Date: Tue, 2 Mar 2021 16:34:28 +0100 | ||||||
|  | Subject: [PATCH] PVE: block/stream: increase chunk size | ||||||
|  | 
 | ||||||
|  | Ceph favors bigger chunks, so increase to 4M. | ||||||
|  | ---
 | ||||||
|  |  block/stream.c | 2 +- | ||||||
|  |  1 file changed, 1 insertion(+), 1 deletion(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/block/stream.c b/block/stream.c
 | ||||||
|  | index 236384f2f7..a5371420e3 100644
 | ||||||
|  | --- a/block/stream.c
 | ||||||
|  | +++ b/block/stream.c
 | ||||||
|  | @@ -26,7 +26,7 @@ enum {
 | ||||||
|  |       * large enough to process multiple clusters in a single call, so | ||||||
|  |       * that populating contiguous regions of the image is efficient. | ||||||
|  |       */ | ||||||
|  | -    STREAM_CHUNK = 512 * 1024, /* in bytes */
 | ||||||
|  | +    STREAM_CHUNK = 4 * 1024 * 1024, /* in bytes */
 | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  |  typedef struct StreamBlockJob { | ||||||
							
								
								
									
										42
									
								
								debian/patches/pve/0046-block-io-accept-NULL-qiov-in-bdrv_pad_request.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								debian/patches/pve/0046-block-io-accept-NULL-qiov-in-bdrv_pad_request.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,42 @@ | |||||||
|  | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Stefan Reiter <s.reiter@proxmox.com> | ||||||
|  | Date: Tue, 2 Mar 2021 16:11:54 +0100 | ||||||
|  | Subject: [PATCH] block/io: accept NULL qiov in bdrv_pad_request | ||||||
|  | 
 | ||||||
|  | Some operations, e.g. block-stream, perform reads while discarding the | ||||||
|  | results (only copy-on-read matters). In this case they will pass NULL as | ||||||
|  | the target QEMUIOVector, which will however trip bdrv_pad_request, since | ||||||
|  | it wants to extend its passed vector. | ||||||
|  | 
 | ||||||
|  | Simply check for NULL and do nothing, there's no reason to pad the | ||||||
|  | target if it will be discarded anyway. | ||||||
|  | ---
 | ||||||
|  |  block/io.c | 13 ++++++++----- | ||||||
|  |  1 file changed, 8 insertions(+), 5 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/block/io.c b/block/io.c
 | ||||||
|  | index ec5e152bb7..08dee005ec 100644
 | ||||||
|  | --- a/block/io.c
 | ||||||
|  | +++ b/block/io.c
 | ||||||
|  | @@ -1613,13 +1613,16 @@ static bool bdrv_pad_request(BlockDriverState *bs,
 | ||||||
|  |          return false; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  | -    qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
 | ||||||
|  | -                             *qiov, *qiov_offset, *bytes,
 | ||||||
|  | -                             pad->buf + pad->buf_len - pad->tail, pad->tail);
 | ||||||
|  | +    if (*qiov) {
 | ||||||
|  | +        qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
 | ||||||
|  | +                                *qiov, *qiov_offset, *bytes,
 | ||||||
|  | +                                pad->buf + pad->buf_len - pad->tail, pad->tail);
 | ||||||
|  | +        *qiov = &pad->local_qiov;
 | ||||||
|  | +        *qiov_offset = 0;
 | ||||||
|  | +    }
 | ||||||
|  | +
 | ||||||
|  |      *bytes += pad->head + pad->tail; | ||||||
|  |      *offset -= pad->head; | ||||||
|  | -    *qiov = &pad->local_qiov;
 | ||||||
|  | -    *qiov_offset = 0;
 | ||||||
|  |   | ||||||
|  |      return true; | ||||||
|  |  } | ||||||
							
								
								
									
										391
									
								
								debian/patches/pve/0047-block-add-alloc-track-driver.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										391
									
								
								debian/patches/pve/0047-block-add-alloc-track-driver.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,391 @@ | |||||||
|  | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Stefan Reiter <s.reiter@proxmox.com> | ||||||
|  | Date: Mon, 7 Dec 2020 15:21:03 +0100 | ||||||
|  | Subject: [PATCH] block: add alloc-track driver | ||||||
|  | 
 | ||||||
|  | Add a new filter node 'alloc-track', which seperates reads and writes to | ||||||
|  | different children, thus allowing to put a backing image behind any
 | ||||||
|  | blockdev (regardless of driver support). Since we can't detect any | ||||||
|  | pre-allocated blocks, we can only track new writes, hence the write | ||||||
|  | target ('file') for this node must always be empty. | ||||||
|  | 
 | ||||||
|  | Intended use case is for live restoring, i.e. add a backup image as a | ||||||
|  | block device into a VM, then put an alloc-track on the restore target | ||||||
|  | and set the backup as backing. With this, one can use a regular | ||||||
|  | 'block-stream' to restore the image, while the VM can already run in the | ||||||
|  | background. Copy-on-read will help make progress as the VM reads as | ||||||
|  | well. | ||||||
|  | 
 | ||||||
|  | This only worked if the target supports backing images, so up until now | ||||||
|  | only for qcow2, with alloc-track any driver for the target can be used. | ||||||
|  | 
 | ||||||
|  | If 'auto-remove' is set, alloc-track will automatically detach itself | ||||||
|  | once the backing image is removed. It will be replaced by 'file'. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Stefan Reiter <s.reiter@proxmox.com> | ||||||
|  | ---
 | ||||||
|  |  block/alloc-track.c | 342 ++++++++++++++++++++++++++++++++++++++++++++ | ||||||
|  |  block/meson.build   |   1 + | ||||||
|  |  2 files changed, 343 insertions(+) | ||||||
|  |  create mode 100644 block/alloc-track.c | ||||||
|  | 
 | ||||||
|  | diff --git a/block/alloc-track.c b/block/alloc-track.c
 | ||||||
|  | new file mode 100644 | ||||||
|  | index 0000000000..b579380279
 | ||||||
|  | --- /dev/null
 | ||||||
|  | +++ b/block/alloc-track.c
 | ||||||
|  | @@ -0,0 +1,342 @@
 | ||||||
|  | +/*
 | ||||||
|  | + * Node to allow backing images to be applied to any node. Assumes a blank
 | ||||||
|  | + * image to begin with, only new writes are tracked as allocated, thus this
 | ||||||
|  | + * must never be put on a node that already contains data.
 | ||||||
|  | + *
 | ||||||
|  | + * Copyright (c) 2020 Proxmox Server Solutions GmbH
 | ||||||
|  | + * Copyright (c) 2020 Stefan Reiter <s.reiter@proxmox.com>
 | ||||||
|  | + *
 | ||||||
|  | + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 | ||||||
|  | + * See the COPYING file in the top-level directory.
 | ||||||
|  | + */
 | ||||||
|  | +
 | ||||||
|  | +#include "qemu/osdep.h"
 | ||||||
|  | +#include "qapi/error.h"
 | ||||||
|  | +#include "block/block_int.h"
 | ||||||
|  | +#include "qapi/qmp/qdict.h"
 | ||||||
|  | +#include "qapi/qmp/qstring.h"
 | ||||||
|  | +#include "qemu/cutils.h"
 | ||||||
|  | +#include "qemu/option.h"
 | ||||||
|  | +#include "qemu/module.h"
 | ||||||
|  | +#include "sysemu/block-backend.h"
 | ||||||
|  | +
 | ||||||
|  | +#define TRACK_OPT_AUTO_REMOVE "auto-remove"
 | ||||||
|  | +
 | ||||||
|  | +typedef enum DropState {
 | ||||||
|  | +    DropNone,
 | ||||||
|  | +    DropRequested,
 | ||||||
|  | +    DropInProgress,
 | ||||||
|  | +} DropState;
 | ||||||
|  | +
 | ||||||
|  | +typedef struct {
 | ||||||
|  | +    BdrvDirtyBitmap *bitmap;
 | ||||||
|  | +    DropState drop_state;
 | ||||||
|  | +    bool auto_remove;
 | ||||||
|  | +} BDRVAllocTrackState;
 | ||||||
|  | +
 | ||||||
|  | +static QemuOptsList runtime_opts = {
 | ||||||
|  | +    .name = "alloc-track",
 | ||||||
|  | +    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
 | ||||||
|  | +    .desc = {
 | ||||||
|  | +        {
 | ||||||
|  | +            .name = TRACK_OPT_AUTO_REMOVE,
 | ||||||
|  | +            .type = QEMU_OPT_BOOL,
 | ||||||
|  | +            .help = "automatically replace this node with 'file' when 'backing'"
 | ||||||
|  | +                    "is detached",
 | ||||||
|  | +        },
 | ||||||
|  | +        { /* end of list */ }
 | ||||||
|  | +    },
 | ||||||
|  | +};
 | ||||||
|  | +
 | ||||||
|  | +static void track_refresh_limits(BlockDriverState *bs, Error **errp)
 | ||||||
|  | +{
 | ||||||
|  | +    BlockDriverInfo bdi;
 | ||||||
|  | +
 | ||||||
|  | +    if (!bs->file) {
 | ||||||
|  | +        return;
 | ||||||
|  | +    }
 | ||||||
|  | +
 | ||||||
|  | +    /* always use alignment from underlying write device so RMW cycle for
 | ||||||
|  | +     * bdrv_pwritev reads data from our backing via track_co_preadv (no partial
 | ||||||
|  | +     * cluster allocation in 'file') */
 | ||||||
|  | +    bdrv_get_info(bs->file->bs, &bdi);
 | ||||||
|  | +    bs->bl.request_alignment = MAX(bs->file->bs->bl.request_alignment,
 | ||||||
|  | +                                   MAX(bdi.cluster_size, BDRV_SECTOR_SIZE));
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static int track_open(BlockDriverState *bs, QDict *options, int flags,
 | ||||||
|  | +                      Error **errp)
 | ||||||
|  | +{
 | ||||||
|  | +    BDRVAllocTrackState *s = bs->opaque;
 | ||||||
|  | +    QemuOpts *opts;
 | ||||||
|  | +    Error *local_err = NULL;
 | ||||||
|  | +    int ret = 0;
 | ||||||
|  | +
 | ||||||
|  | +    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
 | ||||||
|  | +    qemu_opts_absorb_qdict(opts, options, &local_err);
 | ||||||
|  | +    if (local_err) {
 | ||||||
|  | +        error_propagate(errp, local_err);
 | ||||||
|  | +        ret = -EINVAL;
 | ||||||
|  | +        goto fail;
 | ||||||
|  | +    }
 | ||||||
|  | +
 | ||||||
|  | +    s->auto_remove = qemu_opt_get_bool(opts, TRACK_OPT_AUTO_REMOVE, false);
 | ||||||
|  | +
 | ||||||
|  | +    /* open the target (write) node, backing will be attached by block layer */
 | ||||||
|  | +    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
 | ||||||
|  | +                               BDRV_CHILD_DATA | BDRV_CHILD_METADATA, false,
 | ||||||
|  | +                               &local_err);
 | ||||||
|  | +    if (local_err) {
 | ||||||
|  | +        ret = -EINVAL;
 | ||||||
|  | +        error_propagate(errp, local_err);
 | ||||||
|  | +        goto fail;
 | ||||||
|  | +    }
 | ||||||
|  | +
 | ||||||
|  | +    track_refresh_limits(bs, errp);
 | ||||||
|  | +    uint64_t gran = bs->bl.request_alignment;
 | ||||||
|  | +    s->bitmap = bdrv_create_dirty_bitmap(bs->file->bs, gran, NULL, &local_err);
 | ||||||
|  | +    if (local_err) {
 | ||||||
|  | +        ret = -EIO;
 | ||||||
|  | +        error_propagate(errp, local_err);
 | ||||||
|  | +        goto fail;
 | ||||||
|  | +    }
 | ||||||
|  | +
 | ||||||
|  | +    s->drop_state = DropNone;
 | ||||||
|  | +
 | ||||||
|  | +fail:
 | ||||||
|  | +    if (ret < 0) {
 | ||||||
|  | +        bdrv_unref_child(bs, bs->file);
 | ||||||
|  | +        if (s->bitmap) {
 | ||||||
|  | +            bdrv_release_dirty_bitmap(s->bitmap);
 | ||||||
|  | +        }
 | ||||||
|  | +    }
 | ||||||
|  | +    qemu_opts_del(opts);
 | ||||||
|  | +    return ret;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static void track_close(BlockDriverState *bs)
 | ||||||
|  | +{
 | ||||||
|  | +    BDRVAllocTrackState *s = bs->opaque;
 | ||||||
|  | +    if (s->bitmap) {
 | ||||||
|  | +        bdrv_release_dirty_bitmap(s->bitmap);
 | ||||||
|  | +    }
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static int64_t track_getlength(BlockDriverState *bs)
 | ||||||
|  | +{
 | ||||||
|  | +    return bdrv_getlength(bs->file->bs);
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static int coroutine_fn track_co_preadv(BlockDriverState *bs,
 | ||||||
|  | +    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 | ||||||
|  | +{
 | ||||||
|  | +    BDRVAllocTrackState *s = bs->opaque;
 | ||||||
|  | +    QEMUIOVector local_qiov;
 | ||||||
|  | +    int ret;
 | ||||||
|  | +
 | ||||||
|  | +    /* 'cur_offset' is relative to 'offset', 'local_offset' to image start */
 | ||||||
|  | +    uint64_t cur_offset, local_offset;
 | ||||||
|  | +    int64_t local_bytes;
 | ||||||
|  | +    bool alloc;
 | ||||||
|  | +
 | ||||||
|  | +    /* a read request can span multiple granularity-sized chunks, and can thus
 | ||||||
|  | +     * contain blocks with different allocation status - we could just iterate
 | ||||||
|  | +     * granularity-wise, but for better performance use bdrv_dirty_bitmap_next_X
 | ||||||
|  | +     * to find the next flip and consider everything up to that in one go */
 | ||||||
|  | +    for (cur_offset = 0; cur_offset < bytes; cur_offset += local_bytes) {
 | ||||||
|  | +        local_offset = offset + cur_offset;
 | ||||||
|  | +        alloc = bdrv_dirty_bitmap_get(s->bitmap, local_offset);
 | ||||||
|  | +        if (alloc) {
 | ||||||
|  | +            local_bytes = bdrv_dirty_bitmap_next_zero(s->bitmap, local_offset,
 | ||||||
|  | +                                                      bytes - cur_offset);
 | ||||||
|  | +        } else {
 | ||||||
|  | +            local_bytes = bdrv_dirty_bitmap_next_dirty(s->bitmap, local_offset,
 | ||||||
|  | +                                                       bytes - cur_offset);
 | ||||||
|  | +        }
 | ||||||
|  | +
 | ||||||
|  | +        /* _bitmap_next_X return is -1 if no end found within limit, otherwise
 | ||||||
|  | +         * offset of next flip (to start of image) */
 | ||||||
|  | +        local_bytes = local_bytes < 0 ?
 | ||||||
|  | +            bytes - cur_offset :
 | ||||||
|  | +            local_bytes - local_offset;
 | ||||||
|  | +
 | ||||||
|  | +        qemu_iovec_init_slice(&local_qiov, qiov, cur_offset, local_bytes);
 | ||||||
|  | +
 | ||||||
|  | +        if (alloc) {
 | ||||||
|  | +            ret = bdrv_co_preadv(bs->file, local_offset, local_bytes,
 | ||||||
|  | +                                 &local_qiov, flags);
 | ||||||
|  | +        } else if (bs->backing) {
 | ||||||
|  | +            ret = bdrv_co_preadv(bs->backing, local_offset, local_bytes,
 | ||||||
|  | +                                 &local_qiov, flags);
 | ||||||
|  | +        } else {
 | ||||||
|  | +            ret = qemu_iovec_memset(&local_qiov, cur_offset, 0, local_bytes);
 | ||||||
|  | +        }
 | ||||||
|  | +
 | ||||||
|  | +        if (ret != 0) {
 | ||||||
|  | +            break;
 | ||||||
|  | +        }
 | ||||||
|  | +    }
 | ||||||
|  | +
 | ||||||
|  | +    return ret;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static int coroutine_fn track_co_pwritev(BlockDriverState *bs,
 | ||||||
|  | +    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 | ||||||
|  | +{
 | ||||||
|  | +    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static int coroutine_fn track_co_pwrite_zeroes(BlockDriverState *bs,
 | ||||||
|  | +    int64_t offset, int count, BdrvRequestFlags flags)
 | ||||||
|  | +{
 | ||||||
|  | +    return bdrv_pwrite_zeroes(bs->file, offset, count, flags);
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static int coroutine_fn track_co_pdiscard(BlockDriverState *bs,
 | ||||||
|  | +    int64_t offset, int count)
 | ||||||
|  | +{
 | ||||||
|  | +    return bdrv_co_pdiscard(bs->file, offset, count);
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static coroutine_fn int track_co_flush(BlockDriverState *bs)
 | ||||||
|  | +{
 | ||||||
|  | +    return bdrv_co_flush(bs->file->bs);
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static int coroutine_fn track_co_block_status(BlockDriverState *bs,
 | ||||||
|  | +                                              bool want_zero,
 | ||||||
|  | +                                              int64_t offset,
 | ||||||
|  | +                                              int64_t bytes,
 | ||||||
|  | +                                              int64_t *pnum,
 | ||||||
|  | +                                              int64_t *map,
 | ||||||
|  | +                                              BlockDriverState **file)
 | ||||||
|  | +{
 | ||||||
|  | +    BDRVAllocTrackState *s = bs->opaque;
 | ||||||
|  | +
 | ||||||
|  | +    bool alloc = bdrv_dirty_bitmap_get(s->bitmap, offset);
 | ||||||
|  | +    int64_t next_flipped;
 | ||||||
|  | +    if (alloc) {
 | ||||||
|  | +        next_flipped = bdrv_dirty_bitmap_next_zero(s->bitmap, offset, bytes);
 | ||||||
|  | +    } else {
 | ||||||
|  | +        next_flipped = bdrv_dirty_bitmap_next_dirty(s->bitmap, offset, bytes);
 | ||||||
|  | +    }
 | ||||||
|  | +
 | ||||||
|  | +    /* in case not the entire region has the same state, we need to set pnum to
 | ||||||
|  | +     * indicate for how many bytes our result is valid */
 | ||||||
|  | +    *pnum = next_flipped == -1 ? bytes : next_flipped - offset;
 | ||||||
|  | +    *map = offset;
 | ||||||
|  | +
 | ||||||
|  | +    if (alloc) {
 | ||||||
|  | +        *file = bs->file->bs;
 | ||||||
|  | +        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
 | ||||||
|  | +    } else if (bs->backing) {
 | ||||||
|  | +        *file = bs->backing->bs;
 | ||||||
|  | +    }
 | ||||||
|  | +    return 0;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static void track_child_perm(BlockDriverState *bs, BdrvChild *c,
 | ||||||
|  | +                             BdrvChildRole role, BlockReopenQueue *reopen_queue,
 | ||||||
|  | +                             uint64_t perm, uint64_t shared,
 | ||||||
|  | +                             uint64_t *nperm, uint64_t *nshared)
 | ||||||
|  | +{
 | ||||||
|  | +    BDRVAllocTrackState *s = bs->opaque;
 | ||||||
|  | +
 | ||||||
|  | +    *nshared = BLK_PERM_ALL;
 | ||||||
|  | +
 | ||||||
|  | +    /* in case we're currently dropping ourselves, claim to not use any
 | ||||||
|  | +     * permissions at all - which is fine, since from this point on we will
 | ||||||
|  | +     * never issue a read or write anymore */
 | ||||||
|  | +    if (s->drop_state == DropInProgress) {
 | ||||||
|  | +        *nperm = 0;
 | ||||||
|  | +        return;
 | ||||||
|  | +    }
 | ||||||
|  | +
 | ||||||
|  | +    if (role & BDRV_CHILD_DATA) {
 | ||||||
|  | +        *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
 | ||||||
|  | +    } else {
 | ||||||
|  | +        /* 'backing' is also a child of our BDS, but we don't expect it to be
 | ||||||
|  | +         * writeable, so we only forward 'consistent read' */
 | ||||||
|  | +        *nperm = perm & BLK_PERM_CONSISTENT_READ;
 | ||||||
|  | +    }
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static void track_drop(void *opaque)
 | ||||||
|  | +{
 | ||||||
|  | +    BlockDriverState *bs = (BlockDriverState*)opaque;
 | ||||||
|  | +    BlockDriverState *file = bs->file->bs;
 | ||||||
|  | +    BDRVAllocTrackState *s = bs->opaque;
 | ||||||
|  | +
 | ||||||
|  | +    assert(file);
 | ||||||
|  | +
 | ||||||
|  | +    /* we rely on the fact that we're not used anywhere else, so let's wait
 | ||||||
|  | +     * until we're only used once - in the drive connected to the guest (and one
 | ||||||
|  | +     * ref is held by bdrv_ref in track_change_backing_file) */
 | ||||||
|  | +    if (bs->refcnt > 2) {
 | ||||||
|  | +        aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, opaque);
 | ||||||
|  | +        return;
 | ||||||
|  | +    }
 | ||||||
|  | +
 | ||||||
|  | +    /* we do not need a bdrv_drained_end, since this is applied only to the node
 | ||||||
|  | +     * which gets removed by bdrv_replace_node */
 | ||||||
|  | +    bdrv_drained_begin(bs);
 | ||||||
|  | +
 | ||||||
|  | +    /* now that we're drained, we can safely set 'DropInProgress' */
 | ||||||
|  | +    s->drop_state = DropInProgress;
 | ||||||
|  | +    bdrv_child_refresh_perms(bs, bs->file, &error_abort);
 | ||||||
|  | +
 | ||||||
|  | +    bdrv_replace_node(bs, file, &error_abort);
 | ||||||
|  | +    bdrv_unref(bs);
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static int track_change_backing_file(BlockDriverState *bs,
 | ||||||
|  | +                                     const char *backing_file,
 | ||||||
|  | +                                     const char *backing_fmt)
 | ||||||
|  | +{
 | ||||||
|  | +    BDRVAllocTrackState *s = bs->opaque;
 | ||||||
|  | +    if (s->auto_remove && s->drop_state == DropNone &&
 | ||||||
|  | +        backing_file == NULL && backing_fmt == NULL)
 | ||||||
|  | +    {
 | ||||||
|  | +        /* backing file has been disconnected, there's no longer any use for
 | ||||||
|  | +         * this node, so let's remove ourselves from the block graph - we need
 | ||||||
|  | +         * to schedule this for later however, since when this function is
 | ||||||
|  | +         * called, the blockjob modifying us is probably not done yet and has a
 | ||||||
|  | +         * blocker on 'bs' */
 | ||||||
|  | +        s->drop_state = DropRequested;
 | ||||||
|  | +        bdrv_ref(bs);
 | ||||||
|  | +        aio_bh_schedule_oneshot(qemu_get_aio_context(), track_drop, (void*)bs);
 | ||||||
|  | +    }
 | ||||||
|  | +
 | ||||||
|  | +    return 0;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static BlockDriver bdrv_alloc_track = {
 | ||||||
|  | +    .format_name                      = "alloc-track",
 | ||||||
|  | +    .instance_size                    = sizeof(BDRVAllocTrackState),
 | ||||||
|  | +
 | ||||||
|  | +    .bdrv_file_open                   = track_open,
 | ||||||
|  | +    .bdrv_close                       = track_close,
 | ||||||
|  | +    .bdrv_getlength                   = track_getlength,
 | ||||||
|  | +    .bdrv_child_perm                  = track_child_perm,
 | ||||||
|  | +    .bdrv_refresh_limits              = track_refresh_limits,
 | ||||||
|  | +
 | ||||||
|  | +    .bdrv_co_pwrite_zeroes            = track_co_pwrite_zeroes,
 | ||||||
|  | +    .bdrv_co_pwritev                  = track_co_pwritev,
 | ||||||
|  | +    .bdrv_co_preadv                   = track_co_preadv,
 | ||||||
|  | +    .bdrv_co_pdiscard                 = track_co_pdiscard,
 | ||||||
|  | +
 | ||||||
|  | +    .bdrv_co_flush                    = track_co_flush,
 | ||||||
|  | +    .bdrv_co_flush_to_disk            = track_co_flush,
 | ||||||
|  | +
 | ||||||
|  | +    .supports_backing                 = true,
 | ||||||
|  | +
 | ||||||
|  | +    .bdrv_co_block_status             = track_co_block_status,
 | ||||||
|  | +    .bdrv_change_backing_file         = track_change_backing_file,
 | ||||||
|  | +};
 | ||||||
|  | +
 | ||||||
|  | +static void bdrv_alloc_track_init(void)
 | ||||||
|  | +{
 | ||||||
|  | +    bdrv_register(&bdrv_alloc_track);
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +block_init(bdrv_alloc_track_init);
 | ||||||
|  | diff --git a/block/meson.build b/block/meson.build
 | ||||||
|  | index a070060e53..e387990764 100644
 | ||||||
|  | --- a/block/meson.build
 | ||||||
|  | +++ b/block/meson.build
 | ||||||
|  | @@ -2,6 +2,7 @@ block_ss.add(genh)
 | ||||||
|  |  block_ss.add(files( | ||||||
|  |    'accounting.c', | ||||||
|  |    'aio_task.c', | ||||||
|  | +  'alloc-track.c',
 | ||||||
|  |    'amend.c', | ||||||
|  |    'backup.c', | ||||||
|  |    'backup-dump.c', | ||||||
							
								
								
									
										4
									
								
								debian/patches/series
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								debian/patches/series
									
									
									
									
										vendored
									
									
								
							| @ -56,3 +56,7 @@ pve/0040-migration-block-dirty-bitmap-migrate-other-bitmaps-e.patch | |||||||
| pve/0041-PVE-fall-back-to-open-iscsi-initiatorname.patch | pve/0041-PVE-fall-back-to-open-iscsi-initiatorname.patch | ||||||
| pve/0042-PVE-Use-coroutine-QMP-for-backup-cancel_backup.patch | pve/0042-PVE-Use-coroutine-QMP-for-backup-cancel_backup.patch | ||||||
| pve/0043-PBS-add-master-key-support.patch | pve/0043-PBS-add-master-key-support.patch | ||||||
|  | pve/0044-PVE-block-pbs-fast-path-reads-without-allocation-if-.patch | ||||||
|  | pve/0045-PVE-block-stream-increase-chunk-size.patch | ||||||
|  | pve/0046-block-io-accept-NULL-qiov-in-bdrv_pad_request.patch | ||||||
|  | pve/0047-block-add-alloc-track-driver.patch | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Stefan Reiter
						Stefan Reiter