diff --git a/debian/patches/0010-zvol-Remove-broken-blk-mq-optimization.patch b/debian/patches/0010-zvol-Remove-broken-blk-mq-optimization.patch new file mode 100644 index 0000000..178e68e --- /dev/null +++ b/debian/patches/0010-zvol-Remove-broken-blk-mq-optimization.patch @@ -0,0 +1,99 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Tony Hutter +Date: Mon, 23 Oct 2023 14:45:06 -0700 +Subject: [PATCH] zvol: Remove broken blk-mq optimization + +This fix removes a dubious optimization in zfs_uiomove_bvec_rq() +that saved the iterator contents of a rq_for_each_segment(). This +optimization allowed restoring the "saved state" from a previous +rq_for_each_segment() call on the same uio so that you wouldn't +need to iterate though each bvec on every zfs_uiomove_bvec_rq() call. +However, if the kernel is manipulating the requests/bios/bvecs under +the covers between zfs_uiomove_bvec_rq() calls, then it could result +in corruption from using the "saved state". This optimization +results in an unbootable system after installing an OS on a zvol +with blk-mq enabled. + +Reviewed-by: Brian Behlendorf +Signed-off-by: Tony Hutter +Closes #15351 +(cherry picked from commit 7c9b6fed16ed5034fd1cdfdaedfad93dc97b1557) +Signed-off-by: Stoiko Ivanov +--- + include/os/linux/spl/sys/uio.h | 8 -------- + module/os/linux/zfs/zfs_uio.c | 29 ----------------------------- + 2 files changed, 37 deletions(-) + +diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h +index cce097e16..a4b600004 100644 +--- a/include/os/linux/spl/sys/uio.h ++++ b/include/os/linux/spl/sys/uio.h +@@ -73,13 +73,6 @@ typedef struct zfs_uio { + size_t uio_skip; + + struct request *rq; +- +- /* +- * Used for saving rq_for_each_segment() state between calls +- * to zfs_uiomove_bvec_rq(). +- */ +- struct req_iterator iter; +- struct bio_vec bv; + } zfs_uio_t; + + +@@ -138,7 +131,6 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq) + } else { + uio->uio_bvec = NULL; + uio->uio_iovcnt = 0; +- memset(&uio->iter, 0, sizeof (uio->iter)); + } + + uio->uio_loffset = io_offset(bio, rq); +diff --git a/module/os/linux/zfs/zfs_uio.c b/module/os/linux/zfs/zfs_uio.c +index 3efd4ab15..c2ed67c43 100644 +--- a/module/os/linux/zfs/zfs_uio.c ++++ b/module/os/linux/zfs/zfs_uio.c +@@ -204,22 +204,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) + this_seg_start = orig_loffset; + + rq_for_each_segment(bv, rq, iter) { +- if (uio->iter.bio) { +- /* +- * If uio->iter.bio is present, then we know we've saved +- * uio->iter from a previous call to this function, and +- * we can skip ahead in this rq_for_each_segment() loop +- * to where we last left off. That way, we don't need +- * to iterate over tons of segments we've already +- * processed - we can just restore the "saved state". +- */ +- iter = uio->iter; +- bv = uio->bv; +- this_seg_start = uio->uio_loffset; +- memset(&uio->iter, 0, sizeof (uio->iter)); +- continue; +- } +- + /* + * Lookup what the logical offset of the last byte of this + * segment is. +@@ -260,19 +244,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) + copied = 1; /* We copied some data */ + } + +- if (n == 0) { +- /* +- * All done copying. Save our 'iter' value to the uio. +- * This allows us to "save our state" and skip ahead in +- * the rq_for_each_segment() loop the next time we call +- * call zfs_uiomove_bvec_rq() on this uio (which we +- * will be doing for any remaining data in the uio). +- */ +- uio->iter = iter; /* make a copy of the struct data */ +- uio->bv = bv; +- return (0); +- } +- + this_seg_start = this_seg_end + 1; + } + diff --git a/debian/patches/0011-Revert-zvol-Temporally-disable-blk-mq.patch b/debian/patches/0011-Revert-zvol-Temporally-disable-blk-mq.patch new file mode 100644 index 0000000..2671cda --- /dev/null +++ b/debian/patches/0011-Revert-zvol-Temporally-disable-blk-mq.patch @@ -0,0 +1,123 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Tony Hutter +Date: Mon, 23 Oct 2023 14:39:59 -0700 +Subject: [PATCH] Revert "zvol: Temporally disable blk-mq" + +This reverts commit aefb6a2bd6c24597cde655e9ce69edd0a4c34357. + +aefb6a2bd temporally disabled blk-mq until we could fix a fix for + +Signed-off-by: Tony Hutter +Closes #15439 +(cherry picked from commit 05c4710e8958832afc2868102c9535a4f18115be) +Signed-off-by: Stoiko Ivanov +--- + man/man4/zfs.4 | 57 ++++++++++++++++++++++++++++ + module/os/linux/zfs/zvol_os.c | 12 ++++++ + tests/zfs-tests/include/tunables.cfg | 2 +- + 3 files changed, 70 insertions(+), 1 deletion(-) + +diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 +index 71a3e67ee..cfadd79d8 100644 +--- a/man/man4/zfs.4 ++++ b/man/man4/zfs.4 +@@ -2317,6 +2317,63 @@ If + .Sy zvol_threads + to the number of CPUs present or 32 (whichever is greater). + . ++.It Sy zvol_blk_mq_threads Ns = Ns Sy 0 Pq uint ++The number of threads per zvol to use for queuing IO requests. ++This parameter will only appear if your kernel supports ++.Li blk-mq ++and is only read and assigned to a zvol at zvol load time. ++If ++.Sy 0 ++(the default) then internally set ++.Sy zvol_blk_mq_threads ++to the number of CPUs present. ++. ++.It Sy zvol_use_blk_mq Ns = Ns Sy 0 Ns | Ns 1 Pq uint ++Set to ++.Sy 1 ++to use the ++.Li blk-mq ++API for zvols. ++Set to ++.Sy 0 ++(the default) to use the legacy zvol APIs. ++This setting can give better or worse zvol performance depending on ++the workload. ++This parameter will only appear if your kernel supports ++.Li blk-mq ++and is only read and assigned to a zvol at zvol load time. ++. ++.It Sy zvol_blk_mq_blocks_per_thread Ns = Ns Sy 8 Pq uint ++If ++.Sy zvol_use_blk_mq ++is enabled, then process this number of ++.Sy volblocksize Ns -sized blocks per zvol thread. ++This tunable can be use to favor better performance for zvol reads (lower ++values) or writes (higher values). ++If set to ++.Sy 0 , ++then the zvol layer will process the maximum number of blocks ++per thread that it can. ++This parameter will only appear if your kernel supports ++.Li blk-mq ++and is only applied at each zvol's load time. ++. ++.It Sy zvol_blk_mq_queue_depth Ns = Ns Sy 0 Pq uint ++The queue_depth value for the zvol ++.Li blk-mq ++interface. ++This parameter will only appear if your kernel supports ++.Li blk-mq ++and is only applied at each zvol's load time. ++If ++.Sy 0 ++(the default) then use the kernel's default queue depth. ++Values are clamped to the kernel's ++.Dv BLKDEV_MIN_RQ ++and ++.Dv BLKDEV_MAX_RQ Ns / Ns Dv BLKDEV_DEFAULT_RQ ++limits. ++. + .It Sy zvol_volmode Ns = Ns Sy 1 Pq uint + Defines zvol block devices behaviour when + .Sy volmode Ns = Ns Sy default : +diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c +index 76521c959..7a95b54bd 100644 +--- a/module/os/linux/zfs/zvol_os.c ++++ b/module/os/linux/zfs/zvol_os.c +@@ -1620,6 +1620,18 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end"); + module_param(zvol_volmode, uint, 0644); + MODULE_PARM_DESC(zvol_volmode, "Default volmode property value"); + ++#ifdef HAVE_BLK_MQ ++module_param(zvol_blk_mq_queue_depth, uint, 0644); ++MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth"); ++ ++module_param(zvol_use_blk_mq, uint, 0644); ++MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols"); ++ ++module_param(zvol_blk_mq_blocks_per_thread, uint, 0644); ++MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread, ++ "Process volblocksize blocks per thread"); ++#endif ++ + #ifndef HAVE_BLKDEV_GET_ERESTARTSYS + module_param(zvol_open_timeout_ms, uint, 0644); + MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries"); +diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg +index 8010a9451..80e7bcb3b 100644 +--- a/tests/zfs-tests/include/tunables.cfg ++++ b/tests/zfs-tests/include/tunables.cfg +@@ -89,7 +89,7 @@ VDEV_VALIDATE_SKIP vdev.validate_skip vdev_validate_skip + VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev + VOL_MODE vol.mode zvol_volmode + VOL_RECURSIVE vol.recursive UNSUPPORTED +-VOL_USE_BLK_MQ UNSUPPORTED UNSUPPORTED ++VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq + XATTR_COMPAT xattr_compat zfs_xattr_compat + ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max + ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max diff --git a/debian/patches/series b/debian/patches/series index d162be1..710cbfb 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -7,3 +7,5 @@ 0007-Add-systemd-unit-for-importing-specific-pools.patch 0008-Patch-move-manpage-arcstat-1-to-arcstat-8.patch 0009-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch +0010-zvol-Remove-broken-blk-mq-optimization.patch +0011-Revert-zvol-Temporally-disable-blk-mq.patch