From d8fa32a79d8b5f759ad30ed54eb75b2ab0a746f0 Mon Sep 17 00:00:00 2001
From: Rob Norris <robn@despairlabs.com>
Date: Wed, 31 Jul 2024 14:35:48 +1000
Subject: [PATCH] linux/zvol_os: tidy and document queue limit/config setup

It gets hairier again in Linux 6.11, so I want some actual theory of
operation laid out for next time.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <robn@despairlabs.com>
Sponsored-by: https://despairlabs.com/sponsor/
Closes #16400
---
 module/os/linux/zfs/zvol_os.c | 45 +++++++++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
index ee8bc515d..fb4a2d886 100644
--- a/module/os/linux/zfs/zvol_os.c
+++ b/module/os/linux/zfs/zvol_os.c
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2024, Rob Norris <robn@despairlabs.com>
  */
 
 #include <sys/dataset_kstats.h>
@@ -1074,6 +1075,34 @@ static const struct block_device_operations zvol_ops = {
 #endif
 };
 
+/*
+ * Since 6.9, Linux has been removing queue limit setters in favour of an
+ * initial queue_limits struct applied when the device is open. Since 6.11,
+ * queue_limits is being extended to allow more things to be applied when the
+ * device is open. Setters are also being removed for this.
+ *
+ * For OpenZFS, this means that depending on kernel version, some options may
+ * be set up before the device is open, and some applied to an open device
+ * (queue) after the fact.
+ *
+ * We manage this complexity by having our own limits struct,
+ * zvol_queue_limits_t, in which we carry any queue config that we're
+ * interested in setting. This structure is the same on all kernels.
+ *
+ * These limits are then applied to the queue at device open time by the most
+ * appropriate method for the kernel.
+ *
+ * zvol_queue_limits_convert() is used on 6.9+ (where the two-arg form of
+ * blk_alloc_disk() exists). This converts our limits struct to a proper Linux
+ * struct queue_limits, and passes it in. Any fields added in later kernels are
+ * (obviously) not set up here.
+ *
+ * zvol_queue_limits_apply() is called on all kernel versions after the queue
+ * is created, and applies any remaining config. Before 6.9 that will be
+ * everything, via setter methods. After 6.9 that will be whatever couldn't be
+ * put into struct queue_limits. (This implies that zvol_queue_limits_apply()
+ * will always be a no-op on the latest kernel we support).
+ */
 typedef struct zvol_queue_limits {
 	unsigned int	zql_max_hw_sectors;
 	unsigned short	zql_max_segments;
@@ -1176,11 +1205,13 @@ zvol_queue_limits_convert(zvol_queue_limits_t *limits,
 	    BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_IO_STAT;
 #endif
 }
-#else
+#endif
+
 static void
 zvol_queue_limits_apply(zvol_queue_limits_t *limits,
     struct request_queue *queue)
 {
+#ifndef HAVE_BLK_ALLOC_DISK_2ARG
 	blk_queue_max_hw_sectors(queue, limits->zql_max_hw_sectors);
 	blk_queue_max_segments(queue, limits->zql_max_segments);
 	blk_queue_max_segment_size(queue, limits->zql_max_segment_size);
@@ -1194,7 +1225,6 @@ zvol_queue_limits_apply(zvol_queue_limits_t *limits,
 	blk_queue_flag_set(QUEUE_FLAG_IO_STAT, queue);
 #endif
 }
-#endif
 
 static int
 zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits)
@@ -1232,7 +1262,6 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits)
 	}
 
 	zso->zvo_disk->queue = zso->zvo_queue;
-	zvol_queue_limits_apply(limits, zso->zvo_queue);
 #endif /* HAVE_BLK_ALLOC_DISK */
 #else
 	zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
@@ -1246,8 +1275,10 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits)
 	}
 
 	zso->zvo_disk->queue = zso->zvo_queue;
-	zvol_queue_limits_apply(limits, zso->zvo_queue);
 #endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
+
+	zvol_queue_limits_apply(limits, zso->zvo_queue);
+
 	return (0);
 
 }
@@ -1269,7 +1300,6 @@ zvol_alloc_blk_mq(zvol_state_t *zv, zvol_queue_limits_t *limits)
 		return (1);
 	}
 	zso->zvo_queue = zso->zvo_disk->queue;
-	zvol_queue_limits_apply(limits, zso->zvo_queue);
 	zso->zvo_disk->minors = ZVOL_MINORS;
 #elif defined(HAVE_BLK_ALLOC_DISK_2ARG)
 	struct queue_limits qlimits;
@@ -1300,10 +1330,11 @@ zvol_alloc_blk_mq(zvol_state_t *zv, zvol_queue_limits_t *limits)
 
 	/* Our queue is now created, assign it to our disk */
 	zso->zvo_disk->queue = zso->zvo_queue;
-	zvol_queue_limits_apply(limits, zso->zvo_queue);
+#endif
 
+	zvol_queue_limits_apply(limits, zso->zvo_queue);
 #endif
-#endif
+
 	return (0);
 }