linux/zvol_os: tidy and document queue limit/config setup

It gets hairier again in Linux 6.11, so I want some actual theory of
operation laid out for next time.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <robn@despairlabs.com>
Sponsored-by: https://despairlabs.com/sponsor/
Closes #16400
This commit is contained in:
Rob Norris 2024-07-31 14:35:48 +10:00 committed by Brian Behlendorf
parent 9c56b8ec78
commit 767b37019f

View File

@ -20,6 +20,7 @@
*/ */
/* /*
* Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
* Copyright (c) 2024, Rob Norris <robn@despairlabs.com>
* Copyright (c) 2024, Klara, Inc. * Copyright (c) 2024, Klara, Inc.
*/ */
@ -1089,6 +1090,34 @@ static const struct block_device_operations zvol_ops = {
#endif #endif
}; };
/*
* Since 6.9, Linux has been removing queue limit setters in favour of an
* initial queue_limits struct applied when the device is open. Since 6.11,
* queue_limits is being extended to allow more things to be applied when the
* device is open. Setters are also being removed for this.
*
* For OpenZFS, this means that depending on kernel version, some options may
* be set up before the device is open, and some applied to an open device
* (queue) after the fact.
*
* We manage this complexity by having our own limits struct,
* zvol_queue_limits_t, in which we carry any queue config that we're
* interested in setting. This structure is the same on all kernels.
*
* These limits are then applied to the queue at device open time by the most
* appropriate method for the kernel.
*
* zvol_queue_limits_convert() is used on 6.9+ (where the two-arg form of
* blk_alloc_disk() exists). This converts our limits struct to a proper Linux
* struct queue_limits, and passes it in. Any fields added in later kernels are
* (obviously) not set up here.
*
* zvol_queue_limits_apply() is called on all kernel versions after the queue
* is created, and applies any remaining config. Before 6.9 that will be
* everything, via setter methods. After 6.9 that will be whatever couldn't be
* put into struct queue_limits. (This implies that zvol_queue_limits_apply()
* will always be a no-op on the latest kernel we support).
*/
typedef struct zvol_queue_limits { typedef struct zvol_queue_limits {
unsigned int zql_max_hw_sectors; unsigned int zql_max_hw_sectors;
unsigned short zql_max_segments; unsigned short zql_max_segments;
@ -1175,17 +1204,18 @@ zvol_queue_limits_convert(zvol_queue_limits_t *limits,
qlimits->max_segment_size = limits->zql_max_segment_size; qlimits->max_segment_size = limits->zql_max_segment_size;
qlimits->io_opt = limits->zql_io_opt; qlimits->io_opt = limits->zql_io_opt;
} }
#else #endif
static void static void
zvol_queue_limits_apply(zvol_queue_limits_t *limits, zvol_queue_limits_apply(zvol_queue_limits_t *limits,
struct request_queue *queue) struct request_queue *queue)
{ {
#ifndef HAVE_BLK_ALLOC_DISK_2ARG
blk_queue_max_hw_sectors(queue, limits->zql_max_hw_sectors); blk_queue_max_hw_sectors(queue, limits->zql_max_hw_sectors);
blk_queue_max_segments(queue, limits->zql_max_segments); blk_queue_max_segments(queue, limits->zql_max_segments);
blk_queue_max_segment_size(queue, limits->zql_max_segment_size); blk_queue_max_segment_size(queue, limits->zql_max_segment_size);
blk_queue_io_opt(queue, limits->zql_io_opt); blk_queue_io_opt(queue, limits->zql_io_opt);
} }
#endif
static int static int
zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits) zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits)
@ -1223,7 +1253,6 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits)
} }
zso->zvo_disk->queue = zso->zvo_queue; zso->zvo_disk->queue = zso->zvo_queue;
zvol_queue_limits_apply(limits, zso->zvo_queue);
#endif /* HAVE_BLK_ALLOC_DISK */ #endif /* HAVE_BLK_ALLOC_DISK */
#else #else
zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE); zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
@ -1237,8 +1266,10 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits)
} }
zso->zvo_disk->queue = zso->zvo_queue; zso->zvo_disk->queue = zso->zvo_queue;
zvol_queue_limits_apply(limits, zso->zvo_queue);
#endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */ #endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
zvol_queue_limits_apply(limits, zso->zvo_queue);
return (0); return (0);
} }
@ -1260,7 +1291,6 @@ zvol_alloc_blk_mq(zvol_state_t *zv, zvol_queue_limits_t *limits)
return (1); return (1);
} }
zso->zvo_queue = zso->zvo_disk->queue; zso->zvo_queue = zso->zvo_disk->queue;
zvol_queue_limits_apply(limits, zso->zvo_queue);
zso->zvo_disk->minors = ZVOL_MINORS; zso->zvo_disk->minors = ZVOL_MINORS;
#elif defined(HAVE_BLK_ALLOC_DISK_2ARG) #elif defined(HAVE_BLK_ALLOC_DISK_2ARG)
struct queue_limits qlimits; struct queue_limits qlimits;
@ -1291,10 +1321,11 @@ zvol_alloc_blk_mq(zvol_state_t *zv, zvol_queue_limits_t *limits)
/* Our queue is now created, assign it to our disk */ /* Our queue is now created, assign it to our disk */
zso->zvo_disk->queue = zso->zvo_queue; zso->zvo_disk->queue = zso->zvo_queue;
zvol_queue_limits_apply(limits, zso->zvo_queue); #endif
zvol_queue_limits_apply(limits, zso->zvo_queue);
#endif #endif
#endif
return (0); return (0);
} }