Improve ZVOL queue behavior.

The Linux block device queue subsystem exposes a number of configurable
settings described in Linux block/blk-settings.c. The defaults for these
settings are tuned for hard drives, and are not optimized for ZVOLs. Proper
configuration of these options would allow upper layers (I/O scheduler) to
take better decisions about write merging and ordering.

Detailed rationale:

 - max_hw_sectors is set to unlimited (UINT_MAX). zvol_write() is able to
   handle writes of any size, so there's no reason to impose a limit. Let the
   upper layer decide.

 - max_segments and max_segment_size are set to unlimited. zvol_write() will
   copy the requests' contents into a dbuf anyway, so the number and size of
   the segments are irrelevant. Let the upper layer decide.

 - physical_block_size and io_opt are set to the ZVOL's block size. This
   has the potential to somewhat alleviate issue #361 for ZVOLs, by warning
   the upper layers that writes smaller than the volume's block size will be
   slow.

 - The NONROT flag is set to indicate this isn't a rotational device.
   Although the backing zpool might be composed of rotational devices, the
   resulting ZVOL often doesn't exhibit the same behavior due to the COW
   mechanisms used by ZFS. Setting this flag will prevent upper layers from
   making useless decisions (such as reordering writes) based on incorrect
   assumptions about the behavior of the ZVOL.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Etienne Dechamps
2011-09-05 15:15:45 +02:00
committed by Brian Behlendorf
parent b18019d2d8
commit 34037afe24
64 changed files with 1159 additions and 0 deletions
+5
View File
@@ -52,6 +52,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
+5
View File
@@ -52,6 +52,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
+47
View File
@@ -200,6 +200,53 @@ __blk_rq_bytes(struct request *req)
#define blk_queue_stackable(q) ((q)->request_fn == NULL)
#endif
/*
* 2.6.34 API change,
* The blk_queue_max_hw_sectors() function replaces blk_queue_max_sectors().
*/
#ifndef HAVE_BLK_QUEUE_MAX_HW_SECTORS
#define blk_queue_max_hw_sectors __blk_queue_max_hw_sectors
static inline void
__blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
{
blk_queue_max_sectors(q, max_hw_sectors);
}
#endif
/*
* 2.6.34 API change,
* The blk_queue_max_segments() function consolidates
* blk_queue_max_hw_segments() and blk_queue_max_phys_segments().
*/
#ifndef HAVE_BLK_QUEUE_MAX_SEGMENTS
#define blk_queue_max_segments __blk_queue_max_segments
static inline void
__blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
{
blk_queue_max_phys_segments(q, max_segments);
blk_queue_max_hw_segments(q, max_segments);
}
#endif
/*
* 2.6.30 API change,
* The blk_queue_physical_block_size() function was introduced to
* indicate the smallest I/O the device can write without incurring
* a read-modify-write penalty. For older kernels this is a no-op.
*/
#ifndef HAVE_BLK_QUEUE_PHYSICAL_BLOCK_SIZE
#define blk_queue_physical_block_size(q, x) ((void)(0))
#endif
/*
* 2.6.30 API change,
* The blk_queue_io_opt() function was added to indicate the optimal
* I/O size for the device. For older kernels this is a no-op.
*/
#ifndef HAVE_BLK_QUEUE_IO_OPT
#define blk_queue_io_opt(q, x) ((void)(0))
#endif
#ifndef HAVE_GET_DISK_RO
static inline int
get_disk_ro(struct gendisk *disk)
+5
View File
@@ -52,6 +52,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
+5
View File
@@ -52,6 +52,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
+5
View File
@@ -52,6 +52,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
+5
View File
@@ -52,6 +52,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \