Merge branch 'zvol'

Performance improvements for zvols. Signed-off-by: Richard Yao <ryao@gentoo.org> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #3720
2025-10-22 16:05:00 +03:00 · 2015-09-04 13:02:48 -07:00 · 2015-09-04 13:02:48 -07:00 · e20cd6f7a8
commit e20cd6f7a8
parent dca8c34da4 d60328645d
21 changed files with 292 additions and 704 deletions
--- a/config/kernel-bio-rw-barrier.m4
+++ b/config/kernel-bio-rw-barrier.m4
@ -0,0 +1,25 @@
 dnl #
 dnl # Interface for issuing a discard bio:
 dnl # 2.6.28-2.6.35: BIO_RW_BARRIER
 dnl # 2.6.36-3.x:    REQ_BARRIER
 dnl #
 dnl # Since REQ_BARRIER is a preprocessor definition, there is no need for an
 dnl # autotools check for it. Also, REQ_BARRIER existed in the request layer
 dnl # until torvalds/linux@7b6d91daee5cac6402186ff224c3af39d79f4a0e unified the
 dnl # request layer and bio layer flags, so it would be wrong to assume that
 dnl # the APIs are mutually exclusive contrary to the typical case.
 AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_BARRIER], [
 	AC_MSG_CHECKING([whether BIO_RW_BARRIER is defined])
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/bio.h>
 	],[
 		int flags __attribute__ ((unused));
 		flags = BIO_RW_BARRIER;
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BIO_RW_BARRIER, 1, [BIO_RW_BARRIER is defined])
 	],[
 		AC_MSG_RESULT(no)
 	])
 ])
--- a/config/kernel-bio-rw-discard.m4
+++ b/config/kernel-bio-rw-discard.m4
@ -0,0 +1,25 @@
 dnl #
 dnl # Interface for issuing a discard bio:
 dnl # 2.6.28-2.6.35: BIO_RW_DISCARD
 dnl # 2.6.36-3.x:    REQ_DISCARD
 dnl #
 dnl # Since REQ_DISCARD is a preprocessor definition, there is no need for an
 dnl # autotools check for it. Also, REQ_DISCARD existed in the request layer
 dnl # until torvalds/linux@7b6d91daee5cac6402186ff224c3af39d79f4a0e unified the
 dnl # request layer and bio layer flags, so it would be wrong to assume that
 dnl # the APIs are mutually exclusive contrary to the typical case.
 AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_DISCARD], [
 	AC_MSG_CHECKING([whether BIO_RW_DISCARD is defined])
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/bio.h>
 	],[
 		int flags __attribute__ ((unused));
 		flags = BIO_RW_DISCARD;
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BIO_RW_DISCARD, 1, [BIO_RW_DISCARD is defined])
 	],[
 		AC_MSG_RESULT(no)
 	])
 ])
--- a/config/kernel-blk-end-request.m4
+++ b/config/kernel-blk-end-request.m4
@ -1,40 +0,0 @@
 dnl #
 dnl # 2.6.31 API change
 dnl # In 2.6.29 kernels blk_end_request() was a GPL-only symbol, this was
 dnl # changed in 2.6.31 so it may be used by non-GPL modules.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_BLK_END_REQUEST], [
 	AC_MSG_CHECKING([whether blk_end_request() is available])
 	tmp_flags="$EXTRA_KCFLAGS"
 	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/blkdev.h>
 	],[
 		struct request *req = NULL;
 		(void) blk_end_request(req, 0, 0);
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_END_REQUEST, 1,
 		          [blk_end_request() is available])
 	],[
 		AC_MSG_RESULT(no)
 	])
 	AC_MSG_CHECKING([whether blk_end_request() is GPL-only])
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/module.h>
 		#include <linux/blkdev.h>
 		MODULE_LICENSE("$ZFS_META_LICENSE");
 	],[
 		struct request *req = NULL;
 		(void) blk_end_request(req, 0, 0);
 	],[
 		AC_MSG_RESULT(no)
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_END_REQUEST_GPL_ONLY, 1,
 		          [blk_end_request() is GPL-only])
 	])
 	EXTRA_KCFLAGS="$tmp_flags"
 ])
--- a/config/kernel-blk-fetch-request.m4
+++ b/config/kernel-blk-fetch-request.m4
@ -1,25 +0,0 @@
 dnl #
 dnl # 2.6.31 API change
 dnl # Request queue peek/retrieval interface cleanup, the blk_fetch_request()
 dnl # function replaces the elv_next_request() and blk_fetch_request()
 dnl # functions.  The updated blk_fetch_request() function returns the
 dnl # next available request and removed it from the request queue.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_BLK_FETCH_REQUEST], [
 	AC_MSG_CHECKING([whether blk_fetch_request() is available])
 	tmp_flags="$EXTRA_KCFLAGS"
 	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/blkdev.h>
 	],[
 		struct request_queue *q = NULL;
 		(void) blk_fetch_request(q);
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_FETCH_REQUEST, 1,
 		          [blk_fetch_request() is available])
 	],[
 		AC_MSG_RESULT(no)
 	])
 	EXTRA_KCFLAGS="$tmp_flags"
 ])
--- a/config/kernel-blk-queue-discard.m4
+++ b/config/kernel-blk-queue-discard.m4
@ -1,22 +0,0 @@
 dnl #
 dnl # 2.6.32 API change
 dnl # Discard requests were moved to the normal I/O path.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [
 	AC_MSG_CHECKING([whether blk_queue_discard() is available])
 	tmp_flags="$EXTRA_KCFLAGS"
 	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/blkdev.h>
 	],[
 		struct request_queue *q = NULL;
 		(void) blk_queue_discard(q);
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_QUEUE_DISCARD, 1,
 		          [blk_queue_discard() is available])
 	],[
 		AC_MSG_RESULT(no)
 	])
 	EXTRA_KCFLAGS="$tmp_flags"
 ])
--- a/config/kernel-blk-queue-nonrot.m4
+++ b/config/kernel-blk-queue-nonrot.m4
@ -1,25 +0,0 @@
 dnl #
 dnl # 2.6.27 API change
 dnl # The blk_queue_nonrot() function and QUEUE_FLAG_NONROT flag were
 dnl # added so non-rotational devices could be identified.  These devices
 dnl # have no seek time which the higher level elevator uses to optimize
 dnl # how the I/O issued to the device.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_NONROT], [
 	AC_MSG_CHECKING([whether blk_queue_nonrot() is available])
 	tmp_flags="$EXTRA_KCFLAGS"
 	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/blkdev.h>
 	],[
 		struct request_queue *q = NULL;
 		(void) blk_queue_nonrot(q);
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_QUEUE_NONROT, 1,
 		          [blk_queue_nonrot() is available])
 	],[
 		AC_MSG_RESULT(no)
 	])
 	EXTRA_KCFLAGS="$tmp_flags"
 ])
--- a/config/kernel-blk-requeue-request.m4
+++ b/config/kernel-blk-requeue-request.m4
@ -1,25 +0,0 @@
 dnl #
 dnl # 2.6.31 API change
 dnl # Request queue peek/retrieval interface cleanup, the
 dnl # elv_requeue_request() function has been replaced with the
 dnl # blk_requeue_request() function.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_BLK_REQUEUE_REQUEST], [
 	AC_MSG_CHECKING([whether blk_requeue_request() is available])
 	tmp_flags="$EXTRA_KCFLAGS"
 	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/blkdev.h>
 	],[
 		struct request_queue *q = NULL;
 		struct request *req = NULL;
 		blk_requeue_request(q, req);
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_REQUEUE_REQUEST, 1,
 		          [blk_requeue_request() is available])
 	],[
 		AC_MSG_RESULT(no)
 	])
 	EXTRA_KCFLAGS="$tmp_flags"
 ])
--- a/config/kernel-blk-rq-bytes.m4
+++ b/config/kernel-blk-rq-bytes.m4
@ -1,41 +0,0 @@
 dnl #
 dnl # 2.6.29 API change
 dnl # In the 2.6.29 kernel blk_rq_bytes() was available as a GPL-only symbol.
 dnl # So we need to check the symbol license as well.  As of 2.6.31 the
 dnl blk_rq_bytes() helper was changed to a static inline which we can use.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_BLK_RQ_BYTES], [
 	AC_MSG_CHECKING([whether blk_rq_bytes() is available])
 	tmp_flags="$EXTRA_KCFLAGS"
 	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/blkdev.h>
 	],[
 		struct request *req = NULL;
 		(void) blk_rq_bytes(req);
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_RQ_BYTES, 1,
 		          [blk_rq_bytes() is available])
 	],[
 		AC_MSG_RESULT(no)
 	])
 	AC_MSG_CHECKING([whether blk_rq_bytes() is GPL-only])
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/module.h>
 		#include <linux/blkdev.h>
 		MODULE_LICENSE("$ZFS_META_LICENSE");
 	],[
 		struct request *req = NULL;
 		(void) blk_rq_bytes(req);
 	],[
 		AC_MSG_RESULT(no)
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_RQ_BYTES_GPL_ONLY, 1,
 		          [blk_rq_bytes() is GPL-only])
 	])
 	EXTRA_KCFLAGS="$tmp_flags"
 ])
--- a/config/kernel-blk-rq-pos.m4
+++ b/config/kernel-blk-rq-pos.m4
@ -1,21 +0,0 @@
 dnl #
 dnl # 2.6.31 API change
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_BLK_RQ_POS], [
 	AC_MSG_CHECKING([whether blk_rq_pos() is available])
 	tmp_flags="$EXTRA_KCFLAGS"
 	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/blkdev.h>
 	],[
 		struct request *req = NULL;
 		(void) blk_rq_pos(req);
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_RQ_POS, 1,
 		          [blk_rq_pos() is available])
 	],[
 		AC_MSG_RESULT(no)
 	])
 	EXTRA_KCFLAGS="$tmp_flags"
 ])
--- a/config/kernel-blk-rq-sectors.m4
+++ b/config/kernel-blk-rq-sectors.m4
@ -1,21 +0,0 @@
 dnl #
 dnl # 2.6.31 API change
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_BLK_RQ_SECTORS], [
 	AC_MSG_CHECKING([whether blk_rq_sectors() is available])
 	tmp_flags="$EXTRA_KCFLAGS"
 	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/blkdev.h>
 	],[
 		struct request *req = NULL;
 		(void) blk_rq_sectors(req);
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_RQ_SECTORS, 1,
 		          [blk_rq_sectors() is available])
 	],[
 		AC_MSG_RESULT(no)
 	])
 	EXTRA_KCFLAGS="$tmp_flags"
 ])
--- a/config/kernel-current_bio_tail.m4
+++ b/config/kernel-current_bio_tail.m4
@ -0,0 +1,33 @@
 dnl #
 dnl # 2.6.34 API change
 dnl # current->bio_tail and current->bio_list were struct bio pointers prior to
 dnl # Linux 2.6.34. They were refactored into a struct bio_list pointer called
 dnl # current->bio_list in Linux 2.6.34.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_CURRENT_BIO_TAIL], [
 	AC_MSG_CHECKING([whether current->bio_tail exists])
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/sched.h>
 	],[
 		current->bio_tail = (struct bio **) NULL;
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_CURRENT_BIO_TAIL, 1,
 		    [current->bio_tail exists])
 	],[
 		AC_MSG_RESULT(no)
 		AC_MSG_CHECKING([whether current->bio_list exists])
 		ZFS_LINUX_TRY_COMPILE([
 			#include <linux/sched.h>
 		],[
 			current->bio_list = (struct bio_list *) NULL;
 		],[
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_CURRENT_BIO_LIST, 1,
 			    [current->bio_list exists])
 		],[
 			AC_MSG_ERROR(no - Please file a bug report at
 			    https://github.com/zfsonlinux/zfs/issues/new)
 		])
 	])
 ])
--- a/config/kernel-mk-request-fn.m4
+++ b/config/kernel-mk-request-fn.m4
@ -0,0 +1,43 @@
 dnl #
 dnl # Linux 3.2 API Change
 dnl # make_request_fn returns void instead of int.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [
 	AC_MSG_CHECKING([whether make_request_fn() returns int])
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/blkdev.h>
 		int make_request(struct request_queue *q, struct bio *bio)
 		{
 			return (0);
 		}
 	],[
 		blk_queue_make_request(NULL, &make_request);
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(MAKE_REQUEST_FN_RET, int,
 		    [make_request_fn() returns int])
 		AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_INT, 1,
 		    [Noting that make_request_fn() returns int])
 	],[
 		AC_MSG_RESULT(no)
 		AC_MSG_CHECKING([whether make_request_fn() returns void])
 		ZFS_LINUX_TRY_COMPILE([
 			#include <linux/blkdev.h>
 			void make_request(struct request_queue *q, struct bio *bio)
 			{
 				return;
 			}
 		],[
 			blk_queue_make_request(NULL, &make_request);
 		],[
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(MAKE_REQUEST_FN_RET, void,
 			    [make_request_fn() returns void])
 		],[
 			AC_MSG_ERROR(no - Please file a bug report at
 			    https://github.com/zfsonlinux/zfs/issues/new)
 		])
 	])
 ])
--- a/config/kernel-rq-for-each_segment.m4
+++ b/config/kernel-rq-for-each_segment.m4
@ -1,47 +0,0 @@
 dnl #
 dnl # 2.6.x API change
 dnl #
 dnl # 3.14 API change
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_RQ_FOR_EACH_SEGMENT], [
 	tmp_flags="$EXTRA_KCFLAGS"
 	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 	AC_MSG_CHECKING([whether rq_for_each_segment() wants bio_vec *])
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/blkdev.h>
 	],[
 		struct bio_vec *bv;
 		struct req_iterator iter;
 		struct request *req = NULL;
 		rq_for_each_segment(bv, req, iter) { }
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_RQ_FOR_EACH_SEGMENT, 1,
 		          [rq_for_each_segment() is available])
 		AC_DEFINE(HAVE_RQ_FOR_EACH_SEGMENT_BVP, 1,
 		          [rq_for_each_segment() wants bio_vec *])
 	],[
 		AC_MSG_RESULT(no)
 	])
 	AC_MSG_CHECKING([whether rq_for_each_segment() wants bio_vec])
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/blkdev.h>
 	],[
 		struct bio_vec bv;
 		struct req_iterator iter;
 		struct request *req = NULL;
 		rq_for_each_segment(bv, req, iter) { }
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_RQ_FOR_EACH_SEGMENT, 1,
 		          [rq_for_each_segment() is available])
 		AC_DEFINE(HAVE_RQ_FOR_EACH_SEGMENT_BV, 1,
 		          [rq_for_each_segment() wants bio_vec])
 	],[
 		AC_MSG_RESULT(no)
 	])
 	EXTRA_KCFLAGS="$tmp_flags"
 ])
--- a/config/kernel-rq-is_sync.m4
+++ b/config/kernel-rq-is_sync.m4
@ -1,21 +0,0 @@
 dnl #
 dnl # 2.6.x API change
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_RQ_IS_SYNC], [
 	AC_MSG_CHECKING([whether rq_is_sync() is available])
 	tmp_flags="$EXTRA_KCFLAGS"
 	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 	ZFS_LINUX_TRY_COMPILE([
 		#include <linux/blkdev.h>
 	],[
 		struct request *req = NULL;
 		(void) rq_is_sync(req);
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_RQ_IS_SYNC, 1,
 		          [rq_is_sync() is available])
 	],[
 		AC_MSG_RESULT(no)
 	])
 	EXTRA_KCFLAGS="$tmp_flags"
 ])
--- a/config/kernel.m4
+++ b/config/kernel.m4
@ -7,6 +7,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
 	ZFS_AC_TEST_MODULE
 	ZFS_AC_KERNEL_CONFIG
 	ZFS_AC_KERNEL_DECLARE_EVENT_CLASS
 	ZFS_AC_KERNEL_CURRENT_BIO_TAIL
 	ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS
 	ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
 	ZFS_AC_KERNEL_TYPE_FMODE_T
@ -22,24 +23,16 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
 	ZFS_AC_KERNEL_BIO_FAILFAST_DTD
 	ZFS_AC_KERNEL_REQ_FAILFAST_MASK
 	ZFS_AC_KERNEL_BIO_END_IO_T_ARGS
 	ZFS_AC_KERNEL_BIO_RW_BARRIER
 	ZFS_AC_KERNEL_BIO_RW_DISCARD
 	ZFS_AC_KERNEL_BIO_RW_SYNC
 	ZFS_AC_KERNEL_BIO_RW_SYNCIO
 	ZFS_AC_KERNEL_REQ_SYNC
 	ZFS_AC_KERNEL_BLK_END_REQUEST
 	ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
 	ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
 	ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
 	ZFS_AC_KERNEL_BLK_QUEUE_NONROT
 	ZFS_AC_KERNEL_BLK_QUEUE_DISCARD
 	ZFS_AC_KERNEL_BLK_FETCH_REQUEST
 	ZFS_AC_KERNEL_BLK_REQUEUE_REQUEST
 	ZFS_AC_KERNEL_BLK_RQ_BYTES
 	ZFS_AC_KERNEL_BLK_RQ_POS
 	ZFS_AC_KERNEL_BLK_RQ_SECTORS
 	ZFS_AC_KERNEL_GET_DISK_RO
 	ZFS_AC_KERNEL_GET_GENDISK
 	ZFS_AC_KERNEL_RQ_IS_SYNC
 	ZFS_AC_KERNEL_RQ_FOR_EACH_SEGMENT
 	ZFS_AC_KERNEL_DISCARD_GRANULARITY
 	ZFS_AC_KERNEL_CONST_XATTR_HANDLER
 	ZFS_AC_KERNEL_XATTR_HANDLER_GET
@ -100,6 +93,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
 	ZFS_AC_KERNEL_VFS_RW_ITERATE
 	ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS
 	ZFS_AC_KERNEL_FOLLOW_DOWN_ONE
 	ZFS_AC_KERNEL_MAKE_REQUEST_FN
 	AS_IF([test "$LINUX_OBJ" != "$LINUX"], [
 		KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ"
--- a/include/linux/blkdev_compat.h
+++ b/include/linux/blkdev_compat.h
@ -36,102 +36,6 @@
 typedef unsigned __bitwise__ fmode_t;
 #endif /* HAVE_FMODE_T */
 #ifndef HAVE_BLK_FETCH_REQUEST
 static inline struct request *
 blk_fetch_request(struct request_queue *q)
 {
 	struct request *req;
 	req = elv_next_request(q);
 	if (req)
 		blkdev_dequeue_request(req);
 	return (req);
 }
 #endif /* HAVE_BLK_FETCH_REQUEST */
 #ifndef HAVE_BLK_REQUEUE_REQUEST
 static inline void
 blk_requeue_request(request_queue_t *q, struct request *req)
 {
 	elv_requeue_request(q, req);
 }
 #endif /* HAVE_BLK_REQUEUE_REQUEST */
 #ifndef HAVE_BLK_END_REQUEST
 static inline bool
 __blk_end_request(struct request *req, int error, unsigned int nr_bytes)
 {
 	LIST_HEAD(list);
 	/*
 	 * Request has already been dequeued but 2.6.18 version of
 	 * end_request() unconditionally dequeues the request so we
 	 * add it to a local list to prevent hitting the BUG_ON.
 	 */
 	list_add(&req->queuelist, &list);
 	/*
 	 * The old API required the driver to end each segment and not
 	 * the entire request.  In our case we always need to end the
 	 * entire request partial requests are not supported.
 	 */
 	req->hard_cur_sectors = nr_bytes >> 9;
 	end_request(req, ((error == 0) ? 1 : error));
 	return (0);
 }
 static inline bool
 blk_end_request(struct request *req, int error, unsigned int nr_bytes)
 {
 	struct request_queue *q = req->q;
 	bool rc;
 	spin_lock_irq(q->queue_lock);
 	rc = __blk_end_request(req, error, nr_bytes);
 	spin_unlock_irq(q->queue_lock);
 	return (rc);
 }
 #else
 #ifdef HAVE_BLK_END_REQUEST_GPL_ONLY
 /*
 * Define required to avoid conflicting 2.6.29 non-static prototype for a
 * GPL-only version of the helper.  As of 2.6.31 the helper is available
 * to non-GPL modules and is not explicitly exported GPL-only.
 */
 #define	__blk_end_request __blk_end_request_x
 #define	blk_end_request blk_end_request_x
 static inline bool
 __blk_end_request_x(struct request *req, int error, unsigned int nr_bytes)
 {
 	/*
 	 * The old API required the driver to end each segment and not
 	 * the entire request.  In our case we always need to end the
 	 * entire request partial requests are not supported.
 	 */
 	req->hard_cur_sectors = nr_bytes >> 9;
 	end_request(req, ((error == 0) ? 1 : error));
 	return (0);
 }
 static inline bool
 blk_end_request_x(struct request *req, int error, unsigned int nr_bytes)
 {
 	struct request_queue *q = req->q;
 	bool rc;
 	spin_lock_irq(q->queue_lock);
 	rc = __blk_end_request_x(req, error, nr_bytes);
 	spin_unlock_irq(q->queue_lock);
 	return (rc);
 }
 #endif /* HAVE_BLK_END_REQUEST_GPL_ONLY */
 #endif /* HAVE_BLK_END_REQUEST */
 /*
 * 2.6.36 API change,
 * The blk_queue_flush() interface has replaced blk_queue_ordered()
@ -148,37 +52,6 @@ __blk_queue_flush(struct request_queue *q, unsigned int flags)
 	q->flush_flags = flags & (REQ_FLUSH | REQ_FUA);
 }
 #endif /* HAVE_BLK_QUEUE_FLUSH && HAVE_BLK_QUEUE_FLUSH_GPL_ONLY */
 #ifndef HAVE_BLK_RQ_POS
 static inline sector_t
 blk_rq_pos(struct request *req)
 {
 	return (req->sector);
 }
 #endif /* HAVE_BLK_RQ_POS */
 #ifndef HAVE_BLK_RQ_SECTORS
 static inline unsigned int
 blk_rq_sectors(struct request *req)
 {
 	return (req->nr_sectors);
 }
 #endif /* HAVE_BLK_RQ_SECTORS */
 #if !defined(HAVE_BLK_RQ_BYTES) || defined(HAVE_BLK_RQ_BYTES_GPL_ONLY)
 /*
 * Define required to avoid conflicting 2.6.29 non-static prototype for a
 * GPL-only version of the helper.  As of 2.6.31 the helper is available
 * to non-GPL modules in the form of a static inline in the header.
 */
 #define	blk_rq_bytes __blk_rq_bytes
 static inline unsigned int
 __blk_rq_bytes(struct request *req)
 {
 	return (blk_rq_sectors(req) << 9);
 }
 #endif /* !HAVE_BLK_RQ_BYTES || HAVE_BLK_RQ_BYTES_GPL_ONLY */
 /*
 * Most of the blk_* macros were removed in 2.6.36.  Ostensibly this was
 * done to improve readability and allow easier grepping.  However, from
@ -241,64 +114,20 @@ get_disk_ro(struct gendisk *disk)
 }
 #endif /* HAVE_GET_DISK_RO */
 #ifndef HAVE_RQ_IS_SYNC
 static inline bool
 rq_is_sync(struct request *req)
 {
 	return (req->flags & REQ_RW_SYNC);
 }
 #endif /* HAVE_RQ_IS_SYNC */
 #ifndef HAVE_RQ_FOR_EACH_SEGMENT
 struct req_iterator {
 	int i;
 	struct bio *bio;
 };
 #define	for_each_bio(_bio)              \
 	for (; _bio; _bio = _bio->bi_next)
 #define	__rq_for_each_bio(_bio, rq)     \
 	if ((rq->bio))                  \
 		for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
 #define	rq_for_each_segment(bvl, _rq, _iter)                    \
 	__rq_for_each_bio(_iter.bio, _rq)                       \
 		bio_for_each_segment(bvl, _iter.bio, _iter.i)
 #define	HAVE_RQ_FOR_EACH_SEGMENT_BVP 1
 #endif /* HAVE_RQ_FOR_EACH_SEGMENT */
 /*
 * 3.14 API change
 * rq_for_each_segment changed from taking bio_vec * to taking bio_vec.
 * We provide rq_for_each_segment4 which takes both.
 * You should not modify the fields in @bv and @bvp.
 *
 * Note: the if-else is just to inject the assignment before the loop body.
 */
 #ifdef HAVE_RQ_FOR_EACH_SEGMENT_BVP
 #define	rq_for_each_segment4(bv, bvp, rq, iter)	\
 	rq_for_each_segment(bvp, rq, iter)	\
 		if ((bv = *bvp), 0)		\
 			;			\
 		else
 #else
 #define	rq_for_each_segment4(bv, bvp, rq, iter)	\
 	rq_for_each_segment(bv, rq, iter)	\
 		if ((bvp = &bv), 0)		\
 			;			\
 		else
 #endif
 #ifdef HAVE_BIO_BVEC_ITER
 #define	BIO_BI_SECTOR(bio)	(bio)->bi_iter.bi_sector
 #define	BIO_BI_SIZE(bio)	(bio)->bi_iter.bi_size
 #define	BIO_BI_IDX(bio)		(bio)->bi_iter.bi_idx
 #define	bio_for_each_segment4(bv, bvp, b, i)	\
 	bio_for_each_segment((bv), (b), (i))
 typedef struct bvec_iter bvec_iterator_t;
 #else
 #define	BIO_BI_SECTOR(bio)	(bio)->bi_sector
 #define	BIO_BI_SIZE(bio)	(bio)->bi_size
 #define	BIO_BI_IDX(bio)		(bio)->bi_idx
 #define	bio_for_each_segment4(bv, bvp, b, i)	\
 	bio_for_each_segment((bvp), (b), (i))
 typedef int bvec_iterator_t;
 #endif
 /*
@ -457,17 +286,30 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags)
 #define	VDEV_REQ_FUA			REQ_FUA
 #else
 #define	VDEV_WRITE_FLUSH_FUA		WRITE_BARRIER
 #ifdef HAVE_BIO_RW_BARRIER
 #define	VDEV_REQ_FLUSH			(1 << BIO_RW_BARRIER)
 #define	VDEV_REQ_FUA			(1 << BIO_RW_BARRIER)
 #else
 #define	VDEV_REQ_FLUSH			REQ_HARDBARRIER
-#define	VDEV_REQ_FUA			REQ_HARDBARRIER
+#define	VDEV_REQ_FUA			REQ_FUA
 #endif
 #endif
 /*
 * 2.6.32 API change
 * Use the normal I/O patch for discards.
 */
-#ifdef REQ_DISCARD
+#ifdef QUEUE_FLAG_DISCARD
 #ifdef HAVE_BIO_RW_DISCARD
 #define	VDEV_REQ_DISCARD		(1 << BIO_RW_DISCARD)
 #else
 #define	VDEV_REQ_DISCARD		REQ_DISCARD
 #endif
 #else
 #error	"Allowing the build will cause discard requests to become writes "
 	"potentially triggering the DMU_MAX_ACCESS assertion. Please file a "
 	"an issue report at: https://github.com/zfsonlinux/zfs/issues/new"
 #endif
 /*
 * 2.6.33 API change
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@ -710,8 +710,8 @@ void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
 	dmu_tx_t *tx);
 #ifdef _KERNEL
 #include <linux/blkdev_compat.h>
-int dmu_read_req(objset_t *os, uint64_t object, struct request *req);
+int dmu_read_bio(objset_t *os, uint64_t object, struct bio *bio);
-int dmu_write_req(objset_t *os, uint64_t object, struct request *req,
+int dmu_write_bio(objset_t *os, uint64_t object, struct bio *bio,
 	dmu_tx_t *tx);
 int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
 int dmu_read_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size);
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@ -1591,17 +1591,6 @@ Max number of blocks to discard at once
 Default value: \fB16,384\fR.
 .RE
 .sp
 .ne 2
 .na
 \fBzvol_threads\fR (uint)
 .ad
 .RS 12n
 Max number of threads to handle zvol I/O requests
 .sp
 Default value: \fB32\fR.
 .RE
 .SH ZFS I/O SCHEDULER
 ZFS issues I/O operations to leaf vdevs to satisfy and complete I/Os.
 The I/O scheduler determines when and in what order those operations are
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@ -1049,15 +1049,16 @@ xuio_stat_wbuf_nocopy()
 * return value is the number of bytes successfully copied to arg_buf.
 */
 static int
-dmu_req_copy(void *arg_buf, int size, struct request *req, size_t req_offset)
+dmu_bio_copy(void *arg_buf, int size, struct bio *bio, size_t bio_offset)
 {
-	struct bio_vec bv, *bvp;
+	struct bio_vec bv, *bvp = &bv;
-	struct req_iterator iter;
+	bvec_iterator_t iter;
 	char *bv_buf;
 	int tocpy, bv_len, bv_offset;
 	int offset = 0;
-	rq_for_each_segment4(bv, bvp, req, iter) {
+	bio_for_each_segment4(bv, bvp, bio, iter) {
 		/*
 		 * Fully consumed the passed arg_buf. We use goto here because
 		 * rq_for_each_segment is a double loop
@ -1066,23 +1067,23 @@ dmu_req_copy(void *arg_buf, int size, struct request *req, size_t req_offset)
 		if (size == offset)
 			goto out;
-		/* Skip already copied bv */
+		/* Skip already copied bvp */
-		if (req_offset >=  bv.bv_len) {
+		if (bio_offset >= bvp->bv_len) {
-			req_offset -= bv.bv_len;
+			bio_offset -= bvp->bv_len;
 			continue;
 		}
-		bv_len = bv.bv_len - req_offset;
+		bv_len = bvp->bv_len - bio_offset;
-		bv_offset = bv.bv_offset + req_offset;
+		bv_offset = bvp->bv_offset + bio_offset;
-		req_offset = 0;
+		bio_offset = 0;
 		tocpy = MIN(bv_len, size - offset);
 		ASSERT3S(tocpy, >=, 0);
-		bv_buf = page_address(bv.bv_page) + bv_offset;
+		bv_buf = page_address(bvp->bv_page) + bv_offset;
 		ASSERT3P(bv_buf, !=, NULL);
-		if (rq_data_dir(req) == WRITE)
+		if (bio_data_dir(bio) == WRITE)
 			memcpy(arg_buf + offset, bv_buf, tocpy);
 		else
 			memcpy(bv_buf, arg_buf + offset, tocpy);
@ -1094,13 +1095,13 @@ out:
 }
 int
-dmu_read_req(objset_t *os, uint64_t object, struct request *req)
+dmu_read_bio(objset_t *os, uint64_t object, struct bio *bio)
 {
-	uint64_t size = blk_rq_bytes(req);
+	uint64_t offset = BIO_BI_SECTOR(bio) << 9;
-	uint64_t offset = blk_rq_pos(req) << 9;
+	uint64_t size = BIO_BI_SIZE(bio);
 	dmu_buf_t **dbp;
 	int numbufs, i, err;
-	size_t req_offset;
+	size_t bio_offset;
 	/*
 	 * NB: we could do this block-at-a-time, but it's nice
@ -1111,7 +1112,7 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req)
 	if (err)
 		return (err);
-	req_offset = 0;
+	bio_offset = 0;
 	for (i = 0; i < numbufs; i++) {
 		uint64_t tocpy;
 		int64_t bufoff;
@ -1125,8 +1126,8 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req)
 		if (tocpy == 0)
 			break;
-		didcpy = dmu_req_copy(db->db_data + bufoff, tocpy, req,
+		didcpy = dmu_bio_copy(db->db_data + bufoff, tocpy, bio,
-		    req_offset);
+		    bio_offset);
 		if (didcpy < tocpy)
 			err = EIO;
@ -1136,7 +1137,7 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req)
 		size -= tocpy;
 		offset += didcpy;
-		req_offset += didcpy;
+		bio_offset += didcpy;
 		err = 0;
 	}
 	dmu_buf_rele_array(dbp, numbufs, FTAG);
@ -1145,13 +1146,13 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req)
 }
 int
-dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
+dmu_write_bio(objset_t *os, uint64_t object, struct bio *bio, dmu_tx_t *tx)
 {
-	uint64_t size = blk_rq_bytes(req);
+	uint64_t offset = BIO_BI_SECTOR(bio) << 9;
-	uint64_t offset = blk_rq_pos(req) << 9;
+	uint64_t size = BIO_BI_SIZE(bio);
 	dmu_buf_t **dbp;
 	int numbufs, i, err;
-	size_t req_offset;
+	size_t bio_offset;
 	if (size == 0)
 		return (0);
@ -1161,7 +1162,7 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
 	if (err)
 		return (err);
-	req_offset = 0;
+	bio_offset = 0;
 	for (i = 0; i < numbufs; i++) {
 		uint64_t tocpy;
 		int64_t bufoff;
@ -1182,8 +1183,8 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
 		else
 			dmu_buf_will_dirty(db, tx);
-		didcpy = dmu_req_copy(db->db_data + bufoff, tocpy, req,
+		didcpy = dmu_bio_copy(db->db_data + bufoff, tocpy, bio,
-		    req_offset);
+		    bio_offset);
 		if (tocpy == db->db_size)
 			dmu_buf_fill_done(db, tx);
@ -1196,7 +1197,7 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
 		size -= tocpy;
 		offset += didcpy;
-		req_offset += didcpy;
+		bio_offset += didcpy;
 		err = 0;
 	}
--- a/module/zfs/vdev_disk.c
+++ b/module/zfs/vdev_disk.c
@ -496,6 +496,22 @@ bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size)
 	return (bio_size);
 }
 static inline void
 vdev_submit_bio(int rw, struct bio *bio)
 {
 #ifdef HAVE_CURRENT_BIO_TAIL
 	struct bio **bio_tail = current->bio_tail;
 	current->bio_tail = NULL;
 	submit_bio(rw, bio);
 	current->bio_tail = bio_tail;
 #else
 	struct bio_list *bio_list = current->bio_list;
 	current->bio_list = NULL;
 	submit_bio(rw, bio);
 	current->bio_list = bio_list;
 #endif
 }
 static int
 __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
    size_t kbuf_size, uint64_t kbuf_offset, int flags)
@ -571,7 +587,7 @@ retry:
 		bio_offset += BIO_BI_SIZE(dr->dr_bio[i]);
 	}
-	/* Extra reference to protect dio_request during submit_bio */
+	/* Extra reference to protect dio_request during vdev_submit_bio */
 	vdev_disk_dio_get(dr);
 	if (zio)
 		zio->io_delay = jiffies_64;
@ -579,7 +595,7 @@ retry:
 	/* Submit all bio's associated with this dio */
 	for (i = 0; i < dr->dr_bio_count; i++)
 		if (dr->dr_bio[i])
-			submit_bio(dr->dr_rw, dr->dr_bio[i]);
+			vdev_submit_bio(dr->dr_rw, dr->dr_bio[i]);
 	/*
 	 * On synchronous blocking requests we wait for all bio the completion
@ -645,7 +661,7 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
 	bio->bi_private = zio;
 	bio->bi_bdev = bdev;
 	zio->io_delay = jiffies_64;
-	submit_bio(VDEV_WRITE_FLUSH_FUA, bio);
+	vdev_submit_bio(VDEV_WRITE_FLUSH_FUA, bio);
 	invalidate_bdev(bdev);
 	return (0);
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@ -50,10 +50,8 @@
 unsigned int zvol_inhibit_dev = 0;
 unsigned int zvol_major = ZVOL_MAJOR;
 unsigned int zvol_threads = 32;
 unsigned long zvol_max_discard_blocks = 16384;
 static taskq_t *zvol_taskq;
 static kmutex_t zvol_state_lock;
 static list_t zvol_state_list;
 static char *zvol_tag = "zvol_tag";
@ -590,34 +588,24 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
 	}
 }
-/*
+static int
- * Common write path running under the zvol taskq context.  This function
+zvol_write(struct bio *bio)
 * is responsible for copying the request structure data in to the DMU and
 * signaling the request queue with the result of the copy.
 */
 static void
 zvol_write(void *arg)
 {
-	struct request *req = (struct request *)arg;
+	zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data;
-	struct request_queue *q = req->q;
+	uint64_t offset = BIO_BI_SECTOR(bio) << 9;
-	zvol_state_t *zv = q->queuedata;
+	uint64_t size = BIO_BI_SIZE(bio);
 	fstrans_cookie_t cookie = spl_fstrans_mark();
 	uint64_t offset = blk_rq_pos(req) << 9;
 	uint64_t size = blk_rq_bytes(req);
 	int error = 0;
 	dmu_tx_t *tx;
 	rl_t *rl;
-	if (req->cmd_flags & VDEV_REQ_FLUSH)
+	if (bio->bi_rw & VDEV_REQ_FLUSH)
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
 	/*
 	 * Some requests are just for flush and nothing else.
 	 */
-	if (size == 0) {
+	if (size == 0)
 		error = 0;
 		goto out;
 	}
 	rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER);
@ -632,96 +620,82 @@ zvol_write(void *arg)
 		goto out;
 	}
-	error = dmu_write_req(zv->zv_objset, ZVOL_OBJ, req, tx);
+	error = dmu_write_bio(zv->zv_objset, ZVOL_OBJ, bio, tx);
 	if (error == 0)
 		zvol_log_write(zv, tx, offset, size,
-		    req->cmd_flags & VDEV_REQ_FUA);
+		    !!(bio->bi_rw & VDEV_REQ_FUA));
 	dmu_tx_commit(tx);
 	zfs_range_unlock(rl);
-	if ((req->cmd_flags & VDEV_REQ_FUA) ||
+	if ((bio->bi_rw & VDEV_REQ_FUA) ||
 	    zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
 out:
-	blk_end_request(req, -error, size);
+	return (error);
 	spl_fstrans_unmark(cookie);
 }
-#ifdef HAVE_BLK_QUEUE_DISCARD
+static int
-static void
+zvol_discard(struct bio *bio)
 zvol_discard(void *arg)
 {
-	struct request *req = (struct request *)arg;
+	zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data;
-	struct request_queue *q = req->q;
+	uint64_t start = BIO_BI_SECTOR(bio) << 9;
-	zvol_state_t *zv = q->queuedata;
+	uint64_t size = BIO_BI_SIZE(bio);
-	fstrans_cookie_t cookie = spl_fstrans_mark();
+	uint64_t end = start + size;
 	uint64_t start = blk_rq_pos(req) << 9;
 	uint64_t end = start + blk_rq_bytes(req);
 	int error;
 	rl_t *rl;
-	if (end > zv->zv_volsize) {
+	if (end > zv->zv_volsize)
-		error = EIO;
+		return (SET_ERROR(EIO));
 		goto out;
 	}
 	/*
-	 * Align the request to volume block boundaries. If we don't,
+	 * Align the request to volume block boundaries when REQ_SECURE is
-	 * then this will force dnode_free_range() to zero out the
+	 * available, but not requested. If we don't, then this will force
-	 * unaligned parts, which is slow (read-modify-write) and
+	 * dnode_free_range() to zero out the unaligned parts, which is slow
-	 * useless since we are not freeing any space by doing so.
+	 * (read-modify-write) and useless since we are not freeing any space
 	 * by doing so. Kernels that do not support REQ_SECURE (2.6.32 through
 	 * 2.6.35) will not receive this optimization.
 	 */
 #ifdef REQ_SECURE
 	if (!(bio->bi_rw & REQ_SECURE)) {
 		start = P2ROUNDUP(start, zv->zv_volblocksize);
 		end = P2ALIGN(end, zv->zv_volblocksize);
 	if (start >= end) {
 		error = 0;
 		goto out;
 	}
 #endif
-	rl = zfs_range_lock(&zv->zv_znode, start, end - start, RL_WRITER);
+	if (start >= end)
 		return (0);
-	error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, end-start);
+	rl = zfs_range_lock(&zv->zv_znode, start, size, RL_WRITER);
 	error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, size);
 	/*
 	 * TODO: maybe we should add the operation to the log.
 	 */
 	zfs_range_unlock(rl);
 out:
 	blk_end_request(req, -error, blk_rq_bytes(req));
 	spl_fstrans_unmark(cookie);
 }
 #endif /* HAVE_BLK_QUEUE_DISCARD */
-/*
+	return (error);
- * Common read path running under the zvol taskq context.  This function
+}
- * is responsible for copying the requested data out of the DMU and in to
+
- * a linux request structure.  It then must signal the request queue with
+static int
- * an error code describing the result of the copy.
+zvol_read(struct bio *bio)
 */
 static void
 zvol_read(void *arg)
 {
-	struct request *req = (struct request *)arg;
+	zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data;
-	struct request_queue *q = req->q;
+	uint64_t offset = BIO_BI_SECTOR(bio) << 9;
-	zvol_state_t *zv = q->queuedata;
+	uint64_t len = BIO_BI_SIZE(bio);
 	fstrans_cookie_t cookie = spl_fstrans_mark();
 	uint64_t offset = blk_rq_pos(req) << 9;
 	uint64_t size = blk_rq_bytes(req);
 	int error;
 	rl_t *rl;
-	if (size == 0) {
+	if (len == 0)
-		error = 0;
+		return (0);
 		goto out;
 	}
 	rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
-	error = dmu_read_req(zv->zv_objset, ZVOL_OBJ, req);
+	rl = zfs_range_lock(&zv->zv_znode, offset, len, RL_READER);
 	error = dmu_read_bio(zv->zv_objset, ZVOL_OBJ, bio);
 	zfs_range_unlock(rl);
@ -729,91 +703,50 @@ zvol_read(void *arg)
 	if (error == ECKSUM)
 		error = SET_ERROR(EIO);
-out:
+	return (error);
 	blk_end_request(req, -error, size);
 	spl_fstrans_unmark(cookie);
 }
-/*
+static MAKE_REQUEST_FN_RET
- * Request will be added back to the request queue and retried if
+zvol_request(struct request_queue *q, struct bio *bio)
 * it cannot be immediately dispatched to the taskq for handling
 */
 static inline void
 zvol_dispatch(task_func_t func, struct request *req)
 {
 	if (!taskq_dispatch(zvol_taskq, func, (void *)req, TQ_NOSLEEP))
 		blk_requeue_request(req->q, req);
 }
 /*
 * Common request path.  Rather than registering a custom make_request()
 * function we use the generic Linux version.  This is done because it allows
 * us to easily merge read requests which would otherwise we performed
 * synchronously by the DMU.  This is less critical in write case where the
 * DMU will perform the correct merging within a transaction group.  Using
 * the generic make_request() also let's use leverage the fact that the
 * elevator with ensure correct ordering in regards to barrior IOs.  On
 * the downside it means that in the write case we end up doing request
 * merging twice once in the elevator and once in the DMU.
 *
 * The request handler is called under a spin lock so all the real work
 * is handed off to be done in the context of the zvol taskq.  This function
 * simply performs basic request sanity checking and hands off the request.
 */
 static void
 zvol_request(struct request_queue *q)
 {
 	zvol_state_t *zv = q->queuedata;
-	struct request *req;
+	fstrans_cookie_t cookie = spl_fstrans_mark();
-	unsigned int size;
+	uint64_t offset = BIO_BI_SECTOR(bio);
 	unsigned int sectors = bio_sectors(bio);
 	int error = 0;
-	while ((req = blk_fetch_request(q)) != NULL) {
+	if (bio_has_data(bio) && offset + sectors >
 		size = blk_rq_bytes(req);
 		if (size != 0 && blk_rq_pos(req) + blk_rq_sectors(req) >
 	    get_capacity(zv->zv_disk)) {
 		printk(KERN_INFO
 		    "%s: bad access: block=%llu, count=%lu\n",
-			    req->rq_disk->disk_name,
+		    zv->zv_disk->disk_name,
-			    (long long unsigned)blk_rq_pos(req),
+		    (long long unsigned)offset,
-			    (long unsigned)blk_rq_sectors(req));
+		    (long unsigned)sectors);
-			__blk_end_request(req, -EIO, size);
+		error = SET_ERROR(EIO);
-			continue;
+		goto out;
 	}
-		if (!blk_fs_request(req)) {
+	if (bio_data_dir(bio) == WRITE) {
 			printk(KERN_INFO "%s: non-fs cmd\n",
 			    req->rq_disk->disk_name);
 			__blk_end_request(req, -EIO, size);
 			continue;
 		}
 		switch ((int)rq_data_dir(req)) {
 		case READ:
 			zvol_dispatch(zvol_read, req);
 			break;
 		case WRITE:
 		if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
-				__blk_end_request(req, -EROFS, size);
+			error = SET_ERROR(EROFS);
-				break;
+			goto out;
 		}
-#ifdef HAVE_BLK_QUEUE_DISCARD
+		if (bio->bi_rw & VDEV_REQ_DISCARD) {
-			if (req->cmd_flags & VDEV_REQ_DISCARD) {
+			error = zvol_discard(bio);
-				zvol_dispatch(zvol_discard, req);
+			goto out;
 				break;
 		}
 #endif /* HAVE_BLK_QUEUE_DISCARD */
-			zvol_dispatch(zvol_write, req);
+		error = zvol_write(bio);
-			break;
+	} else
-		default:
+		error = zvol_read(bio);
-			printk(KERN_INFO "%s: unknown cmd: %d\n",
+
-			    req->rq_disk->disk_name, (int)rq_data_dir(req));
+out:
-			__blk_end_request(req, -EIO, size);
+	bio_endio(bio, -error);
-			break;
+	spl_fstrans_unmark(cookie);
-		}
+#ifdef HAVE_MAKE_REQUEST_FN_RET_INT
-	}
+	return (0);
 #endif
 }
 static void
@ -1259,25 +1192,17 @@ static zvol_state_t *
 zvol_alloc(dev_t dev, const char *name)
 {
 	zvol_state_t *zv;
 	int error = 0;
 	zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
 	spin_lock_init(&zv->zv_lock);
 	list_link_init(&zv->zv_next);
-	zv->zv_queue = blk_init_queue(zvol_request, &zv->zv_lock);
+	zv->zv_queue = blk_alloc_queue(GFP_ATOMIC);
 	if (zv->zv_queue == NULL)
 		goto out_kmem;
-#ifdef HAVE_ELEVATOR_CHANGE
+	blk_queue_make_request(zv->zv_queue, zvol_request);
 	error = elevator_change(zv->zv_queue, "noop");
 #endif /* HAVE_ELEVATOR_CHANGE */
 	if (error) {
 		printk("ZFS: Unable to set \"%s\" scheduler for zvol %s: %d\n",
 		    "noop", name, error);
 		goto out_queue;
 	}
 #ifdef HAVE_BLK_QUEUE_FLUSH
 	blk_queue_flush(zv->zv_queue, VDEV_REQ_FLUSH | VDEV_REQ_FUA);
@ -1418,13 +1343,11 @@ __zvol_create_minor(const char *name, boolean_t ignore_snapdev)
 	blk_queue_max_segment_size(zv->zv_queue, UINT_MAX);
 	blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize);
 	blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize);
 #ifdef HAVE_BLK_QUEUE_DISCARD
 	blk_queue_max_discard_sectors(zv->zv_queue,
 	    (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9);
 	blk_queue_discard_granularity(zv->zv_queue, zv->zv_volblocksize);
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue);
-#endif
+#ifdef QUEUE_FLAG_NONROT
 #ifdef HAVE_BLK_QUEUE_NONROT
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue);
 #endif
 #ifdef QUEUE_FLAG_ADD_RANDOM
@ -1651,7 +1574,6 @@ zvol_set_snapdev(const char *dsname, uint64_t snapdev) {
 int
 zvol_init(void)
 {
 	int threads = MIN(MAX(zvol_threads, 1), 1024);
 	int error;
 	list_create(&zvol_state_list, sizeof (zvol_state_t),
@ -1659,18 +1581,10 @@ zvol_init(void)
 	mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
 	zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri,
 	    threads * 2, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 	if (zvol_taskq == NULL) {
 		printk(KERN_INFO "ZFS: taskq_create() failed\n");
 		error = -ENOMEM;
 		goto out1;
 	}
 	error = register_blkdev(zvol_major, ZVOL_DRIVER);
 	if (error) {
 		printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
-		goto out2;
+		goto out;
 	}
 	blk_register_region(MKDEV(zvol_major, 0), 1UL << MINORBITS,
@ -1678,9 +1592,7 @@ zvol_init(void)
 	return (0);
-out2:
+out:
 	taskq_destroy(zvol_taskq);
 out1:
 	mutex_destroy(&zvol_state_lock);
 	list_destroy(&zvol_state_list);
@ -1693,7 +1605,6 @@ zvol_fini(void)
 	zvol_remove_minors(NULL);
 	blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS);
 	unregister_blkdev(zvol_major, ZVOL_DRIVER);
 	taskq_destroy(zvol_taskq);
 	mutex_destroy(&zvol_state_lock);
 	list_destroy(&zvol_state_list);
 }
@ -1704,8 +1615,5 @@ MODULE_PARM_DESC(zvol_inhibit_dev, "Do not create zvol device nodes");
 module_param(zvol_major, uint, 0444);
 MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
 module_param(zvol_threads, uint, 0444);
 MODULE_PARM_DESC(zvol_threads, "Max number of threads to handle I/O requests");
 module_param(zvol_max_discard_blocks, ulong, 0444);
 MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard");