diff --git a/config/kernel-blk-queue.m4 b/config/kernel-blk-queue.m4
index 6f42b9812..29b0a2829 100644
--- a/config/kernel-blk-queue.m4
+++ b/config/kernel-blk-queue.m4
@@ -359,6 +359,36 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [
 	])
 ])
 
+dnl #
+dnl # See if kernel supports block multi-queue and blk_status_t.
+dnl # blk_status_t represents the new status codes introduced in the 4.13
+dnl # kernel patch:
+dnl #
+dnl #  block: introduce new block status code type
+dnl #
+dnl # We do not currently support the "old" block multi-queue interfaces from
+dnl # prior kernels.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ], [
+	ZFS_LINUX_TEST_SRC([blk_mq], [
+		#include <linux/blk-mq.h>
+	], [
+		struct blk_mq_tag_set tag_set __attribute__ ((unused)) = {0};
+		(void) blk_mq_alloc_tag_set(&tag_set);
+		return BLK_STS_OK;
+	], [])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [
+	AC_MSG_CHECKING([whether block multiqueue with blk_status_t is available])
+	ZFS_LINUX_TEST_RESULT([blk_mq], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLK_MQ, 1, [block multiqueue is available])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI
@@ -370,6 +400,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS
+	ZFS_AC_KERNEL_SRC_BLK_MQ
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
@@ -383,4 +414,5 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
 	ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
 	ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
 	ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
+	ZFS_AC_KERNEL_BLK_MQ
 ])
diff --git a/include/os/linux/kernel/linux/blkdev_compat.h b/include/os/linux/kernel/linux/blkdev_compat.h
index fd91560a3..7964937a0 100644
--- a/include/os/linux/kernel/linux/blkdev_compat.h
+++ b/include/os/linux/kernel/linux/blkdev_compat.h
@@ -34,6 +34,11 @@
 #include <linux/hdreg.h>
 #include <linux/major.h>
 #include <linux/msdos_fs.h>	/* for SECTOR_* */
+#include <linux/bio.h>
+
+#ifdef HAVE_BLK_MQ
+#include <linux/blk-mq.h>
+#endif
 
 #ifndef HAVE_BLK_QUEUE_FLAG_SET
 static inline void
@@ -608,4 +613,110 @@ blk_generic_alloc_queue(make_request_fn make_request, int node_id)
 }
 #endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
 
+/*
+ * All the io_*() helper functions below can operate on a bio, or a rq, but
+ * not both.  The older submit_bio() codepath will pass a bio, and the
+ * newer blk-mq codepath will pass a rq.
+ */
+static inline int
+io_data_dir(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL) {
+		if (op_is_write(req_op(rq))) {
+			return (WRITE);
+		} else {
+			return (READ);
+		}
+	}
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (bio_data_dir(bio));
+}
+
+static inline int
+io_is_flush(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (req_op(rq) == REQ_OP_FLUSH);
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (bio_is_flush(bio));
+}
+
+static inline int
+io_is_discard(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (req_op(rq) == REQ_OP_DISCARD);
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (bio_is_discard(bio));
+}
+
+static inline int
+io_is_secure_erase(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (req_op(rq) == REQ_OP_SECURE_ERASE);
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (bio_is_secure_erase(bio));
+}
+
+static inline int
+io_is_fua(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (rq->cmd_flags & REQ_FUA);
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (bio_is_fua(bio));
+}
+
+
+static inline uint64_t
+io_offset(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (blk_rq_pos(rq) << 9);
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (BIO_BI_SECTOR(bio) << 9);
+}
+
+static inline uint64_t
+io_size(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (blk_rq_bytes(rq));
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (BIO_BI_SIZE(bio));
+}
+
+static inline int
+io_has_data(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (bio_has_data(rq->bio));
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (bio_has_data(bio));
+}
 #endif /* _ZFS_BLKDEV_H */
diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h
index 439eec986..fe2b5c07a 100644
--- a/include/os/linux/spl/sys/uio.h
+++ b/include/os/linux/spl/sys/uio.h
@@ -69,9 +69,20 @@ typedef struct zfs_uio {
 	uint16_t	uio_fmode;
 	uint16_t	uio_extflg;
 	ssize_t		uio_resid;
+
 	size_t		uio_skip;
+
+	struct request	*rq;
+
+	/*
+	 * Used for saving rq_for_each_segment() state between calls
+	 * to zfs_uiomove_bvec_rq().
+	 */
+	struct req_iterator iter;
+	struct bio_vec bv;
 } zfs_uio_t;
 
+
 #define	zfs_uio_segflg(u)		(u)->uio_segflg
 #define	zfs_uio_offset(u)		(u)->uio_loffset
 #define	zfs_uio_resid(u)		(u)->uio_resid
@@ -116,17 +127,33 @@ zfs_uio_iovec_init(zfs_uio_t *uio, const struct iovec *iov,
 }
 
 static inline void
-zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio)
+zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
 {
-	uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
-	uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
-	uio->uio_loffset = BIO_BI_SECTOR(bio) << 9;
+	/* Either bio or rq will be set, but not both */
+	ASSERT3P(uio, !=, bio);
+
+	if (bio) {
+		uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
+		uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
+	} else {
+		uio->uio_bvec = NULL;
+		uio->uio_iovcnt = 0;
+		memset(&uio->iter, 0, sizeof (uio->iter));
+	}
+
+	uio->uio_loffset = io_offset(bio, rq);
 	uio->uio_segflg = UIO_BVEC;
 	uio->uio_fault_disable = B_FALSE;
 	uio->uio_fmode = 0;
 	uio->uio_extflg = 0;
-	uio->uio_resid = BIO_BI_SIZE(bio);
-	uio->uio_skip = BIO_BI_SKIP(bio);
+	uio->uio_resid = io_size(bio, rq);
+	if (bio) {
+		uio->uio_skip = BIO_BI_SKIP(bio);
+	} else {
+		uio->uio_skip = 0;
+	}
+
+	uio->rq = rq;
 }
 
 #if defined(HAVE_VFS_IOV_ITER)
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
index a086e1a5d..a7e5408e5 100644
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -2248,9 +2248,74 @@ for each I/O submitter.
 When unset, requests are handled asynchronously by a thread pool.
 The number of requests which can be handled concurrently is controlled by
 .Sy zvol_threads .
+.Sy zvol_request_sync
+is ignored when running on a kernel that supports block multiqueue
+.Pq Li blk-mq .
 .
-.It Sy zvol_threads Ns = Ns Sy 32 Pq uint
-Max number of threads which can handle zvol I/O requests concurrently.
+.It Sy zvol_threads Ns = Ns Sy 0 Pq uint
+The number of system wide threads to use for processing zvol block IOs.
+If
+.Sy 0
+(the default) then internally set
+.Sy zvol_threads
+to the number of CPUs present or 32 (whichever is greater).
+.
+.It Sy zvol_blk_mq_threads Ns = Ns Sy 0 Pq uint
+The number of threads per zvol to use for queuing IO requests.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+If
+.Sy 0
+(the default) then internally set
+.Sy zvol_blk_mq_threads
+to the number of CPUs present.
+.
+.It Sy zvol_use_blk_mq Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+Set to
+.Sy 1
+to use the
+.Li blk-mq
+API for zvols.
+Set to
+.Sy 0
+(the default) to use the legacy zvol APIs.
+This setting can give better or worse zvol performance depending on
+the workload.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+.
+.It Sy zvol_blk_mq_blocks_per_thread Ns = Ns Sy 8 Pq uint
+If
+.Sy zvol_use_blk_mq
+is enabled, then process this number of
+.Sy volblocksize Ns -sized blocks per zvol thread.
+This tunable can be use to favor better performance for zvol reads (lower
+values) or writes (higher values).
+If set to
+.Sy 0 ,
+then the zvol layer will process the maximum number of blocks
+per thread that it can.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only applied at each zvol's load time.
+.
+.It Sy zvol_blk_mq_queue_depth Ns = Ns Sy 0 Pq uint
+The queue_depth value for the zvol
+.Li blk-mq
+interface.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only applied at each zvol's load time.
+If
+.Sy 0
+(the default) then use the kernel's default queue depth.
+Values are clamped to the kernel's
+.Dv BLKDEV_MIN_RQ
+and
+.Dv BLKDEV_MAX_RQ Ns / Ns Dv BLKDEV_DEFAULT_RQ
+limits.
 .
 .It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
 Defines zvol block devices behaviour when
diff --git a/module/os/linux/zfs/zfs_uio.c b/module/os/linux/zfs/zfs_uio.c
index 4f31bcb59..abb6dbe67 100644
--- a/module/os/linux/zfs/zfs_uio.c
+++ b/module/os/linux/zfs/zfs_uio.c
@@ -126,7 +126,7 @@ zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 }
 
 static int
-zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 {
 	const struct bio_vec *bv = uio->uio_bvec;
 	size_t skip = uio->uio_skip;
@@ -137,10 +137,13 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 		cnt = MIN(bv->bv_len - skip, n);
 
 		paddr = zfs_kmap_atomic(bv->bv_page);
-		if (rw == UIO_READ)
+		if (rw == UIO_READ) {
+			/* Copy from buffer 'p' to the bvec data */
 			memcpy(paddr + bv->bv_offset + skip, p, cnt);
-		else
+		} else {
+			/* Copy from bvec data to buffer 'p' */
 			memcpy(p, paddr + bv->bv_offset + skip, cnt);
+		}
 		zfs_kunmap_atomic(paddr);
 
 		skip += cnt;
@@ -158,6 +161,141 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 	return (0);
 }
 
+#ifdef HAVE_BLK_MQ
+static void
+zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
+    struct bio_vec *bv)
+{
+	void *paddr;
+
+	paddr = zfs_kmap_atomic(bv->bv_page);
+	if (rw == UIO_READ) {
+		/* Copy from buffer 'p' to the bvec data */
+		memcpy(paddr + bv->bv_offset + skip, p, cnt);
+	} else {
+		/* Copy from bvec data to buffer 'p' */
+		memcpy(p, paddr + bv->bv_offset + skip, cnt);
+	}
+	zfs_kunmap_atomic(paddr);
+}
+
+/*
+ * Copy 'n' bytes of data between the buffer p[] and the data represented
+ * by the request in the uio.
+ */
+static int
+zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+{
+	struct request *rq = uio->rq;
+	struct bio_vec bv;
+	struct req_iterator iter;
+	size_t this_seg_start;	/* logical offset */
+	size_t this_seg_end;		/* logical offset */
+	size_t skip_in_seg;
+	size_t copy_from_seg;
+	size_t orig_loffset;
+	int copied = 0;
+
+	/*
+	 * Get the original logical offset of this entire request (because
+	 * uio->uio_loffset will be modified over time).
+	 */
+	orig_loffset = io_offset(NULL, rq);
+	this_seg_start = orig_loffset;
+
+	rq_for_each_segment(bv, rq, iter) {
+		if (uio->iter.bio) {
+			/*
+			 * If uio->iter.bio is present, then we know we've saved
+			 * uio->iter from a previous call to this function, and
+			 * we can skip ahead in this rq_for_each_segment() loop
+			 * to where we last left off.  That way, we don't need
+			 * to iterate over tons of segments we've already
+			 * processed - we can just restore the "saved state".
+			 */
+			iter = uio->iter;
+			bv = uio->bv;
+			this_seg_start = uio->uio_loffset;
+			memset(&uio->iter, 0, sizeof (uio->iter));
+			continue;
+		}
+
+		/*
+		 * Lookup what the logical offset of the last byte of this
+		 * segment is.
+		 */
+		this_seg_end = this_seg_start + bv.bv_len - 1;
+
+		/*
+		 * We only need to operate on segments that have data we're
+		 * copying.
+		 */
+		if (uio->uio_loffset >= this_seg_start &&
+		    uio->uio_loffset <= this_seg_end) {
+			/*
+			 * Some, or all, of the data in this segment needs to be
+			 * copied.
+			 */
+
+			/*
+			 * We may be not be copying from the first byte in the
+			 * segment.  Figure out how many bytes to skip copying
+			 * from the beginning of this segment.
+			 */
+			skip_in_seg = uio->uio_loffset - this_seg_start;
+
+			/*
+			 * Calculate the total number of bytes from this
+			 * segment that we will be copying.
+			 */
+			copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
+
+			/* Copy the bytes */
+			zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
+			p = ((char *)p) + copy_from_seg;
+
+			n -= copy_from_seg;
+			uio->uio_resid -= copy_from_seg;
+			uio->uio_loffset += copy_from_seg;
+			copied = 1;	/* We copied some data */
+		}
+
+		if (n == 0) {
+			/*
+			 * All done copying.  Save our 'iter' value to the uio.
+			 * This allows us to "save our state" and skip ahead in
+			 * the rq_for_each_segment() loop the next time we call
+			 * call zfs_uiomove_bvec_rq() on this uio (which we
+			 * will be doing for any remaining data in the uio).
+			 */
+			uio->iter = iter; /* make a copy of the struct data */
+			uio->bv = bv;
+			return (0);
+		}
+
+		this_seg_start = this_seg_end + 1;
+	}
+
+	if (!copied) {
+		/* Didn't copy anything */
+		uio->uio_resid = 0;
+	}
+	return (0);
+}
+#endif
+
+static int
+zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+{
+#ifdef HAVE_BLK_MQ
+	if (uio->rq != NULL)
+		return (zfs_uiomove_bvec_rq(p, n, rw, uio));
+#else
+	ASSERT3P(uio->rq, ==, NULL);
+#endif
+	return (zfs_uiomove_bvec_impl(p, n, rw, uio));
+}
+
 #if defined(HAVE_VFS_IOV_ITER)
 static int
 zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
@@ -300,8 +438,14 @@ zfs_uioskip(zfs_uio_t *uio, size_t n)
 {
 	if (n > uio->uio_resid)
 		return;
-
-	if (uio->uio_segflg == UIO_BVEC) {
+	/*
+	 * When using a uio with a struct request, we simply
+	 * use uio_loffset as a pointer to the next logical byte to
+	 * copy in the request.  We don't have to do any fancy
+	 * accounting with uio_bvec/uio_iovcnt since we don't use
+	 * them.
+	 */
+	if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
 		uio->uio_skip += n;
 		while (uio->uio_iovcnt &&
 		    uio->uio_skip >= uio->uio_bvec->bv_len) {
diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
index 39441700a..acbab55d0 100644
--- a/module/os/linux/zfs/zvol_os.c
+++ b/module/os/linux/zfs/zvol_os.c
@@ -41,20 +41,77 @@
 #include <linux/blkdev_compat.h>
 #include <linux/task_io_accounting_ops.h>
 
+#ifdef HAVE_BLK_MQ
+#include <linux/blk-mq.h>
+#endif
+
+static void zvol_request_impl(zvol_state_t *zv, struct bio *bio,
+    struct request *rq, boolean_t force_sync);
+
 static unsigned int zvol_major = ZVOL_MAJOR;
 static unsigned int zvol_request_sync = 0;
 static unsigned int zvol_prefetch_bytes = (128 * 1024);
 static unsigned long zvol_max_discard_blocks = 16384;
-static unsigned int zvol_threads = 32;
 
 #ifndef HAVE_BLKDEV_GET_ERESTARTSYS
 static const unsigned int zvol_open_timeout_ms = 1000;
 #endif
 
+static unsigned int zvol_threads = 0;
+#ifdef HAVE_BLK_MQ
+static unsigned int zvol_blk_mq_threads = 0;
+static unsigned int zvol_blk_mq_actual_threads;
+static boolean_t zvol_use_blk_mq = B_FALSE;
+
+/*
+ * The maximum number of volblocksize blocks to process per thread.  Typically,
+ * write heavy workloads preform better with higher values here, and read
+ * heavy workloads preform better with lower values, but that's not a hard
+ * and fast rule.  It's basically a knob to tune between "less overhead with
+ * less parallelism" and "more overhead, but more parallelism".
+ *
+ * '8' was chosen as a reasonable, balanced, default based off of sequential
+ * read and write tests to a zvol in an NVMe pool (with 16 CPUs).
+ */
+static unsigned int zvol_blk_mq_blocks_per_thread = 8;
+#endif
+
+#ifndef	BLKDEV_DEFAULT_RQ
+/* BLKDEV_MAX_RQ was renamed to BLKDEV_DEFAULT_RQ in the 5.16 kernel */
+#define	BLKDEV_DEFAULT_RQ BLKDEV_MAX_RQ
+#endif
+
+/*
+ * Finalize our BIO or request.
+ */
+#ifdef	HAVE_BLK_MQ
+#define	END_IO(zv, bio, rq, error)  do { \
+	if (bio) { \
+		BIO_END_IO(bio, error); \
+	} else { \
+		blk_mq_end_request(rq, errno_to_bi_status(error)); \
+	} \
+} while (0)
+#else
+#define	END_IO(zv, bio, rq, error)	BIO_END_IO(bio, error)
+#endif
+
+#ifdef HAVE_BLK_MQ
+static unsigned int zvol_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ;
+static unsigned int zvol_actual_blk_mq_queue_depth;
+#endif
+
 struct zvol_state_os {
 	struct gendisk		*zvo_disk;	/* generic disk */
 	struct request_queue	*zvo_queue;	/* request queue */
 	dev_t			zvo_dev;	/* device id */
+
+#ifdef HAVE_BLK_MQ
+	struct blk_mq_tag_set tag_set;
+#endif
+
+	/* Set from the global 'zvol_use_blk_mq' at zvol load */
+	boolean_t use_blk_mq;
 };
 
 taskq_t *zvol_taskq;
@@ -63,8 +120,14 @@ static struct ida zvol_ida;
 typedef struct zv_request_stack {
 	zvol_state_t	*zv;
 	struct bio	*bio;
+	struct request *rq;
 } zv_request_t;
 
+typedef struct zv_work {
+	struct request  *rq;
+	struct work_struct work;
+} zv_work_t;
+
 typedef struct zv_request_task {
 	zv_request_t zvr;
 	taskq_ent_t	ent;
@@ -86,6 +149,62 @@ zv_request_task_free(zv_request_task_t *task)
 	kmem_free(task, sizeof (*task));
 }
 
+#ifdef HAVE_BLK_MQ
+
+/*
+ * This is called when a new block multiqueue request comes in.  A request
+ * contains one or more BIOs.
+ */
+static blk_status_t zvol_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+    const struct blk_mq_queue_data *bd)
+{
+	struct request *rq = bd->rq;
+	zvol_state_t *zv = rq->q->queuedata;
+
+	/* Tell the kernel that we are starting to process this request */
+	blk_mq_start_request(rq);
+
+	if (blk_rq_is_passthrough(rq)) {
+		/* Skip non filesystem request */
+		blk_mq_end_request(rq, BLK_STS_IOERR);
+		return (BLK_STS_IOERR);
+	}
+
+	zvol_request_impl(zv, NULL, rq, 0);
+
+	/* Acknowledge to the kernel that we got this request */
+	return (BLK_STS_OK);
+}
+
+static struct blk_mq_ops zvol_blk_mq_queue_ops = {
+	.queue_rq = zvol_mq_queue_rq,
+};
+
+/* Initialize our blk-mq struct */
+static int zvol_blk_mq_alloc_tag_set(zvol_state_t *zv)
+{
+	struct zvol_state_os *zso = zv->zv_zso;
+
+	memset(&zso->tag_set, 0, sizeof (zso->tag_set));
+
+	/* Initialize tag set. */
+	zso->tag_set.ops = &zvol_blk_mq_queue_ops;
+	zso->tag_set.nr_hw_queues = zvol_blk_mq_actual_threads;
+	zso->tag_set.queue_depth = zvol_actual_blk_mq_queue_depth;
+	zso->tag_set.numa_node = NUMA_NO_NODE;
+	zso->tag_set.cmd_size = 0;
+
+	/*
+	 * We need BLK_MQ_F_BLOCKING here since we do blocking calls in
+	 * zvol_request_impl()
+	 */
+	zso->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
+	zso->tag_set.driver_data = zv;
+
+	return (blk_mq_alloc_tag_set(&zso->tag_set));
+}
+#endif /* HAVE_BLK_MQ */
+
 /*
  * Given a path, return TRUE if path is a ZVOL.
  */
@@ -107,38 +226,51 @@ static void
 zvol_write(zv_request_t *zvr)
 {
 	struct bio *bio = zvr->bio;
+	struct request *rq = zvr->rq;
 	int error = 0;
 	zfs_uio_t uio;
-
-	zfs_uio_bvec_init(&uio, bio);
-
 	zvol_state_t *zv = zvr->zv;
+	struct request_queue *q;
+	struct gendisk *disk;
+	unsigned long start_time = 0;
+	boolean_t acct = B_FALSE;
+
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
 	ASSERT3P(zv->zv_zilog, !=, NULL);
 
+	q = zv->zv_zso->zvo_queue;
+	disk = zv->zv_zso->zvo_disk;
+
 	/* bio marked as FLUSH need to flush before write */
-	if (bio_is_flush(bio))
+	if (io_is_flush(bio, rq))
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
 
 	/* Some requests are just for flush and nothing else. */
-	if (uio.uio_resid == 0) {
+	if (io_size(bio, rq) == 0) {
 		rw_exit(&zv->zv_suspend_lock);
-		BIO_END_IO(bio, 0);
+		END_IO(zv, bio, rq, 0);
 		return;
 	}
 
-	struct request_queue *q = zv->zv_zso->zvo_queue;
-	struct gendisk *disk = zv->zv_zso->zvo_disk;
-	ssize_t start_resid = uio.uio_resid;
-	unsigned long start_time;
+	zfs_uio_bvec_init(&uio, bio, rq);
 
-	boolean_t acct = blk_queue_io_stat(q);
-	if (acct)
-		start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
+	ssize_t start_resid = uio.uio_resid;
+
+	/*
+	 * With use_blk_mq, accounting is done by blk_mq_start_request()
+	 * and blk_mq_end_request(), so we can skip it here.
+	 */
+	if (bio) {
+		acct = blk_queue_io_stat(q);
+		if (acct) {
+			start_time = blk_generic_start_io_acct(q, disk, WRITE,
+			    bio);
+		}
+	}
 
 	boolean_t sync =
-	    bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+	    io_is_fua(bio, rq) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
 
 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
 	    uio.uio_loffset, uio.uio_resid, RL_WRITER);
@@ -180,10 +312,11 @@ zvol_write(zv_request_t *zvr)
 
 	rw_exit(&zv->zv_suspend_lock);
 
-	if (acct)
+	if (bio && acct) {
 		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
+	}
 
-	BIO_END_IO(bio, -error);
+	END_IO(zv, bio, rq, -error);
 }
 
 static void
@@ -198,27 +331,33 @@ static void
 zvol_discard(zv_request_t *zvr)
 {
 	struct bio *bio = zvr->bio;
+	struct request *rq = zvr->rq;
 	zvol_state_t *zv = zvr->zv;
-	uint64_t start = BIO_BI_SECTOR(bio) << 9;
-	uint64_t size = BIO_BI_SIZE(bio);
+	uint64_t start = io_offset(bio, rq);
+	uint64_t size = io_size(bio, rq);
 	uint64_t end = start + size;
 	boolean_t sync;
 	int error = 0;
 	dmu_tx_t *tx;
+	struct request_queue *q = zv->zv_zso->zvo_queue;
+	struct gendisk *disk = zv->zv_zso->zvo_disk;
+	unsigned long start_time = 0;
+
+	boolean_t acct = blk_queue_io_stat(q);
 
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
 	ASSERT3P(zv->zv_zilog, !=, NULL);
 
-	struct request_queue *q = zv->zv_zso->zvo_queue;
-	struct gendisk *disk = zv->zv_zso->zvo_disk;
-	unsigned long start_time;
+	if (bio) {
+		acct = blk_queue_io_stat(q);
+		if (acct) {
+			start_time = blk_generic_start_io_acct(q, disk, WRITE,
+			    bio);
+		}
+	}
 
-	boolean_t acct = blk_queue_io_stat(q);
-	if (acct)
-		start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
-
-	sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+	sync = io_is_fua(bio, rq) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
 
 	if (end > zv->zv_volsize) {
 		error = SET_ERROR(EIO);
@@ -231,7 +370,7 @@ zvol_discard(zv_request_t *zvr)
 	 * the unaligned parts which is slow (read-modify-write) and useless
 	 * since we are not freeing any space by doing so.
 	 */
-	if (!bio_is_secure_erase(bio)) {
+	if (!io_is_secure_erase(bio, rq)) {
 		start = P2ROUNDUP(start, zv->zv_volblocksize);
 		end = P2ALIGN(end, zv->zv_volblocksize);
 		size = end - start;
@@ -262,10 +401,12 @@ zvol_discard(zv_request_t *zvr)
 unlock:
 	rw_exit(&zv->zv_suspend_lock);
 
-	if (acct)
-		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
+	if (bio && acct) {
+		blk_generic_end_io_acct(q, disk, WRITE, bio,
+		    start_time);
+	}
 
-	BIO_END_IO(bio, -error);
+	END_IO(zv, bio, rq, -error);
 }
 
 static void
@@ -280,28 +421,41 @@ static void
 zvol_read(zv_request_t *zvr)
 {
 	struct bio *bio = zvr->bio;
+	struct request *rq = zvr->rq;
 	int error = 0;
 	zfs_uio_t uio;
-
-	zfs_uio_bvec_init(&uio, bio);
-
+	boolean_t acct = B_FALSE;
 	zvol_state_t *zv = zvr->zv;
+	struct request_queue *q;
+	struct gendisk *disk;
+	unsigned long start_time = 0;
+
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
 
-	struct request_queue *q = zv->zv_zso->zvo_queue;
-	struct gendisk *disk = zv->zv_zso->zvo_disk;
-	ssize_t start_resid = uio.uio_resid;
-	unsigned long start_time;
+	zfs_uio_bvec_init(&uio, bio, rq);
 
-	boolean_t acct = blk_queue_io_stat(q);
-	if (acct)
-		start_time = blk_generic_start_io_acct(q, disk, READ, bio);
+	q = zv->zv_zso->zvo_queue;
+	disk = zv->zv_zso->zvo_disk;
+
+	ssize_t start_resid = uio.uio_resid;
+
+	/*
+	 * When blk-mq is being used, accounting is done by
+	 * blk_mq_start_request() and blk_mq_end_request().
+	 */
+	if (bio) {
+		acct = blk_queue_io_stat(q);
+		if (acct)
+			start_time = blk_generic_start_io_acct(q, disk, READ,
+			    bio);
+	}
 
 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
 	    uio.uio_loffset, uio.uio_resid, RL_READER);
 
 	uint64_t volsize = zv->zv_volsize;
+
 	while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
 		uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
 
@@ -325,10 +479,11 @@ zvol_read(zv_request_t *zvr)
 
 	rw_exit(&zv->zv_suspend_lock);
 
-	if (acct)
+	if (bio && acct) {
 		blk_generic_end_io_acct(q, disk, READ, bio, start_time);
+	}
 
-	BIO_END_IO(bio, -error);
+	END_IO(zv, bio, rq, -error);
 }
 
 static void
@@ -339,52 +494,49 @@ zvol_read_task(void *arg)
 	zv_request_task_free(task);
 }
 
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-#ifdef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID
+
+/*
+ * Process a BIO or request
+ *
+ * Either 'bio' or 'rq' should be set depending on if we are processing a
+ * bio or a request (both should not be set).
+ *
+ * force_sync:	Set to 0 to defer processing to a background taskq
+ *			Set to 1 to process data synchronously
+ */
 static void
-zvol_submit_bio(struct bio *bio)
-#else
-static blk_qc_t
-zvol_submit_bio(struct bio *bio)
-#endif
-#else
-static MAKE_REQUEST_FN_RET
-zvol_request(struct request_queue *q, struct bio *bio)
-#endif
+zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
+    boolean_t force_sync)
 {
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-#if defined(HAVE_BIO_BDEV_DISK)
-	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
-#else
-	struct request_queue *q = bio->bi_disk->queue;
-#endif
-#endif
-	zvol_state_t *zv = q->queuedata;
 	fstrans_cookie_t cookie = spl_fstrans_mark();
-	uint64_t offset = BIO_BI_SECTOR(bio) << 9;
-	uint64_t size = BIO_BI_SIZE(bio);
-	int rw = bio_data_dir(bio);
+	uint64_t offset = io_offset(bio, rq);
+	uint64_t size = io_size(bio, rq);
+	int rw = io_data_dir(bio, rq);
 
-	if (bio_has_data(bio) && offset + size > zv->zv_volsize) {
-		printk(KERN_INFO
-		    "%s: bad access: offset=%llu, size=%lu\n",
-		    zv->zv_zso->zvo_disk->disk_name,
-		    (long long unsigned)offset,
-		    (long unsigned)size);
-
-		BIO_END_IO(bio, -SET_ERROR(EIO));
-		goto out;
-	}
+	if (zvol_request_sync)
+		force_sync = 1;
 
 	zv_request_t zvr = {
 		.zv = zv,
 		.bio = bio,
+		.rq = rq,
 	};
+
+	if (io_has_data(bio, rq) && offset + size > zv->zv_volsize) {
+		printk(KERN_INFO "%s: bad access: offset=%llu, size=%lu\n",
+		    zv->zv_zso->zvo_disk->disk_name,
+		    (long long unsigned)offset,
+		    (long unsigned)size);
+
+		END_IO(zv, bio, rq, -SET_ERROR(EIO));
+		goto out;
+	}
+
 	zv_request_task_t *task;
 
 	if (rw == WRITE) {
 		if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
-			BIO_END_IO(bio, -SET_ERROR(EROFS));
+			END_IO(zv, bio, rq, -SET_ERROR(EROFS));
 			goto out;
 		}
 
@@ -421,7 +573,7 @@ zvol_request(struct request_queue *q, struct bio *bio)
 		 * i/o may be a ZIL write (via zil_commit()), or a read of an
 		 * indirect block, or a read of a data block (if this is a
 		 * partial-block write).  We will indicate that the i/o is
-		 * complete by calling BIO_END_IO() from the taskq callback.
+		 * complete by calling END_IO() from the taskq callback.
 		 *
 		 * This design allows the calling thread to continue and
 		 * initiate more concurrent operations by calling
@@ -441,12 +593,12 @@ zvol_request(struct request_queue *q, struct bio *bio)
 		 * of one i/o at a time per zvol.  However, an even better
 		 * design would be for zvol_request() to initiate the zio
 		 * directly, and then be notified by the zio_done callback,
-		 * which would call BIO_END_IO().  Unfortunately, the DMU/ZIL
+		 * which would call END_IO().  Unfortunately, the DMU/ZIL
 		 * interfaces lack this functionality (they block waiting for
 		 * the i/o to complete).
 		 */
-		if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
-			if (zvol_request_sync) {
+		if (io_is_discard(bio, rq) || io_is_secure_erase(bio, rq)) {
+			if (force_sync) {
 				zvol_discard(&zvr);
 			} else {
 				task = zv_request_task_create(zvr);
@@ -454,7 +606,7 @@ zvol_request(struct request_queue *q, struct bio *bio)
 				    zvol_discard_task, task, 0, &task->ent);
 			}
 		} else {
-			if (zvol_request_sync) {
+			if (force_sync) {
 				zvol_write(&zvr);
 			} else {
 				task = zv_request_task_create(zvr);
@@ -469,14 +621,14 @@ zvol_request(struct request_queue *q, struct bio *bio)
 		 * data and require no additional handling.
 		 */
 		if (size == 0) {
-			BIO_END_IO(bio, 0);
+			END_IO(zv, bio, rq, 0);
 			goto out;
 		}
 
 		rw_enter(&zv->zv_suspend_lock, RW_READER);
 
 		/* See comment in WRITE case above. */
-		if (zvol_request_sync) {
+		if (force_sync) {
 			zvol_read(&zvr);
 		} else {
 			task = zv_request_task_create(zvr);
@@ -487,8 +639,33 @@ zvol_request(struct request_queue *q, struct bio *bio)
 
 out:
 	spl_fstrans_unmark(cookie);
-#if (defined(HAVE_MAKE_REQUEST_FN_RET_QC) || \
-	defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS)) && \
+}
+
+#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
+#ifdef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID
+static void
+zvol_submit_bio(struct bio *bio)
+#else
+static blk_qc_t
+zvol_submit_bio(struct bio *bio)
+#endif
+#else
+static MAKE_REQUEST_FN_RET
+zvol_request(struct request_queue *q, struct bio *bio)
+#endif
+{
+#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
+#if defined(HAVE_BIO_BDEV_DISK)
+	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+#else
+	struct request_queue *q = bio->bi_disk->queue;
+#endif
+#endif
+	zvol_state_t *zv = q->queuedata;
+
+	zvol_request_impl(zv, bio, NULL, 0);
+#if defined(HAVE_MAKE_REQUEST_FN_RET_QC) || \
+	defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
 	!defined(HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID)
 	return (BLK_QC_T_NONE);
 #endif
@@ -805,6 +982,27 @@ zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return (0);
 }
 
+/*
+ * Why have two separate block_device_operations structs?
+ *
+ * Normally we'd just have one, and assign 'submit_bio' as needed.  However,
+ * it's possible the user's kernel is built with CONSTIFY_PLUGIN, meaning we
+ * can't just change submit_bio dynamically at runtime.  So just create two
+ * separate structs to get around this.
+ */
+static const struct block_device_operations zvol_ops_blk_mq = {
+	.open			= zvol_open,
+	.release		= zvol_release,
+	.ioctl			= zvol_ioctl,
+	.compat_ioctl		= zvol_compat_ioctl,
+	.check_events		= zvol_check_events,
+#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
+	.revalidate_disk	= zvol_revalidate_disk,
+#endif
+	.getgeo			= zvol_getgeo,
+	.owner			= THIS_MODULE,
+};
+
 static const struct block_device_operations zvol_ops = {
 	.open			= zvol_open,
 	.release		= zvol_release,
@@ -821,6 +1019,87 @@ static const struct block_device_operations zvol_ops = {
 #endif
 };
 
+static int
+zvol_alloc_non_blk_mq(struct zvol_state_os *zso)
+{
+#if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS)
+#if defined(HAVE_BLK_ALLOC_DISK)
+	zso->zvo_disk = blk_alloc_disk(NUMA_NO_NODE);
+	if (zso->zvo_disk == NULL)
+		return (1);
+
+	zso->zvo_disk->minors = ZVOL_MINORS;
+	zso->zvo_queue = zso->zvo_disk->queue;
+#else
+	zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
+	if (zso->zvo_queue == NULL)
+		return (1);
+
+	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+	if (zso->zvo_disk == NULL) {
+		blk_cleanup_queue(zso->zvo_queue);
+		return (1);
+	}
+
+	zso->zvo_disk->queue = zso->zvo_queue;
+#endif /* HAVE_BLK_ALLOC_DISK */
+#else
+	zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
+	if (zso->zvo_queue == NULL)
+		return (1);
+
+	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+	if (zso->zvo_disk == NULL) {
+		blk_cleanup_queue(zso->zvo_queue);
+		return (1);
+	}
+
+	zso->zvo_disk->queue = zso->zvo_queue;
+#endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
+	return (0);
+
+}
+
+static int
+zvol_alloc_blk_mq(zvol_state_t *zv)
+{
+#ifdef HAVE_BLK_MQ
+	struct zvol_state_os *zso = zv->zv_zso;
+
+	/* Allocate our blk-mq tag_set */
+	if (zvol_blk_mq_alloc_tag_set(zv) != 0)
+		return (1);
+
+#if defined(HAVE_BLK_ALLOC_DISK)
+	zso->zvo_disk = blk_mq_alloc_disk(&zso->tag_set, zv);
+	if (zso->zvo_disk == NULL) {
+		blk_mq_free_tag_set(&zso->tag_set);
+		return (1);
+	}
+	zso->zvo_queue = zso->zvo_disk->queue;
+	zso->zvo_disk->minors = ZVOL_MINORS;
+#else
+	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+	if (zso->zvo_disk == NULL) {
+		blk_cleanup_queue(zso->zvo_queue);
+		blk_mq_free_tag_set(&zso->tag_set);
+		return (1);
+	}
+	/* Allocate queue */
+	zso->zvo_queue = blk_mq_init_queue(&zso->tag_set);
+	if (IS_ERR(zso->zvo_queue)) {
+		blk_mq_free_tag_set(&zso->tag_set);
+		return (1);
+	}
+
+	/* Our queue is now created, assign it to our disk */
+	zso->zvo_disk->queue = zso->zvo_queue;
+
+#endif
+#endif
+	return (0);
+}
+
 /*
  * Allocate memory for a new zvol_state_t and setup the required
  * request queue and generic disk structures for the block device.
@@ -831,6 +1110,7 @@ zvol_alloc(dev_t dev, const char *name)
 	zvol_state_t *zv;
 	struct zvol_state_os *zso;
 	uint64_t volmode;
+	int ret;
 
 	if (dsl_prop_get_integer(name, "volmode", &volmode, NULL) != 0)
 		return (NULL);
@@ -849,48 +1129,44 @@ zvol_alloc(dev_t dev, const char *name)
 	list_link_init(&zv->zv_next);
 	mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);
 
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-#ifdef HAVE_BLK_ALLOC_DISK
-	zso->zvo_disk = blk_alloc_disk(NUMA_NO_NODE);
-	if (zso->zvo_disk == NULL)
-		goto out_kmem;
+#ifdef HAVE_BLK_MQ
+	zv->zv_zso->use_blk_mq = zvol_use_blk_mq;
+#endif
 
-	zso->zvo_disk->minors = ZVOL_MINORS;
-	zso->zvo_queue = zso->zvo_disk->queue;
-#else
-	zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
-	if (zso->zvo_queue == NULL)
-		goto out_kmem;
-
-	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
-	if (zso->zvo_disk == NULL) {
-		blk_cleanup_queue(zso->zvo_queue);
-		goto out_kmem;
+	/*
+	 * The block layer has 3 interfaces for getting BIOs:
+	 *
+	 * 1. blk-mq request queues (new)
+	 * 2. submit_bio() (oldest)
+	 * 3. regular request queues (old).
+	 *
+	 * Each of those interfaces has two permutations:
+	 *
+	 * a) We have blk_alloc_disk()/blk_mq_alloc_disk(), which allocates
+	 *    both the disk and its queue (5.14 kernel or newer)
+	 *
+	 * b) We don't have blk_*alloc_disk(), and have to allocate the
+	 *    disk and the queue separately. (5.13 kernel or older)
+	 */
+	if (zv->zv_zso->use_blk_mq) {
+		ret = zvol_alloc_blk_mq(zv);
+		zso->zvo_disk->fops = &zvol_ops_blk_mq;
+	} else {
+		ret = zvol_alloc_non_blk_mq(zso);
+		zso->zvo_disk->fops = &zvol_ops;
 	}
-
-	zso->zvo_disk->queue = zso->zvo_queue;
-#endif /* HAVE_BLK_ALLOC_DISK */
-#else
-	zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
-	if (zso->zvo_queue == NULL)
+	if (ret != 0)
 		goto out_kmem;
 
-	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
-	if (zso->zvo_disk == NULL) {
-		blk_cleanup_queue(zso->zvo_queue);
-		goto out_kmem;
-	}
-
-	zso->zvo_disk->queue = zso->zvo_queue;
-#endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
-
 	blk_queue_set_write_cache(zso->zvo_queue, B_TRUE, B_TRUE);
 
 	/* Limit read-ahead to a single page to prevent over-prefetching. */
 	blk_queue_set_read_ahead(zso->zvo_queue, 1);
 
-	/* Disable write merging in favor of the ZIO pipeline. */
-	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zso->zvo_queue);
+	if (!zv->zv_zso->use_blk_mq) {
+		/* Disable write merging in favor of the ZIO pipeline. */
+		blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zso->zvo_queue);
+	}
 
 	/* Enable /proc/diskstats */
 	blk_queue_flag_set(QUEUE_FLAG_IO_STAT, zso->zvo_queue);
@@ -918,7 +1194,6 @@ zvol_alloc(dev_t dev, const char *name)
 	}
 
 	zso->zvo_disk->first_minor = (dev & MINORMASK);
-	zso->zvo_disk->fops = &zvol_ops;
 	zso->zvo_disk->private_data = zv;
 	snprintf(zso->zvo_disk->disk_name, DISK_NAME_LEN, "%s%d",
 	    ZVOL_DEV_NAME, (dev & MINORMASK));
@@ -963,6 +1238,11 @@ zvol_os_free(zvol_state_t *zv)
 	put_disk(zv->zv_zso->zvo_disk);
 #endif
 
+#ifdef HAVE_BLK_MQ
+	if (zv->zv_zso->use_blk_mq)
+		blk_mq_free_tag_set(&zv->zv_zso->tag_set);
+#endif
+
 	ida_simple_remove(&zvol_ida,
 	    MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS);
 
@@ -1044,8 +1324,69 @@ zvol_os_create_minor(const char *name)
 
 	blk_queue_max_hw_sectors(zv->zv_zso->zvo_queue,
 	    (DMU_MAX_ACCESS / 4) >> 9);
-	blk_queue_max_segments(zv->zv_zso->zvo_queue, UINT16_MAX);
-	blk_queue_max_segment_size(zv->zv_zso->zvo_queue, UINT_MAX);
+
+	if (zv->zv_zso->use_blk_mq) {
+		/*
+		 * IO requests can be really big (1MB).  When an IO request
+		 * comes in, it is passed off to zvol_read() or zvol_write()
+		 * in a new thread, where it is chunked up into 'volblocksize'
+		 * sized pieces and processed.  So for example, if the request
+		 * is a 1MB write and your volblocksize is 128k, one zvol_write
+		 * thread will take that request and sequentially do ten 128k
+		 * IOs.  This is due to the fact that the thread needs to lock
+		 * each volblocksize sized block.  So you might be wondering:
+		 * "instead of passing the whole 1MB request to one thread,
+		 * why not pass ten individual 128k chunks to ten threads and
+		 * process the whole write in parallel?"  The short answer is
+		 * that there's a sweet spot number of chunks that balances
+		 * the greater parallelism with the added overhead of more
+		 * threads. The sweet spot can be different depending on if you
+		 * have a read or write  heavy workload.  Writes typically want
+		 * high chunk counts while reads typically want lower ones.  On
+		 * a test pool with 6 NVMe drives in a 3x 2-disk mirror
+		 * configuration, with volblocksize=8k, the sweet spot for good
+		 * sequential reads and writes was at 8 chunks.
+		 */
+
+		/*
+		 * Below we tell the kernel how big we want our requests
+		 * to be.  You would think that blk_queue_io_opt() would be
+		 * used to do this since it is used to "set optimal request
+		 * size for the queue", but that doesn't seem to do
+		 * anything - the kernel still gives you huge requests
+		 * with tons of little PAGE_SIZE segments contained within it.
+		 *
+		 * Knowing that the kernel will just give you PAGE_SIZE segments
+		 * no matter what, you can say "ok, I want PAGE_SIZE byte
+		 * segments, and I want 'N' of them per request", where N is
+		 * the correct number of segments for the volblocksize and
+		 * number of chunks you want.
+		 */
+#ifdef HAVE_BLK_MQ
+		if (zvol_blk_mq_blocks_per_thread != 0) {
+			unsigned int chunks;
+			chunks = MIN(zvol_blk_mq_blocks_per_thread, UINT16_MAX);
+
+			blk_queue_max_segment_size(zv->zv_zso->zvo_queue,
+			    PAGE_SIZE);
+			blk_queue_max_segments(zv->zv_zso->zvo_queue,
+			    (zv->zv_volblocksize * chunks) / PAGE_SIZE);
+		} else {
+			/*
+			 * Special case: zvol_blk_mq_blocks_per_thread = 0
+			 * Max everything out.
+			 */
+			blk_queue_max_segments(zv->zv_zso->zvo_queue,
+			    UINT16_MAX);
+			blk_queue_max_segment_size(zv->zv_zso->zvo_queue,
+			    UINT_MAX);
+		}
+#endif
+	} else {
+		blk_queue_max_segments(zv->zv_zso->zvo_queue, UINT16_MAX);
+		blk_queue_max_segment_size(zv->zv_zso->zvo_queue, UINT_MAX);
+	}
+
 	blk_queue_physical_block_size(zv->zv_zso->zvo_queue,
 	    zv->zv_volblocksize);
 	blk_queue_io_opt(zv->zv_zso->zvo_queue, zv->zv_volblocksize);
@@ -1167,19 +1508,54 @@ int
 zvol_init(void)
 {
 	int error;
-	int threads = MIN(MAX(zvol_threads, 1), 1024);
+
+	/*
+	 * zvol_threads is the module param the user passes in.
+	 *
+	 * zvol_actual_threads is what we use internally, since the user can
+	 * pass zvol_thread = 0 to mean "use all the CPUs" (the default).
+	 */
+	static unsigned int zvol_actual_threads;
+
+	if (zvol_threads == 0) {
+		/*
+		 * See dde9380a1 for why 32 was chosen here.  This should
+		 * probably be refined to be some multiple of the number
+		 * of CPUs.
+		 */
+		zvol_actual_threads = MAX(num_online_cpus(), 32);
+	} else {
+		zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
+	}
 
 	error = register_blkdev(zvol_major, ZVOL_DRIVER);
 	if (error) {
 		printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
 		return (error);
 	}
-	zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri,
-	    threads * 2, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+
+#ifdef HAVE_BLK_MQ
+	if (zvol_blk_mq_queue_depth == 0) {
+		zvol_actual_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ;
+	} else {
+		zvol_actual_blk_mq_queue_depth =
+		    MAX(zvol_blk_mq_queue_depth, BLKDEV_MIN_RQ);
+	}
+
+	if (zvol_blk_mq_threads == 0) {
+		zvol_blk_mq_actual_threads = num_online_cpus();
+	} else {
+		zvol_blk_mq_actual_threads = MIN(MAX(zvol_blk_mq_threads, 1),
+		    1024);
+	}
+#endif
+	zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_actual_threads, maxclsyspri,
+	    zvol_actual_threads, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 	if (zvol_taskq == NULL) {
 		unregister_blkdev(zvol_major, ZVOL_DRIVER);
 		return (-ENOMEM);
 	}
+
 	zvol_init_impl();
 	ida_init(&zvol_ida);
 	return (0);
@@ -1202,7 +1578,8 @@ module_param(zvol_major, uint, 0444);
 MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
 
 module_param(zvol_threads, uint, 0444);
-MODULE_PARM_DESC(zvol_threads, "Max number of threads to handle I/O requests");
+MODULE_PARM_DESC(zvol_threads, "Number of threads to handle I/O requests. Set"
+    "to 0 to use all active CPUs");
 
 module_param(zvol_request_sync, uint, 0644);
 MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
@@ -1215,4 +1592,17 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
 
 module_param(zvol_volmode, uint, 0644);
 MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
+
+#ifdef HAVE_BLK_MQ
+module_param(zvol_blk_mq_queue_depth, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth");
+
+module_param(zvol_use_blk_mq, uint, 0644);
+MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols");
+
+module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
+    "Process volblocksize blocks per thread");
+#endif
+
 /* END CSTYLED */
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index 243221598..89ee0d3cb 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -937,9 +937,13 @@ tags = ['functional', 'zvol', 'zvol_cli']
 
 [tests/functional/zvol/zvol_misc]
 tests = ['zvol_misc_002_pos', 'zvol_misc_hierarchy', 'zvol_misc_rename_inuse',
-    'zvol_misc_snapdev', 'zvol_misc_volmode', 'zvol_misc_zil']
+    'zvol_misc_snapdev', 'zvol_misc_trim', 'zvol_misc_volmode', 'zvol_misc_zil']
 tags = ['functional', 'zvol', 'zvol_misc']
 
+[tests/functional/zvol/zvol_stress]
+tests = ['zvol_stress']
+tags = ['functional', 'zvol', 'zvol_stress']
+
 [tests/functional/zvol/zvol_swap]
 tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_004_pos']
 tags = ['functional', 'zvol', 'zvol_swap']
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index 3985da146..fa71f412b 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -184,3 +184,8 @@ tags = ['functional', 'user_namespace']
 tests = ['groupspace_001_pos', 'groupspace_002_pos', 'groupspace_003_pos',
     'userquota_013_pos', 'userspace_003_pos']
 tags = ['functional', 'userquota']
+
+[tests/functional/zvol/zvol_misc:Linux]
+tests = ['zvol_misc_fua']
+tags = ['functional', 'zvol', 'zvol_misc']
+
diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg
index 99430bc10..1ee786d13 100644
--- a/tests/zfs-tests/include/commands.cfg
+++ b/tests/zfs-tests/include/commands.cfg
@@ -120,10 +120,12 @@ export SYSTEM_FILES_FREEBSD='chflags
     showmount
     swapctl
     sysctl
+    trim
     uncompress'
 
 export SYSTEM_FILES_LINUX='attr
     blkid
+    blkdiscard
     blockdev
     chattr
     exportfs
diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib
index 51d4e225f..cb20318f4 100644
--- a/tests/zfs-tests/include/libtest.shlib
+++ b/tests/zfs-tests/include/libtest.shlib
@@ -2770,20 +2770,22 @@ function is_te_enabled
 	svcs -H -o state labeld 2>/dev/null | grep -q "enabled"
 }
 
+# Return the number of CPUs (cross-platform)
+function get_num_cpus
+{
+	if is_linux ; then
+		grep -c '^processor' /proc/cpuinfo
+	elif is_freebsd; then
+		sysctl -n kern.smp.cpus
+	else
+		psrinfo | wc -l
+	fi
+}
+
 # Utility function to determine if a system has multiple cpus.
 function is_mp
 {
-	case "$UNAME" in
-		Linux)
-			(($(grep -c '^processor' /proc/cpuinfo) > 1))
-			;;
-		FreeBSD)
-			sysctl -n kern.smp.cpus
-			;;
-		*)
-			(($(psrinfo | wc -l) > 1))
-			;;
-	esac
+	[[ $(get_num_cpus) -gt 1 ]]
 }
 
 function get_cpu_freq
@@ -3320,14 +3322,23 @@ function get_tunable_impl
 {
 	typeset name="$1"
 	typeset module="${2:-zfs}"
+	typeset check_only="$3"
 
 	eval "typeset tunable=\$$name"
 	case "$tunable" in
 	UNSUPPORTED)
-		log_unsupported "Tunable '$name' is unsupported on $UNAME"
+		if [ -z "$check_only" ] ; then
+			log_unsupported "Tunable '$name' is unsupported on $UNAME"
+		else
+			return 1
+		fi
 		;;
 	"")
-		log_fail "Tunable '$name' must be added to tunables.cfg"
+		if [ -z "$check_only" ] ; then
+			log_fail "Tunable '$name' must be added to tunables.cfg"
+		else
+			return 1
+		fi
 		;;
 	*)
 		;;
@@ -3347,6 +3358,14 @@ function get_tunable_impl
 	esac
 }
 
+# Does a tunable exist?
+#
+# $1: Tunable name
+function tunable_exists
+{
+	get_tunable_impl $1 "zfs" 1
+}
+
 #
 # Compute MD5 digest for given file or stdin if no file given.
 # Note: file path must not contain spaces
diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
index d3838cb7c..d6a2fe5db 100644
--- a/tests/zfs-tests/include/tunables.cfg
+++ b/tests/zfs-tests/include/tunables.cfg
@@ -87,6 +87,7 @@ VDEV_VALIDATE_SKIP		vdev.validate_skip		vdev_validate_skip
 VOL_INHIBIT_DEV			UNSUPPORTED			zvol_inhibit_dev
 VOL_MODE			vol.mode			zvol_volmode
 VOL_RECURSIVE			vol.recursive			UNSUPPORTED
+VOL_USE_BLK_MQ			UNSUPPORTED			zvol_use_blk_mq
 XATTR_COMPAT			xattr_compat			zfs_xattr_compat
 ZEVENT_LEN_MAX			zevent.len_max			zfs_zevent_len_max
 ZEVENT_RETAIN_MAX		zevent.retain_max		zfs_zevent_retain_max
diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am
index ffc087351..d759e5196 100644
--- a/tests/zfs-tests/tests/Makefile.am
+++ b/tests/zfs-tests/tests/Makefile.am
@@ -1966,11 +1966,16 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/zvol/zvol_misc/zvol_misc_004_pos.ksh \
 	functional/zvol/zvol_misc/zvol_misc_005_neg.ksh \
 	functional/zvol/zvol_misc/zvol_misc_006_pos.ksh \
+	functional/zvol/zvol_misc/zvol_misc_fua.ksh \
 	functional/zvol/zvol_misc/zvol_misc_hierarchy.ksh \
 	functional/zvol/zvol_misc/zvol_misc_rename_inuse.ksh \
 	functional/zvol/zvol_misc/zvol_misc_snapdev.ksh \
+	functional/zvol/zvol_misc/zvol_misc_trim.ksh \
 	functional/zvol/zvol_misc/zvol_misc_volmode.ksh \
 	functional/zvol/zvol_misc/zvol_misc_zil.ksh \
+	functional/zvol/zvol_stress/cleanup.ksh \
+	functional/zvol/zvol_stress/setup.ksh \
+	functional/zvol/zvol_stress/zvol_stress.ksh \
 	functional/zvol/zvol_swap/cleanup.ksh \
 	functional/zvol/zvol_swap/setup.ksh \
 	functional/zvol/zvol_swap/zvol_swap_001_pos.ksh \
diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_common.shlib b/tests/zfs-tests/tests/functional/zvol/zvol_common.shlib
index c0fd90f58..c04559fe3 100644
--- a/tests/zfs-tests/tests/functional/zvol/zvol_common.shlib
+++ b/tests/zfs-tests/tests/functional/zvol/zvol_common.shlib
@@ -128,3 +128,14 @@ function is_zvol_dumpified
 
 	zdb -dddd $volume 2 | grep -q "dumpsize"
 }
+
+# enable/disable blk-mq (if available)
+#
+# $1: 1 = enable, 0 = disable
+function set_blk_mq
+{
+	# Not all kernels support blk-mq
+	if tunable_exists VOL_USE_BLK_MQ ; then
+		log_must set_tunable32 VOL_USE_BLK_MQ $1
+	fi
+}
diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh b/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh
new file mode 100755
index 000000000..e44107030
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh
@@ -0,0 +1,96 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/zvol/zvol_common.shlib
+
+#
+# DESCRIPTION:
+#	Verify that a zvol Force Unit Access (FUA) write works.
+#
+# STRATEGY:
+# 1. dd write 5MB of data with "oflag=dsync,direct" to a zvol.  Those flags
+#    together do a FUA write.
+# 3. Verify the data is correct.
+# 3. Repeat 1-2 for both the blk-mq and non-blk-mq cases.
+
+verify_runnable "global"
+
+if ! is_physical_device $DISKS; then
+	log_unsupported "This directory cannot be run on raw files."
+fi
+
+if ! is_linux ; then
+	log_unsupported "Only linux supports dd with oflag=dsync for FUA writes"
+fi
+
+typeset datafile1="$(mktemp zvol_misc_fua1.XXXXXX)"
+typeset datafile2="$(mktemp zvol_misc_fua2.XXXXXX)"
+typeset zvolpath=${ZVOL_DEVDIR}/$TESTPOOL/$TESTVOL
+
+function cleanup
+{
+       rm "$datafile1" "$datafile2"
+}
+
+function do_test {
+	# Wait for udev to create symlinks to our zvol
+	block_device_wait $zvolpath
+
+	# Create a data file
+	log_must dd if=/dev/urandom of="$datafile1" bs=1M count=5
+
+	# Write the data to our zvol using FUA
+	log_must dd if=$datafile1 of=$zvolpath oflag=dsync,direct bs=1M count=5
+
+	# Extract data from our zvol
+	log_must dd if=$zvolpath of="$datafile2" bs=1M count=5
+
+	# Compare the data we expect with what's on our zvol.  diff will return
+	# non-zero if they differ.
+	log_must diff $datafile1 $datafile2
+
+	log_must rm $datafile1 $datafile2
+}
+
+log_assert "Verify that a ZFS volume can do Force Unit Access (FUA)"
+log_onexit cleanup
+
+log_must zfs set compression=off $TESTPOOL/$TESTVOL
+
+log_note "Testing without blk-mq"
+
+set_blk_mq 0
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+do_test
+
+set_blk_mq 1
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+do_test
+
+log_pass "ZFS volume FUA works"
diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh b/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh
new file mode 100755
index 000000000..2e417a0e6
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh
@@ -0,0 +1,136 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/include/math.shlib
+. $STF_SUITE/tests/functional/zvol/zvol_common.shlib
+
+#
+# DESCRIPTION:
+#	Verify we can TRIM a zvol
+#
+# STRATEGY:
+# 1. TRIM the entire zvol to remove data from older tests
+# 2. Create a 5MB data file
+# 3. Write the file to the zvol
+# 4. Observe 5MB of used space on the zvol
+# 5. TRIM the first 1MB and last 2MB of the 5MB block of data.
+# 6. Observe 2MB of used space on the zvol
+# 7. Verify the trimmed regions are zero'd on the zvol
+
+verify_runnable "global"
+
+if is_linux ; then
+	# We need '--force' here since the prior tests may leave a filesystem
+	# on the zvol, and blkdiscard will see that filesystem and print a
+	# warning unless you force it.
+	#
+	# Only blkdiscard >= v2.36 supports --force, so we need to
+	# check for it.
+	if blkdiscard --help | grep -q '\-\-force' ; then
+		trimcmd='blkdiscard --force'
+	else
+		trimcmd='blkdiscard'
+	fi
+else
+	# By default, FreeBSD 'trim' always does a dry-run.  '-f' makes
+	# it perform the actual operation.
+	trimcmd='trim -f'
+fi
+
+if ! is_physical_device $DISKS; then
+	log_unsupported "This directory cannot be run on raw files."
+fi
+
+typeset datafile1="$(mktemp zvol_misc_flags1.XXXXXX)"
+typeset datafile2="$(mktemp zvol_misc_flags2.XXXXXX)"
+typeset zvolpath=${ZVOL_DEVDIR}/$TESTPOOL/$TESTVOL
+
+function cleanup
+{
+       rm "$datafile1" "$datafile2"
+}
+
+function do_test {
+	# Wait for udev to create symlinks to our zvol
+	block_device_wait $zvolpath
+
+	# Create a data file
+	log_must dd if=/dev/urandom of="$datafile1" bs=1M count=5
+	
+	# Write to zvol
+	log_must dd if=$datafile1 of=$zvolpath conv=fsync
+
+	# Record how much space we've used (should be 5MB, with 128k
+	# of tolerance).
+	before="$(get_prop refer $TESTPOOL/$TESTVOL)"
+	log_must within_tolerance $before 5242880 131072
+
+	# We currently have 5MB of random data on the zvol.
+	# Trim the first 1MB and also trim 2MB at offset 3MB.
+	log_must $trimcmd -l $((1 * 1048576)) $zvolpath
+	log_must $trimcmd -o $((3 * 1048576)) -l $((2 * 1048576)) $zvolpath
+	sync_pool
+
+	# After trimming 3MB, the zvol should have 2MB of data (with 128k of
+	# tolerance).
+	after="$(get_prop refer $TESTPOOL/$TESTVOL)"
+	log_must within_tolerance $after 2097152 131072
+
+	# Make the same holes in our test data
+	log_must dd if=/dev/zero of="$datafile1" bs=1M count=1 conv=notrunc
+	log_must dd if=/dev/zero of="$datafile1" bs=1M count=2 seek=3 conv=notrunc
+
+	# Extract data from our zvol
+	log_must dd if=$zvolpath of="$datafile2" bs=1M count=5
+
+	# Compare the data we expect with what's on our zvol.  diff will return
+	# non-zero if they differ.
+	log_must diff $datafile1 $datafile2
+
+	log_must rm $datafile1 $datafile2
+}
+
+log_assert "Verify that a ZFS volume can be TRIMed"
+log_onexit cleanup
+
+log_must zfs set compression=off $TESTPOOL/$TESTVOL
+
+# Remove old data from previous tests
+log_must $trimcmd $zvolpath
+
+
+set_blk_mq 1
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+do_test
+
+set_blk_mq 0
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+do_test
+
+log_pass "ZFS volumes can be trimmed"
diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_stress/cleanup.ksh b/tests/zfs-tests/tests/functional/zvol/zvol_stress/cleanup.ksh
new file mode 100755
index 000000000..b81a37263
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/zvol/zvol_stress/cleanup.ksh
@@ -0,0 +1,36 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "global"
+
+default_cleanup
diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_stress/setup.ksh b/tests/zfs-tests/tests/functional/zvol/zvol_stress/setup.ksh
new file mode 100755
index 000000000..9e70fc47b
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/zvol/zvol_stress/setup.ksh
@@ -0,0 +1,36 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "global"
+
+default_setup "$DISKS"
diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh b/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh
new file mode 100755
index 000000000..c1aadcac3
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh
@@ -0,0 +1,169 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/reservation/reservation.shlib
+. $STF_SUITE/tests/functional/zvol/zvol_common.shlib
+
+#
+# DESCRIPTION:
+# Stress test multithreaded transfers to multiple zvols.  Also verify
+# zvol errors show up in zpool status.
+#
+# STRATEGY:
+#
+# For both the normal submit_bio() codepath and the blk-mq codepath, do
+# the following:
+#
+# 1. Create one zvol per CPU
+# 2. In parallel, spawn an fio "write and verify" for each zvol
+# 3. Inject write errors
+# 4. Write to one of the zvols with dd and verify the errors
+#
+
+verify_runnable "global"
+
+num_zvols=$(get_num_cpus)
+
+# If we were making one big zvol from all the pool space, it would
+# be this big:
+biggest_zvol_size_possible=$(largest_volsize_from_pool $TESTPOOL)
+
+# Crude calculation: take the biggest zvol size we could possibly
+# create, knock 10% off it (for overhead) and divide by the number
+# of ZVOLs we want to make.
+#
+# Round the value using a printf
+typeset -f each_zvol_size=$(( floor($biggest_zvol_size_possible * 0.9 / \
+	$num_zvols )))
+
+typeset tmpdir="$(mktemp -d zvol_stress_fio_state.XXXXXX)"
+
+function create_zvols
+{
+	log_note "Creating $num_zvols zvols that are ${each_zvol_size}B each"
+	for i in $(seq $num_zvols) ; do
+		log_must zfs create -V $each_zvol_size $TESTPOOL/testvol$i
+		block_device_wait "$ZVOL_DEVDIR/$TESTPOOL/testvol$i"
+	done
+}
+
+function destroy_zvols
+{
+	for i in $(seq $num_zvols) ; do
+		log_must_busy zfs destroy $TESTPOOL/testvol$i
+	done
+}
+
+function do_zvol_stress
+{
+	# Write 10% of each zvol, or 50MB, whichever is less
+	zvol_write_size=$((each_zvol_size / 10))
+	if [ $zvol_write_size -gt $((50 * 1048576)) ] ; then
+		zvol_write_size=$((50 * 1048576))
+	fi
+	zvol_write_size_mb=$(($zvol_write_size / 1048576))
+
+	if is_linux ; then
+		engine=libaio
+	else
+		engine=psync
+	fi
+
+	# Spawn off one fio per zvol in parallel
+	pids=""
+	for i in $(seq $num_zvols) ; do
+		# Spawn one fio per zvol as its own process
+		fio --ioengine=$engine --name=zvol_stress$i --direct=0 \
+			--filename="$ZVOL_DEVDIR/$TESTPOOL/testvol$i" --bs=1048576 \
+			--iodepth=10 --readwrite=randwrite --size=${zvol_write_size} \
+			--verify_async=2 --numjobs=1 --verify=sha1 \
+			--verify_fatal=1 \
+			--continue_on_error=none \
+			--error_dump=1 \
+			--exitall_on_error \
+			--aux-path="$tmpdir" --do_verify=1 &
+		pids="$pids $!"
+	done
+
+	# Wait for all the spawned fios to finish and look for errors
+	fail=""
+	i=0
+	for pid in $pids ; do
+		log_note "$s waiting on $pid"
+		if ! wait $pid ; then
+			log_fail "fio error on $TESTPOOL/testvol$i"
+		fi
+		i=$(($i + 1))
+	done
+}
+
+function cleanup
+{
+	log_must zinject -c all
+	log_must zpool clear $TESTPOOL
+	destroy_zvols
+	set_blk_mq 0
+
+	# Remove all fio's leftover state files
+	if [ -n "$tmpdir" ] ; then
+		log_must rm -fd "$tmpdir"/*.state "$tmpdir"
+	fi
+}
+
+log_onexit cleanup
+
+log_assert "Stress test zvols"
+
+set_blk_mq 0
+create_zvols
+# Do some fio write/verifies in parallel
+do_zvol_stress
+destroy_zvols
+
+# Enable blk-mq (block multi-queue), and re-run the same test
+set_blk_mq 1
+create_zvols
+do_zvol_stress
+
+# Inject some errors, and verify we see some IO errors in zpool status
+for DISK in $DISKS ; do
+	log_must zinject -d $DISK -f 10 -e io -T write $TESTPOOL
+done
+log_must dd if=/dev/zero of=$ZVOL_DEVDIR/$TESTPOOL/testvol1 bs=512 count=50
+log_must zinject -c all
+
+# We should see write errors
+typeset -i write_errors=$(zpool status -p | awk '
+	!NF { isvdev = 0 }
+	isvdev { errors += $4 }
+	/CKSUM$/ { isvdev = 1 }
+	END { print errors }
+')
+
+if [ $write_errors -eq 0 ] ; then
+	log_fail "Expected to see some write errors"
+else
+	log_note "Correctly saw $write_errors write errors"
+fi
+log_pass "Done with zvol_stress"