mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 03:08:51 +03:00
zvol: Support blk-mq for better performance
Add support for the kernel's block multiqueue (blk-mq) interface in the zvol block driver. blk-mq creates multiple request queues on different CPUs rather than having a single request queue. This can improve zvol performance with multithreaded reads/writes. This implementation uses the blk-mq interfaces on 4.13 or newer kernels. Building against older kernels will fall back to the older BIO interfaces. Note that you must set the `zvol_use_blk_mq` module param to enable the blk-mq API. It is disabled by default. In addition, this commit lets the zvol blk-mq layer process whole `struct request` IOs at a time, rather than breaking them down into their individual BIOs. This reduces dbuf lock contention and overhead versus the legacy zvol submit_bio() codepath. sequential dd to one zvol, 8k volblocksize, no O_DIRECT: legacy submit_bio() 292MB/s write 453MB/s read this commit 453MB/s write 885MB/s read It also introduces a new `zvol_blk_mq_chunks_per_thread` module parameter. This parameter represents how many volblocksize'd chunks to process per each zvol thread. It can be used to tune your zvols for better read vs write performance (higher values favor write, lower favor read). Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz> Reviewed-by: Tony Nguyen <tony.nguyen@delphix.com> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Closes #13148 Issue #12483
This commit is contained in:
@@ -126,7 +126,7 @@ zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
{
|
||||
const struct bio_vec *bv = uio->uio_bvec;
|
||||
size_t skip = uio->uio_skip;
|
||||
@@ -137,10 +137,13 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
cnt = MIN(bv->bv_len - skip, n);
|
||||
|
||||
paddr = zfs_kmap_atomic(bv->bv_page);
|
||||
if (rw == UIO_READ)
|
||||
if (rw == UIO_READ) {
|
||||
/* Copy from buffer 'p' to the bvec data */
|
||||
memcpy(paddr + bv->bv_offset + skip, p, cnt);
|
||||
else
|
||||
} else {
|
||||
/* Copy from bvec data to buffer 'p' */
|
||||
memcpy(p, paddr + bv->bv_offset + skip, cnt);
|
||||
}
|
||||
zfs_kunmap_atomic(paddr);
|
||||
|
||||
skip += cnt;
|
||||
@@ -158,6 +161,141 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
return (0);
|
||||
}
|
||||
|
||||
#ifdef HAVE_BLK_MQ
|
||||
static void
|
||||
zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
|
||||
struct bio_vec *bv)
|
||||
{
|
||||
void *paddr;
|
||||
|
||||
paddr = zfs_kmap_atomic(bv->bv_page);
|
||||
if (rw == UIO_READ) {
|
||||
/* Copy from buffer 'p' to the bvec data */
|
||||
memcpy(paddr + bv->bv_offset + skip, p, cnt);
|
||||
} else {
|
||||
/* Copy from bvec data to buffer 'p' */
|
||||
memcpy(p, paddr + bv->bv_offset + skip, cnt);
|
||||
}
|
||||
zfs_kunmap_atomic(paddr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy 'n' bytes of data between the buffer p[] and the data represented
|
||||
* by the request in the uio.
|
||||
*/
|
||||
static int
|
||||
zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
{
|
||||
struct request *rq = uio->rq;
|
||||
struct bio_vec bv;
|
||||
struct req_iterator iter;
|
||||
size_t this_seg_start; /* logical offset */
|
||||
size_t this_seg_end; /* logical offset */
|
||||
size_t skip_in_seg;
|
||||
size_t copy_from_seg;
|
||||
size_t orig_loffset;
|
||||
int copied = 0;
|
||||
|
||||
/*
|
||||
* Get the original logical offset of this entire request (because
|
||||
* uio->uio_loffset will be modified over time).
|
||||
*/
|
||||
orig_loffset = io_offset(NULL, rq);
|
||||
this_seg_start = orig_loffset;
|
||||
|
||||
rq_for_each_segment(bv, rq, iter) {
|
||||
if (uio->iter.bio) {
|
||||
/*
|
||||
* If uio->iter.bio is present, then we know we've saved
|
||||
* uio->iter from a previous call to this function, and
|
||||
* we can skip ahead in this rq_for_each_segment() loop
|
||||
* to where we last left off. That way, we don't need
|
||||
* to iterate over tons of segments we've already
|
||||
* processed - we can just restore the "saved state".
|
||||
*/
|
||||
iter = uio->iter;
|
||||
bv = uio->bv;
|
||||
this_seg_start = uio->uio_loffset;
|
||||
memset(&uio->iter, 0, sizeof (uio->iter));
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup what the logical offset of the last byte of this
|
||||
* segment is.
|
||||
*/
|
||||
this_seg_end = this_seg_start + bv.bv_len - 1;
|
||||
|
||||
/*
|
||||
* We only need to operate on segments that have data we're
|
||||
* copying.
|
||||
*/
|
||||
if (uio->uio_loffset >= this_seg_start &&
|
||||
uio->uio_loffset <= this_seg_end) {
|
||||
/*
|
||||
* Some, or all, of the data in this segment needs to be
|
||||
* copied.
|
||||
*/
|
||||
|
||||
/*
|
||||
* We may be not be copying from the first byte in the
|
||||
* segment. Figure out how many bytes to skip copying
|
||||
* from the beginning of this segment.
|
||||
*/
|
||||
skip_in_seg = uio->uio_loffset - this_seg_start;
|
||||
|
||||
/*
|
||||
* Calculate the total number of bytes from this
|
||||
* segment that we will be copying.
|
||||
*/
|
||||
copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
|
||||
|
||||
/* Copy the bytes */
|
||||
zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
|
||||
p = ((char *)p) + copy_from_seg;
|
||||
|
||||
n -= copy_from_seg;
|
||||
uio->uio_resid -= copy_from_seg;
|
||||
uio->uio_loffset += copy_from_seg;
|
||||
copied = 1; /* We copied some data */
|
||||
}
|
||||
|
||||
if (n == 0) {
|
||||
/*
|
||||
* All done copying. Save our 'iter' value to the uio.
|
||||
* This allows us to "save our state" and skip ahead in
|
||||
* the rq_for_each_segment() loop the next time we call
|
||||
* call zfs_uiomove_bvec_rq() on this uio (which we
|
||||
* will be doing for any remaining data in the uio).
|
||||
*/
|
||||
uio->iter = iter; /* make a copy of the struct data */
|
||||
uio->bv = bv;
|
||||
return (0);
|
||||
}
|
||||
|
||||
this_seg_start = this_seg_end + 1;
|
||||
}
|
||||
|
||||
if (!copied) {
|
||||
/* Didn't copy anything */
|
||||
uio->uio_resid = 0;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
{
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (uio->rq != NULL)
|
||||
return (zfs_uiomove_bvec_rq(p, n, rw, uio));
|
||||
#else
|
||||
ASSERT3P(uio->rq, ==, NULL);
|
||||
#endif
|
||||
return (zfs_uiomove_bvec_impl(p, n, rw, uio));
|
||||
}
|
||||
|
||||
#if defined(HAVE_VFS_IOV_ITER)
|
||||
static int
|
||||
zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
|
||||
@@ -300,8 +438,14 @@ zfs_uioskip(zfs_uio_t *uio, size_t n)
|
||||
{
|
||||
if (n > uio->uio_resid)
|
||||
return;
|
||||
|
||||
if (uio->uio_segflg == UIO_BVEC) {
|
||||
/*
|
||||
* When using a uio with a struct request, we simply
|
||||
* use uio_loffset as a pointer to the next logical byte to
|
||||
* copy in the request. We don't have to do any fancy
|
||||
* accounting with uio_bvec/uio_iovcnt since we don't use
|
||||
* them.
|
||||
*/
|
||||
if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
|
||||
uio->uio_skip += n;
|
||||
while (uio->uio_iovcnt &&
|
||||
uio->uio_skip >= uio->uio_bvec->bv_len) {
|
||||
|
||||
Reference in New Issue
Block a user