From c600f0687fefac00d3885909dd8e269677a36201 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 17 Aug 2021 11:47:00 -0400 Subject: [PATCH] Avoid vq_lock drop in vdev_queue_aggregate() vq_lock is already too congested for two more operations per I/O. Instead of dropping and reacquiring it inside vdev_queue_aggregate() delegate the zio_vdev_io_bypass() and zio_execute() calls for parent I/Os to callers, that drop the lock any way to execute the new I/O. Reviewed-by: Brian Behlendorf Reviewed-by: Mark Maybee Reviewed-by: Brian Atkinson Signed-off-by: Alexander Motin Closes #12297 --- module/zfs/vdev_queue.c | 63 ++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index 06d22f6df..cc5b15b8c 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -599,7 +599,6 @@ static zio_t * vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) { zio_t *first, *last, *aio, *dio, *mandatory, *nio; - zio_link_t *zl = NULL; uint64_t maxgap = 0; uint64_t size; uint64_t limit; @@ -797,19 +796,12 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) ASSERT3U(abd_get_size(aio->io_abd), ==, aio->io_size); /* - * We need to drop the vdev queue's lock during zio_execute() to - * avoid a deadlock that we could encounter due to lock order - * reversal between vq_lock and io_lock in zio_change_priority(). + * Callers must call zio_vdev_io_bypass() and zio_execute() for + * aggregated (parent) I/Os so that we could avoid dropping the + * queue's lock here to avoid a deadlock that we could encounter + * due to lock order reversal between vq_lock and io_lock in + * zio_change_priority(). */ - mutex_exit(&vq->vq_lock); - while ((dio = zio_walk_parents(aio, &zl)) != NULL) { - ASSERT3U(dio->io_type, ==, aio->io_type); - - zio_vdev_io_bypass(dio); - zio_execute(dio); - } - mutex_enter(&vq->vq_lock); - return (aio); } @@ -847,23 +839,24 @@ again: ASSERT3U(zio->io_priority, ==, p); aio = vdev_queue_aggregate(vq, zio); - if (aio != NULL) + if (aio != NULL) { zio = aio; - else + } else { vdev_queue_io_remove(vq, zio); - /* - * If the I/O is or was optional and therefore has no data, we need to - * simply discard it. We need to drop the vdev queue's lock to avoid a - * deadlock that we could encounter since this I/O will complete - * immediately. - */ - if (zio->io_flags & ZIO_FLAG_NODATA) { - mutex_exit(&vq->vq_lock); - zio_vdev_io_bypass(zio); - zio_execute(zio); - mutex_enter(&vq->vq_lock); - goto again; + /* + * If the I/O is or was optional and therefore has no data, we + * need to simply discard it. We need to drop the vdev queue's + * lock to avoid a deadlock that we could encounter since this + * I/O will complete immediately. + */ + if (zio->io_flags & ZIO_FLAG_NODATA) { + mutex_exit(&vq->vq_lock); + zio_vdev_io_bypass(zio); + zio_execute(zio); + mutex_enter(&vq->vq_lock); + goto again; + } } vdev_queue_pending_add(vq, zio); @@ -876,7 +869,8 @@ zio_t * vdev_queue_io(zio_t *zio) { vdev_queue_t *vq = &zio->io_vd->vdev_queue; - zio_t *nio; + zio_t *dio, *nio; + zio_link_t *zl = NULL; if (zio->io_flags & ZIO_FLAG_DONT_QUEUE) return (zio); @@ -923,6 +917,11 @@ vdev_queue_io(zio_t *zio) return (NULL); if (nio->io_done == vdev_queue_agg_io_done) { + while ((dio = zio_walk_parents(nio, &zl)) != NULL) { + ASSERT3U(dio->io_type, ==, nio->io_type); + zio_vdev_io_bypass(dio); + zio_execute(dio); + } zio_nowait(nio); return (NULL); } @@ -934,7 +933,8 @@ void vdev_queue_io_done(zio_t *zio) { vdev_queue_t *vq = &zio->io_vd->vdev_queue; - zio_t *nio; + zio_t *dio, *nio; + zio_link_t *zl = NULL; hrtime_t now = gethrtime(); vq->vq_io_complete_ts = now; @@ -946,6 +946,11 @@ vdev_queue_io_done(zio_t *zio) while ((nio = vdev_queue_io_to_issue(vq)) != NULL) { mutex_exit(&vq->vq_lock); if (nio->io_done == vdev_queue_agg_io_done) { + while ((dio = zio_walk_parents(nio, &zl)) != NULL) { + ASSERT3U(dio->io_type, ==, nio->io_type); + zio_vdev_io_bypass(dio); + zio_execute(dio); + } zio_nowait(nio); } else { zio_vdev_io_reissue(nio);