From ccfa35c6f9202854fee3c56e5f63d175d51d47e7 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 20 Nov 2020 13:14:45 -0800 Subject: [PATCH] Correct missing zil_claim() DTL updates Commit a1d477c2 accidentally disabled DTL updates for the zil_claim() case described at the end of vdev_stat_update() by unconditionally disabling all DTL updates when loading. This was done to avoid a deadlock on the vd_dtl_lock when loading the DTLs from disk. vdev_dtl_contains <--- Takes vd->vd_dtl_lock vdev_mirror_child_missing vdev_mirror_io_start zio_vdev_io_start __zio_execute arc_read dbuf_issue_final_prefetch dbuf_prefetch_impl dbuf_prefetch dmu_prefetch space_map_iterate space_map_load_length space_map_load vdev_dtl_load <--- Takes vd->vd_dtl_lock vdev_load spa_ld_load_vdev_metadata spa_tryimport The missing DTL updates can be restored by moving the space_map_load() call outside the vd_dtl_lock. A private range tree is populated by reading the space map and then merged in to the DTL_MISSING tree under the lock. Furthermore, the SPA_LOAD_NONE check in vdev_dtl_contains() leads to an additional problem. Any resilvering which occurs before SPA_LOAD_NONE is set will incorrectly determine that there's nothing to repair. This can result in full redundancy not being restored for some blocks. Reviewed-by: Matt Ahrens Signed-off-by: Brian Behlendorf Closes #11218 --- module/zfs/vdev.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index e41e79ab8..0d9428b40 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -2575,15 +2575,12 @@ vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t t, uint64_t txg, uint64_t size) /* * While we are loading the pool, the DTLs have not been loaded yet. - * Ignore the DTLs and try all devices. This avoids a recursive - * mutex enter on the vdev_dtl_lock, and also makes us try hard - * when loading the pool (relying on the checksum to ensure that - * we get the right data -- note that we while loading, we are - * only reading the MOS, which is always checksummed). + * This isn't a problem but it can result in devices being tried + * which are known to not have the data. In which case, the import + * is relying on the checksum to ensure that we get the right data. + * Note that while importing we are only reading the MOS, which is + * always checksummed. */ - if (vd->vdev_spa->spa_load_state != SPA_LOAD_NONE) - return (B_FALSE); - mutex_enter(&vd->vdev_dtl_lock); if (!range_tree_is_empty(rt)) dirty = range_tree_contains(rt, txg, size); @@ -2884,6 +2881,7 @@ vdev_dtl_load(vdev_t *vd) { spa_t *spa = vd->vdev_spa; objset_t *mos = spa->spa_meta_objset; + range_tree_t *rt; int error = 0; if (vd->vdev_ops->vdev_op_leaf && vd->vdev_dtl_object != 0) { @@ -2895,10 +2893,17 @@ vdev_dtl_load(vdev_t *vd) return (error); ASSERT(vd->vdev_dtl_sm != NULL); - mutex_enter(&vd->vdev_dtl_lock); - error = space_map_load(vd->vdev_dtl_sm, - vd->vdev_dtl[DTL_MISSING], SM_ALLOC); - mutex_exit(&vd->vdev_dtl_lock); + rt = range_tree_create(NULL, RANGE_SEG64, NULL, 0, 0); + error = space_map_load(vd->vdev_dtl_sm, rt, SM_ALLOC); + if (error == 0) { + mutex_enter(&vd->vdev_dtl_lock); + range_tree_walk(rt, range_tree_add, + vd->vdev_dtl[DTL_MISSING]); + mutex_exit(&vd->vdev_dtl_lock); + } + + range_tree_vacate(rt, NULL, NULL); + range_tree_destroy(rt); return (error); } @@ -4353,8 +4358,7 @@ vdev_stat_update(zio_t *zio, uint64_t psize) if (zio->io_vd == NULL && (zio->io_flags & ZIO_FLAG_DONT_PROPAGATE)) return; - if (spa->spa_load_state == SPA_LOAD_NONE && - type == ZIO_TYPE_WRITE && txg != 0 && + if (type == ZIO_TYPE_WRITE && txg != 0 && (!(flags & ZIO_FLAG_IO_REPAIR) || (flags & ZIO_FLAG_SCAN_THREAD) || spa->spa_claiming)) {