From 76fd64ac9f625f3166a68b4cf5f1f6fdddf20430 Mon Sep 17 00:00:00 2001 From: Andriy Tkachuk Date: Fri, 1 May 2026 20:15:27 +0100 Subject: [PATCH] Fix rare cksum errors after rebuild Currently, after rebuild (aka sequential resilver), checksum errors can be seen sometimes on the spare vdev or draid spare. On my laptop, it happens from 2 to 4 times of running redundancy_draid_spare1 test in a loop for 100 times. It looks like there's a race in vdev_rebuild_thread() when the rebuild of space map ranges is finished and we re-enable allocations from the metaslab too soon: a new allocations may happen from that metaslab before txg with the rebuilt ranges is sync-ed, causing undesirable interference. Solution: wait for the txg to be sync-ed before enabling metaslab. Reviewed-by: Brian Behlendorf Reviewed-by: Akash B Signed-off-by: Andriy Tkachuk Closes #18307 Closes #18319 Closes #18473 --- include/sys/vdev_rebuild.h | 1 + module/zfs/vdev_rebuild.c | 9 ++++++++- .../tests/functional/redundancy/redundancy.kshlib | 9 ++------- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/include/sys/vdev_rebuild.h b/include/sys/vdev_rebuild.h index b787b1d5d..8ecb30659 100644 --- a/include/sys/vdev_rebuild.h +++ b/include/sys/vdev_rebuild.h @@ -70,6 +70,7 @@ typedef struct vdev_rebuild { zfs_range_tree_t *vr_scan_tree; kmutex_t vr_io_lock; /* inflight IO lock */ kcondvar_t vr_io_cv; /* inflight IO cv */ + uint64_t vr_last_txg; /* last used txg */ /* In-core state and progress */ uint64_t vr_scan_offset[TXG_SIZE]; diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c index 36b3f9e66..384421206 100644 --- a/module/zfs/vdev_rebuild.c +++ b/module/zfs/vdev_rebuild.c @@ -593,6 +593,7 @@ vdev_rebuild_range(vdev_rebuild_t *vr, uint64_t start, uint64_t size) dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); VERIFY0(dmu_tx_assign(tx, DMU_TX_WAIT | DMU_TX_SUSPEND)); uint64_t txg = dmu_tx_get_txg(tx); + vr->vr_last_txg = txg; spa_config_enter(spa, SCL_STATE_ALL, vd, RW_READER); mutex_enter(&vd->vdev_rebuild_lock); @@ -908,8 +909,14 @@ vdev_rebuild_thread(void *arg) error = vdev_rebuild_ranges(vr); zfs_range_tree_vacate(vr->vr_scan_tree, NULL, NULL); - spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + /* + * Allow rebuilt ranges to be sync-ed before enabling metaslab + * to avoid any interfering allocations. Otherwise, we might + * see checksum errors after scrub. + */ + txg_wait_synced(dp, vr->vr_last_txg); metaslab_enable(msp, B_FALSE, B_FALSE); + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); if (error != 0) break; diff --git a/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib b/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib index 702268aee..b9b69e47d 100644 --- a/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib +++ b/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib @@ -360,12 +360,7 @@ function recover_bad_missing_devs # expected state after a healing resilver of a healthy pool. # # 2. sequential - The pool is fully intact. There should never be a -# checksum error, but the occasional checksum error does occur in -# practice. Until the root cause is identified and resolved, tolerate -# a checksum error when scrubbing after a sequential resilver. -# -# https://github.com/openzfs/zfs/issues/18307 -# https://github.com/openzfs/zfs/issues/18319 +# checksum error. # # 3. damaged - The pool was intentionally silently damaged. Checksum # errors are expected to be reported as the damaged blocks are @@ -395,7 +390,7 @@ function verify_draid_pool log_fail "Unexpected repair IO found for $pool ($cksum)" fi elif [[ "$replace_mode" = "sequential" ]]; then - if [[ $cksum -gt 3 ]]; then + if [[ $cksum -gt 0 ]]; then log_must zpool status -v $pool log_fail "Unexpected CKSUM errors found for $pool ($cksum)" fi