mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 10:54:35 +03:00
Fix rare cksum errors after rebuild
Currently, after rebuild (aka sequential resilver), checksum errors can be seen sometimes on the spare vdev or draid spare. On my laptop, it happens from 2 to 4 times of running redundancy_draid_spare1 test in a loop for 100 times. It looks like there's a race in vdev_rebuild_thread() when the rebuild of space map ranges is finished and we re-enable allocations from the metaslab too soon: a new allocations may happen from that metaslab before txg with the rebuilt ranges is sync-ed, causing undesirable interference. Solution: wait for the txg to be sync-ed before enabling metaslab. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Akash B <akash-b@hpe.com> Signed-off-by: Andriy Tkachuk <atkachuk@wasabi.com> Closes #18307 Closes #18319 Closes #18473
This commit is contained in:
committed by
Tony Hutter
parent
b0c1dcb531
commit
76fd64ac9f
@@ -593,6 +593,7 @@ vdev_rebuild_range(vdev_rebuild_t *vr, uint64_t start, uint64_t size)
|
||||
dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
|
||||
VERIFY0(dmu_tx_assign(tx, DMU_TX_WAIT | DMU_TX_SUSPEND));
|
||||
uint64_t txg = dmu_tx_get_txg(tx);
|
||||
vr->vr_last_txg = txg;
|
||||
|
||||
spa_config_enter(spa, SCL_STATE_ALL, vd, RW_READER);
|
||||
mutex_enter(&vd->vdev_rebuild_lock);
|
||||
@@ -908,8 +909,14 @@ vdev_rebuild_thread(void *arg)
|
||||
error = vdev_rebuild_ranges(vr);
|
||||
zfs_range_tree_vacate(vr->vr_scan_tree, NULL, NULL);
|
||||
|
||||
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
|
||||
/*
|
||||
* Allow rebuilt ranges to be sync-ed before enabling metaslab
|
||||
* to avoid any interfering allocations. Otherwise, we might
|
||||
* see checksum errors after scrub.
|
||||
*/
|
||||
txg_wait_synced(dp, vr->vr_last_txg);
|
||||
metaslab_enable(msp, B_FALSE, B_FALSE);
|
||||
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
|
||||
|
||||
if (error != 0)
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user