From 0d77e738e65d2fdc37c9c345c022f14239ae67cb Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Fri, 4 Oct 2024 19:41:17 +0200 Subject: [PATCH] Defer resilver only when progress is above a threshold Restart a resilver from scratch, if the current one in progress is below a new tunable, zfs_resilver_defer_percent (defaulting to 10%). The original rationale for deferring additional resilvers, when there is already one in progress, was to help achieving data redundancy sooner for the data that gets scanned at the end of the resilver. But in case the admin wants to attach multiple disks to a single vdev, it wasn't immediately obvious the admin is supposed to run `zpool resilver` afterwards to reset the deferred resilvers and start a new one from scratch. Reviewed-by: Brian Behlendorf Signed-off-by: Pavel Snajdr Closes #15810 --- man/man4/zfs.4 | 7 +++ module/zfs/dsl_scan.c | 63 +++++++++++++------ tests/zfs-tests/include/tunables.cfg | 1 + .../replacement/resilver_restart_001.ksh | 11 +++- 4 files changed, 62 insertions(+), 20 deletions(-) diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index cf6720317..e19573d5e 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -2012,6 +2012,13 @@ Ignore the feature, causing an operation that would start a resilver to immediately restart the one in progress. . +.It Sy zfs_resilver_defer_percent Ns = Ns Sy 10 Ns % Pq uint +If the ongoing resilver progress is below this threshold, a new resilver will +restart from scratch instead of being deferred after the current one finishes, +even if the +.Sy resilver_defer +feature is enabled. +. .It Sy zfs_resilver_min_time_ms Ns = Ns Sy 3000 Ns ms Po 3 s Pc Pq uint Resilvers are processed by the sync thread. While resilvering, it will spend at least this much time diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 87588455e..6cd0dbdea 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -212,6 +212,9 @@ static uint64_t zfs_max_async_dedup_frees = 100000; /* set to disable resilver deferring */ static int zfs_resilver_disable_defer = B_FALSE; +/* Don't defer a resilver if the one in progress only got this far: */ +static uint_t zfs_resilver_defer_percent = 10; + /* * We wait a few txgs after importing a pool to begin scanning so that * the import / mounting code isn't held up by scrub / resilver IO. @@ -4289,27 +4292,26 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) dsl_scan_t *scn = dp->dp_scan; spa_t *spa = dp->dp_spa; state_sync_type_t sync_type = SYNC_OPTIONAL; + int restart_early = 0; - if (spa->spa_resilver_deferred && - !spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)) - spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx); + if (spa->spa_resilver_deferred) { + uint64_t to_issue, issued; - /* - * Check for scn_restart_txg before checking spa_load_state, so - * that we can restart an old-style scan while the pool is being - * imported (see dsl_scan_init). We also restart scans if there - * is a deferred resilver and the user has manually disabled - * deferred resilvers via the tunable. - */ - if (dsl_scan_restarting(scn, tx) || - (spa->spa_resilver_deferred && zfs_resilver_disable_defer)) { - pool_scan_func_t func = POOL_SCAN_SCRUB; - dsl_scan_done(scn, B_FALSE, tx); - if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) - func = POOL_SCAN_RESILVER; - zfs_dbgmsg("restarting scan func=%u on %s txg=%llu", - func, dp->dp_spa->spa_name, (longlong_t)tx->tx_txg); - dsl_scan_setup_sync(&func, tx); + if (!spa_feature_is_active(dp->dp_spa, + SPA_FEATURE_RESILVER_DEFER)) + spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx); + + /* + * See print_scan_scrub_resilver_status() issued/total_i + * @ cmd/zpool/zpool_main.c + */ + to_issue = + scn->scn_phys.scn_to_examine - scn->scn_phys.scn_skipped; + issued = + scn->scn_issued_before_pass + spa->spa_scan_pass_issued; + restart_early = + zfs_resilver_disable_defer || + (issued < (to_issue * zfs_resilver_defer_percent / 100)); } /* @@ -4318,6 +4320,26 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (spa_sync_pass(spa) > 1) return; + + /* + * Check for scn_restart_txg before checking spa_load_state, so + * that we can restart an old-style scan while the pool is being + * imported (see dsl_scan_init). We also restart scans if there + * is a deferred resilver and the user has manually disabled + * deferred resilvers via zfs_resilver_disable_defer, or if the + * current scan progress is below zfs_resilver_defer_percent. + */ + if (dsl_scan_restarting(scn, tx) || restart_early) { + pool_scan_func_t func = POOL_SCAN_SCRUB; + dsl_scan_done(scn, B_FALSE, tx); + if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) + func = POOL_SCAN_RESILVER; + zfs_dbgmsg("restarting scan func=%u on %s txg=%llu early=%d", + func, dp->dp_spa->spa_name, (longlong_t)tx->tx_txg, + restart_early); + dsl_scan_setup_sync(&func, tx); + } + /* * If the spa is shutting down, then stop scanning. This will * ensure that the scan does not dirty any new data during the @@ -5287,6 +5309,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_report_txgs, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW, "Process all resilvers immediately"); +ZFS_MODULE_PARAM(zfs, zfs_, resilver_defer_percent, UINT, ZMOD_RW, + "Issued IO percent complete after which resilvers are deferred"); + ZFS_MODULE_PARAM(zfs, zfs_, scrub_error_blocks_per_txg, UINT, ZMOD_RW, "Error blocks to be scrubbed in one txg"); /* END CSTYLED */ diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg index 9f436eb40..2024c44cc 100644 --- a/tests/zfs-tests/include/tunables.cfg +++ b/tests/zfs-tests/include/tunables.cfg @@ -73,6 +73,7 @@ REBUILD_SCRUB_ENABLED rebuild_scrub_enabled zfs_rebuild_scrub_enabled REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment RESILVER_MIN_TIME_MS resilver_min_time_ms zfs_resilver_min_time_ms +RESILVER_DEFER_PERCENT resilver_defer_percent zfs_resilver_defer_percent SCAN_LEGACY scan_legacy zfs_scan_legacy SCAN_SUSPEND_PROGRESS scan_suspend_progress zfs_scan_suspend_progress SCAN_VDEV_LIMIT scan_vdev_limit zfs_scan_vdev_limit diff --git a/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh b/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh index b498ba4af..629870adf 100755 --- a/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh +++ b/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh @@ -96,6 +96,8 @@ set -A RESTARTS -- '1' '2' '2' '2' set -A VDEVS -- '' '' '' '' set -A DEFER_RESTARTS -- '1' '1' '1' '2' set -A DEFER_VDEVS -- '-' '2' '2' '-' +set -A EARLY_RESTART_DEFER_RESTARTS -- '1' '2' '2' '2' +set -A EARLY_RESTART_DEFER_VDEVS -- '' '' '' '' VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE" @@ -125,7 +127,7 @@ done wait # test without and with deferred resilve feature enabled -for test in "without" "with" +for test in "without" "with" "with_early_restart" do log_note "Testing $test deferred resilvers" @@ -135,6 +137,13 @@ do RESTARTS=( "${DEFER_RESTARTS[@]}" ) VDEVS=( "${DEFER_VDEVS[@]}" ) VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}" + log_must set_tunable32 RESILVER_DEFER_PERCENT 0 + elif [[ $test == "with_early_restart" ]] + then + RESTARTS=( "${EARLY_RESTART_DEFER_RESTARTS[@]}" ) + VDEVS=( "${EARLY_RESTART_DEFER_VDEVS[@]}" ) + VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE" + log_must set_tunable32 RESILVER_DEFER_PERCENT 100 fi # clear the events