diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index cf6720317..e19573d5e 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -2012,6 +2012,13 @@ Ignore the feature, causing an operation that would start a resilver to immediately restart the one in progress. . +.It Sy zfs_resilver_defer_percent Ns = Ns Sy 10 Ns % Pq uint +If the ongoing resilver progress is below this threshold, a new resilver will +restart from scratch instead of being deferred after the current one finishes, +even if the +.Sy resilver_defer +feature is enabled. +. .It Sy zfs_resilver_min_time_ms Ns = Ns Sy 3000 Ns ms Po 3 s Pc Pq uint Resilvers are processed by the sync thread. While resilvering, it will spend at least this much time diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 87588455e..6cd0dbdea 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -212,6 +212,9 @@ static uint64_t zfs_max_async_dedup_frees = 100000; /* set to disable resilver deferring */ static int zfs_resilver_disable_defer = B_FALSE; +/* Don't defer a resilver if the one in progress only got this far: */ +static uint_t zfs_resilver_defer_percent = 10; + /* * We wait a few txgs after importing a pool to begin scanning so that * the import / mounting code isn't held up by scrub / resilver IO. @@ -4289,27 +4292,26 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) dsl_scan_t *scn = dp->dp_scan; spa_t *spa = dp->dp_spa; state_sync_type_t sync_type = SYNC_OPTIONAL; + int restart_early = 0; - if (spa->spa_resilver_deferred && - !spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)) - spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx); + if (spa->spa_resilver_deferred) { + uint64_t to_issue, issued; - /* - * Check for scn_restart_txg before checking spa_load_state, so - * that we can restart an old-style scan while the pool is being - * imported (see dsl_scan_init). We also restart scans if there - * is a deferred resilver and the user has manually disabled - * deferred resilvers via the tunable. - */ - if (dsl_scan_restarting(scn, tx) || - (spa->spa_resilver_deferred && zfs_resilver_disable_defer)) { - pool_scan_func_t func = POOL_SCAN_SCRUB; - dsl_scan_done(scn, B_FALSE, tx); - if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) - func = POOL_SCAN_RESILVER; - zfs_dbgmsg("restarting scan func=%u on %s txg=%llu", - func, dp->dp_spa->spa_name, (longlong_t)tx->tx_txg); - dsl_scan_setup_sync(&func, tx); + if (!spa_feature_is_active(dp->dp_spa, + SPA_FEATURE_RESILVER_DEFER)) + spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx); + + /* + * See print_scan_scrub_resilver_status() issued/total_i + * @ cmd/zpool/zpool_main.c + */ + to_issue = + scn->scn_phys.scn_to_examine - scn->scn_phys.scn_skipped; + issued = + scn->scn_issued_before_pass + spa->spa_scan_pass_issued; + restart_early = + zfs_resilver_disable_defer || + (issued < (to_issue * zfs_resilver_defer_percent / 100)); } /* @@ -4318,6 +4320,26 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (spa_sync_pass(spa) > 1) return; + + /* + * Check for scn_restart_txg before checking spa_load_state, so + * that we can restart an old-style scan while the pool is being + * imported (see dsl_scan_init). We also restart scans if there + * is a deferred resilver and the user has manually disabled + * deferred resilvers via zfs_resilver_disable_defer, or if the + * current scan progress is below zfs_resilver_defer_percent. + */ + if (dsl_scan_restarting(scn, tx) || restart_early) { + pool_scan_func_t func = POOL_SCAN_SCRUB; + dsl_scan_done(scn, B_FALSE, tx); + if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) + func = POOL_SCAN_RESILVER; + zfs_dbgmsg("restarting scan func=%u on %s txg=%llu early=%d", + func, dp->dp_spa->spa_name, (longlong_t)tx->tx_txg, + restart_early); + dsl_scan_setup_sync(&func, tx); + } + /* * If the spa is shutting down, then stop scanning. This will * ensure that the scan does not dirty any new data during the @@ -5287,6 +5309,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_report_txgs, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW, "Process all resilvers immediately"); +ZFS_MODULE_PARAM(zfs, zfs_, resilver_defer_percent, UINT, ZMOD_RW, + "Issued IO percent complete after which resilvers are deferred"); + ZFS_MODULE_PARAM(zfs, zfs_, scrub_error_blocks_per_txg, UINT, ZMOD_RW, "Error blocks to be scrubbed in one txg"); /* END CSTYLED */ diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg index 9f436eb40..2024c44cc 100644 --- a/tests/zfs-tests/include/tunables.cfg +++ b/tests/zfs-tests/include/tunables.cfg @@ -73,6 +73,7 @@ REBUILD_SCRUB_ENABLED rebuild_scrub_enabled zfs_rebuild_scrub_enabled REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment RESILVER_MIN_TIME_MS resilver_min_time_ms zfs_resilver_min_time_ms +RESILVER_DEFER_PERCENT resilver_defer_percent zfs_resilver_defer_percent SCAN_LEGACY scan_legacy zfs_scan_legacy SCAN_SUSPEND_PROGRESS scan_suspend_progress zfs_scan_suspend_progress SCAN_VDEV_LIMIT scan_vdev_limit zfs_scan_vdev_limit diff --git a/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh b/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh index b498ba4af..629870adf 100755 --- a/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh +++ b/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh @@ -96,6 +96,8 @@ set -A RESTARTS -- '1' '2' '2' '2' set -A VDEVS -- '' '' '' '' set -A DEFER_RESTARTS -- '1' '1' '1' '2' set -A DEFER_VDEVS -- '-' '2' '2' '-' +set -A EARLY_RESTART_DEFER_RESTARTS -- '1' '2' '2' '2' +set -A EARLY_RESTART_DEFER_VDEVS -- '' '' '' '' VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE" @@ -125,7 +127,7 @@ done wait # test without and with deferred resilve feature enabled -for test in "without" "with" +for test in "without" "with" "with_early_restart" do log_note "Testing $test deferred resilvers" @@ -135,6 +137,13 @@ do RESTARTS=( "${DEFER_RESTARTS[@]}" ) VDEVS=( "${DEFER_VDEVS[@]}" ) VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}" + log_must set_tunable32 RESILVER_DEFER_PERCENT 0 + elif [[ $test == "with_early_restart" ]] + then + RESTARTS=( "${EARLY_RESTART_DEFER_RESTARTS[@]}" ) + VDEVS=( "${EARLY_RESTART_DEFER_VDEVS[@]}" ) + VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE" + log_must set_tunable32 RESILVER_DEFER_PERCENT 100 fi # clear the events