From f72fd378c82822c2e8a5c32f52719fd36a3b75bd Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 9 Dec 2025 15:16:46 -0500 Subject: [PATCH] Defer async destroys on pool import We've observed a number of cases when pool import stuck for many minutes due to large async destroy trying to load DDT or BRT from HDD pool. While proper destroy dosage is a separate problem, lets give import process a chance to complete before that at all. It may be not enough if there is a lot of ZIL to replay, but that is harder to cover, since those are in separate syscalls. Code investigation shown that we already have this mechanism used for scrub/resilver, so this patch converts SCAN_IMPORT_WAIT_TXGS into a tunable and applies it to async destroys also. Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Closes #18033 --- man/man4/zfs.4 | 10 ++++++++++ module/zfs/dsl_scan.c | 34 ++++++++++++++++++---------------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index a7e105a95..6f2a23a45 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1767,6 +1767,16 @@ Blocks that go to the special vdevs are still written indirectly, as with .Sy logbias Ns = Ns Sy throughput . This parameter is ignored if an SLOG is present. . +.It Sy zfs_import_defer_txgs Ns = Ns Sy 5 Pq uint +Number of transaction groups to wait after pool import before starting +background work such as asynchronous block freeing +.Pq from snapshots, clones, and deduplication +and scrub or resilver operations. +This allows the pool import and filesystem mounting to complete more quickly +without interference from background activities. +The default value of 5 transaction groups typically provides sufficient time +for import and mount operations to complete on most systems. +. .It Sy zfs_initialize_value Ns = Ns Sy 16045690984833335022 Po 0xDEADBEEFDEADBEEE Pc Pq u64 Pattern written to vdev free space by .Xr zpool-initialize 8 . diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index fcd50c459..eb3f72bb0 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -217,16 +217,14 @@ static int zfs_resilver_disable_defer = B_FALSE; static uint_t zfs_resilver_defer_percent = 10; /* - * We wait a few txgs after importing a pool to begin scanning so that - * the import / mounting code isn't held up by scrub / resilver IO. - * Unfortunately, it is a bit difficult to determine exactly how long - * this will take since userspace will trigger fs mounts asynchronously - * and the kernel will create zvol minors asynchronously. As a result, - * the value provided here is a bit arbitrary, but represents a - * reasonable estimate of how many txgs it will take to finish fully - * importing a pool + * Number of TXGs to wait after importing before starting background + * work (async destroys, scan/scrub/resilver operations). This allows + * the import command and filesystem mounts to complete quickly without + * being delayed by background activities. The value is somewhat arbitrary + * since userspace triggers filesystem mounts asynchronously, but 5 TXGs + * provides a reasonable window for import completion in most cases. */ -#define SCAN_IMPORT_WAIT_TXGS 5 +static uint_t zfs_import_defer_txgs = 5; #define DSL_SCAN_IS_SCRUB_RESILVER(scn) \ ((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \ @@ -4394,6 +4392,14 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (spa_shutting_down(spa)) return; + /* + * Wait a few txgs after importing before doing background work + * (async destroys and scanning). This should help the import + * command to complete quickly. + */ + if (spa->spa_syncing_txg < spa->spa_first_txg + zfs_import_defer_txgs) + return; + /* * If the scan is inactive due to a stalled async destroy, try again. */ @@ -4430,13 +4436,6 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (!dsl_scan_is_running(scn) || dsl_scan_is_paused_scrub(scn)) return; - /* - * Wait a few txgs after importing to begin scanning so that - * we can get the pool imported quickly. - */ - if (spa->spa_syncing_txg < spa->spa_first_txg + SCAN_IMPORT_WAIT_TXGS) - return; - /* * zfs_scan_suspend_progress can be set to disable scan progress. * We don't want to spin the txg_sync thread, so we add a delay @@ -5336,6 +5335,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_issue_strategy, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, scan_legacy, INT, ZMOD_RW, "Scrub using legacy non-sequential method"); +ZFS_MODULE_PARAM(zfs, zfs_, import_defer_txgs, UINT, ZMOD_RW, + "Number of TXGs to defer background work after pool import"); + ZFS_MODULE_PARAM(zfs, zfs_, scan_checkpoint_intval, UINT, ZMOD_RW, "Scan progress on-disk checkpointing interval");