From 66ec7fb26950105da3519038dceeee0648604bbb Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 11 Jun 2025 14:50:49 -0400 Subject: [PATCH] Reduce zfs_dmu_offset_next_sync penalty Looking on txg_wait_synced(, 0) I've noticed that it always syncs 5 TXGs: 3 TXG_CONCURRENT_STATES + 2 TXG_DEFER_SIZE. But in case of dmu_offset_next() we do not care about deferred frees. And even concurrent TXGs we might not need sync all 3 if the dnode was not dirtied in last few TXGs. This patch makes dmu_offset_next() to sync one TXG at a time until the dnode is clean, but no more than 3 TXG_CONCURRENT_STATES times. My tests with random simultaneous writes and seeks over many files on HDD pool show 7-14% performance increase. Reviewed-by: Brian Behlendorf Reviewed-by: Rob Norris Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #17434 --- module/zfs/dmu.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 9a2dfe9b1..9ef3241f9 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2530,7 +2530,8 @@ int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) { dnode_t *dn; - int restarted = 0, err; + uint64_t txg, maxtxg = 0; + int err; restart: err = dnode_hold(os, object, FTAG, &dn); @@ -2546,19 +2547,22 @@ restart: * must be synced to disk to accurately report holes. * * Provided a RL_READER rangelock spanning 0-UINT64_MAX is - * held by the caller only a single restart will be required. + * held by the caller only limited restarts will be required. * We tolerate callers which do not hold the rangelock by - * returning EBUSY and not reporting holes after one restart. + * returning EBUSY and not reporting holes after at most + * TXG_CONCURRENT_STATES (3) restarts. */ if (zfs_dmu_offset_next_sync) { rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); - if (restarted) + if (maxtxg == 0) { + txg = spa_last_synced_txg(dmu_objset_spa(os)); + maxtxg = txg + TXG_CONCURRENT_STATES; + } else if (txg >= maxtxg) return (SET_ERROR(EBUSY)); - txg_wait_synced(dmu_objset_pool(os), 0); - restarted = 1; + txg_wait_synced(dmu_objset_pool(os), ++txg); goto restart; }