From 21bbe7cb676b566fb70b5ddeb178d72220da8b2c Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 4 Feb 2026 13:12:32 -0500 Subject: [PATCH] Improve caching for dbuf prefetches To avoid read errors with transaction open dmu_tx_check_ioerr() is used to read everything required in advance. But there seems to be a chance for the buffer to evicted from dbuf cache in between, which result in immediate eviction from ARC, which may require additional disk read later in a place where error handling is problematic. To partially workaround this introduce a new flag DMU_IS_PREFETCH, relayed to ARC as ARC_FLAG_PREFETCH | ARC_FLAG_PRESCIENT_PREFETCH, making ARC delay eviction by at least several seconds, or till the actual read inside the transaction, that will promote it to demand access. Reviewed-by: Brian Behlendorf Reviewed-by: Tony Hutter Signed-off-by: Alexander Motin Closes #18160 --- include/sys/dmu.h | 1 + module/zfs/dbuf.c | 4 +++- module/zfs/dmu_tx.c | 7 ++++--- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/sys/dmu.h b/include/sys/dmu.h index aae99d71b..bb623e404 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -573,6 +573,7 @@ typedef enum dmu_flags { DMU_PARTIAL_FIRST = 1 << 7, /* First partial access. */ DMU_PARTIAL_MORE = 1 << 8, /* Following partial access. */ DMU_KEEP_CACHING = 1 << 9, /* Don't affect caching. */ + DMU_IS_PREFETCH = 1 << 10, /* This read is a prefetch. */ } dmu_flags_t; /* diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index d2d61819c..e5fe400c4 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1637,6 +1637,8 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, dmu_flags_t flags, aflags |= ARC_FLAG_UNCACHED; else if (dbuf_is_l2cacheable(db, bp)) aflags |= ARC_FLAG_L2CACHE; + if (flags & DMU_IS_PREFETCH) + aflags |= ARC_FLAG_PREFETCH | ARC_FLAG_PRESCIENT_PREFETCH; dbuf_add_ref(db, NULL); @@ -1769,7 +1771,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *pio, dmu_flags_t flags) mutex_enter(&db->db_mtx); if (!(flags & (DMU_UNCACHEDIO | DMU_KEEP_CACHING))) db->db_pending_evict = B_FALSE; - if (flags & DMU_PARTIAL_FIRST) + if (flags & (DMU_PARTIAL_FIRST | DMU_IS_PREFETCH)) db->db_partial_read = B_TRUE; else if (!(flags & (DMU_PARTIAL_MORE | DMU_KEEP_CACHING))) db->db_partial_read = B_FALSE; diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 40c0b3402..2150eb571 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -220,11 +220,12 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid) if (err != 0) return (err); /* - * PARTIAL_FIRST allows caching for uncacheable blocks. It will - * be cleared after dmu_buf_will_dirty() call dbuf_read() again. + * DMU_IS_PREFETCH keeps the buffer temporarily in DBUF cache and ARC + * to avoid immediate eviction after the check. It will be promoted + * to demand access when dmu_buf_will_dirty() read it again. */ err = dbuf_read(db, zio, DB_RF_CANFAIL | DMU_READ_NO_PREFETCH | - (level == 0 ? (DMU_UNCACHEDIO | DMU_PARTIAL_FIRST) : 0)); + (level == 0 ? (DMU_KEEP_CACHING | DMU_IS_PREFETCH) : 0)); dbuf_rele(db, FTAG); return (err); }