From e3acd0a728a83f90659c3204cf75fb9904ef4e4f Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Sun, 26 Oct 2025 19:41:48 -0400 Subject: [PATCH] Fix caching of DDT log and BRT Both DDT log and BRT counters we read on pool import and then only append or overwrite in full blocks. We don't need them in DMU or ARC caches. Fortunately we have DMU_UNCACHEDIO for this now. Even more we don't need BRT in non-evictable metadata DMU caches, since it will likely never fit there, while block the cache from its original users. Since DMU_OT_IS_METADATA_CACHED() has no way to differentiate the new metadata types, mark BRT with storage type of DMU_OT_DDT_ZAP. As side effect it will also put it on dedup device, but that should actually be right. Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Closes #17875 --- include/sys/dmu.h | 2 +- module/zfs/brt.c | 6 ++++-- module/zfs/dbuf.c | 5 ++++- module/zfs/ddt_log.c | 7 ++++--- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/sys/dmu.h b/include/sys/dmu.h index a33e16a06..aae99d71b 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -126,7 +126,7 @@ typedef enum dmu_object_byteswap { (ot) < DMU_OT_NUMTYPES) #define DMU_OT_IS_METADATA_CACHED(ot) (((ot) & DMU_OT_NEWTYPE) ? \ - B_TRUE : dmu_ot[(ot)].ot_dbuf_metadata_cache) + ((ot) & DMU_OT_METADATA) != 0 : dmu_ot[(ot)].ot_dbuf_metadata_cache) /* * MDB doesn't have dmu_ot; it defines these macros itself. diff --git a/module/zfs/brt.c b/module/zfs/brt.c index eb74ce96a..9e389c872 100644 --- a/module/zfs/brt.c +++ b/module/zfs/brt.c @@ -454,6 +454,7 @@ brt_vdev_create(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx) VERIFY(mos_entries != 0); VERIFY0(dnode_hold(spa->spa_meta_objset, mos_entries, brtvd, &brtvd->bv_mos_entries_dnode)); + dnode_set_storage_type(brtvd->bv_mos_entries_dnode, DMU_OT_DDT_ZAP); rw_enter(&brtvd->bv_mos_entries_lock, RW_WRITER); brtvd->bv_mos_entries = mos_entries; rw_exit(&brtvd->bv_mos_entries_lock); @@ -580,13 +581,14 @@ brt_vdev_load(spa_t *spa, brt_vdev_t *brtvd) */ error = dmu_read(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, 0, MIN(brtvd->bv_size, bvphys->bvp_size) * sizeof (uint16_t), - brtvd->bv_entcount, DMU_READ_NO_PREFETCH); + brtvd->bv_entcount, DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO); if (error != 0) return (error); ASSERT(bvphys->bvp_mos_entries != 0); VERIFY0(dnode_hold(spa->spa_meta_objset, bvphys->bvp_mos_entries, brtvd, &brtvd->bv_mos_entries_dnode)); + dnode_set_storage_type(brtvd->bv_mos_entries_dnode, DMU_OT_DDT_ZAP); rw_enter(&brtvd->bv_mos_entries_lock, RW_WRITER); brtvd->bv_mos_entries = bvphys->bvp_mos_entries; rw_exit(&brtvd->bv_mos_entries_lock); @@ -809,7 +811,7 @@ brt_vdev_sync(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx) uint64_t nblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size); dmu_write(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, 0, nblocks * BRT_BLOCKSIZE, brtvd->bv_entcount, tx, - DMU_READ_NO_PREFETCH); + DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO); memset(brtvd->bv_bitmap, 0, BT_SIZEOFMAP(nblocks)); brtvd->bv_entcount_dirty = FALSE; } diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index fccc4c5b5..72c597609 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -446,7 +446,10 @@ static boolean_t dbuf_include_in_metadata_cache(dmu_buf_impl_t *db) { DB_DNODE_ENTER(db); - dmu_object_type_t type = DB_DNODE(db)->dn_type; + dnode_t *dn = DB_DNODE(db); + dmu_object_type_t type = dn->dn_storage_type; + if (type == DMU_OT_NONE) + type = dn->dn_type; DB_DNODE_EXIT(db); /* Check if this dbuf is one of the types we care about */ diff --git a/module/zfs/ddt_log.c b/module/zfs/ddt_log.c index c7a2426f3..3d42c5136 100644 --- a/module/zfs/ddt_log.c +++ b/module/zfs/ddt_log.c @@ -222,7 +222,7 @@ ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx, ddt_log_update_t *dlu) VERIFY0(dmu_buf_hold_array_by_dnode(dlu->dlu_dn, offset, length, B_FALSE, FTAG, &dlu->dlu_ndbp, &dlu->dlu_dbp, - DMU_READ_NO_PREFETCH)); + DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO)); dlu->dlu_tx = tx; dlu->dlu_block = dlu->dlu_offset = 0; @@ -298,7 +298,8 @@ ddt_log_entry(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe, ddt_log_update_t *dlu) * we will fill it, and zero it out. */ if (dlu->dlu_offset == 0) { - dmu_buf_will_fill(db, dlu->dlu_tx, B_FALSE); + dmu_buf_will_fill_flags(db, dlu->dlu_tx, B_FALSE, + DMU_UNCACHEDIO); memset(db->db_data, 0, db->db_size); } @@ -597,7 +598,7 @@ ddt_log_load_one(ddt_t *ddt, uint_t n) for (uint64_t offset = 0; offset < hdr.dlh_length; offset += dn->dn_datablksz) { err = dmu_buf_hold_by_dnode(dn, offset, FTAG, &db, - DMU_READ_PREFETCH); + DMU_READ_PREFETCH | DMU_UNCACHEDIO); if (err != 0) { dnode_rele(dn, FTAG); ddt_log_empty(ddt, ddl);