From 34906f8bbee337ee5aa9b79c141517bff0a4e0ab Mon Sep 17 00:00:00 2001 From: Rob N Date: Sat, 25 May 2024 11:55:47 +1000 Subject: [PATCH] zap: reuse zap_leaf_t on dbuf reuse after shrink If a shrink or truncate had recently freed a portion of the ZAP, the dbuf could still be sitting on the dbuf cache waiting for eviction. If it is then allocated for a new leaf before it can be evicted, the zap_leaf_t is still attached as userdata, tripping the VERIFY. Instead, just check for the userdata, and if we find it, reuse it. Sponsored-by: Klara, Inc. Sponsored-by: iXsystems, Inc. Reviewed-by: Alexander Motin Signed-off-by: Rob Norris Closes #16157. Closes #16204 --- module/zfs/zap.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/module/zfs/zap.c b/module/zfs/zap.c index 81dab80da..03b76ea1b 100644 --- a/module/zfs/zap.c +++ b/module/zfs/zap.c @@ -425,20 +425,36 @@ zap_leaf_evict_sync(void *dbu) static zap_leaf_t * zap_create_leaf(zap_t *zap, dmu_tx_t *tx) { - zap_leaf_t *l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP); - ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); - rw_init(&l->l_rwlock, NULL, RW_NOLOCKDEP, NULL); - rw_enter(&l->l_rwlock, RW_WRITER); - l->l_blkid = zap_allocate_blocks(zap, 1); - l->l_dbuf = NULL; + uint64_t blkid = zap_allocate_blocks(zap, 1); + dmu_buf_t *db = NULL; VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode, - l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf, + blkid << FZAP_BLOCK_SHIFT(zap), NULL, &db, DMU_READ_NO_PREFETCH)); - dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf); - VERIFY3P(NULL, ==, dmu_buf_set_user(l->l_dbuf, &l->l_dbu)); + + /* + * Create the leaf structure and stash it on the dbuf. If zap was + * recent shrunk or truncated, the dbuf might have been sitting in the + * cache waiting to be evicted, and so still have the old leaf attached + * to it. If so, just reuse it. + */ + zap_leaf_t *l = dmu_buf_get_user(db); + if (l == NULL) { + l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP); + l->l_blkid = blkid; + l->l_dbuf = db; + rw_init(&l->l_rwlock, NULL, RW_NOLOCKDEP, NULL); + dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, + &l->l_dbuf); + dmu_buf_set_user(l->l_dbuf, &l->l_dbu); + } else { + ASSERT3U(l->l_blkid, ==, blkid); + ASSERT3P(l->l_dbuf, ==, db); + } + + rw_enter(&l->l_rwlock, RW_WRITER); dmu_buf_will_dirty(l->l_dbuf, tx); zap_leaf_init(l, zap->zap_normflags != 0);