From f70c85086bbd67a195d4ad540aa8b2f252c0aae0 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 30 Jul 2025 12:42:47 -0400 Subject: [PATCH] BRT: Fix ZAP entry endianness During original block cloning implementation a mistake was made, making BRT ZAP entries an array of 8 1-byte entries instead of 1 entry of 8 bytes. This makes the pools non-endian-safe. This commit introduces a new read-compatible pool feature "com.truenas:block_cloning_endian", fixing the endianness issue for new pools while maintaining compatibility with existing ones. The feature is automatically activated when creating the first BRT ZAP (ensuring we don't activate it on pools that already have BRT entries in the old format). When active, BRT entries are stored as single 8-byte values. Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Closes #17572 --- include/zfeature_common.h | 1 + lib/libzfs/libzfs.abi | 11 +-- man/man7/zpool-features.7 | 11 +++ module/zcommon/zfeature_common.c | 6 ++ module/zfs/brt.c | 70 +++++++++++++++---- .../cli_root/zpool_get/zpool_get.cfg | 1 + 6 files changed, 81 insertions(+), 19 deletions(-) diff --git a/include/zfeature_common.h b/include/zfeature_common.h index 53e1ecae3..4877df4b1 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -88,6 +88,7 @@ typedef enum spa_feature { SPA_FEATURE_LONGNAME, SPA_FEATURE_LARGE_MICROZAP, SPA_FEATURE_DYNAMIC_GANG_HEADER, + SPA_FEATURE_BLOCK_CLONING_ENDIAN, SPA_FEATURES } spa_feature_t; diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 0c3e8106c..bd2ab6468 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -638,7 +638,7 @@ - + @@ -6397,7 +6397,8 @@ - + + @@ -9604,8 +9605,8 @@ - - + + @@ -9683,7 +9684,7 @@ - + diff --git a/man/man7/zpool-features.7 b/man/man7/zpool-features.7 index 7ec271164..66aa100b7 100644 --- a/man/man7/zpool-features.7 +++ b/man/man7/zpool-features.7 @@ -401,6 +401,17 @@ This feature becomes .Sy active when first block is cloned. When the last cloned block is freed, it goes back to the enabled state. +.feature com.truenas block_cloning_endian yes +This feature corrects ZAP entry endianness issues in the Block Reference +Table (BRT) used by block cloning. +During the original block cloning implementation, BRT ZAP entries were +mistakenly stored as arrays of 8 single-byte entries instead of single +8-byte entries, making pools non-endian-safe. +.Pp +This feature is activated when the first BRT ZAP is created (that way +ensuring compatibility with existing pools). +When active, new BRT entries are stored in the correct endian-safe format. +The feature becomes inactive when all BRT ZAPs are destroyed. .feature com.delphix bookmarks yes extensible_dataset This feature enables use of the .Nm zfs Cm bookmark diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c index 8ac1c7cab..0b37530b0 100644 --- a/module/zcommon/zfeature_common.c +++ b/module/zcommon/zfeature_common.c @@ -732,6 +732,12 @@ zpool_feature_init(void) ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures); + zfeature_register(SPA_FEATURE_BLOCK_CLONING_ENDIAN, + "com.truenas:block_cloning_endian", "block_cloning_endian", + "Fixes BRT ZAP endianness on new pools.", + ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL, + sfeatures); + zfeature_register(SPA_FEATURE_AVZ_V2, "com.klarasystems:vdev_zaps_v2", "vdev_zaps_v2", "Support for root vdev ZAP.", diff --git a/module/zfs/brt.c b/module/zfs/brt.c index 27d9ed7ea..40664354a 100644 --- a/module/zfs/brt.c +++ b/module/zfs/brt.c @@ -478,6 +478,18 @@ brt_vdev_create(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx) sizeof (uint64_t), 1, &brtvd->bv_mos_brtvdev, tx)); BRT_DEBUG("Pool directory object created, object=%s", name); + /* + * Activate the endian-fixed feature if this is the first BRT ZAP + * (i.e., BLOCK_CLONING is not yet active) and the feature is enabled. + */ + if (spa_feature_is_enabled(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN) && + !spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) { + spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx); + } else if (spa_feature_is_active(spa, + SPA_FEATURE_BLOCK_CLONING_ENDIAN)) { + spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx); + } + spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING, tx); } @@ -658,6 +670,8 @@ brt_vdev_destroy(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx) rw_exit(&brtvd->bv_lock); spa_feature_decr(spa, SPA_FEATURE_BLOCK_CLONING, tx); + if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN)) + spa_feature_decr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx); } static void @@ -855,16 +869,29 @@ brt_entry_fill(const blkptr_t *bp, brt_entry_t *bre, uint64_t *vdevidp) *vdevidp = DVA_GET_VDEV(&bp->blk_dva[0]); } +static boolean_t +brt_has_endian_fixed(spa_t *spa) +{ + return (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN)); +} + static int -brt_entry_lookup(brt_vdev_t *brtvd, brt_entry_t *bre) +brt_entry_lookup(spa_t *spa, brt_vdev_t *brtvd, brt_entry_t *bre) { uint64_t off = BRE_OFFSET(bre); if (brtvd->bv_mos_entries == 0) return (SET_ERROR(ENOENT)); - return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode, - &off, BRT_KEY_WORDS, 1, sizeof (bre->bre_count), &bre->bre_count)); + if (brt_has_endian_fixed(spa)) { + return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode, + &off, BRT_KEY_WORDS, sizeof (bre->bre_count), 1, + &bre->bre_count)); + } else { + return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode, + &off, BRT_KEY_WORDS, 1, sizeof (bre->bre_count), + &bre->bre_count)); + } } /* @@ -1056,7 +1083,7 @@ brt_entry_decref(spa_t *spa, const blkptr_t *bp) } rw_exit(&brtvd->bv_lock); - error = brt_entry_lookup(brtvd, &bre_search); + error = brt_entry_lookup(spa, brtvd, &bre_search); /* bre_search now contains correct bre_count */ if (error == ENOENT) { BRTSTAT_BUMP(brt_decref_no_entry); @@ -1118,7 +1145,7 @@ brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp) bre = avl_find(&brtvd->bv_tree, &bre_search, NULL); if (bre == NULL) { rw_exit(&brtvd->bv_lock); - error = brt_entry_lookup(brtvd, &bre_search); + error = brt_entry_lookup(spa, brtvd, &bre_search); if (error == ENOENT) { refcnt = 0; } else { @@ -1270,10 +1297,18 @@ brt_pending_apply_vdev(spa_t *spa, brt_vdev_t *brtvd, uint64_t txg) uint64_t off = BRE_OFFSET(bre); if (brtvd->bv_mos_entries != 0 && brt_vdev_lookup(spa, brtvd, off)) { - int error = zap_lookup_uint64_by_dnode( - brtvd->bv_mos_entries_dnode, &off, - BRT_KEY_WORDS, 1, sizeof (bre->bre_count), - &bre->bre_count); + int error; + if (brt_has_endian_fixed(spa)) { + error = zap_lookup_uint64_by_dnode( + brtvd->bv_mos_entries_dnode, &off, + BRT_KEY_WORDS, sizeof (bre->bre_count), 1, + &bre->bre_count); + } else { + error = zap_lookup_uint64_by_dnode( + brtvd->bv_mos_entries_dnode, &off, + BRT_KEY_WORDS, 1, sizeof (bre->bre_count), + &bre->bre_count); + } if (error == 0) { BRTSTAT_BUMP(brt_addref_entry_on_disk); } else { @@ -1326,7 +1361,7 @@ brt_pending_apply(spa_t *spa, uint64_t txg) } static void -brt_sync_entry(dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx) +brt_sync_entry(spa_t *spa, dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx) { uint64_t off = BRE_OFFSET(bre); @@ -1337,9 +1372,15 @@ brt_sync_entry(dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx) BRT_KEY_WORDS, tx); VERIFY(error == 0 || error == ENOENT); } else { - VERIFY0(zap_update_uint64_by_dnode(dn, &off, - BRT_KEY_WORDS, 1, sizeof (bre->bre_count), - &bre->bre_count, tx)); + if (brt_has_endian_fixed(spa)) { + VERIFY0(zap_update_uint64_by_dnode(dn, &off, + BRT_KEY_WORDS, sizeof (bre->bre_count), 1, + &bre->bre_count, tx)); + } else { + VERIFY0(zap_update_uint64_by_dnode(dn, &off, + BRT_KEY_WORDS, 1, sizeof (bre->bre_count), + &bre->bre_count, tx)); + } } } @@ -1368,7 +1409,8 @@ brt_sync_table(spa_t *spa, dmu_tx_t *tx) void *c = NULL; while ((bre = avl_destroy_nodes(&brtvd->bv_tree, &c)) != NULL) { - brt_sync_entry(brtvd->bv_mos_entries_dnode, bre, tx); + brt_sync_entry(spa, brtvd->bv_mos_entries_dnode, bre, + tx); kmem_cache_free(brt_entry_cache, bre); } diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index 6de086976..3389dcf72 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -115,5 +115,6 @@ if is_linux || is_freebsd; then "feature@fast_dedup" "feature@longname" "feature@large_microzap" + "feature@block_cloning_endian" ) fi