BRT: Fix ZAP entry endianness

During original block cloning implementation a mistake was made,
making BRT ZAP entries an array of 8 1-byte entries instead of 1
entry of 8 bytes. This makes the pools non-endian-safe.

This commit introduces a new read-compatible pool feature
"com.truenas:block_cloning_endian", fixing the endianness issue
for new pools while maintaining compatibility with existing ones.

The feature is automatically activated when creating the first BRT
ZAP (ensuring we don't activate it on pools that already have BRT
entries in the old format).  When active, BRT entries are stored
as single 8-byte values.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by:	Alexander Motin <alexander.motin@TrueNAS.com>
Closes #17572
This commit is contained in:
Alexander Motin 2025-07-30 12:42:47 -04:00 committed by GitHub
parent 10a78e2647
commit f70c85086b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 81 additions and 19 deletions

View File

@ -88,6 +88,7 @@ typedef enum spa_feature {
SPA_FEATURE_LONGNAME, SPA_FEATURE_LONGNAME,
SPA_FEATURE_LARGE_MICROZAP, SPA_FEATURE_LARGE_MICROZAP,
SPA_FEATURE_DYNAMIC_GANG_HEADER, SPA_FEATURE_DYNAMIC_GANG_HEADER,
SPA_FEATURE_BLOCK_CLONING_ENDIAN,
SPA_FEATURES SPA_FEATURES
} spa_feature_t; } spa_feature_t;

View File

@ -638,7 +638,7 @@
<elf-symbol name='fletcher_4_superscalar_ops' size='128' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='fletcher_4_superscalar_ops' size='128' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='spa_feature_table' size='2520' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='spa_feature_table' size='2576' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_deleg_perm_tab' size='528' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfs_deleg_perm_tab' size='528' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -6397,7 +6397,8 @@
<enumerator name='SPA_FEATURE_LONGNAME' value='42'/> <enumerator name='SPA_FEATURE_LONGNAME' value='42'/>
<enumerator name='SPA_FEATURE_LARGE_MICROZAP' value='43'/> <enumerator name='SPA_FEATURE_LARGE_MICROZAP' value='43'/>
<enumerator name='SPA_FEATURE_DYNAMIC_GANG_HEADER' value='44'/> <enumerator name='SPA_FEATURE_DYNAMIC_GANG_HEADER' value='44'/>
<enumerator name='SPA_FEATURES' value='45'/> <enumerator name='SPA_FEATURE_BLOCK_CLONING_ENDIAN' value='45'/>
<enumerator name='SPA_FEATURES' value='46'/>
</enum-decl> </enum-decl>
<typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/> <typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
<qualified-type-def type-id='80f4b756' const='yes' id='b99c00c9'/> <qualified-type-def type-id='80f4b756' const='yes' id='b99c00c9'/>
@ -9604,8 +9605,8 @@
</function-decl> </function-decl>
</abi-instr> </abi-instr>
<abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'> <abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'>
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='20160' id='b948da70'> <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='20608' id='b9408bab'>
<subrange length='45' type-id='7359adad' id='cb8ddca0'/> <subrange length='46' type-id='7359adad' id='8b86bc1b'/>
</array-type-def> </array-type-def>
<enum-decl name='zfeature_flags' id='6db816a4'> <enum-decl name='zfeature_flags' id='6db816a4'>
<underlying-type type-id='9cac1fee'/> <underlying-type type-id='9cac1fee'/>
@ -9683,7 +9684,7 @@
<pointer-type-def type-id='611586a1' size-in-bits='64' id='2e243169'/> <pointer-type-def type-id='611586a1' size-in-bits='64' id='2e243169'/>
<qualified-type-def type-id='eaa32e2f' const='yes' id='83be723c'/> <qualified-type-def type-id='eaa32e2f' const='yes' id='83be723c'/>
<pointer-type-def type-id='83be723c' size-in-bits='64' id='7acd98a2'/> <pointer-type-def type-id='83be723c' size-in-bits='64' id='7acd98a2'/>
<var-decl name='spa_feature_table' type-id='b948da70' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/> <var-decl name='spa_feature_table' type-id='b9408bab' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/>
<var-decl name='zfeature_checks_disable' type-id='c19b74c3' mangled-name='zfeature_checks_disable' visibility='default' elf-symbol-id='zfeature_checks_disable'/> <var-decl name='zfeature_checks_disable' type-id='c19b74c3' mangled-name='zfeature_checks_disable' visibility='default' elf-symbol-id='zfeature_checks_disable'/>
<function-decl name='opendir' visibility='default' binding='global' size-in-bits='64'> <function-decl name='opendir' visibility='default' binding='global' size-in-bits='64'>
<parameter type-id='80f4b756'/> <parameter type-id='80f4b756'/>

View File

@ -401,6 +401,17 @@ This feature becomes
.Sy active .Sy active
when first block is cloned. when first block is cloned.
When the last cloned block is freed, it goes back to the enabled state. When the last cloned block is freed, it goes back to the enabled state.
.feature com.truenas block_cloning_endian yes
This feature corrects ZAP entry endianness issues in the Block Reference
Table (BRT) used by block cloning.
During the original block cloning implementation, BRT ZAP entries were
mistakenly stored as arrays of 8 single-byte entries instead of single
8-byte entries, making pools non-endian-safe.
.Pp
This feature is activated when the first BRT ZAP is created (that way
ensuring compatibility with existing pools).
When active, new BRT entries are stored in the correct endian-safe format.
The feature becomes inactive when all BRT ZAPs are destroyed.
.feature com.delphix bookmarks yes extensible_dataset .feature com.delphix bookmarks yes extensible_dataset
This feature enables use of the This feature enables use of the
.Nm zfs Cm bookmark .Nm zfs Cm bookmark

View File

@ -732,6 +732,12 @@ zpool_feature_init(void)
ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL, ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL,
sfeatures); sfeatures);
zfeature_register(SPA_FEATURE_BLOCK_CLONING_ENDIAN,
"com.truenas:block_cloning_endian", "block_cloning_endian",
"Fixes BRT ZAP endianness on new pools.",
ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL,
sfeatures);
zfeature_register(SPA_FEATURE_AVZ_V2, zfeature_register(SPA_FEATURE_AVZ_V2,
"com.klarasystems:vdev_zaps_v2", "vdev_zaps_v2", "com.klarasystems:vdev_zaps_v2", "vdev_zaps_v2",
"Support for root vdev ZAP.", "Support for root vdev ZAP.",

View File

@ -478,6 +478,18 @@ brt_vdev_create(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx)
sizeof (uint64_t), 1, &brtvd->bv_mos_brtvdev, tx)); sizeof (uint64_t), 1, &brtvd->bv_mos_brtvdev, tx));
BRT_DEBUG("Pool directory object created, object=%s", name); BRT_DEBUG("Pool directory object created, object=%s", name);
/*
* Activate the endian-fixed feature if this is the first BRT ZAP
* (i.e., BLOCK_CLONING is not yet active) and the feature is enabled.
*/
if (spa_feature_is_enabled(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN) &&
!spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx);
} else if (spa_feature_is_active(spa,
SPA_FEATURE_BLOCK_CLONING_ENDIAN)) {
spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx);
}
spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING, tx); spa_feature_incr(spa, SPA_FEATURE_BLOCK_CLONING, tx);
} }
@ -658,6 +670,8 @@ brt_vdev_destroy(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx)
rw_exit(&brtvd->bv_lock); rw_exit(&brtvd->bv_lock);
spa_feature_decr(spa, SPA_FEATURE_BLOCK_CLONING, tx); spa_feature_decr(spa, SPA_FEATURE_BLOCK_CLONING, tx);
if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN))
spa_feature_decr(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN, tx);
} }
static void static void
@ -855,16 +869,29 @@ brt_entry_fill(const blkptr_t *bp, brt_entry_t *bre, uint64_t *vdevidp)
*vdevidp = DVA_GET_VDEV(&bp->blk_dva[0]); *vdevidp = DVA_GET_VDEV(&bp->blk_dva[0]);
} }
static boolean_t
brt_has_endian_fixed(spa_t *spa)
{
return (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING_ENDIAN));
}
static int static int
brt_entry_lookup(brt_vdev_t *brtvd, brt_entry_t *bre) brt_entry_lookup(spa_t *spa, brt_vdev_t *brtvd, brt_entry_t *bre)
{ {
uint64_t off = BRE_OFFSET(bre); uint64_t off = BRE_OFFSET(bre);
if (brtvd->bv_mos_entries == 0) if (brtvd->bv_mos_entries == 0)
return (SET_ERROR(ENOENT)); return (SET_ERROR(ENOENT));
return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode, if (brt_has_endian_fixed(spa)) {
&off, BRT_KEY_WORDS, 1, sizeof (bre->bre_count), &bre->bre_count)); return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode,
&off, BRT_KEY_WORDS, sizeof (bre->bre_count), 1,
&bre->bre_count));
} else {
return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode,
&off, BRT_KEY_WORDS, 1, sizeof (bre->bre_count),
&bre->bre_count));
}
} }
/* /*
@ -1056,7 +1083,7 @@ brt_entry_decref(spa_t *spa, const blkptr_t *bp)
} }
rw_exit(&brtvd->bv_lock); rw_exit(&brtvd->bv_lock);
error = brt_entry_lookup(brtvd, &bre_search); error = brt_entry_lookup(spa, brtvd, &bre_search);
/* bre_search now contains correct bre_count */ /* bre_search now contains correct bre_count */
if (error == ENOENT) { if (error == ENOENT) {
BRTSTAT_BUMP(brt_decref_no_entry); BRTSTAT_BUMP(brt_decref_no_entry);
@ -1118,7 +1145,7 @@ brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp)
bre = avl_find(&brtvd->bv_tree, &bre_search, NULL); bre = avl_find(&brtvd->bv_tree, &bre_search, NULL);
if (bre == NULL) { if (bre == NULL) {
rw_exit(&brtvd->bv_lock); rw_exit(&brtvd->bv_lock);
error = brt_entry_lookup(brtvd, &bre_search); error = brt_entry_lookup(spa, brtvd, &bre_search);
if (error == ENOENT) { if (error == ENOENT) {
refcnt = 0; refcnt = 0;
} else { } else {
@ -1270,10 +1297,18 @@ brt_pending_apply_vdev(spa_t *spa, brt_vdev_t *brtvd, uint64_t txg)
uint64_t off = BRE_OFFSET(bre); uint64_t off = BRE_OFFSET(bre);
if (brtvd->bv_mos_entries != 0 && if (brtvd->bv_mos_entries != 0 &&
brt_vdev_lookup(spa, brtvd, off)) { brt_vdev_lookup(spa, brtvd, off)) {
int error = zap_lookup_uint64_by_dnode( int error;
brtvd->bv_mos_entries_dnode, &off, if (brt_has_endian_fixed(spa)) {
BRT_KEY_WORDS, 1, sizeof (bre->bre_count), error = zap_lookup_uint64_by_dnode(
&bre->bre_count); brtvd->bv_mos_entries_dnode, &off,
BRT_KEY_WORDS, sizeof (bre->bre_count), 1,
&bre->bre_count);
} else {
error = zap_lookup_uint64_by_dnode(
brtvd->bv_mos_entries_dnode, &off,
BRT_KEY_WORDS, 1, sizeof (bre->bre_count),
&bre->bre_count);
}
if (error == 0) { if (error == 0) {
BRTSTAT_BUMP(brt_addref_entry_on_disk); BRTSTAT_BUMP(brt_addref_entry_on_disk);
} else { } else {
@ -1326,7 +1361,7 @@ brt_pending_apply(spa_t *spa, uint64_t txg)
} }
static void static void
brt_sync_entry(dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx) brt_sync_entry(spa_t *spa, dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx)
{ {
uint64_t off = BRE_OFFSET(bre); uint64_t off = BRE_OFFSET(bre);
@ -1337,9 +1372,15 @@ brt_sync_entry(dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx)
BRT_KEY_WORDS, tx); BRT_KEY_WORDS, tx);
VERIFY(error == 0 || error == ENOENT); VERIFY(error == 0 || error == ENOENT);
} else { } else {
VERIFY0(zap_update_uint64_by_dnode(dn, &off, if (brt_has_endian_fixed(spa)) {
BRT_KEY_WORDS, 1, sizeof (bre->bre_count), VERIFY0(zap_update_uint64_by_dnode(dn, &off,
&bre->bre_count, tx)); BRT_KEY_WORDS, sizeof (bre->bre_count), 1,
&bre->bre_count, tx));
} else {
VERIFY0(zap_update_uint64_by_dnode(dn, &off,
BRT_KEY_WORDS, 1, sizeof (bre->bre_count),
&bre->bre_count, tx));
}
} }
} }
@ -1368,7 +1409,8 @@ brt_sync_table(spa_t *spa, dmu_tx_t *tx)
void *c = NULL; void *c = NULL;
while ((bre = avl_destroy_nodes(&brtvd->bv_tree, &c)) != NULL) { while ((bre = avl_destroy_nodes(&brtvd->bv_tree, &c)) != NULL) {
brt_sync_entry(brtvd->bv_mos_entries_dnode, bre, tx); brt_sync_entry(spa, brtvd->bv_mos_entries_dnode, bre,
tx);
kmem_cache_free(brt_entry_cache, bre); kmem_cache_free(brt_entry_cache, bre);
} }

View File

@ -115,5 +115,6 @@ if is_linux || is_freebsd; then
"feature@fast_dedup" "feature@fast_dedup"
"feature@longname" "feature@longname"
"feature@large_microzap" "feature@large_microzap"
"feature@block_cloning_endian"
) )
fi fi