Implement dynamic gang header sizes

ZFS gang block headers are currently fixed at 512 bytes. This is
increasingly wasteful in the era of larger disk sector sizes. This PR
allows any size allocation to work as a gang header. It also contains
supporting changes to ZDB to make gang headers easier to work with.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Rob Norris <rob.norris@klarasystems.com>
Reviewed-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Paul Dagnelie <paul.dagnelie@klarasystems.com>
Closes #17004
This commit is contained in:
Paul Dagnelie
2025-01-23 16:26:09 -08:00
committed by Brian Behlendorf
parent e845be28e7
commit a981cb69e4
18 changed files with 387 additions and 72 deletions
+2 -1
View File
@@ -739,7 +739,8 @@ tags = ['functional', 'features', 'large_dnode']
[tests/functional/gang_blocks]
tests = ['gang_blocks_001_pos', 'gang_blocks_redundant',
'gang_blocks_ddt_copies']
'gang_blocks_ddt_copies', 'gang_blocks_dyn_header_pos',
'gang_blocks_dyn_header_neg', 'gang_blocks_dyn_multi']
tags = ['functional', 'gang_blocks']
[tests/functional/grow]
+3
View File
@@ -1579,6 +1579,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/gang_blocks/gang_blocks_001_pos.ksh \
functional/gang_blocks/gang_blocks_ddt_copies.ksh \
functional/gang_blocks/gang_blocks_redundant.ksh \
functional/gang_blocks/gang_blocks_dyn_header_neg.ksh \
functional/gang_blocks/gang_blocks_dyn_header_pos.ksh \
functional/gang_blocks/gang_blocks_dyn_multi.ksh \
functional/gang_blocks/setup.ksh \
functional/grow/grow_pool_001_pos.ksh \
functional/grow/grow_replicas_001_pos.ksh \
@@ -50,7 +50,7 @@ function cleanup
function check_features
{
for state in $(zpool get all $TESTPOOL | \
for state in $(zpool get all $TESTPOOL | grep -v "dynamic_gang_header" | \
awk '$2 ~ /feature@/ { print $3 }'); do
if [[ "$state" != "enabled" && "$state" != "active" ]]; then
log_fail "some features are not enabled on new pool"
@@ -58,6 +58,9 @@ function check_features
return 1;
fi
else
if [[ "feature@dynamic_gang_header" == "${2}" ]]; then
continue
fi
# Failure other features must be enabled or active.
if [[ "${3}" != "enabled" && "${3}" != "active" ]]; then
return 2;
@@ -91,6 +91,7 @@ typeset -a properties=(
"feature@device_rebuild"
"feature@draid"
"feature@redaction_list_spill"
"feature@dynamic_gang_header"
)
if is_linux || is_freebsd; then
@@ -0,0 +1,53 @@
#!/bin/ksh
# SPDX-License-Identifier: CDDL-1.0
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2025 by Klara Inc.
#
#
# Description:
# Verify that we don't use larger gang headers on ashift=9 pools
#
# Strategy:
# 1. Create a pool with dynamic gang headers.
# 2. Set metaslab_force_ganging to force multi-level ganging.
# 3. Verify that a large file has multi-level ganging
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/gang_blocks/gang_blocks.kshlib
log_assert "Verify that we don't use large gang headers on small-ashift pools".
log_onexit cleanup
preamble
log_must zpool create -f -o ashift=9 -o feature@dynamic_gang_header=enabled $TESTPOOL $DISKS
log_must zfs create -o recordsize=1M $TESTPOOL/$TESTFS
mountpoint=$(get_prop mountpoint $TESTPOOL/$TESTFS)
set_tunable64 METASLAB_FORCE_GANGING 200000
set_tunable32 METASLAB_FORCE_GANGING_PCT 100
path="${mountpoint}/file"
log_must dd if=/dev/urandom of=$path bs=1M count=1
log_must zpool sync $TESTPOOL
first_block=$(get_first_block_dva $TESTPOOL/$TESTFS file)
leaves=$(read_gang_header $TESTPOOL $first_block 200)
gangs=$(echo "$leaves" | grep -c gang)
[[ "$gangs" -gt 0 ]] || log_fail "We didn't use a deep gang tree when needed"
log_must verify_pool $TESTPOOL
status=$(get_pool_prop feature@dynamic_gang_header $TESTPOOL)
[[ "$status" == "enabled" ]] || log_fail "Dynamic gang headers active on an ashift-9 pool"
log_pass "We don't use large gang headers on small-ashift pools".
@@ -0,0 +1,73 @@
#!/bin/ksh
# SPDX-License-Identifier: CDDL-1.0
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2025 by Klara Inc.
#
#
# Description:
# Verify that we use larger gang headers on ashift=12 pools
#
# Strategy:
# 1. Create a pool with dynamic gang headers.
# 2. Set metaslab_force_ganging to force ganging.
# 3. Verify that a large file has more than 3 gang headers.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/gang_blocks/gang_blocks.kshlib
log_assert "Verify that we don't use large gang headers on small-ashift pools".
log_onexit cleanup
preamble
log_must zpool create -f -o ashift=12 -o feature@dynamic_gang_header=enabled $TESTPOOL $DISKS
log_must zfs create -o recordsize=1M $TESTPOOL/$TESTFS
mountpoint=$(get_prop mountpoint $TESTPOOL/$TESTFS)
set_tunable64 METASLAB_FORCE_GANGING 200000
set_tunable32 METASLAB_FORCE_GANGING_PCT 100
status=$(get_pool_prop feature@dynamic_gang_header $TESTPOOL)
[[ "$status" == "enabled" ]] || log_fail "Dynamic gang headers not enabled"
path="${mountpoint}/file"
log_must dd if=/dev/urandom of=$path bs=1M count=1
log_must zpool sync $TESTPOOL
first_block=$(get_first_block_dva $TESTPOOL/$TESTFS file)
leaves=$(read_gang_header $TESTPOOL $first_block 1000 | grep -v HOLE)
first_dva=$(echo "$leaves" | head -n 1 | awk '{print $1}' | sed 's/.*<//' | sed 's/>.*//')
check_not_gang_dva $first_dva
num_leaves=$(echo "$leaves" | wc -l)
[[ "$num_leaves" -gt 3 ]] && log_fail "used a larger gang header too soon: \"$leaves\""
log_must verify_pool $TESTPOOL
status=$(get_pool_prop feature@dynamic_gang_header $TESTPOOL)
[[ "$status" == "active" ]] || log_fail "Dynamic gang headers not active"
path="${mountpoint}/file2"
log_must dd if=/dev/urandom of=$path bs=1M count=1
log_must zpool sync $TESTPOOL
first_block=$(get_first_block_dva $TESTPOOL/$TESTFS file2)
leaves=$(read_gang_header $TESTPOOL $first_block 1000 | grep -v HOLE)
first_dva=$(echo "$leaves" | head -n 1 | awk '{print $1}' | sed 's/.*<//' | sed 's/>.*//')
check_not_gang_dva $first_dva
num_leaves=$(echo "$leaves" | wc -l)
[[ "$num_leaves" -gt 3 ]] || log_fail "didn't use a larger gang header: \"$leaves\""
log_must verify_pool $TESTPOOL
status=$(get_pool_prop feature@dynamic_gang_header $TESTPOOL)
[[ "$status" == "active" ]] || log_fail "Dynamic gang headers not active"
log_pass "We don't use large gang headers on small-ashift pools".
@@ -0,0 +1,54 @@
#!/bin/ksh
# SPDX-License-Identifier: CDDL-1.0
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2025 by Klara Inc.
#
#
# Description:
# Verify that multi-level ganging still works with dynamic headers
#
# Strategy:
# 1. Create a pool with dynamic gang headers and ashift=12.
# 2. Set metaslab_force_ganging to force multi-level ganging.
# 3. Verify that a large file has multi-level ganging
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/gang_blocks/gang_blocks.kshlib
log_assert "Verify that we can still multi-level gang with large headers."
log_onexit cleanup
preamble
log_must zpool create -f -o ashift=12 -o feature@dynamic_gang_header=enabled $TESTPOOL $DISKS
log_must zfs create -o recordsize=16M $TESTPOOL/$TESTFS
mountpoint=$(get_prop mountpoint $TESTPOOL/$TESTFS)
set_tunable64 METASLAB_FORCE_GANGING 50000
set_tunable32 METASLAB_FORCE_GANGING_PCT 100
path="${mountpoint}/file"
log_must dd if=/dev/urandom of=$path bs=16M count=1
log_must zpool sync $TESTPOOL
first_block=$(get_first_block_dva $TESTPOOL/$TESTFS file)
leaves=$(read_gang_header $TESTPOOL $first_block 200)
gangs=$(echo "$leaves" | grep -c gang)
[[ "$gangs" -gt 0 ]] || log_fail "We didn't use a deep gang tree when needed"
log_must verify_pool $TESTPOOL
status=$(get_pool_prop feature@dynamic_gang_header $TESTPOOL)
[[ "$status" == "active" ]] || log_fail "Dynamic gang headers not active"
log_pass "We can still multi-level gang with large headers."