Make ganging redundancy respect redundant_metadata property (#17073)

The redundant_metadata setting in ZFS allows users to trade resilience
for performance and space savings. This applies to all data and metadata
blocks in zfs, with one exception: gang blocks. Gang blocks currently
just take the copies property of the IO being ganged and, if it's 1,
sets it to 2. This means that we always make at least two copies of a
gang header, which is good for resilience. However, if the users care
more about performance than resilience, their gang blocks will be even
more of a penalty than usual.

We add logic to calculate the number of gang headers copies directly,
and store it as a separate IO property. This is stored in the IO
properties and not calculated when we decide to gang because by that
point we may not have easy access to the relevant information about what
kind of block is being stored. We also check the redundant_metadata
property when doing so, and use that to decide whether to store an extra
copy of the gang headers, compared to the underlying blocks.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.

Signed-off-by: Paul Dagnelie <paul.dagnelie@klarasystems.com>
Co-authored-by: Paul Dagnelie <paul.dagnelie@klarasystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
This commit is contained in:
Paul Dagnelie
2025-03-19 15:58:29 -07:00
committed by GitHub
parent 94b9cbbe1e
commit 9250403ba6
15 changed files with 327 additions and 20 deletions
+4
View File
@@ -724,6 +724,10 @@ tests = ['large_dnode_001_pos', 'large_dnode_003_pos', 'large_dnode_004_neg',
'large_dnode_005_pos', 'large_dnode_007_neg', 'large_dnode_009_pos']
tags = ['functional', 'features', 'large_dnode']
[tests/functional/gang_blocks]
tests = ['gang_blocks_redundant']
tags = ['functional', 'gang_blocks']
[tests/functional/grow]
pre =
post =
+1
View File
@@ -64,6 +64,7 @@ MAX_DATASET_NESTING max_dataset_nesting zfs_max_dataset_nesting
MAX_MISSING_TVDS max_missing_tvds zfs_max_missing_tvds
METASLAB_DEBUG_LOAD metaslab.debug_load metaslab_debug_load
METASLAB_FORCE_GANGING metaslab.force_ganging metaslab_force_ganging
METASLAB_FORCE_GANGING_PCT metaslab.force_ganging_pct metaslab_force_ganging_pct
MULTIHOST_FAIL_INTERVALS multihost.fail_intervals zfs_multihost_fail_intervals
MULTIHOST_HISTORY multihost.history zfs_multihost_history
MULTIHOST_IMPORT_INTERVALS multihost.import_intervals zfs_multihost_import_intervals
+4
View File
@@ -275,6 +275,7 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \
functional/events/events.cfg \
functional/events/events_common.kshlib \
functional/fault/fault.cfg \
functional/gang_blocks/gang_blocks.kshlib \
functional/grow/grow.cfg \
functional/history/history.cfg \
functional/history/history_common.kshlib \
@@ -1558,6 +1559,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/features/large_dnode/large_dnode_008_pos.ksh \
functional/features/large_dnode/large_dnode_009_pos.ksh \
functional/features/large_dnode/setup.ksh \
functional/gang_blocks/cleanup.ksh \
functional/gang_blocks/gang_blocks_redundant.ksh \
functional/gang_blocks/setup.ksh \
functional/grow/grow_pool_001_pos.ksh \
functional/grow/grow_replicas_001_pos.ksh \
functional/history/cleanup.ksh \
+31
View File
@@ -0,0 +1,31 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2025 by Klara Inc.
#
. $STF_SUITE/include/libtest.shlib
restore_tunable METASLAB_FORCE_GANGING
restore_tunable METASLAB_FORCE_GANGING_PCT
default_cleanup
@@ -0,0 +1,120 @@
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2025 By Klara Inc.
#
. $STF_SUITE/include/libtest.shlib
#
# Get 0th DVA of first L0 block of file
#
# $1 filesystem
# $2 object number
#
function get_object_info
{
typeset fs=$1
typeset obj=$2
zdb -dddddd $fs $obj
}
#
# $1 filesystem
# $2 path to file
# $3 block filter
#
function get_blocks_filter
{
typeset fs=$1
typeset path=$2
typeset full_path="$(get_prop mountpoint $fs)/$path"
typeset obj="$(ls -i $full_path | awk '{print $1}')"
get_object_info $fs $obj | grep $3 | grep -v Dataset
}
function get_first_block
{
get_blocks_filter $1 $2 L0 | head -n 1
}
function get_first_block_dva
{
get_first_block $1 $2 | sed 's/.*L0 \([^ ]*\).*/\1/'
}
# Takes a zdb compressed blkptr line on stdin
function get_num_dvas
{
sed 's/.*L[0-9] \(.*\) [a-f0-9]*L.*/\1/' | awk '{print NF}'
}
function check_gang_dva
{
typeset last_byte="$(echo -n $1 | tail -c 1)"
[[ "$last_byte" == "G" ]] || return 1
return 0
}
function check_is_gang_dva
{
check_gang_dva $1 || log_fail "Not a gang DVA: \"$1\""
}
function check_not_gang_dva
{
check_gang_dva $1 && log_fail "Gang DVA: \"$1\""
}
#
# Get the gang header contents of the given dva in the given pool
#
# $1 pool
# $2 dva
# $3 size (in hexidecimal)
#
function read_gang_header
{
typeset pool=$1
typeset dva=$2
typeset size=$3
check_is_gang_dva $dva
zdb -R $pool "${dva%:*}:$size:g" 2>&1 | grep -v "Found vdev:"
}
function preamble
{
save_tunable METASLAB_FORCE_GANGING
save_tunable METASLAB_FORCE_GANGING_PCT
}
function cleanup
{
destroy_pool $TESTPOOL
restore_tunable METASLAB_FORCE_GANGING
restore_tunable METASLAB_FORCE_GANGING_PCT
}
@@ -0,0 +1,88 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2025 by Klara Inc.
#
#
# Description:
# Verify that the redundant_metadata setting is respected by gang headers
#
# Strategy:
# 1. Create a filesystem with redundant_metadata={all,most,some,none}
# 2. Verify that gang blocks at different levels have the right amount of redundancy
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/gang_blocks/gang_blocks.kshlib
log_assert "Verify that gang blocks at different levels have the right amount of redundancy."
function cleanup2
{
for red in all most some none; do zfs destroy $TESTPOOL/$TESTFS-$red; done
cleanup
}
preamble
log_onexit cleanup2
log_must zpool create -f -o ashift=9 $TESTPOOL $DISKS
set_tunable64 METASLAB_FORCE_GANGING 1500
set_tunable32 METASLAB_FORCE_GANGING_PCT 100
for red in all most some none; do
log_must zfs create -o redundant_metadata=$red -o recordsize=512 \
$TESTPOOL/$TESTFS-$red
if [[ "$red" == "all" ]]; then
log_must zfs set recordsize=8k $TESTPOOL/$TESTFS-$red
fi
mountpoint=$(get_prop mountpoint $TESTPOOL/$TESTFS-$red)
path="${mountpoint}/file"
log_must dd if=/dev/urandom of=$path bs=1M count=1
log_must zpool sync $TESTPOOL
num_l0_dvas=$(get_first_block $TESTPOOL/$TESTFS-$red file | get_num_dvas)
if [[ "$red" == "all" ]]; then
[[ "$num_l0_dvas" -eq 2 ]] || \
log_fail "wrong number of DVAs for L0 in $red: $num_l0_dvas"
else
[[ "$num_l0_dvas" -eq 1 ]] || \
log_fail "wrong number of DVAs for L0 in $red: $num_l0_dvas"
fi
num_l1_dvas=$(get_blocks_filter $TESTPOOL/$TESTFS-$red file L1 | head -n 1 | get_num_dvas)
if [[ "$red" == "all" || "$red" == "most" ]]; then
[[ "$num_l1_dvas" -eq 2 ]] || \
log_fail "wrong number of DVAs for L1 in $red: $num_l1_dvas"
else
[[ "$num_l1_dvas" -eq 1 ]] || \
log_fail "wrong number of DVAs for L1 in $red: $num_l1_dvas"
fi
for i in `seq 1 80`; do
dd if=/dev/urandom of=/$mountpoint/f$i bs=512 count=1 2>/dev/null || log_fail "dd failed"
done
log_must zpool sync $TESTPOOL
obj_0_gangs=$(get_object_info $TESTPOOL/$TESTFS-$red 0 L0 | grep G)
num_obj_0_dvas=$(echo "$obj_0_gangs" | head -n 1 | get_num_dvas)
if [[ "$red" != "none" ]]; then
[[ "$num_obj_0_dvas" -eq 2 ]] || \
log_fail "wrong number of DVAs for obj 0 in $red: $num_obj_0_dvas"
else
[[ "$num_obj_0_dvas" -eq 1 ]] || \
log_fail "wrong number of DVAs for obj 0 in $red: $num_obj_0_dvas"
fi
log_note "Level $red passed"
done
log_pass "Gang blocks at different levels have the right amount of redundancy."
+30
View File
@@ -0,0 +1,30 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2025 by Klara Inc.
#
. $STF_SUITE/include/libtest.shlib
set_tunable64 METASLAB_FORCE_GANGING 16777217
set_tunable32 METASLAB_FORCE_GANGING_PCT 0