mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
Make ganging redundancy respect redundant_metadata property (#17073)
The redundant_metadata setting in ZFS allows users to trade resilience for performance and space savings. This applies to all data and metadata blocks in zfs, with one exception: gang blocks. Gang blocks currently just take the copies property of the IO being ganged and, if it's 1, sets it to 2. This means that we always make at least two copies of a gang header, which is good for resilience. However, if the users care more about performance than resilience, their gang blocks will be even more of a penalty than usual. We add logic to calculate the number of gang headers copies directly, and store it as a separate IO property. This is stored in the IO properties and not calculated when we decide to gang because by that point we may not have easy access to the relevant information about what kind of block is being stored. We also check the redundant_metadata property when doing so, and use that to decide whether to store an extra copy of the gang headers, compared to the underlying blocks. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Signed-off-by: Paul Dagnelie <paul.dagnelie@klarasystems.com> Co-authored-by: Paul Dagnelie <paul.dagnelie@klarasystems.com> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Tony Hutter <hutter2@llnl.gov>
This commit is contained in:
committed by
Alexander Motin
parent
90790955a6
commit
a46ce73ca8
@@ -725,6 +725,10 @@ tests = ['large_dnode_001_pos', 'large_dnode_003_pos', 'large_dnode_004_neg',
|
||||
'large_dnode_005_pos', 'large_dnode_007_neg', 'large_dnode_009_pos']
|
||||
tags = ['functional', 'features', 'large_dnode']
|
||||
|
||||
[tests/functional/gang_blocks]
|
||||
tests = ['gang_blocks_redundant']
|
||||
tags = ['functional', 'gang_blocks']
|
||||
|
||||
[tests/functional/grow]
|
||||
pre =
|
||||
post =
|
||||
|
||||
@@ -64,6 +64,7 @@ MAX_DATASET_NESTING max_dataset_nesting zfs_max_dataset_nesting
|
||||
MAX_MISSING_TVDS max_missing_tvds zfs_max_missing_tvds
|
||||
METASLAB_DEBUG_LOAD metaslab.debug_load metaslab_debug_load
|
||||
METASLAB_FORCE_GANGING metaslab.force_ganging metaslab_force_ganging
|
||||
METASLAB_FORCE_GANGING_PCT metaslab.force_ganging_pct metaslab_force_ganging_pct
|
||||
MULTIHOST_FAIL_INTERVALS multihost.fail_intervals zfs_multihost_fail_intervals
|
||||
MULTIHOST_HISTORY multihost.history zfs_multihost_history
|
||||
MULTIHOST_IMPORT_INTERVALS multihost.import_intervals zfs_multihost_import_intervals
|
||||
|
||||
@@ -276,6 +276,7 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \
|
||||
functional/events/events.cfg \
|
||||
functional/events/events_common.kshlib \
|
||||
functional/fault/fault.cfg \
|
||||
functional/gang_blocks/gang_blocks.kshlib \
|
||||
functional/grow/grow.cfg \
|
||||
functional/history/history.cfg \
|
||||
functional/history/history_common.kshlib \
|
||||
@@ -1562,6 +1563,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/features/large_dnode/large_dnode_008_pos.ksh \
|
||||
functional/features/large_dnode/large_dnode_009_pos.ksh \
|
||||
functional/features/large_dnode/setup.ksh \
|
||||
functional/gang_blocks/cleanup.ksh \
|
||||
functional/gang_blocks/gang_blocks_redundant.ksh \
|
||||
functional/gang_blocks/setup.ksh \
|
||||
functional/grow/grow_pool_001_pos.ksh \
|
||||
functional/grow/grow_replicas_001_pos.ksh \
|
||||
functional/history/cleanup.ksh \
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2025 by Klara Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
restore_tunable METASLAB_FORCE_GANGING
|
||||
restore_tunable METASLAB_FORCE_GANGING_PCT
|
||||
default_cleanup
|
||||
@@ -0,0 +1,120 @@
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2025 By Klara Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
#
|
||||
# Get 0th DVA of first L0 block of file
|
||||
#
|
||||
# $1 filesystem
|
||||
# $2 object number
|
||||
#
|
||||
function get_object_info
|
||||
{
|
||||
typeset fs=$1
|
||||
typeset obj=$2
|
||||
|
||||
zdb -dddddd $fs $obj
|
||||
}
|
||||
|
||||
#
|
||||
# $1 filesystem
|
||||
# $2 path to file
|
||||
# $3 block filter
|
||||
#
|
||||
function get_blocks_filter
|
||||
{
|
||||
typeset fs=$1
|
||||
typeset path=$2
|
||||
|
||||
typeset full_path="$(get_prop mountpoint $fs)/$path"
|
||||
typeset obj="$(ls -i $full_path | awk '{print $1}')"
|
||||
|
||||
get_object_info $fs $obj | grep $3 | grep -v Dataset
|
||||
}
|
||||
|
||||
function get_first_block
|
||||
{
|
||||
get_blocks_filter $1 $2 L0 | head -n 1
|
||||
}
|
||||
|
||||
function get_first_block_dva
|
||||
{
|
||||
get_first_block $1 $2 | sed 's/.*L0 \([^ ]*\).*/\1/'
|
||||
}
|
||||
|
||||
# Takes a zdb compressed blkptr line on stdin
|
||||
function get_num_dvas
|
||||
{
|
||||
sed 's/.*L[0-9] \(.*\) [a-f0-9]*L.*/\1/' | awk '{print NF}'
|
||||
}
|
||||
|
||||
function check_gang_dva
|
||||
{
|
||||
typeset last_byte="$(echo -n $1 | tail -c 1)"
|
||||
[[ "$last_byte" == "G" ]] || return 1
|
||||
return 0
|
||||
}
|
||||
|
||||
function check_is_gang_dva
|
||||
{
|
||||
check_gang_dva $1 || log_fail "Not a gang DVA: \"$1\""
|
||||
}
|
||||
|
||||
function check_not_gang_dva
|
||||
{
|
||||
check_gang_dva $1 && log_fail "Gang DVA: \"$1\""
|
||||
}
|
||||
|
||||
#
|
||||
# Get the gang header contents of the given dva in the given pool
|
||||
#
|
||||
# $1 pool
|
||||
# $2 dva
|
||||
# $3 size (in hexidecimal)
|
||||
#
|
||||
function read_gang_header
|
||||
{
|
||||
typeset pool=$1
|
||||
typeset dva=$2
|
||||
typeset size=$3
|
||||
|
||||
check_is_gang_dva $dva
|
||||
|
||||
zdb -R $pool "${dva%:*}:$size:g" 2>&1 | grep -v "Found vdev:"
|
||||
}
|
||||
|
||||
function preamble
|
||||
{
|
||||
save_tunable METASLAB_FORCE_GANGING
|
||||
save_tunable METASLAB_FORCE_GANGING_PCT
|
||||
}
|
||||
|
||||
function cleanup
|
||||
{
|
||||
destroy_pool $TESTPOOL
|
||||
restore_tunable METASLAB_FORCE_GANGING
|
||||
restore_tunable METASLAB_FORCE_GANGING_PCT
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
#!/bin/ksh
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2025 by Klara Inc.
|
||||
#
|
||||
|
||||
#
|
||||
# Description:
|
||||
# Verify that the redundant_metadata setting is respected by gang headers
|
||||
#
|
||||
# Strategy:
|
||||
# 1. Create a filesystem with redundant_metadata={all,most,some,none}
|
||||
# 2. Verify that gang blocks at different levels have the right amount of redundancy
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/gang_blocks/gang_blocks.kshlib
|
||||
|
||||
log_assert "Verify that gang blocks at different levels have the right amount of redundancy."
|
||||
|
||||
function cleanup2
|
||||
{
|
||||
for red in all most some none; do zfs destroy $TESTPOOL/$TESTFS-$red; done
|
||||
cleanup
|
||||
}
|
||||
|
||||
preamble
|
||||
log_onexit cleanup2
|
||||
|
||||
log_must zpool create -f -o ashift=9 $TESTPOOL $DISKS
|
||||
set_tunable64 METASLAB_FORCE_GANGING 1500
|
||||
set_tunable32 METASLAB_FORCE_GANGING_PCT 100
|
||||
for red in all most some none; do
|
||||
log_must zfs create -o redundant_metadata=$red -o recordsize=512 \
|
||||
$TESTPOOL/$TESTFS-$red
|
||||
if [[ "$red" == "all" ]]; then
|
||||
log_must zfs set recordsize=8k $TESTPOOL/$TESTFS-$red
|
||||
fi
|
||||
mountpoint=$(get_prop mountpoint $TESTPOOL/$TESTFS-$red)
|
||||
|
||||
path="${mountpoint}/file"
|
||||
log_must dd if=/dev/urandom of=$path bs=1M count=1
|
||||
log_must zpool sync $TESTPOOL
|
||||
num_l0_dvas=$(get_first_block $TESTPOOL/$TESTFS-$red file | get_num_dvas)
|
||||
if [[ "$red" == "all" ]]; then
|
||||
[[ "$num_l0_dvas" -eq 2 ]] || \
|
||||
log_fail "wrong number of DVAs for L0 in $red: $num_l0_dvas"
|
||||
else
|
||||
[[ "$num_l0_dvas" -eq 1 ]] || \
|
||||
log_fail "wrong number of DVAs for L0 in $red: $num_l0_dvas"
|
||||
fi
|
||||
|
||||
num_l1_dvas=$(get_blocks_filter $TESTPOOL/$TESTFS-$red file L1 | head -n 1 | get_num_dvas)
|
||||
if [[ "$red" == "all" || "$red" == "most" ]]; then
|
||||
[[ "$num_l1_dvas" -eq 2 ]] || \
|
||||
log_fail "wrong number of DVAs for L1 in $red: $num_l1_dvas"
|
||||
else
|
||||
[[ "$num_l1_dvas" -eq 1 ]] || \
|
||||
log_fail "wrong number of DVAs for L1 in $red: $num_l1_dvas"
|
||||
fi
|
||||
|
||||
for i in `seq 1 80`; do
|
||||
dd if=/dev/urandom of=/$mountpoint/f$i bs=512 count=1 2>/dev/null || log_fail "dd failed"
|
||||
done
|
||||
log_must zpool sync $TESTPOOL
|
||||
obj_0_gangs=$(get_object_info $TESTPOOL/$TESTFS-$red 0 L0 | grep G)
|
||||
num_obj_0_dvas=$(echo "$obj_0_gangs" | head -n 1 | get_num_dvas)
|
||||
if [[ "$red" != "none" ]]; then
|
||||
[[ "$num_obj_0_dvas" -eq 2 ]] || \
|
||||
log_fail "wrong number of DVAs for obj 0 in $red: $num_obj_0_dvas"
|
||||
else
|
||||
[[ "$num_obj_0_dvas" -eq 1 ]] || \
|
||||
log_fail "wrong number of DVAs for obj 0 in $red: $num_obj_0_dvas"
|
||||
fi
|
||||
log_note "Level $red passed"
|
||||
done
|
||||
|
||||
log_pass "Gang blocks at different levels have the right amount of redundancy."
|
||||
+30
@@ -0,0 +1,30 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2025 by Klara Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
set_tunable64 METASLAB_FORCE_GANGING 16777217
|
||||
set_tunable32 METASLAB_FORCE_GANGING_PCT 0
|
||||
Reference in New Issue
Block a user