mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Detect a slow raidz child during reads
A single slow responding disk can affect the overall read performance of a raidz group. When a raidz child disk is determined to be a persistent slow outlier, then have it sit out during reads for a period of time. The raidz group can use parity to reconstruct the data that was skipped. Each time a slow disk is placed into a sit out period, its `vdev_stat.vs_slow_ios count` is incremented and a zevent class `ereport.fs.zfs.delay` is posted. The length of the sit out period can be changed using the `raid_read_sit_out_secs` module parameter. Setting it to zero disables slow outlier detection. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Paul Dagnelie <paul.dagnelie@klarasystems.com> Contributions-by: Don Brady <don.brady@klarasystems.com> Contributions-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #17227
This commit is contained in:
committed by
Brian Behlendorf
parent
0df85ec27c
commit
df55ba7c49
@@ -940,10 +940,11 @@ tags = ['functional', 'rename_dirs']
|
||||
|
||||
[tests/functional/replacement]
|
||||
tests = ['attach_import', 'attach_multiple', 'attach_rebuild',
|
||||
'attach_resilver', 'detach', 'rebuild_disabled_feature',
|
||||
'rebuild_multiple', 'rebuild_raidz', 'replace_import', 'replace_rebuild',
|
||||
'replace_resilver', 'resilver_restart_001', 'resilver_restart_002',
|
||||
'scrub_cancel']
|
||||
'attach_resilver', 'attach_resilver_sit_out', 'detach',
|
||||
'rebuild_disabled_feature', 'rebuild_multiple', 'rebuild_raidz',
|
||||
'replace_import', 'replace_rebuild', 'replace_resilver',
|
||||
'replace_resilver_sit_out', 'resilver_restart_001',
|
||||
'resilver_restart_002', 'scrub_cancel']
|
||||
tags = ['functional', 'replacement']
|
||||
|
||||
[tests/functional/reservation]
|
||||
|
||||
@@ -109,7 +109,8 @@ tags = ['functional', 'direct']
|
||||
[tests/functional/events:Linux]
|
||||
tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill',
|
||||
'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config',
|
||||
'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple']
|
||||
'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple',
|
||||
'slow_vdev_sit_out', 'slow_vdev_sit_out_neg', 'slow_vdev_degraded_sit_out']
|
||||
tags = ['functional', 'events']
|
||||
|
||||
[tests/functional/fallocate:Linux]
|
||||
|
||||
@@ -1112,6 +1112,16 @@ function get_pool_prop # property pool
|
||||
zpool get -Hpo value "$prop" "$pool" || log_fail "zpool get $prop $pool"
|
||||
}
|
||||
|
||||
# Get the specified vdev property in parsable format or fail
|
||||
function get_vdev_prop
|
||||
{
|
||||
typeset prop="$1"
|
||||
typeset pool="$2"
|
||||
typeset vdev="$3"
|
||||
|
||||
zpool get -Hpo value "$prop" "$pool" "$vdev" || log_fail "zpool get $prop $pool $vdev"
|
||||
}
|
||||
|
||||
# Return 0 if a pool exists; $? otherwise
|
||||
#
|
||||
# $1 - pool name
|
||||
@@ -1970,6 +1980,28 @@ function wait_vdev_state # pool disk state timeout
|
||||
return 1
|
||||
}
|
||||
|
||||
#
|
||||
# Wait for vdev 'sit_out' property to be cleared.
|
||||
#
|
||||
# $1 pool name
|
||||
# $2 vdev name
|
||||
# $3 timeout
|
||||
#
|
||||
function wait_sit_out #pool vdev timeout
|
||||
{
|
||||
typeset pool=${1:-$TESTPOOL}
|
||||
typeset vdev="$2"
|
||||
typeset timeout=${3:-300}
|
||||
for (( timer = 0; timer < $timeout; timer++ )); do
|
||||
if [ "$(get_vdev_prop sit_out "$pool" "$vdev")" = "off" ]; then
|
||||
return 0
|
||||
fi
|
||||
sleep 1;
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
#
|
||||
# Check the output of 'zpool status -v <pool>',
|
||||
# and to see if the content of <token> contain the <keyword> specified.
|
||||
|
||||
@@ -72,6 +72,9 @@ MULTIHOST_INTERVAL multihost.interval zfs_multihost_interval
|
||||
OVERRIDE_ESTIMATE_RECORDSIZE send.override_estimate_recordsize zfs_override_estimate_recordsize
|
||||
PREFETCH_DISABLE prefetch.disable zfs_prefetch_disable
|
||||
RAIDZ_EXPAND_MAX_REFLOW_BYTES vdev.expand_max_reflow_bytes raidz_expand_max_reflow_bytes
|
||||
READ_SIT_OUT_SECS vdev.read_sit_out_secs vdev_read_sit_out_secs
|
||||
SIT_OUT_CHECK_INTERVAL vdev.raidz_outlier_check_interval_ms vdev_raidz_outlier_check_interval_ms
|
||||
SIT_OUT_INSENSITIVITY vdev.raidz_outlier_insensitivity vdev_raidz_outlier_insensitivity
|
||||
REBUILD_SCRUB_ENABLED rebuild_scrub_enabled zfs_rebuild_scrub_enabled
|
||||
REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress
|
||||
REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment
|
||||
|
||||
@@ -1525,6 +1525,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/events/events_001_pos.ksh \
|
||||
functional/events/events_002_pos.ksh \
|
||||
functional/events/setup.ksh \
|
||||
functional/events/slow_vdev_degraded_sit_out.ksh \
|
||||
functional/events/slow_vdev_sit_out.ksh \
|
||||
functional/events/slow_vdev_sit_out_neg.ksh \
|
||||
functional/events/zed_cksum_config.ksh \
|
||||
functional/events/zed_cksum_reported.ksh \
|
||||
functional/events/zed_diagnose_multiple.ksh \
|
||||
@@ -1937,6 +1940,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/replacement/attach_multiple.ksh \
|
||||
functional/replacement/attach_rebuild.ksh \
|
||||
functional/replacement/attach_resilver.ksh \
|
||||
functional/replacement/attach_resilver_sit_out.ksh \
|
||||
functional/replacement/cleanup.ksh \
|
||||
functional/replacement/detach.ksh \
|
||||
functional/replacement/rebuild_disabled_feature.ksh \
|
||||
@@ -1945,6 +1949,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/replacement/replace_import.ksh \
|
||||
functional/replacement/replace_rebuild.ksh \
|
||||
functional/replacement/replace_resilver.ksh \
|
||||
functional/replacement/replace_resilver_sit_out.ksh \
|
||||
functional/replacement/resilver_restart_001.ksh \
|
||||
functional/replacement/resilver_restart_002.ksh \
|
||||
functional/replacement/scrub_cancel.ksh \
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
|
||||
# Copyright (c) 2025 by Klara, Inc.
|
||||
|
||||
# DESCRIPTION:
|
||||
# Verify that vdevs 'sit out' when they are slow
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create various raidz/draid pools
|
||||
# 2. Degrade/fault one of the disks.
|
||||
# 3. Inject delays into one of the disks
|
||||
# 4. Verify disk is set to 'sit out' for awhile.
|
||||
# 5. Wait for READ_SIT_OUT_SECS and verify sit out state is lifted.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
function cleanup
|
||||
{
|
||||
restore_tunable READ_SIT_OUT_SECS
|
||||
restore_tunable SIT_OUT_CHECK_INTERVAL
|
||||
log_must zinject -c all
|
||||
log_must zpool events -c
|
||||
destroy_pool $TESTPOOL2
|
||||
log_must rm -f $TEST_BASE_DIR/vdev.$$.*
|
||||
}
|
||||
|
||||
log_assert "Verify sit_out works"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
# shorten sit out period for testing
|
||||
save_tunable READ_SIT_OUT_SECS
|
||||
set_tunable32 READ_SIT_OUT_SECS 5
|
||||
|
||||
save_tunable SIT_OUT_CHECK_INTERVAL
|
||||
set_tunable64 SIT_OUT_CHECK_INTERVAL 20
|
||||
|
||||
log_must truncate -s 150M $TEST_BASE_DIR/vdev.$$.{0..9}
|
||||
|
||||
for raidtype in raidz2 raidz3 draid2 draid3 ; do
|
||||
log_must zpool create $TESTPOOL2 $raidtype $TEST_BASE_DIR/vdev.$$.{0..9}
|
||||
log_must zpool set autosit=on $TESTPOOL2 "${raidtype}-0"
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL2/bigfile bs=1M count=400
|
||||
log_must zpool export $TESTPOOL2
|
||||
log_must zpool import -d $TEST_BASE_DIR $TESTPOOL2
|
||||
|
||||
BAD_VDEV=$TEST_BASE_DIR/vdev.$$.9
|
||||
SLOW_VDEV=$TEST_BASE_DIR/vdev.$$.8
|
||||
|
||||
# Initial state should not be sitting out
|
||||
log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)" == "off" ]]
|
||||
|
||||
# Delay our reads 200ms to trigger sit out
|
||||
log_must zinject -d $SLOW_VDEV -D200:1 -T read $TESTPOOL2
|
||||
type=$((RANDOM % 2))
|
||||
[[ "$type" -eq "0" ]] && action="degrade" || action="fault"
|
||||
log_must zinject -d $BAD_VDEV -A $action -T read $TESTPOOL2
|
||||
|
||||
# Do some reads and wait for us to sit out
|
||||
for i in {0..99} ; do
|
||||
dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
|
||||
dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)" == "on"
|
||||
|
||||
# Clear fault injection
|
||||
log_must zinject -c all
|
||||
|
||||
# Wait for us to exit our sit out period
|
||||
log_must wait_sit_out $TESTPOOL2 $SLOW_VDEV 10
|
||||
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)" == "off"
|
||||
destroy_pool $TESTPOOL2
|
||||
log_must zpool labelclear -f $BAD_VDEV
|
||||
done
|
||||
|
||||
log_pass "sit_out works correctly"
|
||||
@@ -0,0 +1,102 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
|
||||
|
||||
# DESCRIPTION:
|
||||
# Verify that vdevs 'sit out' when they are slow
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create various raidz/draid pools
|
||||
# 2. Inject delays into one of the disks
|
||||
# 3. Verify disk is set to 'sit out' for awhile.
|
||||
# 4. Wait for READ_SIT_OUT_SECS and verify sit out state is lifted.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
function cleanup
|
||||
{
|
||||
restore_tunable READ_SIT_OUT_SECS
|
||||
restore_tunable SIT_OUT_CHECK_INTERVAL
|
||||
log_must zinject -c all
|
||||
log_must zpool events -c
|
||||
destroy_pool $TESTPOOL2
|
||||
log_must rm -f $TEST_BASE_DIR/vdev.$$.*
|
||||
}
|
||||
|
||||
log_assert "Verify sit_out works"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
# shorten sit out period for testing
|
||||
save_tunable READ_SIT_OUT_SECS
|
||||
set_tunable32 READ_SIT_OUT_SECS 5
|
||||
|
||||
save_tunable SIT_OUT_CHECK_INTERVAL
|
||||
set_tunable64 SIT_OUT_CHECK_INTERVAL 20
|
||||
|
||||
log_must truncate -s200M $TEST_BASE_DIR/vdev.$$.{0..9}
|
||||
|
||||
for raidtype in raidz raidz2 raidz3 draid1 draid2 draid3 ; do
|
||||
log_must zpool create $TESTPOOL2 $raidtype $TEST_BASE_DIR/vdev.$$.{0..9}
|
||||
log_must zpool set autosit=on $TESTPOOL2 "${raidtype}-0"
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL2/bigfile bs=1M count=600
|
||||
log_must zpool export $TESTPOOL2
|
||||
log_must zpool import -d $TEST_BASE_DIR $TESTPOOL2
|
||||
|
||||
BAD_VDEV=$TEST_BASE_DIR/vdev.$$.9
|
||||
|
||||
# Initial state should not be sitting out
|
||||
log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)" == "off" ]]
|
||||
|
||||
# Delay our reads 200ms to trigger sit out
|
||||
log_must zinject -d $BAD_VDEV -D200:1 -T read $TESTPOOL2
|
||||
|
||||
# Do some reads and wait for us to sit out
|
||||
for i in {0..99} ; do
|
||||
dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
|
||||
dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null &
|
||||
dd if=/$TESTPOOL2/bigfile skip=$((i + 200)) bs=2M count=1 of=/dev/null
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)" == "on"
|
||||
|
||||
# Clear fault injection
|
||||
log_must zinject -c all
|
||||
|
||||
# Wait for us to exit our sit out period
|
||||
log_must wait_sit_out $TESTPOOL2 $BAD_VDEV 10
|
||||
|
||||
# Verify sit_out was cleared during wait_sit_out
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)" == "off"
|
||||
|
||||
destroy_pool $TESTPOOL2
|
||||
done
|
||||
|
||||
log_pass "sit_out works correctly"
|
||||
@@ -0,0 +1,116 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
|
||||
# Copyright (c) 2025 by Klara, Inc.
|
||||
|
||||
# DESCRIPTION:
|
||||
# Verify that we don't sit out too many vdevs
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create draid2 pool
|
||||
# 2. Inject delays into three of the disks
|
||||
# 3. Do reads to trigger sit-outs
|
||||
# 4. Verify exactly 2 disks sit out
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
function cleanup
|
||||
{
|
||||
restore_tunable READ_SIT_OUT_SECS
|
||||
restore_tunable SIT_OUT_CHECK_INTERVAL
|
||||
log_must zinject -c all
|
||||
log_must zpool events -c
|
||||
destroy_pool $TESTPOOL2
|
||||
log_must rm -f $TEST_BASE_DIR/vdev.$$.*
|
||||
}
|
||||
|
||||
log_assert "Verify sit_out works"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
save_tunable SIT_OUT_CHECK_INTERVAL
|
||||
set_tunable64 SIT_OUT_CHECK_INTERVAL 20
|
||||
|
||||
log_must truncate -s 150M $TEST_BASE_DIR/vdev.$$.{0..9}
|
||||
|
||||
log_must zpool create $TESTPOOL2 draid2 $TEST_BASE_DIR/vdev.$$.{0..9}
|
||||
log_must zpool set autosit=on $TESTPOOL2 draid2-0
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL2/bigfile bs=1M count=400
|
||||
log_must zpool export $TESTPOOL2
|
||||
log_must zpool import -d $TEST_BASE_DIR $TESTPOOL2
|
||||
|
||||
BAD_VDEV1=$TEST_BASE_DIR/vdev.$$.7
|
||||
BAD_VDEV2=$TEST_BASE_DIR/vdev.$$.8
|
||||
BAD_VDEV3=$TEST_BASE_DIR/vdev.$$.9
|
||||
|
||||
# Initial state should not be sitting out
|
||||
log_must eval [[ "$(get_vdev_prop autosit $TESTPOOL2 draid2-0)" == "on" ]]
|
||||
log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV1)" == "off" ]]
|
||||
log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV2)" == "off" ]]
|
||||
log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV3)" == "off" ]]
|
||||
|
||||
# Delay our reads 200ms to trigger sit out
|
||||
log_must zinject -d $BAD_VDEV1 -D200:1 -T read $TESTPOOL2
|
||||
|
||||
# Do some reads and wait for us to sit out
|
||||
for i in {0..99} ; do
|
||||
dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
|
||||
dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV1)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV1)" == "on"
|
||||
|
||||
log_must zinject -d $BAD_VDEV2 -D200:1 -T read $TESTPOOL2
|
||||
# Do some reads and wait for us to sit out
|
||||
for i in {0..99} ; do
|
||||
dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
|
||||
dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV2)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV2)" == "on"
|
||||
|
||||
log_must zinject -d $BAD_VDEV3 -D200:1 -T read $TESTPOOL2
|
||||
# Do some reads and wait for us to sit out
|
||||
for i in {0..99} ; do
|
||||
dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
|
||||
dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV3)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV3)" == "off"
|
||||
|
||||
|
||||
log_pass "sit_out works correctly"
|
||||
@@ -0,0 +1,189 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2025, Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Attaching disks while a disk is sitting out reads should pass
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create raidz pools
|
||||
# 2. Make one disk slower and trigger a read sit out for that disk
|
||||
# 3. Start some random I/O
|
||||
# 4. Attach a disk to the pool.
|
||||
# 5. Verify the integrity of the file system and the resilvering.
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
save_tunable READ_SIT_OUT_SECS
|
||||
set_tunable32 READ_SIT_OUT_SECS 120
|
||||
save_tunable SIT_OUT_CHECK_INTERVAL
|
||||
set_tunable64 SIT_OUT_CHECK_INTERVAL 20
|
||||
|
||||
function cleanup
|
||||
{
|
||||
restore_tunable READ_SIT_OUT_SECS
|
||||
restore_tunable SIT_OUT_CHECK_INTERVAL
|
||||
log_must zinject -c all
|
||||
log_must zpool events -c
|
||||
|
||||
if [[ -n "$child_pids" ]]; then
|
||||
for wait_pid in $child_pids; do
|
||||
kill $wait_pid
|
||||
done
|
||||
fi
|
||||
|
||||
if poolexists $TESTPOOL1; then
|
||||
destroy_pool $TESTPOOL1
|
||||
fi
|
||||
|
||||
[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
|
||||
}
|
||||
|
||||
log_assert "Replacing a disk during I/O with a sit out completes."
|
||||
|
||||
options=""
|
||||
options_display="default options"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
|
||||
|
||||
[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
|
||||
|
||||
[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
|
||||
|
||||
[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
|
||||
|
||||
[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
|
||||
|
||||
options="$options -r "
|
||||
|
||||
[[ -n "$options" ]] && options_display=$options
|
||||
|
||||
child_pids=""
|
||||
|
||||
function attach_test
|
||||
{
|
||||
typeset vdev=$1
|
||||
typeset disk=$2
|
||||
|
||||
typeset i=0
|
||||
while [[ $i -lt $iters ]]; do
|
||||
log_note "Invoking file_trunc with: $options_display on $TESTFILE.$i"
|
||||
file_trunc $options $TESTDIR/$TESTFILE.$i &
|
||||
typeset pid=$!
|
||||
|
||||
sleep 1
|
||||
|
||||
child_pids="$child_pids $pid"
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
# attach disk with a slow drive still present
|
||||
SECONDS=0
|
||||
log_must zpool attach -w $TESTPOOL1 $vdev $disk
|
||||
log_note took $SECONDS seconds to attach disk
|
||||
|
||||
for wait_pid in $child_pids
|
||||
do
|
||||
kill $wait_pid
|
||||
done
|
||||
child_pids=""
|
||||
|
||||
log_must zinject -c all
|
||||
log_must zpool export $TESTPOOL1
|
||||
log_must zpool import -d $TESTDIR $TESTPOOL1
|
||||
log_must zfs umount $TESTPOOL1/$TESTFS1
|
||||
log_must zdb -cdui $TESTPOOL1/$TESTFS1
|
||||
log_must zfs mount $TESTPOOL1/$TESTFS1
|
||||
verify_pool $TESTPOOL1
|
||||
}
|
||||
|
||||
DEVSIZE="150M"
|
||||
specials_list=""
|
||||
i=0
|
||||
while [[ $i != 10 ]]; do
|
||||
truncate -s $DEVSIZE $TESTDIR/$TESTFILE1.$i
|
||||
specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
|
||||
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
slow_disk=$TESTDIR/$TESTFILE1.3
|
||||
log_must truncate -s $DEVSIZE $TESTDIR/$REPLACEFILE
|
||||
|
||||
# Test file size in MB
|
||||
count=200
|
||||
|
||||
for type in "raidz1" "raidz2" "raidz3" ; do
|
||||
create_pool $TESTPOOL1 $type $specials_list
|
||||
log_must zpool set autosit=on $TESTPOOL1 "${type}-0"
|
||||
log_must zfs create -o primarycache=none -o recordsize=512K \
|
||||
$TESTPOOL1/$TESTFS1
|
||||
log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
|
||||
|
||||
log_must dd if=/dev/urandom of=/$TESTDIR1/bigfile bs=1M count=$count
|
||||
|
||||
# Make one disk 100ms slower to trigger a sit out
|
||||
log_must zinject -d $slow_disk -D100:1 -T read $TESTPOOL1
|
||||
|
||||
# Do some reads and wait for sit out on slow disk
|
||||
SECONDS=0
|
||||
typeset -i size=0
|
||||
for i in $(seq 1 $count) ; do
|
||||
dd if=/$TESTDIR1/bigfile skip=$i bs=1M count=1 of=/dev/null
|
||||
size=$i
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)" == "on"
|
||||
log_note took $SECONDS seconds to reach sit out reading ${size}M
|
||||
log_must zpool status -s $TESTPOOL1
|
||||
|
||||
typeset top=$(zpool status -j | jq -r ".pools.$TESTPOOL1.vdevs[].vdevs[].name")
|
||||
attach_test $top $TESTDIR/$REPLACEFILE
|
||||
|
||||
log_must eval "zpool iostat -v $TESTPOOL1 | grep \"$REPLACEFILE\""
|
||||
|
||||
destroy_pool $TESTPOOL1
|
||||
log_must rm -rf /$TESTPOOL1
|
||||
done
|
||||
|
||||
log_pass
|
||||
@@ -0,0 +1,199 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2025, Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Replacing disks while a disk is sitting out reads should pass
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create raidz and draid pools
|
||||
# 2. Make one disk slower and trigger a read sit out for that disk
|
||||
# 3. Start some random I/O
|
||||
# 4. Replace a disk in the pool with another disk.
|
||||
# 5. Verify the integrity of the file system and the resilvering.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
save_tunable READ_SIT_OUT_SECS
|
||||
set_tunable32 READ_SIT_OUT_SECS 120
|
||||
save_tunable SIT_OUT_CHECK_INTERVAL
|
||||
set_tunable64 SIT_OUT_CHECK_INTERVAL 20
|
||||
|
||||
function cleanup
|
||||
{
|
||||
restore_tunable READ_SIT_OUT_SECS
|
||||
restore_tunable SIT_OUT_CHECK_INTERVAL
|
||||
log_must zinject -c all
|
||||
log_must zpool events -c
|
||||
|
||||
if [[ -n "$child_pids" ]]; then
|
||||
for wait_pid in $child_pids
|
||||
do
|
||||
kill $wait_pid
|
||||
done
|
||||
fi
|
||||
|
||||
if poolexists $TESTPOOL1; then
|
||||
destroy_pool $TESTPOOL1
|
||||
fi
|
||||
|
||||
[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
|
||||
}
|
||||
|
||||
log_assert "Replacing a disk during I/O with a sit out completes."
|
||||
|
||||
options=""
|
||||
options_display="default options"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
|
||||
|
||||
[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
|
||||
|
||||
[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
|
||||
|
||||
[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
|
||||
|
||||
[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
|
||||
|
||||
options="$options -r "
|
||||
|
||||
[[ -n "$options" ]] && options_display=$options
|
||||
|
||||
child_pids=""
|
||||
|
||||
function replace_test
|
||||
{
|
||||
typeset -i iters=2
|
||||
typeset disk1=$1
|
||||
typeset disk2=$2
|
||||
typeset repl_type=$3
|
||||
|
||||
typeset i=0
|
||||
while [[ $i -lt $iters ]]; do
|
||||
log_note "Invoking file_trunc with: $options_display on $TESTFILE.$i"
|
||||
file_trunc $options $TESTDIR/$TESTFILE.$i &
|
||||
typeset pid=$!
|
||||
|
||||
sleep 1
|
||||
|
||||
child_pids="$child_pids $pid"
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
typeset repl_flag="-w"
|
||||
if [[ "$repl_type" == "seq" ]]; then
|
||||
repl_flag="-ws"
|
||||
fi
|
||||
# replace disk with a slow drive still present
|
||||
SECONDS=0
|
||||
log_must zpool replace $repl_flag $TESTPOOL1 $disk1 $disk2
|
||||
log_note took $SECONDS seconds to replace disk
|
||||
|
||||
for wait_pid in $child_pids
|
||||
do
|
||||
kill $wait_pid
|
||||
done
|
||||
child_pids=""
|
||||
|
||||
log_must zinject -c all
|
||||
log_must zpool export $TESTPOOL1
|
||||
log_must zpool import -d $TESTDIR $TESTPOOL1
|
||||
log_must zfs umount $TESTPOOL1/$TESTFS1
|
||||
log_must zdb -cdui $TESTPOOL1/$TESTFS1
|
||||
log_must zfs mount $TESTPOOL1/$TESTFS1
|
||||
verify_pool $TESTPOOL1
|
||||
}
|
||||
|
||||
DEVSIZE="150M"
|
||||
specials_list=""
|
||||
i=0
|
||||
while [[ $i != 10 ]]; do
|
||||
log_must truncate -s $DEVSIZE $TESTDIR/$TESTFILE1.$i
|
||||
specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
|
||||
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
slow_disk=$TESTDIR/$TESTFILE1.3
|
||||
log_must truncate -s $DEVSIZE $TESTDIR/$REPLACEFILE
|
||||
|
||||
# Test file size in MB
|
||||
count=400
|
||||
|
||||
for type in "raidz2" "raidz3" "draid2"; do
|
||||
create_pool $TESTPOOL1 $type $specials_list
|
||||
log_must zpool set autosit=on $TESTPOOL1 "${type}-0"
|
||||
log_must zfs create -o primarycache=none -o recordsize=512K \
|
||||
$TESTPOOL1/$TESTFS1
|
||||
log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
|
||||
|
||||
log_must dd if=/dev/urandom of=/$TESTDIR1/bigfile bs=1M count=$count
|
||||
|
||||
# Make one disk 100ms slower to trigger a sit out
|
||||
log_must zinject -d $slow_disk -D100:1 -T read $TESTPOOL1
|
||||
|
||||
# Do some reads and wait for sit out on slow disk
|
||||
SECONDS=0
|
||||
typeset -i size=0
|
||||
for i in $(seq 1 $count) ; do
|
||||
dd if=/$TESTDIR1/bigfile skip=$i bs=1M count=1 of=/dev/null
|
||||
size=$i
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)" == "on"
|
||||
log_note took $SECONDS seconds to reach sit out reading ${size}M
|
||||
log_must zpool status -s $TESTPOOL1
|
||||
|
||||
typeset repl_type="replace"
|
||||
if [[ "$type" == "draid2" && $((RANDOM % 2)) -eq 0 ]]; then
|
||||
repl_type="seq"
|
||||
fi
|
||||
replace_test $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE $repl_type
|
||||
|
||||
log_must eval "zpool iostat -v $TESTPOOL1 | grep \"$REPLACEFILE\""
|
||||
|
||||
destroy_pool $TESTPOOL1
|
||||
log_must rm -rf /$TESTPOOL1
|
||||
done
|
||||
|
||||
log_pass
|
||||
Reference in New Issue
Block a user