mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 03:08:51 +03:00
Detect a slow raidz child during reads
A single slow responding disk can affect the overall read performance of a raidz group. When a raidz child disk is determined to be a persistent slow outlier, then have it sit out during reads for a period of time. The raidz group can use parity to reconstruct the data that was skipped. Each time a slow disk is placed into a sit out period, its `vdev_stat.vs_slow_ios count` is incremented and a zevent class `ereport.fs.zfs.delay` is posted. The length of the sit out period can be changed using the `raid_read_sit_out_secs` module parameter. Setting it to zero disables slow outlier detection. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Paul Dagnelie <paul.dagnelie@klarasystems.com> Contributions-by: Don Brady <don.brady@klarasystems.com> Contributions-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #17227
This commit is contained in:
committed by
Brian Behlendorf
parent
0df85ec27c
commit
df55ba7c49
@@ -0,0 +1,106 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
|
||||
# Copyright (c) 2025 by Klara, Inc.
|
||||
|
||||
# DESCRIPTION:
|
||||
# Verify that vdevs 'sit out' when they are slow
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create various raidz/draid pools
|
||||
# 2. Degrade/fault one of the disks.
|
||||
# 3. Inject delays into one of the disks
|
||||
# 4. Verify disk is set to 'sit out' for awhile.
|
||||
# 5. Wait for READ_SIT_OUT_SECS and verify sit out state is lifted.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
function cleanup
|
||||
{
|
||||
restore_tunable READ_SIT_OUT_SECS
|
||||
restore_tunable SIT_OUT_CHECK_INTERVAL
|
||||
log_must zinject -c all
|
||||
log_must zpool events -c
|
||||
destroy_pool $TESTPOOL2
|
||||
log_must rm -f $TEST_BASE_DIR/vdev.$$.*
|
||||
}
|
||||
|
||||
log_assert "Verify sit_out works"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
# shorten sit out period for testing
|
||||
save_tunable READ_SIT_OUT_SECS
|
||||
set_tunable32 READ_SIT_OUT_SECS 5
|
||||
|
||||
save_tunable SIT_OUT_CHECK_INTERVAL
|
||||
set_tunable64 SIT_OUT_CHECK_INTERVAL 20
|
||||
|
||||
log_must truncate -s 150M $TEST_BASE_DIR/vdev.$$.{0..9}
|
||||
|
||||
for raidtype in raidz2 raidz3 draid2 draid3 ; do
|
||||
log_must zpool create $TESTPOOL2 $raidtype $TEST_BASE_DIR/vdev.$$.{0..9}
|
||||
log_must zpool set autosit=on $TESTPOOL2 "${raidtype}-0"
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL2/bigfile bs=1M count=400
|
||||
log_must zpool export $TESTPOOL2
|
||||
log_must zpool import -d $TEST_BASE_DIR $TESTPOOL2
|
||||
|
||||
BAD_VDEV=$TEST_BASE_DIR/vdev.$$.9
|
||||
SLOW_VDEV=$TEST_BASE_DIR/vdev.$$.8
|
||||
|
||||
# Initial state should not be sitting out
|
||||
log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)" == "off" ]]
|
||||
|
||||
# Delay our reads 200ms to trigger sit out
|
||||
log_must zinject -d $SLOW_VDEV -D200:1 -T read $TESTPOOL2
|
||||
type=$((RANDOM % 2))
|
||||
[[ "$type" -eq "0" ]] && action="degrade" || action="fault"
|
||||
log_must zinject -d $BAD_VDEV -A $action -T read $TESTPOOL2
|
||||
|
||||
# Do some reads and wait for us to sit out
|
||||
for i in {0..99} ; do
|
||||
dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
|
||||
dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)" == "on"
|
||||
|
||||
# Clear fault injection
|
||||
log_must zinject -c all
|
||||
|
||||
# Wait for us to exit our sit out period
|
||||
log_must wait_sit_out $TESTPOOL2 $SLOW_VDEV 10
|
||||
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $SLOW_VDEV)" == "off"
|
||||
destroy_pool $TESTPOOL2
|
||||
log_must zpool labelclear -f $BAD_VDEV
|
||||
done
|
||||
|
||||
log_pass "sit_out works correctly"
|
||||
@@ -0,0 +1,102 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
|
||||
|
||||
# DESCRIPTION:
|
||||
# Verify that vdevs 'sit out' when they are slow
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create various raidz/draid pools
|
||||
# 2. Inject delays into one of the disks
|
||||
# 3. Verify disk is set to 'sit out' for awhile.
|
||||
# 4. Wait for READ_SIT_OUT_SECS and verify sit out state is lifted.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
function cleanup
|
||||
{
|
||||
restore_tunable READ_SIT_OUT_SECS
|
||||
restore_tunable SIT_OUT_CHECK_INTERVAL
|
||||
log_must zinject -c all
|
||||
log_must zpool events -c
|
||||
destroy_pool $TESTPOOL2
|
||||
log_must rm -f $TEST_BASE_DIR/vdev.$$.*
|
||||
}
|
||||
|
||||
log_assert "Verify sit_out works"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
# shorten sit out period for testing
|
||||
save_tunable READ_SIT_OUT_SECS
|
||||
set_tunable32 READ_SIT_OUT_SECS 5
|
||||
|
||||
save_tunable SIT_OUT_CHECK_INTERVAL
|
||||
set_tunable64 SIT_OUT_CHECK_INTERVAL 20
|
||||
|
||||
log_must truncate -s200M $TEST_BASE_DIR/vdev.$$.{0..9}
|
||||
|
||||
for raidtype in raidz raidz2 raidz3 draid1 draid2 draid3 ; do
|
||||
log_must zpool create $TESTPOOL2 $raidtype $TEST_BASE_DIR/vdev.$$.{0..9}
|
||||
log_must zpool set autosit=on $TESTPOOL2 "${raidtype}-0"
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL2/bigfile bs=1M count=600
|
||||
log_must zpool export $TESTPOOL2
|
||||
log_must zpool import -d $TEST_BASE_DIR $TESTPOOL2
|
||||
|
||||
BAD_VDEV=$TEST_BASE_DIR/vdev.$$.9
|
||||
|
||||
# Initial state should not be sitting out
|
||||
log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)" == "off" ]]
|
||||
|
||||
# Delay our reads 200ms to trigger sit out
|
||||
log_must zinject -d $BAD_VDEV -D200:1 -T read $TESTPOOL2
|
||||
|
||||
# Do some reads and wait for us to sit out
|
||||
for i in {0..99} ; do
|
||||
dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
|
||||
dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null &
|
||||
dd if=/$TESTPOOL2/bigfile skip=$((i + 200)) bs=2M count=1 of=/dev/null
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)" == "on"
|
||||
|
||||
# Clear fault injection
|
||||
log_must zinject -c all
|
||||
|
||||
# Wait for us to exit our sit out period
|
||||
log_must wait_sit_out $TESTPOOL2 $BAD_VDEV 10
|
||||
|
||||
# Verify sit_out was cleared during wait_sit_out
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV)" == "off"
|
||||
|
||||
destroy_pool $TESTPOOL2
|
||||
done
|
||||
|
||||
log_pass "sit_out works correctly"
|
||||
@@ -0,0 +1,116 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
|
||||
# Copyright (c) 2025 by Klara, Inc.
|
||||
|
||||
# DESCRIPTION:
|
||||
# Verify that we don't sit out too many vdevs
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create draid2 pool
|
||||
# 2. Inject delays into three of the disks
|
||||
# 3. Do reads to trigger sit-outs
|
||||
# 4. Verify exactly 2 disks sit out
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
function cleanup
|
||||
{
|
||||
restore_tunable READ_SIT_OUT_SECS
|
||||
restore_tunable SIT_OUT_CHECK_INTERVAL
|
||||
log_must zinject -c all
|
||||
log_must zpool events -c
|
||||
destroy_pool $TESTPOOL2
|
||||
log_must rm -f $TEST_BASE_DIR/vdev.$$.*
|
||||
}
|
||||
|
||||
log_assert "Verify sit_out works"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
save_tunable SIT_OUT_CHECK_INTERVAL
|
||||
set_tunable64 SIT_OUT_CHECK_INTERVAL 20
|
||||
|
||||
log_must truncate -s 150M $TEST_BASE_DIR/vdev.$$.{0..9}
|
||||
|
||||
log_must zpool create $TESTPOOL2 draid2 $TEST_BASE_DIR/vdev.$$.{0..9}
|
||||
log_must zpool set autosit=on $TESTPOOL2 draid2-0
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL2/bigfile bs=1M count=400
|
||||
log_must zpool export $TESTPOOL2
|
||||
log_must zpool import -d $TEST_BASE_DIR $TESTPOOL2
|
||||
|
||||
BAD_VDEV1=$TEST_BASE_DIR/vdev.$$.7
|
||||
BAD_VDEV2=$TEST_BASE_DIR/vdev.$$.8
|
||||
BAD_VDEV3=$TEST_BASE_DIR/vdev.$$.9
|
||||
|
||||
# Initial state should not be sitting out
|
||||
log_must eval [[ "$(get_vdev_prop autosit $TESTPOOL2 draid2-0)" == "on" ]]
|
||||
log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV1)" == "off" ]]
|
||||
log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV2)" == "off" ]]
|
||||
log_must eval [[ "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV3)" == "off" ]]
|
||||
|
||||
# Delay our reads 200ms to trigger sit out
|
||||
log_must zinject -d $BAD_VDEV1 -D200:1 -T read $TESTPOOL2
|
||||
|
||||
# Do some reads and wait for us to sit out
|
||||
for i in {0..99} ; do
|
||||
dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
|
||||
dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV1)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV1)" == "on"
|
||||
|
||||
log_must zinject -d $BAD_VDEV2 -D200:1 -T read $TESTPOOL2
|
||||
# Do some reads and wait for us to sit out
|
||||
for i in {0..99} ; do
|
||||
dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
|
||||
dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV2)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV2)" == "on"
|
||||
|
||||
log_must zinject -d $BAD_VDEV3 -D200:1 -T read $TESTPOOL2
|
||||
# Do some reads and wait for us to sit out
|
||||
for i in {0..99} ; do
|
||||
dd if=/$TESTPOOL2/bigfile skip=$i bs=2M count=1 of=/dev/null &
|
||||
dd if=/$TESTPOOL2/bigfile skip=$((i + 100)) bs=2M count=1 of=/dev/null
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV3)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL2 $BAD_VDEV3)" == "off"
|
||||
|
||||
|
||||
log_pass "sit_out works correctly"
|
||||
@@ -0,0 +1,189 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2025, Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Attaching disks while a disk is sitting out reads should pass
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create raidz pools
|
||||
# 2. Make one disk slower and trigger a read sit out for that disk
|
||||
# 3. Start some random I/O
|
||||
# 4. Attach a disk to the pool.
|
||||
# 5. Verify the integrity of the file system and the resilvering.
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
save_tunable READ_SIT_OUT_SECS
|
||||
set_tunable32 READ_SIT_OUT_SECS 120
|
||||
save_tunable SIT_OUT_CHECK_INTERVAL
|
||||
set_tunable64 SIT_OUT_CHECK_INTERVAL 20
|
||||
|
||||
function cleanup
|
||||
{
|
||||
restore_tunable READ_SIT_OUT_SECS
|
||||
restore_tunable SIT_OUT_CHECK_INTERVAL
|
||||
log_must zinject -c all
|
||||
log_must zpool events -c
|
||||
|
||||
if [[ -n "$child_pids" ]]; then
|
||||
for wait_pid in $child_pids; do
|
||||
kill $wait_pid
|
||||
done
|
||||
fi
|
||||
|
||||
if poolexists $TESTPOOL1; then
|
||||
destroy_pool $TESTPOOL1
|
||||
fi
|
||||
|
||||
[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
|
||||
}
|
||||
|
||||
log_assert "Replacing a disk during I/O with a sit out completes."
|
||||
|
||||
options=""
|
||||
options_display="default options"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
|
||||
|
||||
[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
|
||||
|
||||
[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
|
||||
|
||||
[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
|
||||
|
||||
[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
|
||||
|
||||
options="$options -r "
|
||||
|
||||
[[ -n "$options" ]] && options_display=$options
|
||||
|
||||
child_pids=""
|
||||
|
||||
function attach_test
|
||||
{
|
||||
typeset vdev=$1
|
||||
typeset disk=$2
|
||||
|
||||
typeset i=0
|
||||
while [[ $i -lt $iters ]]; do
|
||||
log_note "Invoking file_trunc with: $options_display on $TESTFILE.$i"
|
||||
file_trunc $options $TESTDIR/$TESTFILE.$i &
|
||||
typeset pid=$!
|
||||
|
||||
sleep 1
|
||||
|
||||
child_pids="$child_pids $pid"
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
# attach disk with a slow drive still present
|
||||
SECONDS=0
|
||||
log_must zpool attach -w $TESTPOOL1 $vdev $disk
|
||||
log_note took $SECONDS seconds to attach disk
|
||||
|
||||
for wait_pid in $child_pids
|
||||
do
|
||||
kill $wait_pid
|
||||
done
|
||||
child_pids=""
|
||||
|
||||
log_must zinject -c all
|
||||
log_must zpool export $TESTPOOL1
|
||||
log_must zpool import -d $TESTDIR $TESTPOOL1
|
||||
log_must zfs umount $TESTPOOL1/$TESTFS1
|
||||
log_must zdb -cdui $TESTPOOL1/$TESTFS1
|
||||
log_must zfs mount $TESTPOOL1/$TESTFS1
|
||||
verify_pool $TESTPOOL1
|
||||
}
|
||||
|
||||
DEVSIZE="150M"
|
||||
specials_list=""
|
||||
i=0
|
||||
while [[ $i != 10 ]]; do
|
||||
truncate -s $DEVSIZE $TESTDIR/$TESTFILE1.$i
|
||||
specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
|
||||
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
slow_disk=$TESTDIR/$TESTFILE1.3
|
||||
log_must truncate -s $DEVSIZE $TESTDIR/$REPLACEFILE
|
||||
|
||||
# Test file size in MB
|
||||
count=200
|
||||
|
||||
for type in "raidz1" "raidz2" "raidz3" ; do
|
||||
create_pool $TESTPOOL1 $type $specials_list
|
||||
log_must zpool set autosit=on $TESTPOOL1 "${type}-0"
|
||||
log_must zfs create -o primarycache=none -o recordsize=512K \
|
||||
$TESTPOOL1/$TESTFS1
|
||||
log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
|
||||
|
||||
log_must dd if=/dev/urandom of=/$TESTDIR1/bigfile bs=1M count=$count
|
||||
|
||||
# Make one disk 100ms slower to trigger a sit out
|
||||
log_must zinject -d $slow_disk -D100:1 -T read $TESTPOOL1
|
||||
|
||||
# Do some reads and wait for sit out on slow disk
|
||||
SECONDS=0
|
||||
typeset -i size=0
|
||||
for i in $(seq 1 $count) ; do
|
||||
dd if=/$TESTDIR1/bigfile skip=$i bs=1M count=1 of=/dev/null
|
||||
size=$i
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)" == "on"
|
||||
log_note took $SECONDS seconds to reach sit out reading ${size}M
|
||||
log_must zpool status -s $TESTPOOL1
|
||||
|
||||
typeset top=$(zpool status -j | jq -r ".pools.$TESTPOOL1.vdevs[].vdevs[].name")
|
||||
attach_test $top $TESTDIR/$REPLACEFILE
|
||||
|
||||
log_must eval "zpool iostat -v $TESTPOOL1 | grep \"$REPLACEFILE\""
|
||||
|
||||
destroy_pool $TESTPOOL1
|
||||
log_must rm -rf /$TESTPOOL1
|
||||
done
|
||||
|
||||
log_pass
|
||||
@@ -0,0 +1,199 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2025, Klara, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/replacement/replacement.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Replacing disks while a disk is sitting out reads should pass
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create raidz and draid pools
|
||||
# 2. Make one disk slower and trigger a read sit out for that disk
|
||||
# 3. Start some random I/O
|
||||
# 4. Replace a disk in the pool with another disk.
|
||||
# 5. Verify the integrity of the file system and the resilvering.
|
||||
#
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
save_tunable READ_SIT_OUT_SECS
|
||||
set_tunable32 READ_SIT_OUT_SECS 120
|
||||
save_tunable SIT_OUT_CHECK_INTERVAL
|
||||
set_tunable64 SIT_OUT_CHECK_INTERVAL 20
|
||||
|
||||
function cleanup
|
||||
{
|
||||
restore_tunable READ_SIT_OUT_SECS
|
||||
restore_tunable SIT_OUT_CHECK_INTERVAL
|
||||
log_must zinject -c all
|
||||
log_must zpool events -c
|
||||
|
||||
if [[ -n "$child_pids" ]]; then
|
||||
for wait_pid in $child_pids
|
||||
do
|
||||
kill $wait_pid
|
||||
done
|
||||
fi
|
||||
|
||||
if poolexists $TESTPOOL1; then
|
||||
destroy_pool $TESTPOOL1
|
||||
fi
|
||||
|
||||
[[ -e $TESTDIR ]] && log_must rm -rf $TESTDIR/*
|
||||
}
|
||||
|
||||
log_assert "Replacing a disk during I/O with a sit out completes."
|
||||
|
||||
options=""
|
||||
options_display="default options"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
[[ -n "$HOLES_FILESIZE" ]] && options=" $options -f $HOLES_FILESIZE "
|
||||
|
||||
[[ -n "$HOLES_BLKSIZE" ]] && options="$options -b $HOLES_BLKSIZE "
|
||||
|
||||
[[ -n "$HOLES_COUNT" ]] && options="$options -c $HOLES_COUNT "
|
||||
|
||||
[[ -n "$HOLES_SEED" ]] && options="$options -s $HOLES_SEED "
|
||||
|
||||
[[ -n "$HOLES_FILEOFFSET" ]] && options="$options -o $HOLES_FILEOFFSET "
|
||||
|
||||
options="$options -r "
|
||||
|
||||
[[ -n "$options" ]] && options_display=$options
|
||||
|
||||
child_pids=""
|
||||
|
||||
function replace_test
|
||||
{
|
||||
typeset -i iters=2
|
||||
typeset disk1=$1
|
||||
typeset disk2=$2
|
||||
typeset repl_type=$3
|
||||
|
||||
typeset i=0
|
||||
while [[ $i -lt $iters ]]; do
|
||||
log_note "Invoking file_trunc with: $options_display on $TESTFILE.$i"
|
||||
file_trunc $options $TESTDIR/$TESTFILE.$i &
|
||||
typeset pid=$!
|
||||
|
||||
sleep 1
|
||||
|
||||
child_pids="$child_pids $pid"
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
typeset repl_flag="-w"
|
||||
if [[ "$repl_type" == "seq" ]]; then
|
||||
repl_flag="-ws"
|
||||
fi
|
||||
# replace disk with a slow drive still present
|
||||
SECONDS=0
|
||||
log_must zpool replace $repl_flag $TESTPOOL1 $disk1 $disk2
|
||||
log_note took $SECONDS seconds to replace disk
|
||||
|
||||
for wait_pid in $child_pids
|
||||
do
|
||||
kill $wait_pid
|
||||
done
|
||||
child_pids=""
|
||||
|
||||
log_must zinject -c all
|
||||
log_must zpool export $TESTPOOL1
|
||||
log_must zpool import -d $TESTDIR $TESTPOOL1
|
||||
log_must zfs umount $TESTPOOL1/$TESTFS1
|
||||
log_must zdb -cdui $TESTPOOL1/$TESTFS1
|
||||
log_must zfs mount $TESTPOOL1/$TESTFS1
|
||||
verify_pool $TESTPOOL1
|
||||
}
|
||||
|
||||
DEVSIZE="150M"
|
||||
specials_list=""
|
||||
i=0
|
||||
while [[ $i != 10 ]]; do
|
||||
log_must truncate -s $DEVSIZE $TESTDIR/$TESTFILE1.$i
|
||||
specials_list="$specials_list $TESTDIR/$TESTFILE1.$i"
|
||||
|
||||
((i = i + 1))
|
||||
done
|
||||
|
||||
slow_disk=$TESTDIR/$TESTFILE1.3
|
||||
log_must truncate -s $DEVSIZE $TESTDIR/$REPLACEFILE
|
||||
|
||||
# Test file size in MB
|
||||
count=400
|
||||
|
||||
for type in "raidz2" "raidz3" "draid2"; do
|
||||
create_pool $TESTPOOL1 $type $specials_list
|
||||
log_must zpool set autosit=on $TESTPOOL1 "${type}-0"
|
||||
log_must zfs create -o primarycache=none -o recordsize=512K \
|
||||
$TESTPOOL1/$TESTFS1
|
||||
log_must zfs set mountpoint=$TESTDIR1 $TESTPOOL1/$TESTFS1
|
||||
|
||||
log_must dd if=/dev/urandom of=/$TESTDIR1/bigfile bs=1M count=$count
|
||||
|
||||
# Make one disk 100ms slower to trigger a sit out
|
||||
log_must zinject -d $slow_disk -D100:1 -T read $TESTPOOL1
|
||||
|
||||
# Do some reads and wait for sit out on slow disk
|
||||
SECONDS=0
|
||||
typeset -i size=0
|
||||
for i in $(seq 1 $count) ; do
|
||||
dd if=/$TESTDIR1/bigfile skip=$i bs=1M count=1 of=/dev/null
|
||||
size=$i
|
||||
|
||||
sit_out=$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)
|
||||
if [[ "$sit_out" == "on" ]] ; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
log_must test "$(get_vdev_prop sit_out $TESTPOOL1 $slow_disk)" == "on"
|
||||
log_note took $SECONDS seconds to reach sit out reading ${size}M
|
||||
log_must zpool status -s $TESTPOOL1
|
||||
|
||||
typeset repl_type="replace"
|
||||
if [[ "$type" == "draid2" && $((RANDOM % 2)) -eq 0 ]]; then
|
||||
repl_type="seq"
|
||||
fi
|
||||
replace_test $TESTDIR/$TESTFILE1.1 $TESTDIR/$REPLACEFILE $repl_type
|
||||
|
||||
log_must eval "zpool iostat -v $TESTPOOL1 | grep \"$REPLACEFILE\""
|
||||
|
||||
destroy_pool $TESTPOOL1
|
||||
log_must rm -rf /$TESTPOOL1
|
||||
done
|
||||
|
||||
log_pass
|
||||
Reference in New Issue
Block a user