Add slow disk diagnosis to ZED

Slow disk response times can be indicative of a failing drive. ZFS
currently tracks slow I/Os (slower than zio_slow_io_ms) and generates
events (ereport.fs.zfs.delay).  However, no action is taken by ZED,
like is done for checksum or I/O errors.  This change adds slow disk
diagnosis to ZED which is opt-in using new VDEV properties:
  VDEV_PROP_SLOW_IO_N
  VDEV_PROP_SLOW_IO_T

If multiple VDEVs in a pool are undergoing slow I/Os, then it skips
the zpool_vdev_degrade().

Sponsored-By: OpenDrives Inc.
Sponsored-By: Klara Inc.
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Allan Jude <allan@klarasystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Co-authored-by: Rob Wing <rob.wing@klarasystems.com>
Signed-off-by: Don Brady <don.brady@klarasystems.com>
Closes #15469
This commit is contained in:
Don Brady
2024-02-08 10:19:52 -07:00
committed by Tony Hutter
parent db65272aef
commit c1c26a77ff
29 changed files with 655 additions and 71 deletions
+2 -1
View File
@@ -104,7 +104,8 @@ tags = ['functional', 'devices']
[tests/functional/events:Linux]
tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill',
'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config']
'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config',
'zed_slow_io', 'zed_slow_io_many_vdevs']
tags = ['functional', 'events']
[tests/functional/fadvise:Linux]
+2
View File
@@ -1447,6 +1447,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/events/zed_fd_spill.ksh \
functional/events/zed_io_config.ksh \
functional/events/zed_rc_filter.ksh \
functional/events/zed_slow_io.ksh \
functional/events/zed_slow_io_many_vdevs.ksh \
functional/exec/cleanup.ksh \
functional/exec/exec_001_pos.ksh \
functional/exec/exec_002_neg.ksh \
@@ -70,4 +70,6 @@ typeset -a properties=(
checksum_t
io_n
io_t
slow_io_n
slow_io_t
)
@@ -26,8 +26,10 @@
. $STF_SUITE/include/libtest.shlib
zed_cleanup all-debug.sh all-syslog.sh all-dumpfds
zed_stop
zed_cleanup all-debug.sh all-syslog.sh all-dumpfds
zed_events_drain
default_cleanup
+205
View File
@@ -0,0 +1,205 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023, Klara Inc.
#
# DESCRIPTION:
# Verify that vdev properties, slow_io_n and slow_io_t, work with ZED.
#
# STRATEGY:
# 1. Create a pool with single vdev
# 2. Set slow_io_n/slow_io_t to non-default values
# 3. Inject slow io errors
# 4. Verify that ZED degrades vdev
#
. $STF_SUITE/include/libtest.shlib
TESTDIR="$TEST_BASE_DIR/zed_slow_io"
VDEV="$TEST_BASE_DIR/vdevfile.$$"
TESTPOOL="slow_io_pool"
FILEPATH="$TESTDIR/slow_io.testfile"
OLD_SLOW_IO=$(get_tunable ZIO_SLOW_IO_MS)
OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND)
verify_runnable "both"
function do_setup
{
log_must truncate -s 1G $VDEV
default_setup_noexit $VDEV
zed_events_drain
log_must zfs set compression=off $TESTPOOL
log_must zfs set primarycache=none $TESTPOOL
log_must zfs set prefetch=none $TESTPOOL
log_must zfs set recordsize=512 $TESTPOOL
for i in {1..10}; do
dd if=/dev/urandom of=${FILEPATH}$i bs=512 count=1 2>/dev/null
done
zpool sync
}
# intermediate cleanup
function do_clean
{
log_must zinject -c all
log_must zpool destroy $TESTPOOL
log_must rm -f $VDEV
}
# final cleanup
function cleanup
{
log_must zinject -c all
# if pool still exists then something failed so log additional info
if poolexists $TESTPOOL ; then
log_note "$(zpool status -s $TESTPOOL)"
echo "=================== zed log search ==================="
grep "Diagnosis Engine" $ZEDLET_DIR/zed.log
destroy_pool $TESTPOOL
fi
log_must zed_stop
log_must rm -f $VDEV
log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
}
function start_slow_io
{
zpool sync
log_must set_tunable64 ZIO_SLOW_IO_MS 10
log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1000
log_must zinject -d $VDEV -D10:1 -T read $TESTPOOL
zpool sync
}
function stop_slow_io
{
log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
log_must zinject -c all
}
# Test default ZED settings:
# inject 10 events over 2.5 seconds, should not degrade.
function default_degrade
{
do_setup
start_slow_io
for i in {1..10}; do
dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
sleep 0.25
done
stop_slow_io
log_note "$(zpool status -s $TESTPOOL)"
# give slow ZED a chance to process the delay events
sleep 18
log_note "$(zpool status -s $TESTPOOL)"
degrades=$(grep "zpool_vdev_degrade" $ZEDLET_DIR/zed.log | wc -l)
log_note $degrades vdev degrades in ZED log
[ $degrades -eq "0" ] || \
log_fail "expecting no degrade events, found $degrades"
do_clean
}
# change slow_io_n, slow_io_t to 5 events in 60 seconds
# fire more than 5 events, should degrade
function slow_io_degrade
{
do_setup
zpool set slow_io_n=5 $TESTPOOL $VDEV
zpool set slow_io_t=60 $TESTPOOL $VDEV
start_slow_io
for i in {1..16}; do
dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
sleep 0.5
done
stop_slow_io
zpool sync
#
# wait up to 60 seconds for kernel to produce at least 5 delay events
#
typeset -i i=0
typeset -i events=0
while [[ $i -lt 60 ]]; do
events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
[[ $events -ge "5" ]] && break
i=$((i+1))
sleep 1
done
log_note "$events delay events found"
if [[ $events -ge "5" ]]; then
log_must wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 10
fi
do_clean
}
# change slow_io_n, slow_io_t to 10 events in 1 second
# inject events spaced 0.5 seconds apart, should not degrade
function slow_io_no_degrade
{
do_setup
zpool set slow_io_n=10 $TESTPOOL $VDEV
zpool set slow_io_t=1 $TESTPOOL $VDEV
start_slow_io
for i in {1..16}; do
dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
sleep 0.5
done
stop_slow_io
zpool sync
log_mustnot wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 45
do_clean
}
log_assert "Test ZED slow io configurability"
log_onexit cleanup
log_must zed_events_drain
log_must zed_start
default_degrade
slow_io_degrade
slow_io_no_degrade
log_pass "Test ZED slow io configurability"
@@ -0,0 +1,177 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023, Klara Inc.
#
# DESCRIPTION:
# Verify that delay events from multiple vdevs doesnt degrade
#
# STRATEGY:
# 1. Create a pool with a 3 disk raidz vdev
# 2. Inject slow io errors
# 3. Verify that ZED detects slow I/Os but doesn't degrade any vdevs
#
. $STF_SUITE/include/libtest.shlib
TESTDIR="$TEST_BASE_DIR/zed_slow_io"
VDEV1="$TEST_BASE_DIR/vdevfile1.$$"
VDEV2="$TEST_BASE_DIR/vdevfile2.$$"
VDEV3="$TEST_BASE_DIR/vdevfile3.$$"
VDEV4="$TEST_BASE_DIR/vdevfile4.$$"
VDEVS="$VDEV1 $VDEV2 $VDEV3 $VDEV4"
TESTPOOL="slow_io_pool"
FILEPATH="$TESTDIR/slow_io.testfile"
OLD_SLOW_IO=$(get_tunable ZIO_SLOW_IO_MS)
OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND)
verify_runnable "both"
function cleanup
{
log_must zinject -c all
# if pool still exists then something failed so log additional info
if poolexists $TESTPOOL ; then
log_note "$(zpool status -s $TESTPOOL)"
echo "=================== zed log search ==================="
grep "Diagnosis Engine" $ZEDLET_DIR/zed.log
destroy_pool $TESTPOOL
fi
log_must zed_stop
log_must rm -f $VDEVS
log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
}
function start_slow_io
{
for vdev in $VDEVS
do
log_must zpool set slow_io_n=4 $TESTPOOL $vdev
log_must zpool set slow_io_t=60 $TESTPOOL $vdev
done
zpool sync
log_must set_tunable64 ZIO_SLOW_IO_MS 10
log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1000
for vdev in $VDEVS
do
log_must zinject -d $vdev -D10:1 $TESTPOOL
done
zpool sync
}
function stop_slow_io
{
log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
log_must zinject -c all
}
function multiple_slow_vdevs_test
{
log_must truncate -s 1G $VDEVS
default_raidz_setup_noexit $VDEVS
log_must zpool events -c
log_must zfs set compression=off $TESTPOOL
log_must zfs set primarycache=none $TESTPOOL
log_must zfs set recordsize=4K $TESTPOOL
log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=20
zpool sync
#
# Read the file with slow io injected on the disks
# This will cause multiple errors on each disk to trip ZED SERD
#
# pool: slow_io_pool
# state: ONLINE
# config:
#
# NAME STATE READ WRITE CKSUM SLOW
# slow_io_pool ONLINE 0 0 0 -
# raidz1-0 ONLINE 0 0 0 -
# /var/tmp/vdevfile1.499278 ONLINE 0 0 0 113
# /var/tmp/vdevfile2.499278 ONLINE 0 0 0 109
# /var/tmp/vdevfile3.499278 ONLINE 0 0 0 96
# /var/tmp/vdevfile4.499278 ONLINE 0 0 0 109
#
start_slow_io
dd if=$FILEPATH of=/dev/null bs=1M count=20 2>/dev/null
stop_slow_io
# count events available for processing
typeset -i i=0
typeset -i events=0
while [[ $i -lt 60 ]]; do
events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
[[ $events -ge "50" ]] && break
i=$((i+1))
sleep 1
done
log_note "$events delay events found"
if [[ $events -lt "50" ]]; then
log_note "bailing: not enough events to complete the test"
destroy_pool $TESTPOOL
return
fi
#
# give slow ZED a chance to process the delay events
#
typeset -i i=0
typeset -i skips=0
while [[ $i -lt 75 ]]; do
skips=$(grep "retiring case" \
$ZEDLET_DIR/zed.log | wc -l)
[[ $skips -gt "0" ]] && break
i=$((i+1))
sleep 1
done
log_note $skips degrade skips in ZED log after $i seconds
[ $skips -gt "0" ] || log_fail "expecting to see skips"
degrades=$(grep "zpool_vdev_degrade" $ZEDLET_DIR/zed.log | wc -l)
log_note $degrades vdev degrades in ZED log
[ $degrades -eq "0" ] || \
log_fail "expecting no degrade events, found $degrades"
destroy_pool $TESTPOOL
}
log_assert "Test ZED slow io across multiple vdevs"
log_onexit cleanup
log_must zed_events_drain
log_must zed_start
multiple_slow_vdevs_test
log_pass "Test ZED slow io across multiple vdevs"
@@ -32,5 +32,6 @@ cleanup_devices $DISKS
zed_stop
zed_cleanup resilver_finish-start-scrub.sh
zed_events_drain
log_pass
@@ -28,6 +28,7 @@
verify_runnable "global"
zed_events_drain
zed_setup resilver_finish-start-scrub.sh
zed_start