Configure zed's diagnosis engine with vdev properties

Introduce four new vdev properties:
    checksum_n
    checksum_t
    io_n
    io_t

These properties can be used for configuring the thresholds of zed's
diagnosis engine and are interpeted as <N> events in T <seconds>.

When this property is set to a non-default value on a top-level vdev,
those thresholds will also apply to its leaf vdevs. This behavior can be
overridden by explicitly setting the property on the leaf vdev.

Note that, these properties do not persist across vdev replacement. For
this reason, it is advisable to set the property on the top-level vdev
instead of the leaf vdev.

The default values for zed's diagnosis engine (10 events, 600 seconds)
remains unchanged.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Rob Wing <rob.wing@klarasystems.com>
Sponsored-by: Seagate Technology LLC
Closes #13805
This commit is contained in:
rob-wing
2023-01-23 12:14:25 -09:00
committed by GitHub
parent f091db9248
commit 69f024a56e
15 changed files with 618 additions and 24 deletions
+2
View File
@@ -1367,8 +1367,10 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/events/events_001_pos.ksh \
functional/events/events_002_pos.ksh \
functional/events/setup.ksh \
functional/events/zed_cksum_config.ksh \
functional/events/zed_cksum_reported.ksh \
functional/events/zed_fd_spill.ksh \
functional/events/zed_io_config.ksh \
functional/events/zed_rc_filter.ksh \
functional/exec/cleanup.ksh \
functional/exec/exec_001_pos.ksh \
@@ -0,0 +1,158 @@
#!/bin/ksh -p
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2022, Klara Inc.
#
# DESCRIPTION:
# Verify that vdev properties, checksum_n and checksum_t, work with ZED.
#
# STRATEGY:
# 1. Create a pool with single vdev
# 2. Set checksum_n/checksum_t to non-default values
# 3. Inject checksum errors
# 4. Verify that ZED degrades vdev
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/events/events_common.kshlib
verify_runnable "both"
MOUNTDIR="$TEST_BASE_DIR/checksum_mount"
FILEPATH="$MOUNTDIR/checksum_file"
VDEV="$TEST_BASE_DIR/vdevfile.$$"
POOL="checksum_pool"
FILESIZE="10M"
function cleanup
{
log_must zed_stop
log_must zinject -c all
if poolexists $POOL ; then
destroy_pool $POOL
fi
log_must rm -fd $VDEV $MOUNTDIR
}
log_onexit cleanup
log_assert "Test ZED checksum_N and checksum_T configurability"
function do_setup
{
log_must zpool create -f -m $MOUNTDIR $POOL $VDEV
log_must zpool events -c
log_must truncate -s 0 $ZED_DEBUG_LOG
log_must zfs set compression=off $POOL
log_must zfs set primarycache=none $POOL
log_must zfs set recordsize=512 $POOL
}
function do_clean
{
log_must zinject -c all
log_must zpool destroy $POOL
}
function must_degrade
{
log_must wait_vdev_state $POOL $VDEV "DEGRADED" 60
}
function mustnot_degrade
{
log_must file_wait $ZED_DEBUG_LOG 5
log_must wait_vdev_state $POOL $VDEV "ONLINE" 60
}
# Test default settings of ZED:
# checksum_n=10
# checksum_t=600
# fire 10 events, should degrade.
function default_degrade
{
do_setup
log_must mkfile $FILESIZE $FILEPATH
log_must zinject -a -t data -e checksum -T read -f 100 $FILEPATH
blk=0
for _ in {1..10}; do
dd if=$FILEPATH of=/dev/null bs=1 count=1 skip=$blk 2>/dev/null
blk=$((blk+512))
done
must_degrade
do_clean
}
# Set checksum_t=1
# fire 10 events over 2.5 seconds, should not degrade.
function checksum_t_no_degrade
{
do_setup
log_must zpool set checksum_t=1 $POOL $VDEV
log_must mkfile $FILESIZE $FILEPATH
log_must zinject -a -t data -e checksum -T read -f 100 $FILEPATH
blk=0
for _ in {1..10}; do
dd if=$FILEPATH of=/dev/null bs=1 count=1 skip=$blk 2>/dev/null
blk=$((blk+512))
sleep 0.25
done
mustnot_degrade
do_clean
}
# Set checksum_n=1
# fire 1 event, should degrade.
function checksum_n_degrade
{
do_setup
log_must zpool set checksum_n=1 $POOL $VDEV
log_must mkfile $FILESIZE $FILEPATH
log_must zinject -a -t data -e checksum -T read -f 100 $FILEPATH
dd if=$FILEPATH of=/dev/null bs=1 count=1 2>/dev/null
must_degrade
do_clean
}
log_must truncate -s $MINVDEVSIZE $VDEV
log_must mkdir -p $MOUNTDIR
log_must zed_start
default_degrade
checksum_n_degrade
checksum_t_no_degrade
log_pass "Test ZED checksum_N and checksum_T configurability"
+150
View File
@@ -0,0 +1,150 @@
#!/bin/ksh -p
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2022, Klara Inc.
#
# DESCRIPTION:
# Verify that vdev properties, io_n and io_t, work with ZED.
#
# STRATEGY:
# 1. Create a mirrored pool.
# 3. Set io_n/io_t to non-default values
# 3. Inject io errors
# 4. Verify that ZED degrades vdev
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/events/events_common.kshlib
verify_runnable "both"
MOUNTDIR="$TEST_BASE_DIR/io_mount"
FILEPATH="$MOUNTDIR/io_file"
VDEV="$TEST_BASE_DIR/vdevfile.$$"
VDEV1="$TEST_BASE_DIR/vdevfile1.$$"
POOL="io_pool"
function cleanup
{
log_must zed_stop
log_must zinject -c all
if poolexists $POOL ; then
destroy_pool $POOL
fi
log_must rm -fd $VDEV $VDEV1 $MOUNTDIR
log_must set_tunable32 PREFETCH_DISABLE $zfsprefetch
}
log_onexit cleanup
log_assert "Test ZED io_n and io_t configurability"
zfsprefetch=$(get_tunable PREFETCH_DISABLE)
log_must set_tunable32 PREFETCH_DISABLE 1
function setup_pool
{
log_must zpool create -f -m $MOUNTDIR $POOL mirror $VDEV $VDEV1
log_must zpool events -c
log_must truncate -s 0 $ZED_DEBUG_LOG
log_must zfs set compression=off $POOL
log_must zfs set primarycache=none $POOL
log_must zfs set recordsize=512 $POOL
}
function do_clean
{
log_must zinject -c all
log_must zpool destroy $POOL
}
# Test default ZED settings:
# io_n=10 (events)
# io_t=600 (seconds)
# fire 10 events over 2.5 seconds, should degrade.
function default_degrade
{
setup_pool
log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=64
log_must zinject -a -d $VDEV -e io -T read -f 100 $POOL
blk=0
for _ in {1..10}; do
dd if=$FILEPATH of=/dev/null bs=1 count=1 skip=$blk 2>/dev/null
blk=$((blk+512))
sleep 0.25
done
log_must wait_vdev_state $POOL $VDEV "FAULTED" 60
do_clean
}
# set io_n=1
# fire 1 event, should degrade
function io_n_degrade
{
setup_pool
log_must zpool set io_n=1 $POOL $VDEV
log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=64
log_must zinject -a -d $VDEV -e io -T read -f 100 $POOL
dd if=$FILEPATH of=/dev/null bs=1 count=1 2>/dev/null
log_must wait_vdev_state $POOL $VDEV "FAULTED" 60
do_clean
}
# set io_t=1
# fire 10 events over 2.5 seconds, should not degrade
function io_t_nodegrade
{
setup_pool
log_must zpool set io_t=1 $POOL $VDEV
log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=64
log_must zinject -a -d $VDEV -e io -T read -f 100 $POOL
blk=0
for _ in {1..10}; do
dd if=$FILEPATH of=/dev/null bs=1 count=1 skip=$blk 2>/dev/null
blk=$((blk+512))
sleep 0.25
done
log_must file_wait $ZED_DEBUG_LOG 30
log_must wait_vdev_state $POOL $VDEV "ONLINE" 1
do_clean
}
log_must truncate -s $MINVDEVSIZE $VDEV
log_must truncate -s $MINVDEVSIZE $VDEV1
log_must mkdir -p $MOUNTDIR
log_must zed_start
default_degrade
io_n_degrade
io_t_nodegrade
log_pass "Test ZED io_n and io_t configurability"