mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-27 10:24:22 +03:00
Avoid fault diagnosis if multiple vdevs have errors
When multiple drives are throwing errors, it is likely not a drive failing but rather a failure above the drives, like a controller. The active cases context of the drive's peers is now considered when making a diagnosis. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Don Brady <don.brady@klarasystems.com> Closes #16531
This commit is contained in:
parent
f245541e24
commit
ddf5f34f06
@ -71,6 +71,7 @@ typedef struct zfs_case_data {
|
||||
uint64_t zc_ena;
|
||||
uint64_t zc_pool_guid;
|
||||
uint64_t zc_vdev_guid;
|
||||
uint64_t zc_parent_guid;
|
||||
int zc_pool_state;
|
||||
char zc_serd_checksum[MAX_SERDLEN];
|
||||
char zc_serd_io[MAX_SERDLEN];
|
||||
@ -181,10 +182,10 @@ zfs_case_unserialize(fmd_hdl_t *hdl, fmd_case_t *cp)
|
||||
}
|
||||
|
||||
/*
|
||||
* count other unique slow-io cases in a pool
|
||||
* Return count of other unique SERD cases under same vdev parent
|
||||
*/
|
||||
static uint_t
|
||||
zfs_other_slow_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case)
|
||||
zfs_other_serd_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case)
|
||||
{
|
||||
zfs_case_t *zcp;
|
||||
uint_t cases = 0;
|
||||
@ -206,10 +207,32 @@ zfs_other_slow_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case)
|
||||
|
||||
for (zcp = uu_list_first(zfs_cases); zcp != NULL;
|
||||
zcp = uu_list_next(zfs_cases, zcp)) {
|
||||
if (zcp->zc_data.zc_pool_guid == zfs_case->zc_pool_guid &&
|
||||
zcp->zc_data.zc_vdev_guid != zfs_case->zc_vdev_guid &&
|
||||
zcp->zc_data.zc_serd_slow_io[0] != '\0' &&
|
||||
fmd_serd_active(hdl, zcp->zc_data.zc_serd_slow_io)) {
|
||||
zfs_case_data_t *zcd = &zcp->zc_data;
|
||||
|
||||
/*
|
||||
* must be same pool and parent vdev but different leaf vdev
|
||||
*/
|
||||
if (zcd->zc_pool_guid != zfs_case->zc_pool_guid ||
|
||||
zcd->zc_parent_guid != zfs_case->zc_parent_guid ||
|
||||
zcd->zc_vdev_guid == zfs_case->zc_vdev_guid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if there is another active serd case besides zfs_case
|
||||
*
|
||||
* Only one serd engine will be assigned to the case
|
||||
*/
|
||||
if (zcd->zc_serd_checksum[0] == zfs_case->zc_serd_checksum[0] &&
|
||||
fmd_serd_active(hdl, zcd->zc_serd_checksum)) {
|
||||
cases++;
|
||||
}
|
||||
if (zcd->zc_serd_io[0] == zfs_case->zc_serd_io[0] &&
|
||||
fmd_serd_active(hdl, zcd->zc_serd_io)) {
|
||||
cases++;
|
||||
}
|
||||
if (zcd->zc_serd_slow_io[0] == zfs_case->zc_serd_slow_io[0] &&
|
||||
fmd_serd_active(hdl, zcd->zc_serd_slow_io)) {
|
||||
cases++;
|
||||
}
|
||||
}
|
||||
@ -502,6 +525,34 @@ zfs_ereport_when(fmd_hdl_t *hdl, nvlist_t *nvl, er_timeval_t *when)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Record the specified event in the SERD engine and return a
|
||||
* boolean value indicating whether or not the engine fired as
|
||||
* the result of inserting this event.
|
||||
*
|
||||
* When the pool has similar active cases on other vdevs, then
|
||||
* the fired state is disregarded and the case is retired.
|
||||
*/
|
||||
static int
|
||||
zfs_fm_serd_record(fmd_hdl_t *hdl, const char *name, fmd_event_t *ep,
|
||||
zfs_case_t *zcp, const char *err_type)
|
||||
{
|
||||
int fired = fmd_serd_record(hdl, name, ep);
|
||||
int peers = 0;
|
||||
|
||||
if (fired && (peers = zfs_other_serd_cases(hdl, &zcp->zc_data)) > 0) {
|
||||
fmd_hdl_debug(hdl, "pool %llu is tracking %d other %s cases "
|
||||
"-- skip faulting the vdev %llu",
|
||||
(u_longlong_t)zcp->zc_data.zc_pool_guid,
|
||||
peers, err_type,
|
||||
(u_longlong_t)zcp->zc_data.zc_vdev_guid);
|
||||
zfs_case_retire(hdl, zcp);
|
||||
fired = 0;
|
||||
}
|
||||
|
||||
return (fired);
|
||||
}
|
||||
|
||||
/*
|
||||
* Main fmd entry point.
|
||||
*/
|
||||
@ -510,7 +561,7 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
|
||||
{
|
||||
zfs_case_t *zcp, *dcp;
|
||||
int32_t pool_state;
|
||||
uint64_t ena, pool_guid, vdev_guid;
|
||||
uint64_t ena, pool_guid, vdev_guid, parent_guid;
|
||||
uint64_t checksum_n, checksum_t;
|
||||
uint64_t io_n, io_t;
|
||||
er_timeval_t pool_load;
|
||||
@ -600,6 +651,9 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
|
||||
if (nvlist_lookup_uint64(nvl,
|
||||
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0)
|
||||
vdev_guid = 0;
|
||||
if (nvlist_lookup_uint64(nvl,
|
||||
FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID, &parent_guid) != 0)
|
||||
parent_guid = 0;
|
||||
if (nvlist_lookup_uint64(nvl, FM_EREPORT_ENA, &ena) != 0)
|
||||
ena = 0;
|
||||
|
||||
@ -710,6 +764,7 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
|
||||
data.zc_ena = ena;
|
||||
data.zc_pool_guid = pool_guid;
|
||||
data.zc_vdev_guid = vdev_guid;
|
||||
data.zc_parent_guid = parent_guid;
|
||||
data.zc_pool_state = (int)pool_state;
|
||||
|
||||
fmd_buf_write(hdl, cs, CASE_DATA, &data, sizeof (data));
|
||||
@ -872,8 +927,10 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
|
||||
SEC2NSEC(io_t));
|
||||
zfs_case_serialize(zcp);
|
||||
}
|
||||
if (fmd_serd_record(hdl, zcp->zc_data.zc_serd_io, ep))
|
||||
if (zfs_fm_serd_record(hdl, zcp->zc_data.zc_serd_io,
|
||||
ep, zcp, "io error")) {
|
||||
checkremove = B_TRUE;
|
||||
}
|
||||
} else if (fmd_nvl_class_match(hdl, nvl,
|
||||
ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DELAY))) {
|
||||
uint64_t slow_io_n, slow_io_t;
|
||||
@ -899,25 +956,10 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
|
||||
}
|
||||
/* Pass event to SERD engine and see if this triggers */
|
||||
if (zcp->zc_data.zc_serd_slow_io[0] != '\0' &&
|
||||
fmd_serd_record(hdl, zcp->zc_data.zc_serd_slow_io,
|
||||
ep)) {
|
||||
/*
|
||||
* Ignore a slow io diagnosis when other
|
||||
* VDEVs in the pool show signs of being slow.
|
||||
*/
|
||||
if (zfs_other_slow_cases(hdl, &zcp->zc_data)) {
|
||||
zfs_case_retire(hdl, zcp);
|
||||
fmd_hdl_debug(hdl, "pool %llu has "
|
||||
"multiple slow io cases -- skip "
|
||||
"degrading vdev %llu",
|
||||
(u_longlong_t)
|
||||
zcp->zc_data.zc_pool_guid,
|
||||
(u_longlong_t)
|
||||
zcp->zc_data.zc_vdev_guid);
|
||||
} else {
|
||||
zfs_case_solve(hdl, zcp,
|
||||
"fault.fs.zfs.vdev.slow_io");
|
||||
}
|
||||
zfs_fm_serd_record(hdl,
|
||||
zcp->zc_data.zc_serd_slow_io, ep, zcp, "slow io")) {
|
||||
zfs_case_solve(hdl, zcp,
|
||||
"fault.fs.zfs.vdev.slow_io");
|
||||
}
|
||||
} else if (fmd_nvl_class_match(hdl, nvl,
|
||||
ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM))) {
|
||||
@ -968,8 +1010,9 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
|
||||
SEC2NSEC(checksum_t));
|
||||
zfs_case_serialize(zcp);
|
||||
}
|
||||
if (fmd_serd_record(hdl,
|
||||
zcp->zc_data.zc_serd_checksum, ep)) {
|
||||
if (zfs_fm_serd_record(hdl,
|
||||
zcp->zc_data.zc_serd_checksum, ep, zcp,
|
||||
"checksum")) {
|
||||
zfs_case_solve(hdl, zcp,
|
||||
"fault.fs.zfs.vdev.checksum");
|
||||
}
|
||||
|
@ -109,7 +109,7 @@ tags = ['functional', 'direct']
|
||||
[tests/functional/events:Linux]
|
||||
tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill',
|
||||
'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config',
|
||||
'zed_slow_io', 'zed_slow_io_many_vdevs']
|
||||
'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple']
|
||||
tags = ['functional', 'events']
|
||||
|
||||
[tests/functional/fadvise:Linux]
|
||||
|
@ -1490,6 +1490,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/events/setup.ksh \
|
||||
functional/events/zed_cksum_config.ksh \
|
||||
functional/events/zed_cksum_reported.ksh \
|
||||
functional/events/zed_diagnose_multiple.ksh \
|
||||
functional/events/zed_fd_spill.ksh \
|
||||
functional/events/zed_io_config.ksh \
|
||||
functional/events/zed_rc_filter.ksh \
|
||||
|
168
tests/zfs-tests/tests/functional/events/zed_diagnose_multiple.ksh
Executable file
168
tests/zfs-tests/tests/functional/events/zed_diagnose_multiple.ksh
Executable file
@ -0,0 +1,168 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2024, Klara Inc.
|
||||
#
|
||||
|
||||
# DESCRIPTION:
|
||||
# Verify that simultaneous io error events from multiple vdevs
|
||||
# doesn't generate a fault
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool with a 4 disk raidz vdev
|
||||
# 2. Inject io errors
|
||||
# 3. Verify that ZED detects the errors but doesn't fault any vdevs
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
TESTDIR="$TEST_BASE_DIR/zed_error_multiple"
|
||||
VDEV1="$TEST_BASE_DIR/vdevfile1.$$"
|
||||
VDEV2="$TEST_BASE_DIR/vdevfile2.$$"
|
||||
VDEV3="$TEST_BASE_DIR/vdevfile3.$$"
|
||||
VDEV4="$TEST_BASE_DIR/vdevfile4.$$"
|
||||
VDEVS="$VDEV1 $VDEV2 $VDEV3 $VDEV4"
|
||||
TESTPOOL="zed_test_pool"
|
||||
FILEPATH="$TESTDIR/zed.testfile"
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must zinject -c all
|
||||
|
||||
# if pool still exists then something failed so log additional info
|
||||
if poolexists $TESTPOOL ; then
|
||||
log_note "$(zpool status -s $TESTPOOL)"
|
||||
echo "=================== zed log search ==================="
|
||||
grep "Diagnosis Engine" $ZEDLET_DIR/zed.log
|
||||
destroy_pool $TESTPOOL
|
||||
fi
|
||||
log_must zed_stop
|
||||
|
||||
log_must rm -f $VDEVS
|
||||
}
|
||||
|
||||
function start_io_errors
|
||||
{
|
||||
for vdev in $VDEVS
|
||||
do
|
||||
log_must zpool set io_n=4 $TESTPOOL $vdev
|
||||
log_must zpool set io_t=60 $TESTPOOL $vdev
|
||||
done
|
||||
zpool sync
|
||||
|
||||
for vdev in $VDEVS
|
||||
do
|
||||
log_must zinject -d $vdev -e io $TESTPOOL
|
||||
done
|
||||
zpool sync
|
||||
}
|
||||
|
||||
function multiple_slow_vdevs_test
|
||||
{
|
||||
log_must truncate -s 1G $VDEVS
|
||||
default_raidz_setup_noexit $VDEVS
|
||||
|
||||
log_must zpool events -c
|
||||
log_must zfs set compression=off $TESTPOOL
|
||||
log_must zfs set primarycache=none $TESTPOOL
|
||||
log_must zfs set recordsize=4K $TESTPOOL
|
||||
|
||||
log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=4
|
||||
zpool sync
|
||||
|
||||
#
|
||||
# Read the file with io errors injected on the disks
|
||||
# This will cause multiple errors on each disk to trip ZED SERD
|
||||
#
|
||||
# pool: zed_test_pool
|
||||
# state: ONLINE
|
||||
# status: One or more devices has experienced an unrecoverable error. An
|
||||
# attempt was made to correct the error. Applications are unaffected.
|
||||
# action: Determine if the device needs to be replaced, and clear the errors
|
||||
# using 'zpool clear' or replace the device with 'zpool replace'.
|
||||
# see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-9P
|
||||
# config:
|
||||
#
|
||||
# NAME STATE READ WRITE CKSUM
|
||||
# zed_test_pool ONLINE 0 0 0
|
||||
# raidz1-0 ONLINE 0 0 0
|
||||
# /var/tmp/vdevfile1.1547063 ONLINE 532 561 0
|
||||
# /var/tmp/vdevfile2.1547063 ONLINE 547 594 0
|
||||
# /var/tmp/vdevfile3.1547063 ONLINE 1.05K 1.10K 0
|
||||
# /var/tmp/vdevfile4.1547063 ONLINE 1.05K 1.00K 0
|
||||
#
|
||||
|
||||
start_io_errors
|
||||
dd if=$FILEPATH of=/dev/null bs=1M count=4 2>/dev/null
|
||||
log_must zinject -c all
|
||||
|
||||
# count io error events available for processing
|
||||
typeset -i i=0
|
||||
typeset -i events=0
|
||||
while [[ $i -lt 60 ]]; do
|
||||
events=$(zpool events | grep "ereport\.fs\.zfs.io" | wc -l)
|
||||
[[ $events -ge "50" ]] && break
|
||||
i=$((i+1))
|
||||
sleep 1
|
||||
done
|
||||
log_note "$events io error events found"
|
||||
if [[ $events -lt "50" ]]; then
|
||||
log_note "bailing: not enough events to complete the test"
|
||||
destroy_pool $TESTPOOL
|
||||
return
|
||||
fi
|
||||
|
||||
#
|
||||
# give slow ZED a chance to process the checkum events
|
||||
#
|
||||
typeset -i i=0
|
||||
typeset -i skips=0
|
||||
while [[ $i -lt 75 ]]; do
|
||||
skips=$(grep "retiring case" \
|
||||
$ZEDLET_DIR/zed.log | wc -l)
|
||||
[[ $skips -gt "0" ]] && break
|
||||
i=$((i+1))
|
||||
sleep 1
|
||||
done
|
||||
|
||||
log_note $skips fault skips in ZED log after $i seconds
|
||||
[ $skips -gt "0" ] || log_fail "expecting to see skips"
|
||||
|
||||
fault=$(grep "zpool_vdev_fault" $ZEDLET_DIR/zed.log | wc -l)
|
||||
log_note $fault vdev fault in ZED log
|
||||
[ $fault -eq "0" ] || \
|
||||
log_fail "expecting no fault events, found $fault"
|
||||
|
||||
destroy_pool $TESTPOOL
|
||||
}
|
||||
|
||||
log_assert "Test ZED io errors across multiple vdevs"
|
||||
log_onexit cleanup
|
||||
|
||||
log_must zed_events_drain
|
||||
log_must zed_start
|
||||
multiple_slow_vdevs_test
|
||||
|
||||
log_pass "Test ZED io errors across multiple vdevs"
|
@ -25,10 +25,10 @@
|
||||
#
|
||||
|
||||
# DESCRIPTION:
|
||||
# Verify that delay events from multiple vdevs doesnt degrade
|
||||
# Verify that delay events from multiple vdevs doesn't degrade
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool with a 3 disk raidz vdev
|
||||
# 1. Create a pool with a 4 disk raidz vdev
|
||||
# 2. Inject slow io errors
|
||||
# 3. Verify that ZED detects slow I/Os but doesn't degrade any vdevs
|
||||
#
|
||||
|
Loading…
Reference in New Issue
Block a user