Avoid posting duplicate zpool events

Duplicate io and checksum ereport events can misrepresent that
things are worse than they seem. Ideally the zpool events and the
corresponding vdev stat error counts in a zpool status should be
for unique errors -- not the same error being counted over and over.
This can be demonstrated in a simple example. With a single bad
block in a datafile and just 5 reads of the file we end up with a
degraded vdev, even though there is only one unique error in the pool.

The proposed solution to the above issue, is to eliminate duplicates
when posting events and when updating vdev error stats. We now save
recent error events of interest when posting events so that we can
easily check for duplicates when posting an error.

Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #10861
This commit is contained in:
Don Brady
2020-09-04 11:34:28 -06:00
committed by Brian Behlendorf
parent bd724261d2
commit 8afac5dc55
22 changed files with 799 additions and 79 deletions
+1 -1
View File
@@ -346,7 +346,7 @@ tags = ['functional', 'cli_root', 'zpool_detach']
[tests/functional/cli_root/zpool_events]
tests = ['zpool_events_clear', 'zpool_events_cliargs', 'zpool_events_follow',
'zpool_events_poolname', 'zpool_events_errors']
'zpool_events_poolname', 'zpool_events_errors', 'zpool_events_duplicates']
tags = ['functional', 'cli_root', 'zpool_events']
[tests/functional/cli_root/zpool_export]
+1
View File
@@ -105,6 +105,7 @@ export SYSTEM_FILES_COMMON='arp
umask
umount
uname
uniq
uuidgen
vmstat
wait
+1
View File
@@ -82,6 +82,7 @@ VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
VOL_MODE vol.mode zvol_volmode
VOL_RECURSIVE vol.recursive UNSUPPORTED
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
ZIO_SLOW_IO_MS zio.slow_io_ms zio_slow_io_ms
%%%%
while read name FreeBSD Linux; do
@@ -0,0 +1 @@
/ereports
@@ -1,4 +1,8 @@
include $(top_srcdir)/config/Rules.am
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_events
pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_events
dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
@@ -6,8 +10,16 @@ dist_pkgdata_SCRIPTS = \
zpool_events_cliargs.ksh \
zpool_events_follow.ksh \
zpool_events_poolname.ksh \
zpool_events_errors.ksh
zpool_events_errors.ksh \
zpool_events_duplicates.ksh
dist_pkgdata_DATA = \
zpool_events.cfg \
zpool_events.kshlib
ereports_LDADD = \
$(abs_top_builddir)/lib/libnvpair/libnvpair.la \
$(abs_top_builddir)/lib/libzfs/libzfs.la
pkgexec_PROGRAMS = ereports
ereports_SOURCES = ereports.c
@@ -0,0 +1,174 @@
/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2020 by Delphix. All rights reserved.
*/
#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#include <libzfs.h>
#include <sys/zfs_ioctl.h>
#include <sys/nvpair.h>
#include <sys/fm/protocol.h>
#include <sys/fm/fs/zfs.h>
/*
* Command to output io and checksum ereport values, one per line.
* Used by zpool_events_duplicates.ksh to check for duplicate events.
*
* example output line:
*
* checksum "error_pool" 0x856dd01ce52e336 0x000034 0x000400 0x000a402c00
* 0x000004 0x000000 0x000000 0x000000 0x000001
*/
/*
* Our ereport duplicate criteria
*
* When the class and all of these values match, then an ereport is
* considered to be a duplicate.
*/
static const char *criteria_name[] = {
FM_EREPORT_PAYLOAD_ZFS_POOL,
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR,
FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY,
/* logical zio criteriai (optional) */
FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
};
#define CRITERIA_NAMES_COUNT ARRAY_SIZE(criteria_name)
static void
print_ereport_line(nvlist_t *nvl)
{
char *class;
int last = CRITERIA_NAMES_COUNT - 1;
/*
* For the test case context, we only want to see 'io' and
* 'checksum' subclass. We skip 'data' to minimize the output.
*/
if (nvlist_lookup_string(nvl, FM_CLASS, &class) != 0 ||
strstr(class, "ereport.fs.zfs.") == NULL ||
strcmp(class, "ereport.fs.zfs.data") == 0) {
return;
}
(void) printf("%s\t", class + strlen("ereport.fs.zfs."));
for (int i = 0; i < CRITERIA_NAMES_COUNT; i++) {
nvpair_t *nvp;
uint32_t i32 = 0;
uint64_t i64 = 0;
char *str = NULL;
if (nvlist_lookup_nvpair(nvl, criteria_name[i], &nvp) != 0) {
/* print a proxy for optional criteria */
(void) printf("--------");
(void) printf("%c", i == last ? '\n' : '\t');
continue;
}
switch (nvpair_type(nvp)) {
case DATA_TYPE_STRING:
(void) nvpair_value_string(nvp, &str);
(void) printf("\"%s\"", str ? str : "<NULL>");
break;
case DATA_TYPE_INT32:
(void) nvpair_value_int32(nvp, (void *)&i32);
(void) printf("0x%06x", i32);
break;
case DATA_TYPE_UINT32:
(void) nvpair_value_uint32(nvp, &i32);
(void) printf("0x%06x", i32);
break;
case DATA_TYPE_INT64:
(void) nvpair_value_int64(nvp, (void *)&i64);
(void) printf("0x%06llx", (u_longlong_t)i64);
break;
case DATA_TYPE_UINT64:
(void) nvpair_value_uint64(nvp, &i64);
if (strcmp(FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
criteria_name[i]) == 0)
(void) printf("0x%010llx", (u_longlong_t)i64);
else
(void) printf("0x%06llx", (u_longlong_t)i64);
break;
default:
(void) printf("<unknown>");
break;
}
(void) printf("%c", i == last ? '\n' : '\t');
}
}
static void
ereports_dump(libzfs_handle_t *zhdl, int zevent_fd)
{
nvlist_t *nvl;
int ret, dropped;
while (1) {
ret = zpool_events_next(zhdl, &nvl, &dropped, ZEVENT_NONBLOCK,
zevent_fd);
if (ret || nvl == NULL)
break;
if (dropped > 0)
(void) fprintf(stdout, "dropped %d events\n", dropped);
print_ereport_line(nvl);
(void) fflush(stdout);
nvlist_free(nvl);
}
}
/* ARGSUSED */
int
main(int argc, char **argv)
{
libzfs_handle_t *hdl;
int fd;
hdl = libzfs_init();
if (hdl == NULL) {
(void) fprintf(stderr, "libzfs_init: %s\n", strerror(errno));
exit(2);
}
fd = open(ZFS_DEV, O_RDWR);
if (fd < 0) {
(void) fprintf(stderr, "open: %s\n", strerror(errno));
libzfs_fini(hdl);
exit(2);
}
ereports_dump(hdl, fd);
(void) close(fd);
libzfs_fini(hdl);
return (0);
}
@@ -0,0 +1,155 @@
#!/bin/ksh -p
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
# Copyright (c) 2020 by Delphix. All rights reserved.
#
# DESCRIPTION:
# Verify that duplicate I/O ereport errors are not posted
#
# STRATEGY:
# 1. Create a mirror pool
# 2. Inject duplicate read/write IO errors and checksum errors
# 3. Verify there are no duplicate events being posted
#
. $STF_SUITE/include/libtest.shlib
verify_runnable "both"
MOUNTDIR=$TEST_BASE_DIR/mount
FILEPATH=$MOUNTDIR/badfile
VDEV1=$TEST_BASE_DIR/vfile1
VDEV2=$TEST_BASE_DIR/vfile2
POOL=error_pool
FILESIZE="10M"
OLD_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
RETAIN_MAX=$(get_tunable ZEVENT_RETAIN_MAX)
EREPORTS="$STF_SUITE/tests/functional/cli_root/zpool_events/ereports"
duplicates=false
function cleanup
{
log_must set_tunable64 ZEVENT_LEN_MAX $OLD_LEN_MAX
log_must zinject -c all
if poolexists $POOL ; then
destroy_pool $POOL
fi
log_must rm -f $VDEV1 $VDEV2
}
log_assert "Duplicate I/O ereport errors are not posted"
log_note "zevent retain max setting: $RETAIN_MAX"
log_onexit cleanup
# Set our threshold high to avoid dropping events.
set_tunable64 ZEVENT_LEN_MAX 20000
log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2
log_must mkdir -p $MOUNTDIR
#
# $1: test type - corrupt (checksum error), io
# $2: read, write
function do_dup_test
{
ERR=$1
RW=$2
log_note "Testing $ERR $RW ereports"
log_must zpool create -f -m $MOUNTDIR -o failmode=continue $POOL mirror $VDEV1 $VDEV2
log_must zpool events -c
log_must zfs set compression=off $POOL
if [ "$RW" == "read" ] ; then
log_must mkfile $FILESIZE $FILEPATH
# unmount and mount filesystems to purge file from ARC
# to force reads to go through error inject handler
log_must zfs unmount $POOL
log_must zfs mount $POOL
# all reads from this file get an error
if [ "$ERR" == "corrupt" ] ; then
log_must zinject -a -t data -e checksum -T read $FILEPATH
else
log_must zinject -a -t data -e io -T read $FILEPATH
fi
# Read the file a few times to generate some
# duplicate errors of the same blocks
# shellcheck disable=SC2034
for i in {1..15}; do
dd if=$FILEPATH of=/dev/null bs=128K > /dev/null 2>&1
done
log_must zinject -c all
fi
log_must zinject -d $VDEV1 -e $ERR -T $RW -f 100 $POOL
if [ "$RW" == "write" ] ; then
log_must mkfile $FILESIZE $FILEPATH
log_must zpool sync $POOL
else
# scrub twice to generate some duplicates
log_must zpool scrub $POOL
log_must zpool wait -t scrub $POOL
log_must zpool scrub $POOL
log_must zpool wait -t scrub $POOL
fi
log_must zinject -c all
# Wait for the pool to settle down and finish resilvering (if
# necessary). We want the errors to stop incrementing before we
# check for duplicates.
zpool wait -t resilver $POOL
ereports="$($EREPORTS | sort)"
actual=$(echo "$ereports" | wc -l)
unique=$(echo "$ereports" | uniq | wc -l)
log_note "$actual total $ERR $RW ereports where $unique were unique"
if [ $actual -gt $unique ] ; then
log_note "UNEXPECTED -- $((actual-unique)) duplicate $ERR $RW ereports"
echo "$ereports"
duplicates=true
fi
log_must zpool destroy $POOL
}
do_dup_test "corrupt" "read"
do_dup_test "io" "read"
do_dup_test "io" "write"
if $duplicates; then
log_fail "FAILED -- Duplicate I/O ereport errors encountered"
else
log_pass "Duplicate I/O ereport errors are not posted"
fi