diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 6c3295353..3cfde40a7 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -458,10 +458,11 @@ struct vdev { kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */ /* - * We rate limit ZIO delay and ZIO checksum events, since they + * We rate limit ZIO delay, deadman, and checksum events, since they * can flood ZED with tons of events when a drive is acting up. */ zfs_ratelimit_t vdev_delay_rl; + zfs_ratelimit_t vdev_deadman_rl; zfs_ratelimit_t vdev_checksum_rl; }; diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index cda4661fe..935c8afb0 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -1695,7 +1695,8 @@ Default value: \fB64\fR. \fBzfs_slow_io_events_per_second\fR (int) .ad .RS 12n -Rate limit delay zevents (which report slow I/Os) to this many per second. +Rate limit delay and deadman zevents (which report slow I/Os) to this many per +second. .sp Default value: 20 .RE diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 69e44b48e..d00782d93 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -625,6 +625,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) */ zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_slow_io_events_per_second, 1); + zfs_ratelimit_init(&vd->vdev_deadman_rl, &zfs_slow_io_events_per_second, + 1); zfs_ratelimit_init(&vd->vdev_checksum_rl, &zfs_checksum_events_per_second, 1); @@ -1106,6 +1108,7 @@ vdev_free(vdev_t *vd) cv_destroy(&vd->vdev_rebuild_cv); zfs_ratelimit_fini(&vd->vdev_delay_rl); + zfs_ratelimit_fini(&vd->vdev_deadman_rl); zfs_ratelimit_fini(&vd->vdev_checksum_rl); if (vd == spa->spa_root_vdev) diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index f0f953405..60e631567 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -395,8 +395,8 @@ zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector) } /* - * We want to rate limit ZIO delay and checksum events so as to not - * flood ZED when a disk is acting up. + * We want to rate limit ZIO delay, deadman, and checksum events so as to not + * flood zevent consumers when a disk is acting up. * * Returns 1 if we're ratelimiting, 0 if not. */ @@ -405,11 +405,13 @@ zfs_is_ratelimiting_event(const char *subclass, vdev_t *vd) { int rc = 0; /* - * __ratelimit() returns 1 if we're *not* ratelimiting and 0 if we + * zfs_ratelimit() returns 1 if we're *not* ratelimiting and 0 if we * are. Invert it to get our return value. */ if (strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) { rc = !zfs_ratelimit(&vd->vdev_delay_rl); + } else if (strcmp(subclass, FM_EREPORT_ZFS_DEADMAN) == 0) { + rc = !zfs_ratelimit(&vd->vdev_deadman_rl); } else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) { rc = !zfs_ratelimit(&vd->vdev_checksum_rl); } diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index cc2414550..32be9ef05 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -577,6 +577,12 @@ tags = ['functional', 'cp_files'] tests = ['ctime_001_pos' ] tags = ['functional', 'ctime'] +[tests/functional/deadman] +tests = ['deadman_ratelimit', 'deadman_sync', 'deadman_zio'] +pre = +post = +tags = ['functional', 'deadman'] + [tests/functional/delegate] tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos', 'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos', diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 9f6bd856a..4952f3013 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -85,12 +85,6 @@ tags = ['functional', 'cli_root', 'zpool_split'] tests = ['compress_004_pos'] tags = ['functional', 'compression'] -[tests/functional/deadman:Linux] -tests = ['deadman_sync', 'deadman_zio'] -pre = -post = -tags = ['functional', 'deadman'] - [tests/functional/devices:Linux] tests = ['devices_001_pos', 'devices_002_neg', 'devices_003_pos'] tags = ['functional', 'devices'] diff --git a/tests/zfs-tests/tests/functional/deadman/Makefile.am b/tests/zfs-tests/tests/functional/deadman/Makefile.am index 7b70ca09d..097f23e88 100644 --- a/tests/zfs-tests/tests/functional/deadman/Makefile.am +++ b/tests/zfs-tests/tests/functional/deadman/Makefile.am @@ -1,5 +1,6 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/deadman dist_pkgdata_SCRIPTS = \ + deadman_ratelimit.ksh \ deadman_sync.ksh \ deadman_zio.ksh diff --git a/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh b/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh new file mode 100755 index 000000000..469117a56 --- /dev/null +++ b/tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Portions Copyright 2021 iXsystems, Inc. +# + +# DESCRIPTION: +# Verify spa deadman events are rate limited +# +# STRATEGY: +# 1. Reduce the zfs_slow_io_events_per_second to 1. +# 2. Reduce the zfs_deadman_ziotime_ms to 1ms. +# 3. Write data to a pool and read it back. +# 4. Verify deadman events have been produced at a reasonable rate. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/deadman/deadman.cfg + +verify_runnable "both" + +function cleanup +{ + zinject -c all + default_cleanup_noexit + + set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS + set_tunable64 DEADMAN_ZIOTIME_MS $ZIOTIME_DEFAULT +} + +log_assert "Verify spa deadman events are rate limited" +log_onexit cleanup + +OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND) +log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1 +log_must set_tunable64 DEADMAN_ZIOTIME_MS 1 + +# Create a new pool in order to use the updated deadman settings. +default_setup_noexit $DISK1 +log_must zpool events -c + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +log_must file_write -b 1048576 -c 8 -o create -d 0 -f $mntpnt/file +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL +log_must zinject -d $DISK1 -D 5:1 $TESTPOOL +log_must dd if=$mntpnt/file of=$TEST_BASE_DIR/devnull oflag=sync + +events=$(zpool events $TESTPOOL | grep -c ereport.fs.zfs.deadman) +log_note "events=$events" +if [ "$events" -lt 1 ]; then + log_fail "Expect >= 1 deadman events, $events found" +fi +if [ "$events" -gt 10 ]; then + log_fail "Expect <= 10 deadman events, $events found" +fi + +log_pass "Verify spa deadman events are rate limited" diff --git a/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh b/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh index 5d803af85..b0b03f5d5 100755 --- a/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh +++ b/tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh @@ -73,7 +73,11 @@ log_must zinject -c all log_must zpool sync # Log txg sync times for reference and the zpool event summary. -log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs +if is_freebsd; then + log_must sysctl -n kstat.zfs.$TESTPOOL.txgs +else + log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs +fi log_must zpool events # Verify at least 5 deadman events were logged. The first after 5 seconds,