diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index aa7421261..830c8455b 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -387,6 +387,7 @@ typedef enum { VDEV_PROP_SLOW_IOS, VDEV_PROP_SIT_OUT, VDEV_PROP_AUTOSIT, + VDEV_PROP_SLOW_IO_EVENTS, VDEV_NUM_PROPS } vdev_prop_t; diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 5a8c2f846..afaa40134 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -470,6 +470,7 @@ struct vdev { uint64_t vdev_checksum_t; uint64_t vdev_io_n; uint64_t vdev_io_t; + boolean_t vdev_slow_io_events; uint64_t vdev_slow_io_n; uint64_t vdev_slow_io_t; }; diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index f988d27a2..232265237 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -6026,7 +6026,8 @@ - + + diff --git a/man/man7/vdevprops.7 b/man/man7/vdevprops.7 index 0fb28d7db..b54abcd3e 100644 --- a/man/man7/vdevprops.7 +++ b/man/man7/vdevprops.7 @@ -45,7 +45,7 @@ section, below. Every vdev has a set of properties that export statistics about the vdev as well as control various behaviors. Properties are not inherited from top-level vdevs, with the exception of -checksum_n, checksum_t, io_n, io_t, slow_io_n, and slow_io_t. +checksum_n, checksum_t, io_n, io_t, slow_io_events, slow_io_n, and slow_io_t. .Pp The values of numeric properties can be specified using human-readable suffixes .Po for example, @@ -149,6 +149,12 @@ For .Sy OpenZFS on FreeBSD defaults see .Xr zfsd 8 . +The +.It Sy slow_io_events +property controls whether slow I/O events are generated. +Even when disabled, slow I/Os will be included in the +.Nm zpool Cm status Fl s +output. .It Sy comment A text comment up to 8192 characters long .It Sy bootsize diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index 07819ba2b..4826237b2 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -481,6 +481,9 @@ vdev_prop_init(void) zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table, sfeatures); + zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events", + B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", + "SLOW_IO_EVENTS", boolean_table, sfeatures); /* hidden properties */ zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING, diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index c8d728038..2a4d18762 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -448,6 +448,23 @@ vdev_get_nparity(vdev_t *vd) return (nparity); } +static int +vdev_prop_get_objid(vdev_t *vd, uint64_t *objid) +{ + + if (vd->vdev_root_zap != 0) { + *objid = vd->vdev_root_zap; + } else if (vd->vdev_top_zap != 0) { + *objid = vd->vdev_top_zap; + } else if (vd->vdev_leaf_zap != 0) { + *objid = vd->vdev_leaf_zap; + } else { + return (EINVAL); + } + + return (0); +} + static int vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value) { @@ -456,25 +473,29 @@ vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value) uint64_t objid; int err; - if (vd->vdev_root_zap != 0) { - objid = vd->vdev_root_zap; - } else if (vd->vdev_top_zap != 0) { - objid = vd->vdev_top_zap; - } else if (vd->vdev_leaf_zap != 0) { - objid = vd->vdev_leaf_zap; - } else { + if (vdev_prop_get_objid(vd, &objid) != 0) return (EINVAL); - } err = zap_lookup(mos, objid, vdev_prop_to_name(prop), sizeof (uint64_t), 1, value); - if (err == ENOENT) *value = vdev_prop_default_numeric(prop); return (err); } +static int +vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue) +{ + int err; + uint64_t ivalue; + + err = vdev_prop_get_int(vd, prop, &ivalue); + *bvalue = ivalue != 0; + + return (err); +} + /* * Get the number of data disks for a top-level vdev. */ @@ -737,8 +758,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) */ vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N); vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T); + vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N); vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T); + + vd->vdev_slow_io_events = vdev_prop_default_numeric( + VDEV_PROP_SLOW_IO_EVENTS); vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N); vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T); @@ -3931,6 +3956,11 @@ vdev_load(vdev_t *vd) vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) " "failed [error=%d]", (u_longlong_t)zapobj, error); + error = vdev_prop_get_bool(vd, VDEV_PROP_SLOW_IO_EVENTS, + &vd->vdev_slow_io_events); + if (error && error != ENOENT) + vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) " + "failed [error=%d]", (u_longlong_t)zapobj, error); error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N, &vd->vdev_slow_io_n); if (error && error != ENOENT) @@ -5980,15 +6010,8 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx) /* * Set vdev property values in the vdev props mos object. */ - if (vd->vdev_root_zap != 0) { - objid = vd->vdev_root_zap; - } else if (vd->vdev_top_zap != 0) { - objid = vd->vdev_top_zap; - } else if (vd->vdev_leaf_zap != 0) { - objid = vd->vdev_leaf_zap; - } else { + if (vdev_prop_get_objid(vd, &objid) != 0) panic("unexpected vdev type"); - } mutex_enter(&spa->spa_props_lock); @@ -6215,6 +6238,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) } vd->vdev_io_t = intval; break; + case VDEV_PROP_SLOW_IO_EVENTS: + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + vd->vdev_slow_io_events = intval != 0; + break; case VDEV_PROP_SLOW_IO_N: if (nvpair_value_uint64(elem, &intval) != 0) { error = EINVAL; @@ -6256,6 +6286,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) nvpair_t *elem = NULL; nvlist_t *nvprops = NULL; uint64_t intval = 0; + boolean_t boolval = 0; char *strval = NULL; const char *propname = NULL; vdev_prop_t prop; @@ -6269,15 +6300,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops); - if (vd->vdev_root_zap != 0) { - objid = vd->vdev_root_zap; - } else if (vd->vdev_top_zap != 0) { - objid = vd->vdev_top_zap; - } else if (vd->vdev_leaf_zap != 0) { - objid = vd->vdev_leaf_zap; - } else { + if (vdev_prop_get_objid(vd, &objid) != 0) return (SET_ERROR(EINVAL)); - } ASSERT(objid != 0); mutex_enter(&spa->spa_props_lock); @@ -6622,6 +6646,18 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) intval, src); break; + case VDEV_PROP_SLOW_IO_EVENTS: + err = vdev_prop_get_bool(vd, prop, &boolval); + if (err && err != ENOENT) + break; + + src = ZPROP_SRC_LOCAL; + if (boolval == vdev_prop_default_numeric(prop)) + src = ZPROP_SRC_DEFAULT; + + vdev_prop_add_list(outnvl, propname, NULL, + boolval, src); + break; case VDEV_PROP_CHECKSUM_N: case VDEV_PROP_CHECKSUM_T: case VDEV_PROP_IO_N: diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index 96c722731..4a0d41c24 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -223,6 +223,9 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop) case VDEV_PROP_IO_T: propval = vd->vdev_io_t; break; + case VDEV_PROP_SLOW_IO_EVENTS: + propval = vd->vdev_slow_io_events; + break; case VDEV_PROP_SLOW_IO_N: propval = vd->vdev_slow_io_n; break; diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 4f3ecdc35..74373f759 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -5569,9 +5569,12 @@ zio_done(zio_t *zio) zio->io_vd->vdev_stat.vs_slow_ios++; mutex_exit(&zio->io_vd->vdev_stat_lock); - (void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY, - zio->io_spa, zio->io_vd, &zio->io_bookmark, - zio, 0); + if (zio->io_vd->vdev_slow_io_events) { + (void) zfs_ereport_post( + FM_EREPORT_ZFS_DELAY, + zio->io_spa, zio->io_vd, + &zio->io_bookmark, zio, 0); + } } } } diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg index ccb5e9c15..6d9aa2868 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg @@ -71,6 +71,7 @@ typeset -a properties=( checksum_t io_n io_t + slow_io_events slow_io_n slow_io_t trim_support diff --git a/tests/zfs-tests/tests/functional/events/zed_slow_io.ksh b/tests/zfs-tests/tests/functional/events/zed_slow_io.ksh index 0c68530ee..570c3b0c6 100755 --- a/tests/zfs-tests/tests/functional/events/zed_slow_io.ksh +++ b/tests/zfs-tests/tests/functional/events/zed_slow_io.ksh @@ -23,6 +23,7 @@ # # Copyright (c) 2023, Klara Inc. +# Copyright (c) 2025, Mariusz Zaborski # # DESCRIPTION: @@ -140,8 +141,8 @@ function slow_io_degrade { do_setup - zpool set slow_io_n=5 $TESTPOOL $VDEV - zpool set slow_io_t=60 $TESTPOOL $VDEV + log_must zpool set slow_io_n=5 $TESTPOOL $VDEV + log_must zpool set slow_io_t=60 $TESTPOOL $VDEV start_slow_io for i in {1..16}; do @@ -193,6 +194,44 @@ function slow_io_no_degrade do_clean } +# Change slow_io_n, slow_io_t to 5 events in 60 seconds +# fire more than 5 events. Disable slow io events. +# Should not degrade. +function slow_io_degrade_disabled +{ + do_setup + + log_must zpool set slow_io_n=5 $TESTPOOL $VDEV + log_must zpool set slow_io_t=60 $TESTPOOL $VDEV + log_must zpool set slow_io_events=off $TESTPOOL $VDEV + + start_slow_io + for i in {1..16}; do + dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null + sleep 0.5 + done + stop_slow_io + zpool sync + + # + # wait 60 seconds to confirm that zfs.delay was not generated. + # + typeset -i i=0 + typeset -i events=0 + while [[ $i -lt 60 ]]; do + events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l) + i=$((i+1)) + sleep 1 + done + log_note "$events delay events found" + + [ $events -eq "0" ] || \ + log_fail "expecting no delay events, found $events" + + log_mustnot wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 45 + do_clean +} + log_assert "Test ZED slow io configurability" log_onexit cleanup @@ -202,5 +241,6 @@ log_must zed_start default_degrade slow_io_degrade slow_io_no_degrade +slow_io_degrade_disabled log_pass "Test ZED slow io configurability"