Add knob to disable slow io notifications

Introduce a new vdev property `VDEV_PROP_SLOW_IO_REPORTING` that
allows users to disable notifications for slow devices.
This prevents ZED and/or ZFSD from degrading the pool due to slow
I/O.

Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Mariusz Zaborski <oshogbo@FreeBSD.org>
Closes 17477
This commit is contained in:
Mariusz Zaborski 2025-11-11 19:42:17 +01:00 committed by Brian Behlendorf
parent 41878d57ea
commit 1e8c96d7d5
10 changed files with 127 additions and 32 deletions

View File

@ -387,6 +387,7 @@ typedef enum {
VDEV_PROP_SLOW_IOS,
VDEV_PROP_SIT_OUT,
VDEV_PROP_AUTOSIT,
VDEV_PROP_SLOW_IO_EVENTS,
VDEV_NUM_PROPS
} vdev_prop_t;

View File

@ -470,6 +470,7 @@ struct vdev {
uint64_t vdev_checksum_t;
uint64_t vdev_io_n;
uint64_t vdev_io_t;
boolean_t vdev_slow_io_events;
uint64_t vdev_slow_io_n;
uint64_t vdev_slow_io_t;
};

View File

@ -6026,7 +6026,8 @@
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
<enumerator name='VDEV_PROP_SIT_OUT' value='52'/>
<enumerator name='VDEV_PROP_AUTOSIT' value='53'/>
<enumerator name='VDEV_NUM_PROPS' value='54'/>
<enumerator name='VDEV_PROP_SLOW_IO_EVENTS' value='54'/>
<enumerator name='VDEV_NUM_PROPS' value='55'/>
</enum-decl>
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>

View File

@ -45,7 +45,7 @@ section, below.
Every vdev has a set of properties that export statistics about the vdev
as well as control various behaviors.
Properties are not inherited from top-level vdevs, with the exception of
checksum_n, checksum_t, io_n, io_t, slow_io_n, and slow_io_t.
checksum_n, checksum_t, io_n, io_t, slow_io_events, slow_io_n, and slow_io_t.
.Pp
The values of numeric properties can be specified using human-readable suffixes
.Po for example,
@ -149,6 +149,12 @@ For
.Sy OpenZFS on FreeBSD
defaults see
.Xr zfsd 8 .
The
.It Sy slow_io_events
property controls whether slow I/O events are generated.
Even when disabled, slow I/Os will be included in the
.Nm zpool Cm status Fl s
output.
.It Sy comment
A text comment up to 8192 characters long
.It Sy bootsize

View File

@ -481,6 +481,9 @@ vdev_prop_init(void)
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
sfeatures);
zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events",
B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",
"SLOW_IO_EVENTS", boolean_table, sfeatures);
/* hidden properties */
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,

View File

@ -448,6 +448,23 @@ vdev_get_nparity(vdev_t *vd)
return (nparity);
}
static int
vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)
{
if (vd->vdev_root_zap != 0) {
*objid = vd->vdev_root_zap;
} else if (vd->vdev_top_zap != 0) {
*objid = vd->vdev_top_zap;
} else if (vd->vdev_leaf_zap != 0) {
*objid = vd->vdev_leaf_zap;
} else {
return (EINVAL);
}
return (0);
}
static int
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
{
@ -456,25 +473,29 @@ vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
uint64_t objid;
int err;
if (vd->vdev_root_zap != 0) {
objid = vd->vdev_root_zap;
} else if (vd->vdev_top_zap != 0) {
objid = vd->vdev_top_zap;
} else if (vd->vdev_leaf_zap != 0) {
objid = vd->vdev_leaf_zap;
} else {
if (vdev_prop_get_objid(vd, &objid) != 0)
return (EINVAL);
}
err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
sizeof (uint64_t), 1, value);
if (err == ENOENT)
*value = vdev_prop_default_numeric(prop);
return (err);
}
static int
vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue)
{
int err;
uint64_t ivalue;
err = vdev_prop_get_int(vd, prop, &ivalue);
*bvalue = ivalue != 0;
return (err);
}
/*
* Get the number of data disks for a top-level vdev.
*/
@ -737,8 +758,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
*/
vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N);
vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);
vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);
vd->vdev_slow_io_events = vdev_prop_default_numeric(
VDEV_PROP_SLOW_IO_EVENTS);
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);
@ -3931,6 +3956,11 @@ vdev_load(vdev_t *vd)
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
"failed [error=%d]", (u_longlong_t)zapobj, error);
error = vdev_prop_get_bool(vd, VDEV_PROP_SLOW_IO_EVENTS,
&vd->vdev_slow_io_events);
if (error && error != ENOENT)
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
"failed [error=%d]", (u_longlong_t)zapobj, error);
error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N,
&vd->vdev_slow_io_n);
if (error && error != ENOENT)
@ -5980,15 +6010,8 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
/*
* Set vdev property values in the vdev props mos object.
*/
if (vd->vdev_root_zap != 0) {
objid = vd->vdev_root_zap;
} else if (vd->vdev_top_zap != 0) {
objid = vd->vdev_top_zap;
} else if (vd->vdev_leaf_zap != 0) {
objid = vd->vdev_leaf_zap;
} else {
if (vdev_prop_get_objid(vd, &objid) != 0)
panic("unexpected vdev type");
}
mutex_enter(&spa->spa_props_lock);
@ -6215,6 +6238,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_io_t = intval;
break;
case VDEV_PROP_SLOW_IO_EVENTS:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
vd->vdev_slow_io_events = intval != 0;
break;
case VDEV_PROP_SLOW_IO_N:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
@ -6256,6 +6286,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
nvpair_t *elem = NULL;
nvlist_t *nvprops = NULL;
uint64_t intval = 0;
boolean_t boolval = 0;
char *strval = NULL;
const char *propname = NULL;
vdev_prop_t prop;
@ -6269,15 +6300,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);
if (vd->vdev_root_zap != 0) {
objid = vd->vdev_root_zap;
} else if (vd->vdev_top_zap != 0) {
objid = vd->vdev_top_zap;
} else if (vd->vdev_leaf_zap != 0) {
objid = vd->vdev_leaf_zap;
} else {
if (vdev_prop_get_objid(vd, &objid) != 0)
return (SET_ERROR(EINVAL));
}
ASSERT(objid != 0);
mutex_enter(&spa->spa_props_lock);
@ -6622,6 +6646,18 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
intval, src);
break;
case VDEV_PROP_SLOW_IO_EVENTS:
err = vdev_prop_get_bool(vd, prop, &boolval);
if (err && err != ENOENT)
break;
src = ZPROP_SRC_LOCAL;
if (boolval == vdev_prop_default_numeric(prop))
src = ZPROP_SRC_DEFAULT;
vdev_prop_add_list(outnvl, propname, NULL,
boolval, src);
break;
case VDEV_PROP_CHECKSUM_N:
case VDEV_PROP_CHECKSUM_T:
case VDEV_PROP_IO_N:

View File

@ -223,6 +223,9 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop)
case VDEV_PROP_IO_T:
propval = vd->vdev_io_t;
break;
case VDEV_PROP_SLOW_IO_EVENTS:
propval = vd->vdev_slow_io_events;
break;
case VDEV_PROP_SLOW_IO_N:
propval = vd->vdev_slow_io_n;
break;

View File

@ -5569,9 +5569,12 @@ zio_done(zio_t *zio)
zio->io_vd->vdev_stat.vs_slow_ios++;
mutex_exit(&zio->io_vd->vdev_stat_lock);
(void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
zio->io_spa, zio->io_vd, &zio->io_bookmark,
zio, 0);
if (zio->io_vd->vdev_slow_io_events) {
(void) zfs_ereport_post(
FM_EREPORT_ZFS_DELAY,
zio->io_spa, zio->io_vd,
&zio->io_bookmark, zio, 0);
}
}
}
}

View File

@ -71,6 +71,7 @@ typeset -a properties=(
checksum_t
io_n
io_t
slow_io_events
slow_io_n
slow_io_t
trim_support

View File

@ -23,6 +23,7 @@
#
# Copyright (c) 2023, Klara Inc.
# Copyright (c) 2025, Mariusz Zaborski <oshogbo@FreeBSD.org>
#
# DESCRIPTION:
@ -140,8 +141,8 @@ function slow_io_degrade
{
do_setup
zpool set slow_io_n=5 $TESTPOOL $VDEV
zpool set slow_io_t=60 $TESTPOOL $VDEV
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
start_slow_io
for i in {1..16}; do
@ -193,6 +194,44 @@ function slow_io_no_degrade
do_clean
}
# Change slow_io_n, slow_io_t to 5 events in 60 seconds
# fire more than 5 events. Disable slow io events.
# Should not degrade.
function slow_io_degrade_disabled
{
do_setup
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
log_must zpool set slow_io_events=off $TESTPOOL $VDEV
start_slow_io
for i in {1..16}; do
dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
sleep 0.5
done
stop_slow_io
zpool sync
#
# wait 60 seconds to confirm that zfs.delay was not generated.
#
typeset -i i=0
typeset -i events=0
while [[ $i -lt 60 ]]; do
events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
i=$((i+1))
sleep 1
done
log_note "$events delay events found"
[ $events -eq "0" ] || \
log_fail "expecting no delay events, found $events"
log_mustnot wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 45
do_clean
}
log_assert "Test ZED slow io configurability"
log_onexit cleanup
@ -202,5 +241,6 @@ log_must zed_start
default_degrade
slow_io_degrade
slow_io_no_degrade
slow_io_degrade_disabled
log_pass "Test ZED slow io configurability"