mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-01-14 01:02:04 +03:00
Add knob to disable slow io notifications
Introduce a new vdev property `VDEV_PROP_SLOW_IO_REPORTING` that allows users to disable notifications for slow devices. This prevents ZED and/or ZFSD from degrading the pool due to slow I/O. Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Mariusz Zaborski <oshogbo@FreeBSD.org> Closes 17477
This commit is contained in:
parent
41878d57ea
commit
1e8c96d7d5
@ -387,6 +387,7 @@ typedef enum {
|
||||
VDEV_PROP_SLOW_IOS,
|
||||
VDEV_PROP_SIT_OUT,
|
||||
VDEV_PROP_AUTOSIT,
|
||||
VDEV_PROP_SLOW_IO_EVENTS,
|
||||
VDEV_NUM_PROPS
|
||||
} vdev_prop_t;
|
||||
|
||||
|
||||
@ -470,6 +470,7 @@ struct vdev {
|
||||
uint64_t vdev_checksum_t;
|
||||
uint64_t vdev_io_n;
|
||||
uint64_t vdev_io_t;
|
||||
boolean_t vdev_slow_io_events;
|
||||
uint64_t vdev_slow_io_n;
|
||||
uint64_t vdev_slow_io_t;
|
||||
};
|
||||
|
||||
@ -6026,7 +6026,8 @@
|
||||
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
|
||||
<enumerator name='VDEV_PROP_SIT_OUT' value='52'/>
|
||||
<enumerator name='VDEV_PROP_AUTOSIT' value='53'/>
|
||||
<enumerator name='VDEV_NUM_PROPS' value='54'/>
|
||||
<enumerator name='VDEV_PROP_SLOW_IO_EVENTS' value='54'/>
|
||||
<enumerator name='VDEV_NUM_PROPS' value='55'/>
|
||||
</enum-decl>
|
||||
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
|
||||
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>
|
||||
|
||||
@ -45,7 +45,7 @@ section, below.
|
||||
Every vdev has a set of properties that export statistics about the vdev
|
||||
as well as control various behaviors.
|
||||
Properties are not inherited from top-level vdevs, with the exception of
|
||||
checksum_n, checksum_t, io_n, io_t, slow_io_n, and slow_io_t.
|
||||
checksum_n, checksum_t, io_n, io_t, slow_io_events, slow_io_n, and slow_io_t.
|
||||
.Pp
|
||||
The values of numeric properties can be specified using human-readable suffixes
|
||||
.Po for example,
|
||||
@ -149,6 +149,12 @@ For
|
||||
.Sy OpenZFS on FreeBSD
|
||||
defaults see
|
||||
.Xr zfsd 8 .
|
||||
The
|
||||
.It Sy slow_io_events
|
||||
property controls whether slow I/O events are generated.
|
||||
Even when disabled, slow I/Os will be included in the
|
||||
.Nm zpool Cm status Fl s
|
||||
output.
|
||||
.It Sy comment
|
||||
A text comment up to 8192 characters long
|
||||
.It Sy bootsize
|
||||
|
||||
@ -481,6 +481,9 @@ vdev_prop_init(void)
|
||||
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
|
||||
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
|
||||
sfeatures);
|
||||
zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events",
|
||||
B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",
|
||||
"SLOW_IO_EVENTS", boolean_table, sfeatures);
|
||||
|
||||
/* hidden properties */
|
||||
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,
|
||||
|
||||
@ -448,6 +448,23 @@ vdev_get_nparity(vdev_t *vd)
|
||||
return (nparity);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)
|
||||
{
|
||||
|
||||
if (vd->vdev_root_zap != 0) {
|
||||
*objid = vd->vdev_root_zap;
|
||||
} else if (vd->vdev_top_zap != 0) {
|
||||
*objid = vd->vdev_top_zap;
|
||||
} else if (vd->vdev_leaf_zap != 0) {
|
||||
*objid = vd->vdev_leaf_zap;
|
||||
} else {
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
|
||||
{
|
||||
@ -456,25 +473,29 @@ vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
|
||||
uint64_t objid;
|
||||
int err;
|
||||
|
||||
if (vd->vdev_root_zap != 0) {
|
||||
objid = vd->vdev_root_zap;
|
||||
} else if (vd->vdev_top_zap != 0) {
|
||||
objid = vd->vdev_top_zap;
|
||||
} else if (vd->vdev_leaf_zap != 0) {
|
||||
objid = vd->vdev_leaf_zap;
|
||||
} else {
|
||||
if (vdev_prop_get_objid(vd, &objid) != 0)
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
|
||||
sizeof (uint64_t), 1, value);
|
||||
|
||||
if (err == ENOENT)
|
||||
*value = vdev_prop_default_numeric(prop);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue)
|
||||
{
|
||||
int err;
|
||||
uint64_t ivalue;
|
||||
|
||||
err = vdev_prop_get_int(vd, prop, &ivalue);
|
||||
*bvalue = ivalue != 0;
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the number of data disks for a top-level vdev.
|
||||
*/
|
||||
@ -737,8 +758,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
||||
*/
|
||||
vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N);
|
||||
vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);
|
||||
|
||||
vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
|
||||
vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);
|
||||
|
||||
vd->vdev_slow_io_events = vdev_prop_default_numeric(
|
||||
VDEV_PROP_SLOW_IO_EVENTS);
|
||||
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
|
||||
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);
|
||||
|
||||
@ -3931,6 +3956,11 @@ vdev_load(vdev_t *vd)
|
||||
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
|
||||
"failed [error=%d]", (u_longlong_t)zapobj, error);
|
||||
|
||||
error = vdev_prop_get_bool(vd, VDEV_PROP_SLOW_IO_EVENTS,
|
||||
&vd->vdev_slow_io_events);
|
||||
if (error && error != ENOENT)
|
||||
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
|
||||
"failed [error=%d]", (u_longlong_t)zapobj, error);
|
||||
error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N,
|
||||
&vd->vdev_slow_io_n);
|
||||
if (error && error != ENOENT)
|
||||
@ -5980,15 +6010,8 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
|
||||
/*
|
||||
* Set vdev property values in the vdev props mos object.
|
||||
*/
|
||||
if (vd->vdev_root_zap != 0) {
|
||||
objid = vd->vdev_root_zap;
|
||||
} else if (vd->vdev_top_zap != 0) {
|
||||
objid = vd->vdev_top_zap;
|
||||
} else if (vd->vdev_leaf_zap != 0) {
|
||||
objid = vd->vdev_leaf_zap;
|
||||
} else {
|
||||
if (vdev_prop_get_objid(vd, &objid) != 0)
|
||||
panic("unexpected vdev type");
|
||||
}
|
||||
|
||||
mutex_enter(&spa->spa_props_lock);
|
||||
|
||||
@ -6215,6 +6238,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
}
|
||||
vd->vdev_io_t = intval;
|
||||
break;
|
||||
case VDEV_PROP_SLOW_IO_EVENTS:
|
||||
if (nvpair_value_uint64(elem, &intval) != 0) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
vd->vdev_slow_io_events = intval != 0;
|
||||
break;
|
||||
case VDEV_PROP_SLOW_IO_N:
|
||||
if (nvpair_value_uint64(elem, &intval) != 0) {
|
||||
error = EINVAL;
|
||||
@ -6256,6 +6286,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
nvpair_t *elem = NULL;
|
||||
nvlist_t *nvprops = NULL;
|
||||
uint64_t intval = 0;
|
||||
boolean_t boolval = 0;
|
||||
char *strval = NULL;
|
||||
const char *propname = NULL;
|
||||
vdev_prop_t prop;
|
||||
@ -6269,15 +6300,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
|
||||
nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);
|
||||
|
||||
if (vd->vdev_root_zap != 0) {
|
||||
objid = vd->vdev_root_zap;
|
||||
} else if (vd->vdev_top_zap != 0) {
|
||||
objid = vd->vdev_top_zap;
|
||||
} else if (vd->vdev_leaf_zap != 0) {
|
||||
objid = vd->vdev_leaf_zap;
|
||||
} else {
|
||||
if (vdev_prop_get_objid(vd, &objid) != 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
ASSERT(objid != 0);
|
||||
|
||||
mutex_enter(&spa->spa_props_lock);
|
||||
@ -6622,6 +6646,18 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
intval, src);
|
||||
break;
|
||||
|
||||
case VDEV_PROP_SLOW_IO_EVENTS:
|
||||
err = vdev_prop_get_bool(vd, prop, &boolval);
|
||||
if (err && err != ENOENT)
|
||||
break;
|
||||
|
||||
src = ZPROP_SRC_LOCAL;
|
||||
if (boolval == vdev_prop_default_numeric(prop))
|
||||
src = ZPROP_SRC_DEFAULT;
|
||||
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
boolval, src);
|
||||
break;
|
||||
case VDEV_PROP_CHECKSUM_N:
|
||||
case VDEV_PROP_CHECKSUM_T:
|
||||
case VDEV_PROP_IO_N:
|
||||
|
||||
@ -223,6 +223,9 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop)
|
||||
case VDEV_PROP_IO_T:
|
||||
propval = vd->vdev_io_t;
|
||||
break;
|
||||
case VDEV_PROP_SLOW_IO_EVENTS:
|
||||
propval = vd->vdev_slow_io_events;
|
||||
break;
|
||||
case VDEV_PROP_SLOW_IO_N:
|
||||
propval = vd->vdev_slow_io_n;
|
||||
break;
|
||||
|
||||
@ -5569,9 +5569,12 @@ zio_done(zio_t *zio)
|
||||
zio->io_vd->vdev_stat.vs_slow_ios++;
|
||||
mutex_exit(&zio->io_vd->vdev_stat_lock);
|
||||
|
||||
(void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
|
||||
zio->io_spa, zio->io_vd, &zio->io_bookmark,
|
||||
zio, 0);
|
||||
if (zio->io_vd->vdev_slow_io_events) {
|
||||
(void) zfs_ereport_post(
|
||||
FM_EREPORT_ZFS_DELAY,
|
||||
zio->io_spa, zio->io_vd,
|
||||
&zio->io_bookmark, zio, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -71,6 +71,7 @@ typeset -a properties=(
|
||||
checksum_t
|
||||
io_n
|
||||
io_t
|
||||
slow_io_events
|
||||
slow_io_n
|
||||
slow_io_t
|
||||
trim_support
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
|
||||
#
|
||||
# Copyright (c) 2023, Klara Inc.
|
||||
# Copyright (c) 2025, Mariusz Zaborski <oshogbo@FreeBSD.org>
|
||||
#
|
||||
|
||||
# DESCRIPTION:
|
||||
@ -140,8 +141,8 @@ function slow_io_degrade
|
||||
{
|
||||
do_setup
|
||||
|
||||
zpool set slow_io_n=5 $TESTPOOL $VDEV
|
||||
zpool set slow_io_t=60 $TESTPOOL $VDEV
|
||||
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
|
||||
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
|
||||
|
||||
start_slow_io
|
||||
for i in {1..16}; do
|
||||
@ -193,6 +194,44 @@ function slow_io_no_degrade
|
||||
do_clean
|
||||
}
|
||||
|
||||
# Change slow_io_n, slow_io_t to 5 events in 60 seconds
|
||||
# fire more than 5 events. Disable slow io events.
|
||||
# Should not degrade.
|
||||
function slow_io_degrade_disabled
|
||||
{
|
||||
do_setup
|
||||
|
||||
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
|
||||
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
|
||||
log_must zpool set slow_io_events=off $TESTPOOL $VDEV
|
||||
|
||||
start_slow_io
|
||||
for i in {1..16}; do
|
||||
dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
|
||||
sleep 0.5
|
||||
done
|
||||
stop_slow_io
|
||||
zpool sync
|
||||
|
||||
#
|
||||
# wait 60 seconds to confirm that zfs.delay was not generated.
|
||||
#
|
||||
typeset -i i=0
|
||||
typeset -i events=0
|
||||
while [[ $i -lt 60 ]]; do
|
||||
events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
|
||||
i=$((i+1))
|
||||
sleep 1
|
||||
done
|
||||
log_note "$events delay events found"
|
||||
|
||||
[ $events -eq "0" ] || \
|
||||
log_fail "expecting no delay events, found $events"
|
||||
|
||||
log_mustnot wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 45
|
||||
do_clean
|
||||
}
|
||||
|
||||
log_assert "Test ZED slow io configurability"
|
||||
log_onexit cleanup
|
||||
|
||||
@ -202,5 +241,6 @@ log_must zed_start
|
||||
default_degrade
|
||||
slow_io_degrade
|
||||
slow_io_no_degrade
|
||||
slow_io_degrade_disabled
|
||||
|
||||
log_pass "Test ZED slow io configurability"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user