Add slow disk diagnosis to ZED

Slow disk response times can be indicative of a failing drive. ZFS
currently tracks slow I/Os (slower than zio_slow_io_ms) and generates
events (ereport.fs.zfs.delay).  However, no action is taken by ZED,
like is done for checksum or I/O errors.  This change adds slow disk
diagnosis to ZED which is opt-in using new VDEV properties:
  VDEV_PROP_SLOW_IO_N
  VDEV_PROP_SLOW_IO_T

If multiple VDEVs in a pool are undergoing slow I/Os, then it skips
the zpool_vdev_degrade().

Sponsored-By: OpenDrives Inc.
Sponsored-By: Klara Inc.
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Allan Jude <allan@klarasystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Co-authored-by: Rob Wing <rob.wing@klarasystems.com>
Signed-off-by: Don Brady <don.brady@klarasystems.com>
Closes #15469
This commit is contained in:
Don Brady
2024-02-08 10:19:52 -07:00
committed by GitHub
parent 229b9f4ed0
commit cbe882298e
29 changed files with 655 additions and 71 deletions
+30
View File
@@ -677,6 +677,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);
vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);
list_link_init(&vd->vdev_config_dirty_node);
list_link_init(&vd->vdev_state_dirty_node);
@@ -3755,6 +3757,18 @@ vdev_load(vdev_t *vd)
if (error && error != ENOENT)
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
"failed [error=%d]", (u_longlong_t)zapobj, error);
error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N,
&vd->vdev_slow_io_n);
if (error && error != ENOENT)
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
"failed [error=%d]", (u_longlong_t)zapobj, error);
error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_T,
&vd->vdev_slow_io_t);
if (error && error != ENOENT)
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
"failed [error=%d]", (u_longlong_t)zapobj, error);
}
/*
@@ -5970,6 +5984,20 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_io_t = intval;
break;
case VDEV_PROP_SLOW_IO_N:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
vd->vdev_slow_io_n = intval;
break;
case VDEV_PROP_SLOW_IO_T:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
vd->vdev_slow_io_t = intval;
break;
default:
/* Most processing is done in vdev_props_set_sync */
break;
@@ -6313,6 +6341,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
case VDEV_PROP_CHECKSUM_T:
case VDEV_PROP_IO_N:
case VDEV_PROP_IO_T:
case VDEV_PROP_SLOW_IO_N:
case VDEV_PROP_SLOW_IO_T:
err = vdev_prop_get_int(vd, prop, &intval);
if (err && err != ENOENT)
break;
+26
View File
@@ -222,6 +222,12 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop)
case VDEV_PROP_IO_T:
propval = vd->vdev_io_t;
break;
case VDEV_PROP_SLOW_IO_N:
propval = vd->vdev_slow_io_n;
break;
case VDEV_PROP_SLOW_IO_T:
propval = vd->vdev_slow_io_t;
break;
default:
propval = propdef;
break;
@@ -741,6 +747,26 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
NULL);
}
if (vd != NULL && strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) {
uint64_t slow_io_n, slow_io_t;
slow_io_n = vdev_prop_get_inherited(vd, VDEV_PROP_SLOW_IO_N);
if (slow_io_n != vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N))
fm_payload_set(ereport,
FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_N,
DATA_TYPE_UINT64,
slow_io_n,
NULL);
slow_io_t = vdev_prop_get_inherited(vd, VDEV_PROP_SLOW_IO_T);
if (slow_io_t != vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T))
fm_payload_set(ereport,
FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_T,
DATA_TYPE_UINT64,
slow_io_t,
NULL);
}
mutex_exit(&spa->spa_errlist_lock);
*ereport_out = ereport;
+4
View File
@@ -605,6 +605,10 @@ zio_handle_io_delay(zio_t *zio)
if (vd->vdev_guid != handler->zi_record.zi_guid)
continue;
if (handler->zi_record.zi_iotype != ZIO_TYPES &&
handler->zi_record.zi_iotype != zio->io_type)
continue;
/*
* Defensive; should never happen as the array allocation
* occurs prior to inserting this handler on the list.