mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 19:04:45 +03:00
Add illumos FMD ZFS logic to ZED -- phase 2
The phase 2 work primarily entails the Diagnosis Engine and the Retire Agent modules. It also includes infrastructure to support a crude FMD environment to host these modules. The Diagnosis Engine consumes I/O and checksum ereports and feeds them into a SERD engine which will generate a corres- ponding fault diagnosis when the SERD engine fires. All the diagnosis state data is collected into cases, one case per vdev being tracked. The Retire Agent responds to diagnosed faults by isolating the faulty VDEV. It will notify the ZFS kernel module of the new VDEV state (degraded or faulted). This agent is also responsible for managing hot spares across pools. When it encounters a device fault or a device removal it replaces the device with an appropriate spare if available. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Don Brady <don.brady@intel.com> Closes #5343
This commit is contained in:
committed by
Brian Behlendorf
parent
f4bae2ed63
commit
976246fadd
@@ -80,7 +80,7 @@ zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
|
||||
if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0)
|
||||
zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_VDEV_GUID, numval);
|
||||
|
||||
(void) zfs_slm_event(class, subclass, nvl);
|
||||
(void) zfs_agent_post_event(class, subclass, nvl);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -213,8 +213,6 @@ zed_udev_monitor(void *arg)
|
||||
strcmp(type, "disk") == 0 &&
|
||||
part != NULL && part[0] != '\0') {
|
||||
/* skip and wait for partition event */
|
||||
zed_log_msg(LOG_INFO, "zed_udev_monitor: %s waiting "
|
||||
"for slice", udev_device_get_devnode(dev));
|
||||
udev_device_unref(dev);
|
||||
continue;
|
||||
}
|
||||
@@ -297,12 +295,19 @@ zed_udev_monitor(void *arg)
|
||||
* dev are the same name (i.e. /dev/dm-5), then
|
||||
* there is no real underlying disk for this
|
||||
* multipath device, and so this "change" event
|
||||
* really a multipath removal.
|
||||
* really is a multipath removal.
|
||||
*/
|
||||
class = EC_DEV_ADD;
|
||||
subclass = ESC_DISK;
|
||||
} else {
|
||||
/* multipath remove, ignore it. */
|
||||
tmp = (char *)
|
||||
udev_device_get_property_value(dev,
|
||||
"DM_NR_VALID_PATHS");
|
||||
/* treat as a multipath remove */
|
||||
if (tmp != NULL && strcmp(tmp, "0") == 0) {
|
||||
class = EC_DEV_REMOVE;
|
||||
subclass = ESC_DISK;
|
||||
}
|
||||
}
|
||||
free(tmp2);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user