Add illumos FMD ZFS logic to ZED -- phase 2

The phase 2 work primarily entails the Diagnosis Engine and
the Retire Agent modules. It also includes infrastructure
to support a crude FMD environment to host these modules.

The Diagnosis Engine consumes I/O and checksum ereports and
feeds them into a SERD engine which will generate a corres-
ponding fault diagnosis when the SERD engine fires. All the
diagnosis state data is collected into cases, one case per
vdev being tracked.

The Retire Agent responds to diagnosed faults by isolating
the faulty VDEV. It will notify the ZFS kernel module of
the new VDEV state (degraded or faulted). This agent is
also responsible for managing hot spares across pools.
When it encounters a device fault or a device removal it
replaces the device with an appropriate spare if available.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Don Brady <don.brady@intel.com>
Closes #5343
This commit is contained in:
Don Brady
2016-11-07 16:01:38 -07:00
committed by Brian Behlendorf
parent f4bae2ed63
commit 976246fadd
17 changed files with 3597 additions and 343 deletions
+18 -31
View File
@@ -168,7 +168,7 @@ zfs_unavail_pool(zpool_handle_t *zhp, void *data)
* operation when finished). If this succeeds, then we're done. If it fails,
* and the new state is VDEV_CANT_OPEN, it indicates that the device was opened,
* but that the label was not what we expected. If the 'autoreplace' property
* is not set, then we relabel the disk (if specified), and attempt a 'zpool
* is enabled, then we relabel the disk (if specified), and attempt a 'zpool
* replace'. If the online is successful, but the new state is something else
* (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of
* race, and we should avoid attempting to relabel the disk.
@@ -261,16 +261,15 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
}
/*
* If the pool doesn't have the autoreplace property set, then attempt
* a true online (without the unspare flag), which will trigger a FMA
* fault.
* If the pool doesn't have the autoreplace property set, then use
* vdev online to trigger a FMA fault by posting an ereport.
*/
if (!is_dm && (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
!wholedisk || physpath == NULL)) {
if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
!(wholedisk || is_dm) || (physpath == NULL)) {
(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
&newstate);
zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s)",
fullpath, libzfs_error_description(g_zfshdl));
zed_log_msg(LOG_INFO, "Pool's autoreplace is not enabled or "
"not a whole disk for '%s'", fullpath);
return;
}
@@ -291,12 +290,6 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
return;
}
if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL)) {
zed_log_msg(LOG_INFO, "%s: Autoreplace is not enabled on this"
" pool, ignore disk.", __func__);
return;
}
/* Only autoreplace bad disks */
if ((vs->vs_state != VDEV_STATE_DEGRADED) &&
(vs->vs_state != VDEV_STATE_FAULTED) &&
@@ -369,9 +362,13 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
found = B_TRUE;
break;
}
zed_log_msg(LOG_INFO, "zpool_label_disk: %s != %s",
physpath, device->pd_physpath);
}
if (!found) {
/* unexpected partition slice encountered */
zed_log_msg(LOG_INFO, "labeled disk %s unexpected here",
fullpath);
(void) zpool_vdev_online(zhp, fullpath,
ZFS_ONLINE_FORCEFAULT, &newstate);
return;
@@ -656,14 +653,10 @@ zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
* 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location).
*
* For disks, we only want to pay attention to vdevs marked as whole
* disks. For multipath devices does whole disk apply? (TBD).
* disks or are a multipath device.
*/
if (!devid_iter(devid, zfs_process_add, is_slice) && devpath != NULL) {
if (!is_slice) {
(void) devphys_iter(devpath, devid, zfs_process_add,
is_slice);
}
}
if (!devid_iter(devid, zfs_process_add, is_slice) && devpath != NULL)
(void) devphys_iter(devpath, devid, zfs_process_add, is_slice);
return (0);
}
@@ -849,9 +842,9 @@ zfs_enum_pools(void *arg)
* For now, each agent has it's own libzfs instance
*/
int
zfs_slm_init(libzfs_handle_t *zfs_hdl)
zfs_slm_init()
{
if ((g_zfshdl = libzfs_init()) == NULL)
if ((g_zfshdl = __libzfs_init()) == NULL)
return (-1);
/*
@@ -863,6 +856,7 @@ zfs_slm_init(libzfs_handle_t *zfs_hdl)
if (pthread_create(&g_zfs_tid, NULL, zfs_enum_pools, NULL) != 0) {
list_destroy(&g_pool_list);
__libzfs_fini(g_zfshdl);
return (-1);
}
@@ -903,19 +897,12 @@ zfs_slm_fini()
}
list_destroy(&g_device_list);
libzfs_fini(g_zfshdl);
__libzfs_fini(g_zfshdl);
}
void
zfs_slm_event(const char *class, const char *subclass, nvlist_t *nvl)
{
static pthread_mutex_t serialize = PTHREAD_MUTEX_INITIALIZER;
/*
* Serialize incoming events from zfs or libudev sources
*/
(void) pthread_mutex_lock(&serialize);
zed_log_msg(LOG_INFO, "zfs_slm_event: %s.%s", class, subclass);
(void) zfs_slm_deliver_event(class, subclass, nvl);
(void) pthread_mutex_unlock(&serialize);
}