Add illumos FMD ZFS logic to ZED -- phase 2

The phase 2 work primarily entails the Diagnosis Engine and the Retire Agent modules. It also includes infrastructure to support a crude FMD environment to host these modules. The Diagnosis Engine consumes I/O and checksum ereports and feeds them into a SERD engine which will generate a corres- ponding fault diagnosis when the SERD engine fires. All the diagnosis state data is collected into cases, one case per vdev being tracked. The Retire Agent responds to diagnosed faults by isolating the faulty VDEV. It will notify the ZFS kernel module of the new VDEV state (degraded or faulted). This agent is also responsible for managing hot spares across pools. When it encounters a device fault or a device removal it replaces the device with an appropriate spare if available. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Don Brady <don.brady@intel.com> Closes #5343
2026-05-25 11:47:43 +03:00 · 2016-11-07 16:01:38 -07:00
parent f4bae2ed63
commit 976246fadd
17 changed files with 3597 additions and 343 deletions
@@ -168,7 +168,7 @@ zfs_unavail_pool(zpool_handle_t *zhp, void *data)
 * operation when finished).  If this succeeds, then we're done.  If it fails,
 * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened,
 * but that the label was not what we expected.  If the 'autoreplace' property
- * is not set, then we relabel the disk (if specified), and attempt a 'zpool
+ * is enabled, then we relabel the disk (if specified), and attempt a 'zpool
 * replace'.  If the online is successful, but the new state is something else
 * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of
 * race, and we should avoid attempting to relabel the disk.
@@ -261,16 +261,15 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
 	}

 	/*
-	 * If the pool doesn't have the autoreplace property set, then attempt
-	 * a true online (without the unspare flag), which will trigger a FMA
-	 * fault.
+	 * If the pool doesn't have the autoreplace property set, then use
+	 * vdev online to trigger a FMA fault by posting an ereport.
 	 */
-	if (!is_dm && (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
-	    !wholedisk || physpath == NULL)) {
+	if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
+	    !(wholedisk || is_dm) || (physpath == NULL)) {
 		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
 		    &newstate);
-		zed_log_msg(LOG_INFO, "  zpool_vdev_online: %s FORCEFAULT (%s)",
-		    fullpath, libzfs_error_description(g_zfshdl));
+		zed_log_msg(LOG_INFO, "Pool's autoreplace is not enabled or "
+		    "not a whole disk for '%s'", fullpath);
 		return;
 	}

@@ -291,12 +290,6 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
 		return;
 	}

-	if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL)) {
-		zed_log_msg(LOG_INFO, "%s: Autoreplace is not enabled on this"
-		    " pool, ignore disk.", __func__);
-		return;
-	}
-
 	/* Only autoreplace bad disks */
 	if ((vs->vs_state != VDEV_STATE_DEGRADED) &&
 	    (vs->vs_state != VDEV_STATE_FAULTED) &&
@@ -369,9 +362,13 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
 				found = B_TRUE;
 				break;
 			}
+			zed_log_msg(LOG_INFO, "zpool_label_disk: %s != %s",
+			    physpath, device->pd_physpath);
 		}
 		if (!found) {
 			/* unexpected partition slice encountered */
+			zed_log_msg(LOG_INFO, "labeled disk %s unexpected here",
+			    fullpath);
 			(void) zpool_vdev_online(zhp, fullpath,
 			    ZFS_ONLINE_FORCEFAULT, &newstate);
 			return;
@@ -656,14 +653,10 @@ zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
 	 * 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location).
 	 *
 	 * For disks, we only want to pay attention to vdevs marked as whole
-	 * disks.  For multipath devices does whole disk apply? (TBD).
+	 * disks or are a multipath device.
 	 */
-	if (!devid_iter(devid, zfs_process_add, is_slice) && devpath != NULL) {
-		if (!is_slice) {
-			(void) devphys_iter(devpath, devid, zfs_process_add,
-			    is_slice);
-		}
-	}
+	if (!devid_iter(devid, zfs_process_add, is_slice) && devpath != NULL)
+		(void) devphys_iter(devpath, devid, zfs_process_add, is_slice);

 	return (0);
 }
@@ -849,9 +842,9 @@ zfs_enum_pools(void *arg)
 * For now, each agent has it's own libzfs instance
 */
 int
-zfs_slm_init(libzfs_handle_t *zfs_hdl)
+zfs_slm_init()
 {
-	if ((g_zfshdl = libzfs_init()) == NULL)
+	if ((g_zfshdl = __libzfs_init()) == NULL)
 		return (-1);

 	/*
@@ -863,6 +856,7 @@ zfs_slm_init(libzfs_handle_t *zfs_hdl)

 	if (pthread_create(&g_zfs_tid, NULL, zfs_enum_pools, NULL) != 0) {
 		list_destroy(&g_pool_list);
+		__libzfs_fini(g_zfshdl);
 		return (-1);
 	}

@@ -903,19 +897,12 @@ zfs_slm_fini()
 	}
 	list_destroy(&g_device_list);

-	libzfs_fini(g_zfshdl);
+	__libzfs_fini(g_zfshdl);
 }

 void
 zfs_slm_event(const char *class, const char *subclass, nvlist_t *nvl)
 {
-	static pthread_mutex_t serialize = PTHREAD_MUTEX_INITIALIZER;
-
-	/*
-	 * Serialize incoming events from zfs or libudev sources
-	 */
-	(void) pthread_mutex_lock(&serialize);
 	zed_log_msg(LOG_INFO, "zfs_slm_event: %s.%s", class, subclass);
 	(void) zfs_slm_deliver_event(class, subclass, nvl);
-	(void) pthread_mutex_unlock(&serialize);
 }