mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Probe vdevs before marking removed
Before allowing the ZED to mark a vdev as REMOVED due to a hotplug event confirm that it is non-responsive with probe. Any device which can be successfully probed should be left ONLINE to prevent a healthy pool from being incorrectly SUSPENDED. This may occur for at least the following two scenarios. 1) Drive expansion (zpool online -e) in VMware environments. If, during the partition resize operation, a partition is removed and re-created then udev will send a removed event. 2) Re-scanning the namespaces of an NVMe device (nvme ns-rescan) may result in a udev remove and add event being delivered. Finally, update the ZED to only kick in a spare when the removal was successful. Reviewed-by: Ameer Hamza <ahamza@ixsystems.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #14859 Closes #14861
This commit is contained in:
		
							parent
							
								
									c2f0aaeb3c
								
							
						
					
					
						commit
						e2176f12a9
					
				| @ -444,14 +444,16 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, | |||||||
| 			return; | 			return; | ||||||
| 
 | 
 | ||||||
| 		/* Remove the vdev since device is unplugged */ | 		/* Remove the vdev since device is unplugged */ | ||||||
|  | 		int remove_status = 0; | ||||||
| 		if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) { | 		if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) { | ||||||
| 			int status = zpool_vdev_remove_wanted(zhp, devname); | 			remove_status = zpool_vdev_remove_wanted(zhp, devname); | ||||||
| 			fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'" | 			fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'" | ||||||
| 			    ", ret:%d", devname, status); | 			    ", err:%d", devname, libzfs_errno(zhdl)); | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		/* Replace the vdev with a spare if its not a l2arc */ | 		/* Replace the vdev with a spare if its not a l2arc */ | ||||||
| 		if (!l2arc && (!fmd_prop_get_int32(hdl, "spare_on_remove") || | 		if (!l2arc && !remove_status && | ||||||
|  | 		    (!fmd_prop_get_int32(hdl, "spare_on_remove") || | ||||||
| 		    replace_with_spare(hdl, zhp, vdev) == B_FALSE)) { | 		    replace_with_spare(hdl, zhp, vdev) == B_FALSE)) { | ||||||
| 			/* Could not handle with spare */ | 			/* Could not handle with spare */ | ||||||
| 			fmd_hdl_debug(hdl, "no spare for '%s'", devname); | 			fmd_hdl_debug(hdl, "no spare for '%s'", devname); | ||||||
|  | |||||||
| @ -3994,11 +3994,18 @@ vdev_remove_wanted(spa_t *spa, uint64_t guid) | |||||||
| 		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV))); | 		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV))); | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * If the vdev is already removed, then don't do anything. | 	 * If the vdev is already removed, or expanding which can trigger | ||||||
|  | 	 * repartition add/remove events, then don't do anything. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (vd->vdev_removed) | 	if (vd->vdev_removed || vd->vdev_expanding) | ||||||
| 		return (spa_vdev_state_exit(spa, NULL, 0)); | 		return (spa_vdev_state_exit(spa, NULL, 0)); | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Confirm the vdev has been removed, otherwise don't do anything. | ||||||
|  | 	 */ | ||||||
|  | 	if (vd->vdev_ops->vdev_op_leaf && !zio_wait(vdev_probe(vd, NULL))) | ||||||
|  | 		return (spa_vdev_state_exit(spa, NULL, SET_ERROR(EEXIST))); | ||||||
|  | 
 | ||||||
| 	vd->vdev_remove_wanted = B_TRUE; | 	vd->vdev_remove_wanted = B_TRUE; | ||||||
| 	spa_async_request(spa, SPA_ASYNC_REMOVE); | 	spa_async_request(spa, SPA_ASYNC_REMOVE); | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Brian Behlendorf
						Brian Behlendorf