mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
zed: Ensure spare activation after kernel-initiated device removal
In addition to hotplug events, the kernel may also mark a failing vdev as REMOVED. This was observed in a customer report and reproduced by forcing the NVMe host driver to disable the device after a failed reset due to command timeout. In such cases, the spare was not activated because the device had already transitioned to a REMOVED state before zed processed the event. To address this, explicitly attempt hot spare activation when the kernel marks a device as REMOVED. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Ameer Hamza <ahamza@ixsystems.com> Closes #17187
This commit is contained in:
@@ -404,6 +404,7 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
|
||||
(state == VDEV_STATE_REMOVED || state == VDEV_STATE_FAULTED))) {
|
||||
const char *devtype;
|
||||
char *devname;
|
||||
boolean_t skip_removal = B_FALSE;
|
||||
|
||||
if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
|
||||
&devtype) == 0) {
|
||||
@@ -441,18 +442,28 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
|
||||
nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
|
||||
(uint64_t **)&vs, &c);
|
||||
|
||||
if (vs->vs_state == VDEV_STATE_OFFLINE)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If state removed is requested for already removed vdev,
|
||||
* its a loopback event from spa_async_remove(). Just
|
||||
* ignore it.
|
||||
*/
|
||||
if ((vs->vs_state == VDEV_STATE_REMOVED && state ==
|
||||
VDEV_STATE_REMOVED) || vs->vs_state == VDEV_STATE_OFFLINE)
|
||||
return;
|
||||
if ((vs->vs_state == VDEV_STATE_REMOVED &&
|
||||
state == VDEV_STATE_REMOVED)) {
|
||||
if (strcmp(class, "resource.fs.zfs.removed") == 0 &&
|
||||
nvlist_exists(nvl, "by_kernel")) {
|
||||
skip_removal = B_TRUE;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Remove the vdev since device is unplugged */
|
||||
int remove_status = 0;
|
||||
if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) {
|
||||
if (!skip_removal && (l2arc ||
|
||||
(strcmp(class, "resource.fs.zfs.removed") == 0))) {
|
||||
remove_status = zpool_vdev_remove_wanted(zhp, devname);
|
||||
fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'"
|
||||
", err:%d", devname, libzfs_errno(zhdl));
|
||||
|
||||
Reference in New Issue
Block a user