mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-27 18:34:22 +03:00
zed: Control NVMe fault LEDs
The ZED code currently can only turn on the fault LED for a faulted disk in a JBOD enclosure. This extends support for faulted NVMe disks as well. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Closes #12648 Closes #12695
This commit is contained in:
parent
e39fe05b69
commit
ae70d628ff
@ -29,7 +29,8 @@
|
|||||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||||
|
|
||||||
if [ ! -d /sys/class/enclosure ] ; then
|
if [ ! -d /sys/class/enclosure ] && [ ! -d /sys/bus/pci/slots ] ; then
|
||||||
|
# No JBOD enclosure or NVMe slots
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -92,6 +93,29 @@ check_and_set_led()
|
|||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Fault LEDs for JBODs and NVMe drives are handled a little differently.
|
||||||
|
#
|
||||||
|
# On JBODs the fault LED is called 'fault' and on a path like this:
|
||||||
|
#
|
||||||
|
# /sys/class/enclosure/0:0:1:0/SLOT 10/fault
|
||||||
|
#
|
||||||
|
# On NVMe it's called 'attention' and on a path like this:
|
||||||
|
#
|
||||||
|
# /sys/bus/pci/slot/0/attention
|
||||||
|
#
|
||||||
|
# This function returns the full path to the fault LED file for a given
|
||||||
|
# enclosure/slot directory.
|
||||||
|
#
|
||||||
|
path_to_led()
|
||||||
|
{
|
||||||
|
dir=$1
|
||||||
|
if [ -f "$dir/fault" ] ; then
|
||||||
|
echo "$dir/fault"
|
||||||
|
elif [ -f "$dir/attention" ] ; then
|
||||||
|
echo "$dir/attention"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
state_to_val()
|
state_to_val()
|
||||||
{
|
{
|
||||||
state="$1"
|
state="$1"
|
||||||
@ -105,6 +129,38 @@ state_to_val()
|
|||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Given a nvme name like 'nvme0n1', pass back its slot directory
|
||||||
|
# like "/sys/bus/pci/slots/0"
|
||||||
|
#
|
||||||
|
nvme_dev_to_slot()
|
||||||
|
{
|
||||||
|
dev="$1"
|
||||||
|
|
||||||
|
# Get the address "0000:01:00.0"
|
||||||
|
address=$(cat "/sys/class/block/$dev/device/address")
|
||||||
|
|
||||||
|
# For each /sys/bus/pci/slots subdir that is an actual number
|
||||||
|
# (rather than weird directories like "1-3/").
|
||||||
|
# shellcheck disable=SC2010
|
||||||
|
for i in $(ls /sys/bus/pci/slots/ | grep -E "^[0-9]+$") ; do
|
||||||
|
this_address=$(cat "/sys/bus/pci/slots/$i/address")
|
||||||
|
|
||||||
|
# The format of address is a little different between
|
||||||
|
# /sys/class/block/$dev/device/address and
|
||||||
|
# /sys/bus/pci/slots/
|
||||||
|
#
|
||||||
|
# address= "0000:01:00.0"
|
||||||
|
# this_address = "0000:01:00"
|
||||||
|
#
|
||||||
|
if echo "$address" | grep -Eq ^"$this_address" ; then
|
||||||
|
echo "/sys/bus/pci/slots/$i"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# process_pool (pool)
|
# process_pool (pool)
|
||||||
#
|
#
|
||||||
# Iterate through a pool and set the vdevs' enclosure slot LEDs to
|
# Iterate through a pool and set the vdevs' enclosure slot LEDs to
|
||||||
@ -134,6 +190,11 @@ process_pool()
|
|||||||
# Get dev name (like 'sda')
|
# Get dev name (like 'sda')
|
||||||
dev=$(basename "$(echo "$therest" | awk '{print $(NF-1)}')")
|
dev=$(basename "$(echo "$therest" | awk '{print $(NF-1)}')")
|
||||||
vdev_enc_sysfs_path=$(realpath "/sys/class/block/$dev/device/enclosure_device"*)
|
vdev_enc_sysfs_path=$(realpath "/sys/class/block/$dev/device/enclosure_device"*)
|
||||||
|
if [ ! -d "$vdev_enc_sysfs_path" ] ; then
|
||||||
|
# This is not a JBOD disk, but it could be a PCI NVMe drive
|
||||||
|
vdev_enc_sysfs_path=$(nvme_dev_to_slot "$dev")
|
||||||
|
fi
|
||||||
|
|
||||||
current_val=$(echo "$therest" | awk '{print $NF}')
|
current_val=$(echo "$therest" | awk '{print $NF}')
|
||||||
|
|
||||||
if [ "$current_val" != "0" ] ; then
|
if [ "$current_val" != "0" ] ; then
|
||||||
@ -145,9 +206,10 @@ process_pool()
|
|||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -e "$vdev_enc_sysfs_path/fault" ] ; then
|
led_path=$(path_to_led "$vdev_enc_sysfs_path")
|
||||||
|
if [ ! -e "$led_path" ] ; then
|
||||||
rc=3
|
rc=3
|
||||||
zed_log_msg "vdev $vdev '$file/fault' doesn't exist"
|
zed_log_msg "vdev $vdev '$led_path' doesn't exist"
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -158,7 +220,7 @@ process_pool()
|
|||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! check_and_set_led "$vdev_enc_sysfs_path/fault" "$val"; then
|
if ! check_and_set_led "$led_path" "$val"; then
|
||||||
rc=3
|
rc=3
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
@ -169,7 +231,8 @@ if [ -n "$ZEVENT_VDEV_ENC_SYSFS_PATH" ] && [ -n "$ZEVENT_VDEV_STATE_STR" ] ; the
|
|||||||
# Got a statechange for an individual vdev
|
# Got a statechange for an individual vdev
|
||||||
val=$(state_to_val "$ZEVENT_VDEV_STATE_STR")
|
val=$(state_to_val "$ZEVENT_VDEV_STATE_STR")
|
||||||
vdev=$(basename "$ZEVENT_VDEV_PATH")
|
vdev=$(basename "$ZEVENT_VDEV_PATH")
|
||||||
check_and_set_led "$ZEVENT_VDEV_ENC_SYSFS_PATH/fault" "$val"
|
ledpath=$(path_to_led "$ZEVENT_VDEV_ENC_SYSFS_PATH")
|
||||||
|
check_and_set_led "$ledpath" "$val"
|
||||||
else
|
else
|
||||||
# Process the entire pool
|
# Process the entire pool
|
||||||
poolname=$(zed_guid_to_pool "$ZEVENT_POOL_GUID")
|
poolname=$(zed_guid_to_pool "$ZEVENT_POOL_GUID")
|
||||||
|
@ -106,8 +106,8 @@
|
|||||||
|
|
||||||
##
|
##
|
||||||
# Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED. This works for
|
# Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED. This works for
|
||||||
# device mapper and multipath devices as well. Your enclosure must be
|
# device mapper and multipath devices as well. This works with JBOD enclosures
|
||||||
# supported by the Linux SES driver for this to work.
|
# and NVMe PCI drives (assuming they're supported by Linux in sysfs).
|
||||||
#
|
#
|
||||||
ZED_USE_ENCLOSURE_LEDS=1
|
ZED_USE_ENCLOSURE_LEDS=1
|
||||||
|
|
||||||
|
@ -41,7 +41,13 @@ for i in $scripts ; do
|
|||||||
val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null)
|
val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null)
|
||||||
;;
|
;;
|
||||||
fault_led)
|
fault_led)
|
||||||
val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null)
|
# JBODs fault LED is called 'fault', NVMe fault LED is called
|
||||||
|
# 'attention'.
|
||||||
|
if [ -f "$VDEV_ENC_SYSFS_PATH/fault" ] ; then
|
||||||
|
val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null)
|
||||||
|
elif [ -f "$VDEV_ENC_SYSFS_PATH/attention" ] ; then
|
||||||
|
val=$(cat "$VDEV_ENC_SYSFS_PATH/attention" 2>/dev/null)
|
||||||
|
fi
|
||||||
;;
|
;;
|
||||||
locate_led)
|
locate_led)
|
||||||
val=$(cat "$VDEV_ENC_SYSFS_PATH/locate" 2>/dev/null)
|
val=$(cat "$VDEV_ENC_SYSFS_PATH/locate" 2>/dev/null)
|
||||||
|
@ -154,18 +154,148 @@ zfs_strip_path(char *path)
|
|||||||
return (strrchr(path, '/') + 1);
|
return (strrchr(path, '/') + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read the contents of a sysfs file into an allocated buffer and remove the
|
||||||
|
* last newline.
|
||||||
|
*
|
||||||
|
* This is useful for reading sysfs files that return a single string. Return
|
||||||
|
* an allocated string pointer on success, NULL otherwise. Returned buffer
|
||||||
|
* must be freed by the user.
|
||||||
|
*/
|
||||||
|
static char *
|
||||||
|
zfs_read_sysfs_file(char *filepath)
|
||||||
|
{
|
||||||
|
char buf[4096]; /* all sysfs files report 4k size */
|
||||||
|
char *str = NULL;
|
||||||
|
|
||||||
|
FILE *fp = fopen(filepath, "r");
|
||||||
|
if (fp == NULL) {
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
if (fgets(buf, sizeof (buf), fp) == buf) {
|
||||||
|
/* success */
|
||||||
|
|
||||||
|
/* Remove the last newline (if any) */
|
||||||
|
size_t len = strlen(buf);
|
||||||
|
if (buf[len - 1] == '\n') {
|
||||||
|
buf[len - 1] = '\0';
|
||||||
|
}
|
||||||
|
str = strdup(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
return (str);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Given a dev name like "nvme0n1", return the full PCI slot sysfs path to
|
||||||
|
* the drive (in /sys/bus/pci/slots).
|
||||||
|
*
|
||||||
|
* For example:
|
||||||
|
* dev: "nvme0n1"
|
||||||
|
* returns: "/sys/bus/pci/slots/0"
|
||||||
|
*
|
||||||
|
* 'dev' must be an NVMe device.
|
||||||
|
*
|
||||||
|
* Returned string must be freed. Returns NULL on error or no sysfs path.
|
||||||
|
*/
|
||||||
|
static char *
|
||||||
|
zfs_get_pci_slots_sys_path(const char *dev_name)
|
||||||
|
{
|
||||||
|
DIR *dp = NULL;
|
||||||
|
struct dirent *ep;
|
||||||
|
char *address1 = NULL;
|
||||||
|
char *address2 = NULL;
|
||||||
|
char *path = NULL;
|
||||||
|
char buf[MAXPATHLEN];
|
||||||
|
char *tmp;
|
||||||
|
|
||||||
|
/* If they preface 'dev' with a path (like "/dev") then strip it off */
|
||||||
|
tmp = strrchr(dev_name, '/');
|
||||||
|
if (tmp != NULL)
|
||||||
|
dev_name = tmp + 1; /* +1 since we want the chr after '/' */
|
||||||
|
|
||||||
|
if (strncmp("nvme", dev_name, 4) != 0)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
(void) snprintf(buf, sizeof (buf), "/sys/block/%s/device/address",
|
||||||
|
dev_name);
|
||||||
|
|
||||||
|
address1 = zfs_read_sysfs_file(buf);
|
||||||
|
if (!address1)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* /sys/block/nvme0n1/device/address format will
|
||||||
|
* be "0000:01:00.0" while /sys/bus/pci/slots/0/address will be
|
||||||
|
* "0000:01:00". Just NULL terminate at the '.' so they match.
|
||||||
|
*/
|
||||||
|
tmp = strrchr(address1, '.');
|
||||||
|
if (tmp != NULL)
|
||||||
|
*tmp = '\0';
|
||||||
|
|
||||||
|
dp = opendir("/sys/bus/pci/slots/");
|
||||||
|
if (dp == NULL) {
|
||||||
|
free(address1);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Look through all the /sys/bus/pci/slots/ subdirs
|
||||||
|
*/
|
||||||
|
while ((ep = readdir(dp))) {
|
||||||
|
/*
|
||||||
|
* We only care about directory names that are a single number.
|
||||||
|
* Sometimes there's other directories like
|
||||||
|
* "/sys/bus/pci/slots/0-3/" in there - skip those.
|
||||||
|
*/
|
||||||
|
if (!zfs_isnumber(ep->d_name))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
(void) snprintf(buf, sizeof (buf),
|
||||||
|
"/sys/bus/pci/slots/%s/address", ep->d_name);
|
||||||
|
|
||||||
|
address2 = zfs_read_sysfs_file(buf);
|
||||||
|
if (!address2)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (strcmp(address1, address2) == 0) {
|
||||||
|
/* Addresses match, we're all done */
|
||||||
|
free(address2);
|
||||||
|
if (asprintf(&path, "/sys/bus/pci/slots/%s",
|
||||||
|
ep->d_name) == -1) {
|
||||||
|
free(tmp);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
free(address2);
|
||||||
|
}
|
||||||
|
|
||||||
|
closedir(dp);
|
||||||
|
free(address1);
|
||||||
|
|
||||||
|
return (path);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Given a dev name like "sda", return the full enclosure sysfs path to
|
* Given a dev name like "sda", return the full enclosure sysfs path to
|
||||||
* the disk. You can also pass in the name with "/dev" prepended
|
* the disk. You can also pass in the name with "/dev" prepended
|
||||||
* to it (like /dev/sda).
|
* to it (like /dev/sda). This works for both JBODs and NVMe PCI devices.
|
||||||
*
|
*
|
||||||
* For example, disk "sda" in enclosure slot 1:
|
* For example, disk "sda" in enclosure slot 1:
|
||||||
* dev: "sda"
|
* dev_name: "sda"
|
||||||
* returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
|
* returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
|
||||||
*
|
*
|
||||||
|
* Or:
|
||||||
|
*
|
||||||
|
* dev_name: "nvme0n1"
|
||||||
|
* returns: "/sys/bus/pci/slots/0"
|
||||||
|
*
|
||||||
* 'dev' must be a non-devicemapper device.
|
* 'dev' must be a non-devicemapper device.
|
||||||
*
|
*
|
||||||
* Returned string must be freed.
|
* Returned string must be freed. Returns NULL on error.
|
||||||
*/
|
*/
|
||||||
char *
|
char *
|
||||||
zfs_get_enclosure_sysfs_path(const char *dev_name)
|
zfs_get_enclosure_sysfs_path(const char *dev_name)
|
||||||
@ -252,6 +382,16 @@ end:
|
|||||||
if (dp != NULL)
|
if (dp != NULL)
|
||||||
closedir(dp);
|
closedir(dp);
|
||||||
|
|
||||||
|
if (!path) {
|
||||||
|
/*
|
||||||
|
* This particular disk isn't in a JBOD. It could be an NVMe
|
||||||
|
* drive. If so, look up the NVMe device's path in
|
||||||
|
* /sys/bus/pci/slots/. Within that directory is a 'attention'
|
||||||
|
* file which controls the NVMe fault LED.
|
||||||
|
*/
|
||||||
|
path = zfs_get_pci_slots_sys_path(dev_name);
|
||||||
|
}
|
||||||
|
|
||||||
return (path);
|
return (path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <libzutil.h>
|
#include <libzutil.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return B_TRUE if "str" is a number string, B_FALSE otherwise.
|
* Return B_TRUE if "str" is a number string, B_FALSE otherwise.
|
||||||
@ -42,6 +43,14 @@ zfs_isnumber(const char *str)
|
|||||||
if (!(isdigit(*str) || (*str == '.')))
|
if (!(isdigit(*str) || (*str == '.')))
|
||||||
return (B_FALSE);
|
return (B_FALSE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Numbers should not end with a period ("." ".." or "5." are
|
||||||
|
* not valid)
|
||||||
|
*/
|
||||||
|
if (str[strlen(str) - 1] == '.') {
|
||||||
|
return (B_FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
return (B_TRUE);
|
return (B_TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user