mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-29 12:29:35 +03:00
zed: Control NVMe fault LEDs
The ZED code currently can only turn on the fault LED for a faulted disk in a JBOD enclosure. This extends support for faulted NVMe disks as well. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Closes #12648 Closes #12695
This commit is contained in:
parent
22b0891dbb
commit
1fca958615
@ -29,7 +29,8 @@
|
||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||
|
||||
if [ ! -d /sys/class/enclosure ] ; then
|
||||
if [ ! -d /sys/class/enclosure ] && [ ! -d /sys/bus/pci/slots ] ; then
|
||||
# No JBOD enclosure or NVMe slots
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@ -92,6 +93,29 @@ check_and_set_led()
|
||||
done
|
||||
}
|
||||
|
||||
# Fault LEDs for JBODs and NVMe drives are handled a little differently.
|
||||
#
|
||||
# On JBODs the fault LED is called 'fault' and on a path like this:
|
||||
#
|
||||
# /sys/class/enclosure/0:0:1:0/SLOT 10/fault
|
||||
#
|
||||
# On NVMe it's called 'attention' and on a path like this:
|
||||
#
|
||||
# /sys/bus/pci/slot/0/attention
|
||||
#
|
||||
# This function returns the full path to the fault LED file for a given
|
||||
# enclosure/slot directory.
|
||||
#
|
||||
path_to_led()
|
||||
{
|
||||
dir=$1
|
||||
if [ -f "$dir/fault" ] ; then
|
||||
echo "$dir/fault"
|
||||
elif [ -f "$dir/attention" ] ; then
|
||||
echo "$dir/attention"
|
||||
fi
|
||||
}
|
||||
|
||||
state_to_val()
|
||||
{
|
||||
state="$1"
|
||||
@ -105,6 +129,38 @@ state_to_val()
|
||||
esac
|
||||
}
|
||||
|
||||
#
|
||||
# Given a nvme name like 'nvme0n1', pass back its slot directory
|
||||
# like "/sys/bus/pci/slots/0"
|
||||
#
|
||||
nvme_dev_to_slot()
|
||||
{
|
||||
dev="$1"
|
||||
|
||||
# Get the address "0000:01:00.0"
|
||||
address=$(cat "/sys/class/block/$dev/device/address")
|
||||
|
||||
# For each /sys/bus/pci/slots subdir that is an actual number
|
||||
# (rather than weird directories like "1-3/").
|
||||
# shellcheck disable=SC2010
|
||||
for i in $(ls /sys/bus/pci/slots/ | grep -E "^[0-9]+$") ; do
|
||||
this_address=$(cat "/sys/bus/pci/slots/$i/address")
|
||||
|
||||
# The format of address is a little different between
|
||||
# /sys/class/block/$dev/device/address and
|
||||
# /sys/bus/pci/slots/
|
||||
#
|
||||
# address= "0000:01:00.0"
|
||||
# this_address = "0000:01:00"
|
||||
#
|
||||
if echo "$address" | grep -Eq ^"$this_address" ; then
|
||||
echo "/sys/bus/pci/slots/$i"
|
||||
break
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
# process_pool (pool)
|
||||
#
|
||||
# Iterate through a pool and set the vdevs' enclosure slot LEDs to
|
||||
@ -134,6 +190,11 @@ process_pool()
|
||||
# Get dev name (like 'sda')
|
||||
dev=$(basename "$(echo "$therest" | awk '{print $(NF-1)}')")
|
||||
vdev_enc_sysfs_path=$(realpath "/sys/class/block/$dev/device/enclosure_device"*)
|
||||
if [ ! -d "$vdev_enc_sysfs_path" ] ; then
|
||||
# This is not a JBOD disk, but it could be a PCI NVMe drive
|
||||
vdev_enc_sysfs_path=$(nvme_dev_to_slot "$dev")
|
||||
fi
|
||||
|
||||
current_val=$(echo "$therest" | awk '{print $NF}')
|
||||
|
||||
if [ "$current_val" != "0" ] ; then
|
||||
@ -145,9 +206,10 @@ process_pool()
|
||||
continue
|
||||
fi
|
||||
|
||||
if [ ! -e "$vdev_enc_sysfs_path/fault" ] ; then
|
||||
led_path=$(path_to_led "$vdev_enc_sysfs_path")
|
||||
if [ ! -e "$led_path" ] ; then
|
||||
rc=3
|
||||
zed_log_msg "vdev $vdev '$file/fault' doesn't exist"
|
||||
zed_log_msg "vdev $vdev '$led_path' doesn't exist"
|
||||
continue
|
||||
fi
|
||||
|
||||
@ -158,7 +220,7 @@ process_pool()
|
||||
continue
|
||||
fi
|
||||
|
||||
if ! check_and_set_led "$vdev_enc_sysfs_path/fault" "$val"; then
|
||||
if ! check_and_set_led "$led_path" "$val"; then
|
||||
rc=3
|
||||
fi
|
||||
done
|
||||
@ -169,7 +231,8 @@ if [ -n "$ZEVENT_VDEV_ENC_SYSFS_PATH" ] && [ -n "$ZEVENT_VDEV_STATE_STR" ] ; the
|
||||
# Got a statechange for an individual vdev
|
||||
val=$(state_to_val "$ZEVENT_VDEV_STATE_STR")
|
||||
vdev=$(basename "$ZEVENT_VDEV_PATH")
|
||||
check_and_set_led "$ZEVENT_VDEV_ENC_SYSFS_PATH/fault" "$val"
|
||||
ledpath=$(path_to_led "$ZEVENT_VDEV_ENC_SYSFS_PATH")
|
||||
check_and_set_led "$ledpath" "$val"
|
||||
else
|
||||
# Process the entire pool
|
||||
poolname=$(zed_guid_to_pool "$ZEVENT_POOL_GUID")
|
||||
|
@ -89,8 +89,8 @@
|
||||
|
||||
##
|
||||
# Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED. This works for
|
||||
# device mapper and multipath devices as well. Your enclosure must be
|
||||
# supported by the Linux SES driver for this to work.
|
||||
# device mapper and multipath devices as well. This works with JBOD enclosures
|
||||
# and NVMe PCI drives (assuming they're supported by Linux in sysfs).
|
||||
#
|
||||
ZED_USE_ENCLOSURE_LEDS=1
|
||||
|
||||
|
@ -41,7 +41,13 @@ for i in $scripts ; do
|
||||
val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null)
|
||||
;;
|
||||
fault_led)
|
||||
val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null)
|
||||
# JBODs fault LED is called 'fault', NVMe fault LED is called
|
||||
# 'attention'.
|
||||
if [ -f "$VDEV_ENC_SYSFS_PATH/fault" ] ; then
|
||||
val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null)
|
||||
elif [ -f "$VDEV_ENC_SYSFS_PATH/attention" ] ; then
|
||||
val=$(cat "$VDEV_ENC_SYSFS_PATH/attention" 2>/dev/null)
|
||||
fi
|
||||
;;
|
||||
locate_led)
|
||||
val=$(cat "$VDEV_ENC_SYSFS_PATH/locate" 2>/dev/null)
|
||||
|
@ -154,18 +154,148 @@ zfs_strip_path(char *path)
|
||||
return (strrchr(path, '/') + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the contents of a sysfs file into an allocated buffer and remove the
|
||||
* last newline.
|
||||
*
|
||||
* This is useful for reading sysfs files that return a single string. Return
|
||||
* an allocated string pointer on success, NULL otherwise. Returned buffer
|
||||
* must be freed by the user.
|
||||
*/
|
||||
static char *
|
||||
zfs_read_sysfs_file(char *filepath)
|
||||
{
|
||||
char buf[4096]; /* all sysfs files report 4k size */
|
||||
char *str = NULL;
|
||||
|
||||
FILE *fp = fopen(filepath, "r");
|
||||
if (fp == NULL) {
|
||||
return (NULL);
|
||||
}
|
||||
if (fgets(buf, sizeof (buf), fp) == buf) {
|
||||
/* success */
|
||||
|
||||
/* Remove the last newline (if any) */
|
||||
size_t len = strlen(buf);
|
||||
if (buf[len - 1] == '\n') {
|
||||
buf[len - 1] = '\0';
|
||||
}
|
||||
str = strdup(buf);
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return (str);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a dev name like "nvme0n1", return the full PCI slot sysfs path to
|
||||
* the drive (in /sys/bus/pci/slots).
|
||||
*
|
||||
* For example:
|
||||
* dev: "nvme0n1"
|
||||
* returns: "/sys/bus/pci/slots/0"
|
||||
*
|
||||
* 'dev' must be an NVMe device.
|
||||
*
|
||||
* Returned string must be freed. Returns NULL on error or no sysfs path.
|
||||
*/
|
||||
static char *
|
||||
zfs_get_pci_slots_sys_path(const char *dev_name)
|
||||
{
|
||||
DIR *dp = NULL;
|
||||
struct dirent *ep;
|
||||
char *address1 = NULL;
|
||||
char *address2 = NULL;
|
||||
char *path = NULL;
|
||||
char buf[MAXPATHLEN];
|
||||
char *tmp;
|
||||
|
||||
/* If they preface 'dev' with a path (like "/dev") then strip it off */
|
||||
tmp = strrchr(dev_name, '/');
|
||||
if (tmp != NULL)
|
||||
dev_name = tmp + 1; /* +1 since we want the chr after '/' */
|
||||
|
||||
if (strncmp("nvme", dev_name, 4) != 0)
|
||||
return (NULL);
|
||||
|
||||
(void) snprintf(buf, sizeof (buf), "/sys/block/%s/device/address",
|
||||
dev_name);
|
||||
|
||||
address1 = zfs_read_sysfs_file(buf);
|
||||
if (!address1)
|
||||
return (NULL);
|
||||
|
||||
/*
|
||||
* /sys/block/nvme0n1/device/address format will
|
||||
* be "0000:01:00.0" while /sys/bus/pci/slots/0/address will be
|
||||
* "0000:01:00". Just NULL terminate at the '.' so they match.
|
||||
*/
|
||||
tmp = strrchr(address1, '.');
|
||||
if (tmp != NULL)
|
||||
*tmp = '\0';
|
||||
|
||||
dp = opendir("/sys/bus/pci/slots/");
|
||||
if (dp == NULL) {
|
||||
free(address1);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Look through all the /sys/bus/pci/slots/ subdirs
|
||||
*/
|
||||
while ((ep = readdir(dp))) {
|
||||
/*
|
||||
* We only care about directory names that are a single number.
|
||||
* Sometimes there's other directories like
|
||||
* "/sys/bus/pci/slots/0-3/" in there - skip those.
|
||||
*/
|
||||
if (!zfs_isnumber(ep->d_name))
|
||||
continue;
|
||||
|
||||
(void) snprintf(buf, sizeof (buf),
|
||||
"/sys/bus/pci/slots/%s/address", ep->d_name);
|
||||
|
||||
address2 = zfs_read_sysfs_file(buf);
|
||||
if (!address2)
|
||||
continue;
|
||||
|
||||
if (strcmp(address1, address2) == 0) {
|
||||
/* Addresses match, we're all done */
|
||||
free(address2);
|
||||
if (asprintf(&path, "/sys/bus/pci/slots/%s",
|
||||
ep->d_name) == -1) {
|
||||
free(tmp);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
free(address2);
|
||||
}
|
||||
|
||||
closedir(dp);
|
||||
free(address1);
|
||||
|
||||
return (path);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a dev name like "sda", return the full enclosure sysfs path to
|
||||
* the disk. You can also pass in the name with "/dev" prepended
|
||||
* to it (like /dev/sda).
|
||||
* to it (like /dev/sda). This works for both JBODs and NVMe PCI devices.
|
||||
*
|
||||
* For example, disk "sda" in enclosure slot 1:
|
||||
* dev: "sda"
|
||||
* dev_name: "sda"
|
||||
* returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
|
||||
*
|
||||
* Or:
|
||||
*
|
||||
* dev_name: "nvme0n1"
|
||||
* returns: "/sys/bus/pci/slots/0"
|
||||
*
|
||||
* 'dev' must be a non-devicemapper device.
|
||||
*
|
||||
* Returned string must be freed.
|
||||
* Returned string must be freed. Returns NULL on error.
|
||||
*/
|
||||
char *
|
||||
zfs_get_enclosure_sysfs_path(const char *dev_name)
|
||||
@ -252,6 +382,16 @@ end:
|
||||
if (dp != NULL)
|
||||
closedir(dp);
|
||||
|
||||
if (!path) {
|
||||
/*
|
||||
* This particular disk isn't in a JBOD. It could be an NVMe
|
||||
* drive. If so, look up the NVMe device's path in
|
||||
* /sys/bus/pci/slots/. Within that directory is a 'attention'
|
||||
* file which controls the NVMe fault LED.
|
||||
*/
|
||||
path = zfs_get_pci_slots_sys_path(dev_name);
|
||||
}
|
||||
|
||||
return (path);
|
||||
}
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <libzutil.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Return B_TRUE if "str" is a number string, B_FALSE otherwise.
|
||||
@ -42,6 +43,14 @@ zfs_isnumber(const char *str)
|
||||
if (!(isdigit(*str) || (*str == '.')))
|
||||
return (B_FALSE);
|
||||
|
||||
/*
|
||||
* Numbers should not end with a period ("." ".." or "5." are
|
||||
* not valid)
|
||||
*/
|
||||
if (str[strlen(str) - 1] == '.') {
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user