mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	zed: Control NVMe fault LEDs
The ZED code currently can only turn on the fault LED for a faulted disk in a JBOD enclosure. This extends support for faulted NVMe disks as well. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Closes #12648 Closes #12695
This commit is contained in:
		
							parent
							
								
									6de5c440fa
								
							
						
					
					
						commit
						4ba1a6227a
					
				@ -29,7 +29,8 @@
 | 
			
		||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
 | 
			
		||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
 | 
			
		||||
 | 
			
		||||
if [ ! -d /sys/class/enclosure ] ; then
 | 
			
		||||
if [ ! -d /sys/class/enclosure ] && [ ! -d /sys/bus/pci/slots ] ; then
 | 
			
		||||
	# No JBOD enclosure or NVMe slots
 | 
			
		||||
	exit 1
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
@ -92,6 +93,29 @@ check_and_set_led()
 | 
			
		||||
	done
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Fault LEDs for JBODs and NVMe drives are handled a little differently.
 | 
			
		||||
#
 | 
			
		||||
# On JBODs the fault LED is called 'fault' and on a path like this:
 | 
			
		||||
#
 | 
			
		||||
#   /sys/class/enclosure/0:0:1:0/SLOT 10/fault
 | 
			
		||||
#
 | 
			
		||||
# On NVMe it's called 'attention' and on a path like this:
 | 
			
		||||
#
 | 
			
		||||
#   /sys/bus/pci/slot/0/attention
 | 
			
		||||
#
 | 
			
		||||
# This function returns the full path to the fault LED file for a given
 | 
			
		||||
# enclosure/slot directory.
 | 
			
		||||
#
 | 
			
		||||
path_to_led()
 | 
			
		||||
{
 | 
			
		||||
	dir=$1
 | 
			
		||||
	if [ -f "$dir/fault" ] ; then
 | 
			
		||||
		echo "$dir/fault"
 | 
			
		||||
	elif [ -f "$dir/attention" ] ; then
 | 
			
		||||
		echo "$dir/attention"
 | 
			
		||||
	fi
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
state_to_val()
 | 
			
		||||
{
 | 
			
		||||
	state="$1"
 | 
			
		||||
@ -105,6 +129,38 @@ state_to_val()
 | 
			
		||||
	esac
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Given a nvme name like 'nvme0n1', pass back its slot directory
 | 
			
		||||
# like "/sys/bus/pci/slots/0"
 | 
			
		||||
#
 | 
			
		||||
nvme_dev_to_slot()
 | 
			
		||||
{
 | 
			
		||||
	dev="$1"
 | 
			
		||||
 | 
			
		||||
	# Get the address "0000:01:00.0"
 | 
			
		||||
	address=$(cat "/sys/class/block/$dev/device/address")
 | 
			
		||||
 | 
			
		||||
	# For each /sys/bus/pci/slots subdir that is an actual number
 | 
			
		||||
	# (rather than weird directories like "1-3/").
 | 
			
		||||
	# shellcheck disable=SC2010
 | 
			
		||||
	for i in $(ls /sys/bus/pci/slots/ | grep -E "^[0-9]+$") ; do
 | 
			
		||||
		this_address=$(cat "/sys/bus/pci/slots/$i/address")
 | 
			
		||||
 | 
			
		||||
		# The format of address is a little different between
 | 
			
		||||
		# /sys/class/block/$dev/device/address and
 | 
			
		||||
		# /sys/bus/pci/slots/
 | 
			
		||||
		#
 | 
			
		||||
		# address=           "0000:01:00.0"
 | 
			
		||||
		# this_address =     "0000:01:00"
 | 
			
		||||
		#
 | 
			
		||||
		if echo "$address" | grep -Eq ^"$this_address" ; then
 | 
			
		||||
			echo "/sys/bus/pci/slots/$i"
 | 
			
		||||
			break
 | 
			
		||||
		fi
 | 
			
		||||
	done
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# process_pool (pool)
 | 
			
		||||
#
 | 
			
		||||
# Iterate through a pool and set the vdevs' enclosure slot LEDs to
 | 
			
		||||
@ -134,6 +190,11 @@ process_pool()
 | 
			
		||||
		# Get dev name (like 'sda')
 | 
			
		||||
		dev=$(basename "$(echo "$therest" | awk '{print $(NF-1)}')")
 | 
			
		||||
		vdev_enc_sysfs_path=$(realpath "/sys/class/block/$dev/device/enclosure_device"*)
 | 
			
		||||
		if [ ! -d "$vdev_enc_sysfs_path" ] ; then
 | 
			
		||||
			# This is not a JBOD disk, but it could be a PCI NVMe drive
 | 
			
		||||
			vdev_enc_sysfs_path=$(nvme_dev_to_slot "$dev")
 | 
			
		||||
		fi
 | 
			
		||||
 | 
			
		||||
		current_val=$(echo "$therest" | awk '{print $NF}')
 | 
			
		||||
 | 
			
		||||
		if [ "$current_val" != "0" ] ; then
 | 
			
		||||
@ -145,9 +206,10 @@ process_pool()
 | 
			
		||||
			continue
 | 
			
		||||
		fi
 | 
			
		||||
 | 
			
		||||
		if [ ! -e "$vdev_enc_sysfs_path/fault" ] ; then
 | 
			
		||||
		led_path=$(path_to_led "$vdev_enc_sysfs_path")
 | 
			
		||||
		if [ ! -e "$led_path" ] ; then
 | 
			
		||||
			rc=3
 | 
			
		||||
			zed_log_msg "vdev $vdev '$file/fault' doesn't exist"
 | 
			
		||||
			zed_log_msg "vdev $vdev '$led_path' doesn't exist"
 | 
			
		||||
			continue
 | 
			
		||||
		fi
 | 
			
		||||
 | 
			
		||||
@ -158,7 +220,7 @@ process_pool()
 | 
			
		||||
			continue
 | 
			
		||||
		fi
 | 
			
		||||
 | 
			
		||||
		if ! check_and_set_led "$vdev_enc_sysfs_path/fault" "$val"; then
 | 
			
		||||
		if ! check_and_set_led "$led_path" "$val"; then
 | 
			
		||||
			rc=3
 | 
			
		||||
		fi
 | 
			
		||||
	done
 | 
			
		||||
@ -169,7 +231,8 @@ if [ -n "$ZEVENT_VDEV_ENC_SYSFS_PATH" ] && [ -n "$ZEVENT_VDEV_STATE_STR" ] ; the
 | 
			
		||||
	# Got a statechange for an individual vdev
 | 
			
		||||
	val=$(state_to_val "$ZEVENT_VDEV_STATE_STR")
 | 
			
		||||
	vdev=$(basename "$ZEVENT_VDEV_PATH")
 | 
			
		||||
	check_and_set_led "$ZEVENT_VDEV_ENC_SYSFS_PATH/fault" "$val"
 | 
			
		||||
	ledpath=$(path_to_led "$ZEVENT_VDEV_ENC_SYSFS_PATH")
 | 
			
		||||
	check_and_set_led "$ledpath" "$val"
 | 
			
		||||
else
 | 
			
		||||
	# Process the entire pool
 | 
			
		||||
	poolname=$(zed_guid_to_pool "$ZEVENT_POOL_GUID")
 | 
			
		||||
 | 
			
		||||
@ -89,8 +89,8 @@
 | 
			
		||||
 | 
			
		||||
##
 | 
			
		||||
# Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED.  This works for
 | 
			
		||||
# device mapper and multipath devices as well.  Your enclosure must be
 | 
			
		||||
# supported by the Linux SES driver for this to work.
 | 
			
		||||
# device mapper and multipath devices as well.  This works with JBOD enclosures
 | 
			
		||||
# and NVMe PCI drives (assuming they're supported by Linux in sysfs).
 | 
			
		||||
#
 | 
			
		||||
ZED_USE_ENCLOSURE_LEDS=1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -41,7 +41,13 @@ for i in $scripts ; do
 | 
			
		||||
		val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null)
 | 
			
		||||
		;;
 | 
			
		||||
	fault_led)
 | 
			
		||||
		# JBODs fault LED is called 'fault', NVMe fault LED is called
 | 
			
		||||
		# 'attention'.
 | 
			
		||||
		if [ -f "$VDEV_ENC_SYSFS_PATH/fault" ] ; then
 | 
			
		||||
			val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null)
 | 
			
		||||
		elif [ -f "$VDEV_ENC_SYSFS_PATH/attention" ] ; then
 | 
			
		||||
			val=$(cat "$VDEV_ENC_SYSFS_PATH/attention" 2>/dev/null)
 | 
			
		||||
		fi
 | 
			
		||||
		;;
 | 
			
		||||
	locate_led)
 | 
			
		||||
		val=$(cat "$VDEV_ENC_SYSFS_PATH/locate" 2>/dev/null)
 | 
			
		||||
 | 
			
		||||
@ -154,18 +154,148 @@ zfs_strip_path(char *path)
 | 
			
		||||
	return (strrchr(path, '/') + 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Read the contents of a sysfs file into an allocated buffer and remove the
 | 
			
		||||
 * last newline.
 | 
			
		||||
 *
 | 
			
		||||
 * This is useful for reading sysfs files that return a single string.  Return
 | 
			
		||||
 * an allocated string pointer on success, NULL otherwise.  Returned buffer
 | 
			
		||||
 * must be freed by the user.
 | 
			
		||||
 */
 | 
			
		||||
static char *
 | 
			
		||||
zfs_read_sysfs_file(char *filepath)
 | 
			
		||||
{
 | 
			
		||||
	char buf[4096];	/* all sysfs files report 4k size */
 | 
			
		||||
	char *str = NULL;
 | 
			
		||||
 | 
			
		||||
	FILE *fp = fopen(filepath, "r");
 | 
			
		||||
	if (fp == NULL) {
 | 
			
		||||
		return (NULL);
 | 
			
		||||
	}
 | 
			
		||||
	if (fgets(buf, sizeof (buf), fp) == buf) {
 | 
			
		||||
		/* success */
 | 
			
		||||
 | 
			
		||||
		/* Remove the last newline (if any) */
 | 
			
		||||
		size_t len = strlen(buf);
 | 
			
		||||
		if (buf[len - 1] == '\n') {
 | 
			
		||||
			buf[len - 1] = '\0';
 | 
			
		||||
		}
 | 
			
		||||
		str = strdup(buf);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	fclose(fp);
 | 
			
		||||
 | 
			
		||||
	return (str);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Given a dev name like "nvme0n1", return the full PCI slot sysfs path to
 | 
			
		||||
 * the drive (in /sys/bus/pci/slots).
 | 
			
		||||
 *
 | 
			
		||||
 * For example:
 | 
			
		||||
 *     dev:            "nvme0n1"
 | 
			
		||||
 *     returns:        "/sys/bus/pci/slots/0"
 | 
			
		||||
 *
 | 
			
		||||
 * 'dev' must be an NVMe device.
 | 
			
		||||
 *
 | 
			
		||||
 * Returned string must be freed.  Returns NULL on error or no sysfs path.
 | 
			
		||||
 */
 | 
			
		||||
static char *
 | 
			
		||||
zfs_get_pci_slots_sys_path(const char *dev_name)
 | 
			
		||||
{
 | 
			
		||||
	DIR *dp = NULL;
 | 
			
		||||
	struct dirent *ep;
 | 
			
		||||
	char *address1 = NULL;
 | 
			
		||||
	char *address2 = NULL;
 | 
			
		||||
	char *path = NULL;
 | 
			
		||||
	char buf[MAXPATHLEN];
 | 
			
		||||
	char *tmp;
 | 
			
		||||
 | 
			
		||||
	/* If they preface 'dev' with a path (like "/dev") then strip it off */
 | 
			
		||||
	tmp = strrchr(dev_name, '/');
 | 
			
		||||
	if (tmp != NULL)
 | 
			
		||||
		dev_name = tmp + 1;    /* +1 since we want the chr after '/' */
 | 
			
		||||
 | 
			
		||||
	if (strncmp("nvme", dev_name, 4) != 0)
 | 
			
		||||
		return (NULL);
 | 
			
		||||
 | 
			
		||||
	(void) snprintf(buf, sizeof (buf), "/sys/block/%s/device/address",
 | 
			
		||||
	    dev_name);
 | 
			
		||||
 | 
			
		||||
	address1 = zfs_read_sysfs_file(buf);
 | 
			
		||||
	if (!address1)
 | 
			
		||||
		return (NULL);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * /sys/block/nvme0n1/device/address format will
 | 
			
		||||
	 * be "0000:01:00.0" while /sys/bus/pci/slots/0/address will be
 | 
			
		||||
	 * "0000:01:00".  Just NULL terminate at the '.' so they match.
 | 
			
		||||
	 */
 | 
			
		||||
	tmp = strrchr(address1, '.');
 | 
			
		||||
	if (tmp != NULL)
 | 
			
		||||
		*tmp = '\0';
 | 
			
		||||
 | 
			
		||||
	dp = opendir("/sys/bus/pci/slots/");
 | 
			
		||||
	if (dp == NULL) {
 | 
			
		||||
		free(address1);
 | 
			
		||||
		return (NULL);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Look through all the /sys/bus/pci/slots/ subdirs
 | 
			
		||||
	 */
 | 
			
		||||
	while ((ep = readdir(dp))) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * We only care about directory names that are a single number.
 | 
			
		||||
		 * Sometimes there's other directories like
 | 
			
		||||
		 * "/sys/bus/pci/slots/0-3/" in there - skip those.
 | 
			
		||||
		 */
 | 
			
		||||
		if (!zfs_isnumber(ep->d_name))
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		(void) snprintf(buf, sizeof (buf),
 | 
			
		||||
		    "/sys/bus/pci/slots/%s/address", ep->d_name);
 | 
			
		||||
 | 
			
		||||
		address2 = zfs_read_sysfs_file(buf);
 | 
			
		||||
		if (!address2)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		if (strcmp(address1, address2) == 0) {
 | 
			
		||||
			/* Addresses match, we're all done */
 | 
			
		||||
			free(address2);
 | 
			
		||||
			if (asprintf(&path, "/sys/bus/pci/slots/%s",
 | 
			
		||||
			    ep->d_name) == -1) {
 | 
			
		||||
				free(tmp);
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
		free(address2);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	closedir(dp);
 | 
			
		||||
	free(address1);
 | 
			
		||||
 | 
			
		||||
	return (path);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Given a dev name like "sda", return the full enclosure sysfs path to
 | 
			
		||||
 * the disk.  You can also pass in the name with "/dev" prepended
 | 
			
		||||
 * to it (like /dev/sda).
 | 
			
		||||
 * to it (like /dev/sda).  This works for both JBODs and NVMe PCI devices.
 | 
			
		||||
 *
 | 
			
		||||
 * For example, disk "sda" in enclosure slot 1:
 | 
			
		||||
 *     dev:            "sda"
 | 
			
		||||
 *     dev_name:       "sda"
 | 
			
		||||
 *     returns:        "/sys/class/enclosure/1:0:3:0/Slot 1"
 | 
			
		||||
 *
 | 
			
		||||
 * Or:
 | 
			
		||||
 *
 | 
			
		||||
 *      dev_name:   "nvme0n1"
 | 
			
		||||
 *      returns:    "/sys/bus/pci/slots/0"
 | 
			
		||||
 *
 | 
			
		||||
 * 'dev' must be a non-devicemapper device.
 | 
			
		||||
 *
 | 
			
		||||
 * Returned string must be freed.
 | 
			
		||||
 * Returned string must be freed.  Returns NULL on error.
 | 
			
		||||
 */
 | 
			
		||||
char *
 | 
			
		||||
zfs_get_enclosure_sysfs_path(const char *dev_name)
 | 
			
		||||
@ -252,6 +382,16 @@ end:
 | 
			
		||||
	if (dp != NULL)
 | 
			
		||||
		closedir(dp);
 | 
			
		||||
 | 
			
		||||
	if (!path) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * This particular disk isn't in a JBOD.  It could be an NVMe
 | 
			
		||||
		 * drive. If so, look up the NVMe device's path in
 | 
			
		||||
		 * /sys/bus/pci/slots/. Within that directory is a 'attention'
 | 
			
		||||
		 * file which controls the NVMe fault LED.
 | 
			
		||||
		 */
 | 
			
		||||
		path = zfs_get_pci_slots_sys_path(dev_name);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return (path);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -27,6 +27,7 @@
 | 
			
		||||
#include <math.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <libzutil.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Return B_TRUE if "str" is a number string, B_FALSE otherwise.
 | 
			
		||||
@ -42,6 +43,14 @@ zfs_isnumber(const char *str)
 | 
			
		||||
		if (!(isdigit(*str) || (*str == '.')))
 | 
			
		||||
			return (B_FALSE);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Numbers should not end with a period ("." ".." or "5." are
 | 
			
		||||
	 * not valid)
 | 
			
		||||
	 */
 | 
			
		||||
	if (str[strlen(str) - 1] == '.') {
 | 
			
		||||
		return (B_FALSE);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return (B_TRUE);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user