mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	zed: Control NVMe fault LEDs
The ZED code currently can only turn on the fault LED for a faulted disk in a JBOD enclosure. This extends support for faulted NVMe disks as well. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Closes #12648 Closes #12695
This commit is contained in:
		
							parent
							
								
									22b0891dbb
								
							
						
					
					
						commit
						1fca958615
					
				| @ -29,7 +29,8 @@ | |||||||
| [ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" | [ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" | ||||||
| . "${ZED_ZEDLET_DIR}/zed-functions.sh" | . "${ZED_ZEDLET_DIR}/zed-functions.sh" | ||||||
| 
 | 
 | ||||||
| if [ ! -d /sys/class/enclosure ] ; then | if [ ! -d /sys/class/enclosure ] && [ ! -d /sys/bus/pci/slots ] ; then | ||||||
|  | 	# No JBOD enclosure or NVMe slots | ||||||
| 	exit 1 | 	exit 1 | ||||||
| fi | fi | ||||||
| 
 | 
 | ||||||
| @ -92,6 +93,29 @@ check_and_set_led() | |||||||
| 	done | 	done | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | # Fault LEDs for JBODs and NVMe drives are handled a little differently. | ||||||
|  | # | ||||||
|  | # On JBODs the fault LED is called 'fault' and on a path like this: | ||||||
|  | # | ||||||
|  | #   /sys/class/enclosure/0:0:1:0/SLOT 10/fault | ||||||
|  | # | ||||||
|  | # On NVMe it's called 'attention' and on a path like this: | ||||||
|  | # | ||||||
|  | #   /sys/bus/pci/slot/0/attention | ||||||
|  | # | ||||||
|  | # This function returns the full path to the fault LED file for a given | ||||||
|  | # enclosure/slot directory. | ||||||
|  | # | ||||||
|  | path_to_led() | ||||||
|  | { | ||||||
|  | 	dir=$1 | ||||||
|  | 	if [ -f "$dir/fault" ] ; then | ||||||
|  | 		echo "$dir/fault" | ||||||
|  | 	elif [ -f "$dir/attention" ] ; then | ||||||
|  | 		echo "$dir/attention" | ||||||
|  | 	fi | ||||||
|  | } | ||||||
|  | 
 | ||||||
| state_to_val() | state_to_val() | ||||||
| { | { | ||||||
| 	state="$1" | 	state="$1" | ||||||
| @ -105,6 +129,38 @@ state_to_val() | |||||||
| 	esac | 	esac | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | # | ||||||
|  | # Given a nvme name like 'nvme0n1', pass back its slot directory | ||||||
|  | # like "/sys/bus/pci/slots/0" | ||||||
|  | # | ||||||
|  | nvme_dev_to_slot() | ||||||
|  | { | ||||||
|  | 	dev="$1" | ||||||
|  | 
 | ||||||
|  | 	# Get the address "0000:01:00.0" | ||||||
|  | 	address=$(cat "/sys/class/block/$dev/device/address") | ||||||
|  | 
 | ||||||
|  | 	# For each /sys/bus/pci/slots subdir that is an actual number | ||||||
|  | 	# (rather than weird directories like "1-3/"). | ||||||
|  | 	# shellcheck disable=SC2010 | ||||||
|  | 	for i in $(ls /sys/bus/pci/slots/ | grep -E "^[0-9]+$") ; do | ||||||
|  | 		this_address=$(cat "/sys/bus/pci/slots/$i/address") | ||||||
|  | 
 | ||||||
|  | 		# The format of address is a little different between | ||||||
|  | 		# /sys/class/block/$dev/device/address and | ||||||
|  | 		# /sys/bus/pci/slots/ | ||||||
|  | 		# | ||||||
|  | 		# address=           "0000:01:00.0" | ||||||
|  | 		# this_address =     "0000:01:00" | ||||||
|  | 		# | ||||||
|  | 		if echo "$address" | grep -Eq ^"$this_address" ; then | ||||||
|  | 			echo "/sys/bus/pci/slots/$i" | ||||||
|  | 			break | ||||||
|  | 		fi | ||||||
|  | 	done | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| # process_pool (pool) | # process_pool (pool) | ||||||
| # | # | ||||||
| # Iterate through a pool and set the vdevs' enclosure slot LEDs to | # Iterate through a pool and set the vdevs' enclosure slot LEDs to | ||||||
| @ -134,6 +190,11 @@ process_pool() | |||||||
| 		# Get dev name (like 'sda') | 		# Get dev name (like 'sda') | ||||||
| 		dev=$(basename "$(echo "$therest" | awk '{print $(NF-1)}')") | 		dev=$(basename "$(echo "$therest" | awk '{print $(NF-1)}')") | ||||||
| 		vdev_enc_sysfs_path=$(realpath "/sys/class/block/$dev/device/enclosure_device"*) | 		vdev_enc_sysfs_path=$(realpath "/sys/class/block/$dev/device/enclosure_device"*) | ||||||
|  | 		if [ ! -d "$vdev_enc_sysfs_path" ] ; then | ||||||
|  | 			# This is not a JBOD disk, but it could be a PCI NVMe drive | ||||||
|  | 			vdev_enc_sysfs_path=$(nvme_dev_to_slot "$dev") | ||||||
|  | 		fi | ||||||
|  | 
 | ||||||
| 		current_val=$(echo "$therest" | awk '{print $NF}') | 		current_val=$(echo "$therest" | awk '{print $NF}') | ||||||
| 
 | 
 | ||||||
| 		if [ "$current_val" != "0" ] ; then | 		if [ "$current_val" != "0" ] ; then | ||||||
| @ -145,9 +206,10 @@ process_pool() | |||||||
| 			continue | 			continue | ||||||
| 		fi | 		fi | ||||||
| 
 | 
 | ||||||
| 		if [ ! -e "$vdev_enc_sysfs_path/fault" ] ; then | 		led_path=$(path_to_led "$vdev_enc_sysfs_path") | ||||||
|  | 		if [ ! -e "$led_path" ] ; then | ||||||
| 			rc=3 | 			rc=3 | ||||||
| 			zed_log_msg "vdev $vdev '$file/fault' doesn't exist" | 			zed_log_msg "vdev $vdev '$led_path' doesn't exist" | ||||||
| 			continue | 			continue | ||||||
| 		fi | 		fi | ||||||
| 
 | 
 | ||||||
| @ -158,7 +220,7 @@ process_pool() | |||||||
| 			continue | 			continue | ||||||
| 		fi | 		fi | ||||||
| 
 | 
 | ||||||
| 		if ! check_and_set_led "$vdev_enc_sysfs_path/fault" "$val"; then | 		if ! check_and_set_led "$led_path" "$val"; then | ||||||
| 			rc=3 | 			rc=3 | ||||||
| 		fi | 		fi | ||||||
| 	done | 	done | ||||||
| @ -169,7 +231,8 @@ if [ -n "$ZEVENT_VDEV_ENC_SYSFS_PATH" ] && [ -n "$ZEVENT_VDEV_STATE_STR" ] ; the | |||||||
| 	# Got a statechange for an individual vdev | 	# Got a statechange for an individual vdev | ||||||
| 	val=$(state_to_val "$ZEVENT_VDEV_STATE_STR") | 	val=$(state_to_val "$ZEVENT_VDEV_STATE_STR") | ||||||
| 	vdev=$(basename "$ZEVENT_VDEV_PATH") | 	vdev=$(basename "$ZEVENT_VDEV_PATH") | ||||||
| 	check_and_set_led "$ZEVENT_VDEV_ENC_SYSFS_PATH/fault" "$val" | 	ledpath=$(path_to_led "$ZEVENT_VDEV_ENC_SYSFS_PATH") | ||||||
|  | 	check_and_set_led "$ledpath" "$val" | ||||||
| else | else | ||||||
| 	# Process the entire pool | 	# Process the entire pool | ||||||
| 	poolname=$(zed_guid_to_pool "$ZEVENT_POOL_GUID") | 	poolname=$(zed_guid_to_pool "$ZEVENT_POOL_GUID") | ||||||
|  | |||||||
| @ -89,8 +89,8 @@ | |||||||
| 
 | 
 | ||||||
| ## | ## | ||||||
| # Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED.  This works for | # Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED.  This works for | ||||||
| # device mapper and multipath devices as well.  Your enclosure must be | # device mapper and multipath devices as well.  This works with JBOD enclosures | ||||||
| # supported by the Linux SES driver for this to work. | # and NVMe PCI drives (assuming they're supported by Linux in sysfs). | ||||||
| # | # | ||||||
| ZED_USE_ENCLOSURE_LEDS=1 | ZED_USE_ENCLOSURE_LEDS=1 | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -41,7 +41,13 @@ for i in $scripts ; do | |||||||
| 		val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null) | 		val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null) | ||||||
| 		;; | 		;; | ||||||
| 	fault_led) | 	fault_led) | ||||||
|  | 		# JBODs fault LED is called 'fault', NVMe fault LED is called | ||||||
|  | 		# 'attention'. | ||||||
|  | 		if [ -f "$VDEV_ENC_SYSFS_PATH/fault" ] ; then | ||||||
| 			val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null) | 			val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null) | ||||||
|  | 		elif [ -f "$VDEV_ENC_SYSFS_PATH/attention" ] ; then | ||||||
|  | 			val=$(cat "$VDEV_ENC_SYSFS_PATH/attention" 2>/dev/null) | ||||||
|  | 		fi | ||||||
| 		;; | 		;; | ||||||
| 	locate_led) | 	locate_led) | ||||||
| 		val=$(cat "$VDEV_ENC_SYSFS_PATH/locate" 2>/dev/null) | 		val=$(cat "$VDEV_ENC_SYSFS_PATH/locate" 2>/dev/null) | ||||||
|  | |||||||
| @ -154,18 +154,148 @@ zfs_strip_path(char *path) | |||||||
| 	return (strrchr(path, '/') + 1); | 	return (strrchr(path, '/') + 1); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Read the contents of a sysfs file into an allocated buffer and remove the | ||||||
|  |  * last newline. | ||||||
|  |  * | ||||||
|  |  * This is useful for reading sysfs files that return a single string.  Return | ||||||
|  |  * an allocated string pointer on success, NULL otherwise.  Returned buffer | ||||||
|  |  * must be freed by the user. | ||||||
|  |  */ | ||||||
|  | static char * | ||||||
|  | zfs_read_sysfs_file(char *filepath) | ||||||
|  | { | ||||||
|  | 	char buf[4096];	/* all sysfs files report 4k size */ | ||||||
|  | 	char *str = NULL; | ||||||
|  | 
 | ||||||
|  | 	FILE *fp = fopen(filepath, "r"); | ||||||
|  | 	if (fp == NULL) { | ||||||
|  | 		return (NULL); | ||||||
|  | 	} | ||||||
|  | 	if (fgets(buf, sizeof (buf), fp) == buf) { | ||||||
|  | 		/* success */ | ||||||
|  | 
 | ||||||
|  | 		/* Remove the last newline (if any) */ | ||||||
|  | 		size_t len = strlen(buf); | ||||||
|  | 		if (buf[len - 1] == '\n') { | ||||||
|  | 			buf[len - 1] = '\0'; | ||||||
|  | 		} | ||||||
|  | 		str = strdup(buf); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	fclose(fp); | ||||||
|  | 
 | ||||||
|  | 	return (str); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Given a dev name like "nvme0n1", return the full PCI slot sysfs path to | ||||||
|  |  * the drive (in /sys/bus/pci/slots). | ||||||
|  |  * | ||||||
|  |  * For example: | ||||||
|  |  *     dev:            "nvme0n1" | ||||||
|  |  *     returns:        "/sys/bus/pci/slots/0" | ||||||
|  |  * | ||||||
|  |  * 'dev' must be an NVMe device. | ||||||
|  |  * | ||||||
|  |  * Returned string must be freed.  Returns NULL on error or no sysfs path. | ||||||
|  |  */ | ||||||
|  | static char * | ||||||
|  | zfs_get_pci_slots_sys_path(const char *dev_name) | ||||||
|  | { | ||||||
|  | 	DIR *dp = NULL; | ||||||
|  | 	struct dirent *ep; | ||||||
|  | 	char *address1 = NULL; | ||||||
|  | 	char *address2 = NULL; | ||||||
|  | 	char *path = NULL; | ||||||
|  | 	char buf[MAXPATHLEN]; | ||||||
|  | 	char *tmp; | ||||||
|  | 
 | ||||||
|  | 	/* If they preface 'dev' with a path (like "/dev") then strip it off */ | ||||||
|  | 	tmp = strrchr(dev_name, '/'); | ||||||
|  | 	if (tmp != NULL) | ||||||
|  | 		dev_name = tmp + 1;    /* +1 since we want the chr after '/' */ | ||||||
|  | 
 | ||||||
|  | 	if (strncmp("nvme", dev_name, 4) != 0) | ||||||
|  | 		return (NULL); | ||||||
|  | 
 | ||||||
|  | 	(void) snprintf(buf, sizeof (buf), "/sys/block/%s/device/address", | ||||||
|  | 	    dev_name); | ||||||
|  | 
 | ||||||
|  | 	address1 = zfs_read_sysfs_file(buf); | ||||||
|  | 	if (!address1) | ||||||
|  | 		return (NULL); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * /sys/block/nvme0n1/device/address format will | ||||||
|  | 	 * be "0000:01:00.0" while /sys/bus/pci/slots/0/address will be | ||||||
|  | 	 * "0000:01:00".  Just NULL terminate at the '.' so they match. | ||||||
|  | 	 */ | ||||||
|  | 	tmp = strrchr(address1, '.'); | ||||||
|  | 	if (tmp != NULL) | ||||||
|  | 		*tmp = '\0'; | ||||||
|  | 
 | ||||||
|  | 	dp = opendir("/sys/bus/pci/slots/"); | ||||||
|  | 	if (dp == NULL) { | ||||||
|  | 		free(address1); | ||||||
|  | 		return (NULL); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Look through all the /sys/bus/pci/slots/ subdirs | ||||||
|  | 	 */ | ||||||
|  | 	while ((ep = readdir(dp))) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * We only care about directory names that are a single number. | ||||||
|  | 		 * Sometimes there's other directories like | ||||||
|  | 		 * "/sys/bus/pci/slots/0-3/" in there - skip those. | ||||||
|  | 		 */ | ||||||
|  | 		if (!zfs_isnumber(ep->d_name)) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		(void) snprintf(buf, sizeof (buf), | ||||||
|  | 		    "/sys/bus/pci/slots/%s/address", ep->d_name); | ||||||
|  | 
 | ||||||
|  | 		address2 = zfs_read_sysfs_file(buf); | ||||||
|  | 		if (!address2) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		if (strcmp(address1, address2) == 0) { | ||||||
|  | 			/* Addresses match, we're all done */ | ||||||
|  | 			free(address2); | ||||||
|  | 			if (asprintf(&path, "/sys/bus/pci/slots/%s", | ||||||
|  | 			    ep->d_name) == -1) { | ||||||
|  | 				free(tmp); | ||||||
|  | 				continue; | ||||||
|  | 			} | ||||||
|  | 			break; | ||||||
|  | 		} | ||||||
|  | 		free(address2); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	closedir(dp); | ||||||
|  | 	free(address1); | ||||||
|  | 
 | ||||||
|  | 	return (path); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Given a dev name like "sda", return the full enclosure sysfs path to |  * Given a dev name like "sda", return the full enclosure sysfs path to | ||||||
|  * the disk.  You can also pass in the name with "/dev" prepended |  * the disk.  You can also pass in the name with "/dev" prepended | ||||||
|  * to it (like /dev/sda). |  * to it (like /dev/sda).  This works for both JBODs and NVMe PCI devices. | ||||||
|  * |  * | ||||||
|  * For example, disk "sda" in enclosure slot 1: |  * For example, disk "sda" in enclosure slot 1: | ||||||
|  *     dev:            "sda" |  *     dev_name:       "sda" | ||||||
|  *     returns:        "/sys/class/enclosure/1:0:3:0/Slot 1" |  *     returns:        "/sys/class/enclosure/1:0:3:0/Slot 1" | ||||||
|  * |  * | ||||||
|  |  * Or: | ||||||
|  |  * | ||||||
|  |  *      dev_name:   "nvme0n1" | ||||||
|  |  *      returns:    "/sys/bus/pci/slots/0" | ||||||
|  |  * | ||||||
|  * 'dev' must be a non-devicemapper device. |  * 'dev' must be a non-devicemapper device. | ||||||
|  * |  * | ||||||
|  * Returned string must be freed. |  * Returned string must be freed.  Returns NULL on error. | ||||||
|  */ |  */ | ||||||
| char * | char * | ||||||
| zfs_get_enclosure_sysfs_path(const char *dev_name) | zfs_get_enclosure_sysfs_path(const char *dev_name) | ||||||
| @ -252,6 +382,16 @@ end: | |||||||
| 	if (dp != NULL) | 	if (dp != NULL) | ||||||
| 		closedir(dp); | 		closedir(dp); | ||||||
| 
 | 
 | ||||||
|  | 	if (!path) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * This particular disk isn't in a JBOD.  It could be an NVMe | ||||||
|  | 		 * drive. If so, look up the NVMe device's path in | ||||||
|  | 		 * /sys/bus/pci/slots/. Within that directory is a 'attention' | ||||||
|  | 		 * file which controls the NVMe fault LED. | ||||||
|  | 		 */ | ||||||
|  | 		path = zfs_get_pci_slots_sys_path(dev_name); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	return (path); | 	return (path); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -27,6 +27,7 @@ | |||||||
| #include <math.h> | #include <math.h> | ||||||
| #include <stdio.h> | #include <stdio.h> | ||||||
| #include <libzutil.h> | #include <libzutil.h> | ||||||
|  | #include <string.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Return B_TRUE if "str" is a number string, B_FALSE otherwise. |  * Return B_TRUE if "str" is a number string, B_FALSE otherwise. | ||||||
| @ -42,6 +43,14 @@ zfs_isnumber(const char *str) | |||||||
| 		if (!(isdigit(*str) || (*str == '.'))) | 		if (!(isdigit(*str) || (*str == '.'))) | ||||||
| 			return (B_FALSE); | 			return (B_FALSE); | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Numbers should not end with a period ("." ".." or "5." are | ||||||
|  | 	 * not valid) | ||||||
|  | 	 */ | ||||||
|  | 	if (str[strlen(str) - 1] == '.') { | ||||||
|  | 		return (B_FALSE); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	return (B_TRUE); | 	return (B_TRUE); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Tony Hutter
						Tony Hutter