Turn on/off enclosure slot fault LED even when disk isn't present

Previously when a drive faulted, the statechange-led.sh script would lookup
the drive's LED sysfs entry in /sys/block/sd*/device/enclosure_device, and
turn it on.  During testing we noticed that if you pulled out a drive, or if
the drive was so badly broken that it no longer appeared to Linux, that the
/sys/block/sd* path would be removed, and the script could not lookup the
LED entry.

To fix this, this patch looks up the disks's more persistent
"/sys/class/enclosure/X:X:X:X/Slot N" LED sysfs path at pool import.  It then
passes that path to the statechange-led script to use, rather than having the
script look it up on the fly.  This allows the script to turn on/off the slot
LEDs even when the drive is missing.

Closes #5309 
Closes #2375
This commit is contained in:
Tony Hutter 2016-10-24 10:45:59 -07:00 committed by Brian Behlendorf
parent a85cefa35c
commit 1bbd877049
13 changed files with 217 additions and 82 deletions

View File

@ -185,7 +185,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
uint64_t wholedisk = 0ULL;
uint64_t offline = 0ULL;
uint64_t guid = 0ULL;
char *physpath = NULL, *new_devid = NULL;
char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
char rawpath[PATH_MAX], fullpath[PATH_MAX];
char devpath[PATH_MAX];
int ret;
@ -206,6 +206,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
}
(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
&enc_sysfs_path);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid);
@ -214,7 +216,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
return; /* don't intervene if it was taken offline */
#ifdef HAVE_LIBDEVMAPPER
is_dm = dev_is_dm(path);
is_dm = zfs_dev_is_dm(path);
#endif
zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'"
" wholedisk %d, dm %d (%llu)", zpool_get_name(zhp), path,
@ -402,6 +404,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
nvlist_add_string(newvd, ZPOOL_CONFIG_DEVID, new_devid) != 0 ||
(physpath != NULL && nvlist_add_string(newvd,
ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) ||
nvlist_add_string(newvd, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
enc_sysfs_path) != 0 ||
nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd,

View File

@ -2,8 +2,13 @@
#
# Turn off/on the VDEV's enclosure fault LEDs when the pool's state changes.
#
# Turn LED on if the VDEV becomes faulted/degraded, and turn it back off when
# it's healthy again. This requires that your enclosure be supported by the
# Turn LED on if the VDEV becomes faulted or degraded, and turn it back off
# when it's online again. It will also turn on the LED (or keep it on) if
# the drive becomes unavailable, unless the drive was in was a previously
# online state (online->unavail is a normal state transition during an
# autoreplace).
#
# This script requires that your enclosure be supported by the
# Linux SCSI enclosure services (ses) driver. The script will do nothing
# if you have no enclosure, or if your enclosure isn't supported.
#
@ -13,76 +18,82 @@
# 0: enclosure led successfully set
# 1: enclosure leds not not available
# 2: enclosure leds administratively disabled
# 3: ZED built without libdevmapper
# 3: ZED didn't pass enclosure sysfs path
# 4: Enclosure sysfs path doesn't exist
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
# ZEVENT_VDEV_UPATH will not be present if ZFS is not built with libdevmapper
[ -n "${ZEVENT_VDEV_UPATH}" ] || exit 3
if [ "${ZED_USE_ENCLOSURE_LEDS}" != "1" ] ; then
exit 2
fi
if [ ! -d /sys/class/enclosure ] ; then
exit 1
fi
if [ "${ZED_USE_ENCLOSURE_LEDS}" != "1" ] ; then
exit 2
fi
[ -n "${ZEVENT_VDEV_ENC_SYSFS_PATH}" ] || exit 3
[ -e "${ZEVENT_VDEV_ENC_SYSFS_PATH}/fault" ] || exit 4
# Turn on/off enclosure LEDs
function led
{
name=$1
file="$1/fault"
val=$2
# We want to check the current state first, since writing to the
# 'fault' entry always always causes a SES command, even if the
# current state is already what you want.
if [ -e /sys/block/$name/device/enclosure_device*/fault ] ; then
# We have to do some monkey business to deal with spaces in
# enclosure_device names. I've seen horrible things like this:
#
# '/sys/block/sdfw/device/enclosure_device:SLOT 43 41 /fault'
#
# ...so escape all spaces.
file=`ls /sys/block/$name/device/enclosure_device*/fault | sed 's/\s/\\ /g'`
current=$(cat "${file}")
current=`cat "$file"`
# On some enclosures if you write 1 to fault, and read it back,
# it will return 2. Treat all non-zero values as 1 for
# simplicity.
if [ "$current" != "0" ] ; then
current=1
fi
# On some enclosures if you write 1 to fault, and read it back,
# it will return 2. Treat all non-zero values as 1 for
# simplicity.
if [ "$current" != "0" ] ; then
current=1
fi
if [ "$current" != "$val" ] ; then
# Set the value twice. I've seen enclosures that were
# flakey about setting it the first time.
echo $val > "$file"
echo $val > "$file"
fi
if [ "$current" != "$val" ] ; then
# Set the value twice. I've seen enclosures that were
# flakey about setting it the first time.
echo "$val" > "$file"
echo "$val" > "$file"
fi
}
# Decide whether to turn on/off an LED based on the state
# Pass in path name and fault string ("ONLINE"/"FAULTED"/"DEGRADED"...etc)
#
# We only turn on LEDs when a drive becomes FAULTED, DEGRADED, or UNAVAIL and
# only turn it on when it comes back ONLINE. All other states are ignored, and
# keep the previous LED state.
function process {
# path=/dev/sda, fault=
path=$1
path="$1"
fault=$2
name=`basename $path`
if [ -z "$name" ] ; then
return
fi
prev=$3
if [ "$fault" == "FAULTED" ] || [ "$fault" == "DEGRADED" ] ; then
led $name 1
else
led $name 0
led "$path" 1
elif [ "$fault" == "UNAVAIL" ] && [ "$prev" != "ONLINE" ] ; then
# For the most part, UNAVAIL should turn on the LED. However,
# during an autoreplace, we see our new drive go online,
# followed by our "old" drive going ONLINE->UNAVAIL. Since the
# "old" drive has the same slot information, we want to ignore
# the ONLINE->UNAVAIL event.
#
# NAME STATE READ WRITE CKSUM
# mypool3 DEGRADED 0 0 0
# mirror-0 DEGRADED 0 0 0
# A1 ONLINE 0 0 0
# A2 ONLINE 0 880 0
# replacing-3 UNAVAIL 0 0 0
# old UNAVAIL 0 2.93K 0 corrupted data
# A3 ONLINE 0 0 156 (resilvering)
led "$path" 1
elif [ "$fault" == "ONLINE" ] ; then
led "$path" 0
fi
}
process "$ZEVENT_VDEV_UPATH" "$ZEVENT_VDEV_STATE_STR"
process "$ZEVENT_VDEV_ENC_SYSFS_PATH" "$ZEVENT_VDEV_STATE_STR" \
"$ZEVENT_VDEV_LASTSTATE_STR"

View File

@ -286,7 +286,7 @@ zed_udev_monitor(void *arg)
udev_device_get_property_value(dev, "DM_UUID") &&
udev_device_get_property_value(dev, "MPATH_SBIN_PATH")) {
tmp = (char *) udev_device_get_devnode(dev);
tmp2 = get_underlying_path(NULL, tmp);
tmp2 = zfs_get_underlying_path(tmp);
if (tmp && tmp2 && (strcmp(tmp, tmp2) != 0)) {
/*
* We have a real underlying device, which

View File

@ -843,23 +843,6 @@ _zed_internal_event(const char *class, nvlist_t *nvl)
}
}
static void
_zed_event_add_upath(uint64_t eid, zed_strings_t *zsp, nvlist_t *nvl)
{
char *path = NULL;
char *upath = NULL;
if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH,
&path) == 0) {
upath = get_underlying_path(NULL, path);
if (upath) {
_zed_event_add_var(eid, zsp, ZEVENT_VAR_PREFIX,
"VDEV_UPATH",
"%s", upath);
free(upath);
}
}
}
/*
* Service the next zevent, blocking until one is available.
*/
@ -932,16 +915,6 @@ zed_event_service(struct zed_conf *zcp)
_zed_event_add_time_strings(eid, zsp, etime);
/*
* If a VDEV is included, resolve it's path to the "underlying
* device". This is useful for resolving device mapper and
* multipath devices to their underlying /dev/sd* devices.
* For example, if you have a DM or multipath VDEV
* (/dev/mapper/mpatha) that points to one or more /dev/sd*
* devices, this will return the first of its devices.
*/
_zed_event_add_upath(eid, zsp, nvl);
zed_exec_process(eid, class, subclass,
zcp->zedlet_dir, zcp->zedlets, zsp, zcp->zevent_fd);

View File

@ -280,8 +280,9 @@ extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
extern int zpool_label_disk_wait(char *, int);
extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
int dev_is_dm(char *devname);
char *get_underlying_path(libzfs_handle_t *hdl, char *dev_name);
int zfs_dev_is_dm(char *dev_name);
char *zfs_get_underlying_path(char *dev_name);
char *zfs_get_enclosure_sysfs_path(char *dev_name);
/*
* Functions to manage pool properties

View File

@ -58,6 +58,7 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE "vdev_type"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH "vdev_path"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_PHYSPATH "vdev_physpath"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH "vdev_enc_sysfs_path"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID "vdev_devid"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU "vdev_fru"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE "vdev_state"

View File

@ -592,6 +592,9 @@ typedef struct zpool_rewind_policy {
#define ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO "vdev_async_agg_w_histo"
#define ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO "vdev_agg_scrub_histo"
/* vdev enclosure sysfs path */
#define ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH "vdev_enc_sysfs_path"
#define ZPOOL_CONFIG_WHOLE_DISK "whole_disk"
#define ZPOOL_CONFIG_ERRCOUNT "error_count"
#define ZPOOL_CONFIG_NOT_PRESENT "not_present"

View File

@ -213,6 +213,7 @@ struct vdev {
char *vdev_path; /* vdev path (if any) */
char *vdev_devid; /* vdev devid (if any) */
char *vdev_physpath; /* vdev device path (if any) */
char *vdev_enc_sysfs_path; /* enclosure sysfs path */
char *vdev_fru; /* physical FRU location */
uint64_t vdev_not_present; /* not present during import */
uint64_t vdev_unspare; /* unspare when resilvering done */

View File

@ -64,6 +64,7 @@
#include <blkid/blkid.h>
#include "libzfs.h"
#include "libzfs_impl.h"
#include <libzfs.h>
/*
* Intermediate structures used to gather configuration information.
@ -437,6 +438,10 @@ no_dev:
*
* multipath device node example:
* devid: 'dm-uuid-mpath-35000c5006304de3f'
*
* We also store the enclosure sysfs path for turning on enclosure LEDs
* (if applicable):
* vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
*/
void
update_vdev_config_dev_strs(nvlist_t *nv)
@ -444,6 +449,7 @@ update_vdev_config_dev_strs(nvlist_t *nv)
vdev_dev_strs_t vds;
char *env, *type, *path;
uint64_t wholedisk = 0;
char *upath, *spath;
/*
* For the benefit of legacy ZFS implementations, allow
@ -470,6 +476,7 @@ update_vdev_config_dev_strs(nvlist_t *nv)
!strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
return;
}
@ -490,10 +497,20 @@ update_vdev_config_dev_strs(nvlist_t *nv)
(void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
vds.vds_devphys);
}
/* Add enclosure sysfs path (if disk is in an enclosure) */
upath = zfs_get_underlying_path(path);
spath = zfs_get_enclosure_sysfs_path(upath);
if (spath)
nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
spath);
free(upath);
free(spath);
} else {
/* clear out any stale entries */
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
}
}
#else

View File

@ -4407,7 +4407,7 @@ static char * dm_get_underlying_path(char *dm_name)
goto end;
if ((asprintf(&tmp, "/dev/block/%d:%d", child_info->major,
child_info->minor) == -1) || !tmp)
child_info->minor) == -1) || tmp == NULL)
goto end;
/* Further translate /dev/block/ name into the normal name */
@ -4430,11 +4430,11 @@ end:
* Return 0 if not.
*/
int
dev_is_dm(char *devname)
zfs_dev_is_dm(char *dev_name)
{
char *tmp;
tmp = dm_get_underlying_path(devname);
tmp = dm_get_underlying_path(dev_name);
if (!tmp)
return (0);
@ -4483,7 +4483,7 @@ dev_is_dm(char *devname)
* NOTE: The returned name string must be *freed*.
*/
char *
get_underlying_path(libzfs_handle_t *hdl, char *dev_name)
zfs_get_underlying_path(char *dev_name)
{
char *name = NULL;
char *tmp;
@ -4504,3 +4504,108 @@ get_underlying_path(libzfs_handle_t *hdl, char *dev_name)
return (name);
}
/*
* Given a dev name like "sda", return the full enclosure sysfs path to
* the disk. You can also pass in the name with "/dev" prepended
* to it (like /dev/sda).
*
* For example, disk "sda" in enclosure slot 1:
* dev: "sda"
* returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
*
* 'dev' must be a non-devicemapper device.
*
* Returned string must be freed.
*/
char *
zfs_get_enclosure_sysfs_path(char *dev_name)
{
DIR *dp = NULL;
struct dirent *ep;
char buf[MAXPATHLEN];
char *tmp1 = NULL;
char *tmp2 = NULL;
char *tmp3 = NULL;
char *path = NULL;
size_t size;
int tmpsize;
if (!dev_name)
return (NULL);
/* If they preface 'dev' with a path (like "/dev") then strip it off */
tmp1 = strrchr(dev_name, '/');
if (tmp1)
dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */
tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
if (tmpsize == -1 || tmp1 == NULL) {
tmp1 = NULL;
goto end;
}
dp = opendir(tmp1);
if (dp == NULL) {
tmp1 = NULL; /* To make free() at the end a NOP */
goto end;
}
/*
* Look though all sysfs entries in /sys/block/<dev>/device for
* the enclosure symlink.
*/
while ((ep = readdir(dp))) {
/* Ignore everything that's not our enclosure_device link */
if (!strstr(ep->d_name, "enclosure_device"))
continue;
if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
tmp2 == NULL)
break;
size = readlink(tmp2, buf, sizeof (buf));
/* Did readlink fail or crop the link name? */
if (size == -1 || size >= sizeof (buf)) {
free(tmp2);
tmp2 = NULL; /* To make free() at the end a NOP */
break;
}
/*
* We got a valid link. readlink() doesn't terminate strings
* so we have to do it.
*/
buf[size] = '\0';
/*
* Our link will look like:
*
* "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
*
* We want to grab the "enclosure/1:0:3:0/SLOT 1" part
*/
tmp3 = strstr(buf, "enclosure");
if (tmp3 == NULL)
break;
if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
/* If asprintf() fails, 'path' is undefined */
path = NULL;
break;
}
if (path == NULL)
break;
}
end:
free(tmp2);
free(tmp1);
if (dp)
closedir(dp);
return (path);
}

View File

@ -488,6 +488,11 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH,
&vd->vdev_physpath) == 0)
vd->vdev_physpath = spa_strdup(vd->vdev_physpath);
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
&vd->vdev_enc_sysfs_path) == 0)
vd->vdev_enc_sysfs_path = spa_strdup(vd->vdev_enc_sysfs_path);
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &vd->vdev_fru) == 0)
vd->vdev_fru = spa_strdup(vd->vdev_fru);
@ -673,6 +678,10 @@ vdev_free(vdev_t *vd)
spa_strfree(vd->vdev_devid);
if (vd->vdev_physpath)
spa_strfree(vd->vdev_physpath);
if (vd->vdev_enc_sysfs_path)
spa_strfree(vd->vdev_enc_sysfs_path);
if (vd->vdev_fru)
spa_strfree(vd->vdev_fru);

View File

@ -375,6 +375,10 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
fnvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
vd->vdev_physpath);
if (vd->vdev_enc_sysfs_path != NULL)
fnvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
vd->vdev_enc_sysfs_path);
if (vd->vdev_fru != NULL)
fnvlist_add_string(nv, ZPOOL_CONFIG_FRU, vd->vdev_fru);

View File

@ -986,6 +986,12 @@ zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate)
FM_EREPORT_PAYLOAD_ZFS_VDEV_PHYSPATH,
vd->vdev_physpath);
}
if (vd->vdev_enc_sysfs_path) {
(void) nvlist_add_string(aux,
FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH,
vd->vdev_enc_sysfs_path);
}
(void) nvlist_add_uint64(aux,
FM_EREPORT_PAYLOAD_ZFS_VDEV_LASTSTATE, laststate);
}