diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index f7740ad2a..a073956b2 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -185,7 +185,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) uint64_t wholedisk = 0ULL; uint64_t offline = 0ULL; uint64_t guid = 0ULL; - char *physpath = NULL, *new_devid = NULL; + char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL; char rawpath[PATH_MAX], fullpath[PATH_MAX]; char devpath[PATH_MAX]; int ret; @@ -206,6 +206,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) } (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath); + (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, + &enc_sysfs_path); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid); @@ -214,7 +216,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) return; /* don't intervene if it was taken offline */ #ifdef HAVE_LIBDEVMAPPER - is_dm = dev_is_dm(path); + is_dm = zfs_dev_is_dm(path); #endif zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'" " wholedisk %d, dm %d (%llu)", zpool_get_name(zhp), path, @@ -402,6 +404,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) nvlist_add_string(newvd, ZPOOL_CONFIG_DEVID, new_devid) != 0 || (physpath != NULL && nvlist_add_string(newvd, ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) || + nvlist_add_string(newvd, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, + enc_sysfs_path) != 0 || nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd, diff --git a/cmd/zed/zed.d/statechange-led.sh b/cmd/zed/zed.d/statechange-led.sh index ca911d2b9..af1a14a9a 100755 --- a/cmd/zed/zed.d/statechange-led.sh +++ b/cmd/zed/zed.d/statechange-led.sh @@ -2,8 +2,13 @@ # # Turn off/on the VDEV's enclosure fault LEDs when the pool's state changes. # -# Turn LED on if the VDEV becomes faulted/degraded, and turn it back off when -# it's healthy again. This requires that your enclosure be supported by the +# Turn LED on if the VDEV becomes faulted or degraded, and turn it back off +# when it's online again. It will also turn on the LED (or keep it on) if +# the drive becomes unavailable, unless the drive was in was a previously +# online state (online->unavail is a normal state transition during an +# autoreplace). +# +# This script requires that your enclosure be supported by the # Linux SCSI enclosure services (ses) driver. The script will do nothing # if you have no enclosure, or if your enclosure isn't supported. # @@ -13,76 +18,82 @@ # 0: enclosure led successfully set # 1: enclosure leds not not available # 2: enclosure leds administratively disabled -# 3: ZED built without libdevmapper +# 3: ZED didn't pass enclosure sysfs path +# 4: Enclosure sysfs path doesn't exist [ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" . "${ZED_ZEDLET_DIR}/zed-functions.sh" -# ZEVENT_VDEV_UPATH will not be present if ZFS is not built with libdevmapper -[ -n "${ZEVENT_VDEV_UPATH}" ] || exit 3 - -if [ "${ZED_USE_ENCLOSURE_LEDS}" != "1" ] ; then - exit 2 -fi - if [ ! -d /sys/class/enclosure ] ; then exit 1 fi +if [ "${ZED_USE_ENCLOSURE_LEDS}" != "1" ] ; then + exit 2 +fi + +[ -n "${ZEVENT_VDEV_ENC_SYSFS_PATH}" ] || exit 3 + +[ -e "${ZEVENT_VDEV_ENC_SYSFS_PATH}/fault" ] || exit 4 + # Turn on/off enclosure LEDs function led { - name=$1 + file="$1/fault" val=$2 # We want to check the current state first, since writing to the # 'fault' entry always always causes a SES command, even if the # current state is already what you want. - if [ -e /sys/block/$name/device/enclosure_device*/fault ] ; then - # We have to do some monkey business to deal with spaces in - # enclosure_device names. I've seen horrible things like this: - # - # '/sys/block/sdfw/device/enclosure_device:SLOT 43 41 /fault' - # - # ...so escape all spaces. - file=`ls /sys/block/$name/device/enclosure_device*/fault | sed 's/\s/\\ /g'` + current=$(cat "${file}") - current=`cat "$file"` + # On some enclosures if you write 1 to fault, and read it back, + # it will return 2. Treat all non-zero values as 1 for + # simplicity. + if [ "$current" != "0" ] ; then + current=1 + fi - # On some enclosures if you write 1 to fault, and read it back, - # it will return 2. Treat all non-zero values as 1 for - # simplicity. - if [ "$current" != "0" ] ; then - current=1 - fi - - if [ "$current" != "$val" ] ; then - # Set the value twice. I've seen enclosures that were - # flakey about setting it the first time. - echo $val > "$file" - echo $val > "$file" - fi + if [ "$current" != "$val" ] ; then + # Set the value twice. I've seen enclosures that were + # flakey about setting it the first time. + echo "$val" > "$file" + echo "$val" > "$file" fi } # Decide whether to turn on/off an LED based on the state # Pass in path name and fault string ("ONLINE"/"FAULTED"/"DEGRADED"...etc) +# +# We only turn on LEDs when a drive becomes FAULTED, DEGRADED, or UNAVAIL and +# only turn it on when it comes back ONLINE. All other states are ignored, and +# keep the previous LED state. function process { - # path=/dev/sda, fault= - - path=$1 + path="$1" fault=$2 - name=`basename $path` - - if [ -z "$name" ] ; then - return - fi - + prev=$3 if [ "$fault" == "FAULTED" ] || [ "$fault" == "DEGRADED" ] ; then - led $name 1 - else - led $name 0 + led "$path" 1 + elif [ "$fault" == "UNAVAIL" ] && [ "$prev" != "ONLINE" ] ; then + # For the most part, UNAVAIL should turn on the LED. However, + # during an autoreplace, we see our new drive go online, + # followed by our "old" drive going ONLINE->UNAVAIL. Since the + # "old" drive has the same slot information, we want to ignore + # the ONLINE->UNAVAIL event. + # + # NAME STATE READ WRITE CKSUM + # mypool3 DEGRADED 0 0 0 + # mirror-0 DEGRADED 0 0 0 + # A1 ONLINE 0 0 0 + # A2 ONLINE 0 880 0 + # replacing-3 UNAVAIL 0 0 0 + # old UNAVAIL 0 2.93K 0 corrupted data + # A3 ONLINE 0 0 156 (resilvering) + led "$path" 1 + elif [ "$fault" == "ONLINE" ] ; then + led "$path" 0 fi } -process "$ZEVENT_VDEV_UPATH" "$ZEVENT_VDEV_STATE_STR" +process "$ZEVENT_VDEV_ENC_SYSFS_PATH" "$ZEVENT_VDEV_STATE_STR" \ + "$ZEVENT_VDEV_LASTSTATE_STR" diff --git a/cmd/zed/zed_disk_event.c b/cmd/zed/zed_disk_event.c index 691024181..b5f57508e 100644 --- a/cmd/zed/zed_disk_event.c +++ b/cmd/zed/zed_disk_event.c @@ -286,7 +286,7 @@ zed_udev_monitor(void *arg) udev_device_get_property_value(dev, "DM_UUID") && udev_device_get_property_value(dev, "MPATH_SBIN_PATH")) { tmp = (char *) udev_device_get_devnode(dev); - tmp2 = get_underlying_path(NULL, tmp); + tmp2 = zfs_get_underlying_path(tmp); if (tmp && tmp2 && (strcmp(tmp, tmp2) != 0)) { /* * We have a real underlying device, which diff --git a/cmd/zed/zed_event.c b/cmd/zed/zed_event.c index 51f4f99c5..2c97b7115 100644 --- a/cmd/zed/zed_event.c +++ b/cmd/zed/zed_event.c @@ -843,23 +843,6 @@ _zed_internal_event(const char *class, nvlist_t *nvl) } } -static void -_zed_event_add_upath(uint64_t eid, zed_strings_t *zsp, nvlist_t *nvl) -{ - char *path = NULL; - char *upath = NULL; - if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH, - &path) == 0) { - upath = get_underlying_path(NULL, path); - if (upath) { - _zed_event_add_var(eid, zsp, ZEVENT_VAR_PREFIX, - "VDEV_UPATH", - "%s", upath); - free(upath); - } - } -} - /* * Service the next zevent, blocking until one is available. */ @@ -932,16 +915,6 @@ zed_event_service(struct zed_conf *zcp) _zed_event_add_time_strings(eid, zsp, etime); - /* - * If a VDEV is included, resolve it's path to the "underlying - * device". This is useful for resolving device mapper and - * multipath devices to their underlying /dev/sd* devices. - * For example, if you have a DM or multipath VDEV - * (/dev/mapper/mpatha) that points to one or more /dev/sd* - * devices, this will return the first of its devices. - */ - _zed_event_add_upath(eid, zsp, nvl); - zed_exec_process(eid, class, subclass, zcp->zedlet_dir, zcp->zedlets, zsp, zcp->zevent_fd); diff --git a/include/libzfs.h b/include/libzfs.h index 089cb8bc4..d4962dec6 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -280,8 +280,9 @@ extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, extern int zpool_label_disk_wait(char *, int); extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *); -int dev_is_dm(char *devname); -char *get_underlying_path(libzfs_handle_t *hdl, char *dev_name); +int zfs_dev_is_dm(char *dev_name); +char *zfs_get_underlying_path(char *dev_name); +char *zfs_get_enclosure_sysfs_path(char *dev_name); /* * Functions to manage pool properties diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h index 25510f8ca..7a8c36ea2 100644 --- a/include/sys/fm/fs/zfs.h +++ b/include/sys/fm/fs/zfs.h @@ -58,6 +58,7 @@ extern "C" { #define FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE "vdev_type" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH "vdev_path" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_PHYSPATH "vdev_physpath" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH "vdev_enc_sysfs_path" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID "vdev_devid" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU "vdev_fru" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE "vdev_state" diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index c51d190c7..d1d0a275d 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -592,6 +592,9 @@ typedef struct zpool_rewind_policy { #define ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO "vdev_async_agg_w_histo" #define ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO "vdev_agg_scrub_histo" +/* vdev enclosure sysfs path */ +#define ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH "vdev_enc_sysfs_path" + #define ZPOOL_CONFIG_WHOLE_DISK "whole_disk" #define ZPOOL_CONFIG_ERRCOUNT "error_count" #define ZPOOL_CONFIG_NOT_PRESENT "not_present" diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index bdf8498fa..b9a2d181b 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -213,6 +213,7 @@ struct vdev { char *vdev_path; /* vdev path (if any) */ char *vdev_devid; /* vdev devid (if any) */ char *vdev_physpath; /* vdev device path (if any) */ + char *vdev_enc_sysfs_path; /* enclosure sysfs path */ char *vdev_fru; /* physical FRU location */ uint64_t vdev_not_present; /* not present during import */ uint64_t vdev_unspare; /* unspare when resilvering done */ diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c index e76f7432a..3de7fd73d 100644 --- a/lib/libzfs/libzfs_import.c +++ b/lib/libzfs/libzfs_import.c @@ -64,6 +64,7 @@ #include #include "libzfs.h" #include "libzfs_impl.h" +#include /* * Intermediate structures used to gather configuration information. @@ -437,6 +438,10 @@ no_dev: * * multipath device node example: * devid: 'dm-uuid-mpath-35000c5006304de3f' + * + * We also store the enclosure sysfs path for turning on enclosure LEDs + * (if applicable): + * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' */ void update_vdev_config_dev_strs(nvlist_t *nv) @@ -444,6 +449,7 @@ update_vdev_config_dev_strs(nvlist_t *nv) vdev_dev_strs_t vds; char *env, *type, *path; uint64_t wholedisk = 0; + char *upath, *spath; /* * For the benefit of legacy ZFS implementations, allow @@ -470,6 +476,7 @@ update_vdev_config_dev_strs(nvlist_t *nv) !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) { (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); + (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); return; } @@ -490,10 +497,20 @@ update_vdev_config_dev_strs(nvlist_t *nv) (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, vds.vds_devphys); } + + /* Add enclosure sysfs path (if disk is in an enclosure) */ + upath = zfs_get_underlying_path(path); + spath = zfs_get_enclosure_sysfs_path(upath); + if (spath) + nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, + spath); + free(upath); + free(spath); } else { /* clear out any stale entries */ (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH); + (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); } } #else diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index ebca76834..0641c1844 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -4407,7 +4407,7 @@ static char * dm_get_underlying_path(char *dm_name) goto end; if ((asprintf(&tmp, "/dev/block/%d:%d", child_info->major, - child_info->minor) == -1) || !tmp) + child_info->minor) == -1) || tmp == NULL) goto end; /* Further translate /dev/block/ name into the normal name */ @@ -4430,11 +4430,11 @@ end: * Return 0 if not. */ int -dev_is_dm(char *devname) +zfs_dev_is_dm(char *dev_name) { char *tmp; - tmp = dm_get_underlying_path(devname); + tmp = dm_get_underlying_path(dev_name); if (!tmp) return (0); @@ -4483,7 +4483,7 @@ dev_is_dm(char *devname) * NOTE: The returned name string must be *freed*. */ char * -get_underlying_path(libzfs_handle_t *hdl, char *dev_name) +zfs_get_underlying_path(char *dev_name) { char *name = NULL; char *tmp; @@ -4504,3 +4504,108 @@ get_underlying_path(libzfs_handle_t *hdl, char *dev_name) return (name); } + +/* + * Given a dev name like "sda", return the full enclosure sysfs path to + * the disk. You can also pass in the name with "/dev" prepended + * to it (like /dev/sda). + * + * For example, disk "sda" in enclosure slot 1: + * dev: "sda" + * returns: "/sys/class/enclosure/1:0:3:0/Slot 1" + * + * 'dev' must be a non-devicemapper device. + * + * Returned string must be freed. + */ +char * +zfs_get_enclosure_sysfs_path(char *dev_name) +{ + DIR *dp = NULL; + struct dirent *ep; + char buf[MAXPATHLEN]; + char *tmp1 = NULL; + char *tmp2 = NULL; + char *tmp3 = NULL; + char *path = NULL; + size_t size; + int tmpsize; + + if (!dev_name) + return (NULL); + + /* If they preface 'dev' with a path (like "/dev") then strip it off */ + tmp1 = strrchr(dev_name, '/'); + if (tmp1) + dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */ + + tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name); + if (tmpsize == -1 || tmp1 == NULL) { + tmp1 = NULL; + goto end; + } + + dp = opendir(tmp1); + if (dp == NULL) { + tmp1 = NULL; /* To make free() at the end a NOP */ + goto end; + } + + /* + * Look though all sysfs entries in /sys/block//device for + * the enclosure symlink. + */ + while ((ep = readdir(dp))) { + /* Ignore everything that's not our enclosure_device link */ + if (!strstr(ep->d_name, "enclosure_device")) + continue; + + if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 || + tmp2 == NULL) + break; + + size = readlink(tmp2, buf, sizeof (buf)); + + /* Did readlink fail or crop the link name? */ + if (size == -1 || size >= sizeof (buf)) { + free(tmp2); + tmp2 = NULL; /* To make free() at the end a NOP */ + break; + } + + /* + * We got a valid link. readlink() doesn't terminate strings + * so we have to do it. + */ + buf[size] = '\0'; + + /* + * Our link will look like: + * + * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1" + * + * We want to grab the "enclosure/1:0:3:0/SLOT 1" part + */ + tmp3 = strstr(buf, "enclosure"); + if (tmp3 == NULL) + break; + + if (asprintf(&path, "/sys/class/%s", tmp3) == -1) { + /* If asprintf() fails, 'path' is undefined */ + path = NULL; + break; + } + + if (path == NULL) + break; + } + +end: + free(tmp2); + free(tmp1); + + if (dp) + closedir(dp); + + return (path); +} diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index f7e91430f..8a4d48a1d 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -488,6 +488,11 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH, &vd->vdev_physpath) == 0) vd->vdev_physpath = spa_strdup(vd->vdev_physpath); + + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, + &vd->vdev_enc_sysfs_path) == 0) + vd->vdev_enc_sysfs_path = spa_strdup(vd->vdev_enc_sysfs_path); + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &vd->vdev_fru) == 0) vd->vdev_fru = spa_strdup(vd->vdev_fru); @@ -673,6 +678,10 @@ vdev_free(vdev_t *vd) spa_strfree(vd->vdev_devid); if (vd->vdev_physpath) spa_strfree(vd->vdev_physpath); + + if (vd->vdev_enc_sysfs_path) + spa_strfree(vd->vdev_enc_sysfs_path); + if (vd->vdev_fru) spa_strfree(vd->vdev_fru); diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 95bf4392f..4edbfa41e 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -375,6 +375,10 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, fnvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, vd->vdev_physpath); + if (vd->vdev_enc_sysfs_path != NULL) + fnvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, + vd->vdev_enc_sysfs_path); + if (vd->vdev_fru != NULL) fnvlist_add_string(nv, ZPOOL_CONFIG_FRU, vd->vdev_fru); diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index 15dbdc2a0..0f8ac28c1 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -986,6 +986,12 @@ zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate) FM_EREPORT_PAYLOAD_ZFS_VDEV_PHYSPATH, vd->vdev_physpath); } + if (vd->vdev_enc_sysfs_path) { + (void) nvlist_add_string(aux, + FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH, + vd->vdev_enc_sysfs_path); + } + (void) nvlist_add_uint64(aux, FM_EREPORT_PAYLOAD_ZFS_VDEV_LASTSTATE, laststate); }