mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Turn on/off enclosure slot fault LED even when disk isn't present
Previously when a drive faulted, the statechange-led.sh script would lookup the drive's LED sysfs entry in /sys/block/sd*/device/enclosure_device, and turn it on. During testing we noticed that if you pulled out a drive, or if the drive was so badly broken that it no longer appeared to Linux, that the /sys/block/sd* path would be removed, and the script could not lookup the LED entry. To fix this, this patch looks up the disks's more persistent "/sys/class/enclosure/X:X:X:X/Slot N" LED sysfs path at pool import. It then passes that path to the statechange-led script to use, rather than having the script look it up on the fly. This allows the script to turn on/off the slot LEDs even when the drive is missing. Closes #5309 Closes #2375
This commit is contained in:
committed by
Brian Behlendorf
parent
a85cefa35c
commit
1bbd877049
@@ -185,7 +185,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||
uint64_t wholedisk = 0ULL;
|
||||
uint64_t offline = 0ULL;
|
||||
uint64_t guid = 0ULL;
|
||||
char *physpath = NULL, *new_devid = NULL;
|
||||
char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
|
||||
char rawpath[PATH_MAX], fullpath[PATH_MAX];
|
||||
char devpath[PATH_MAX];
|
||||
int ret;
|
||||
@@ -206,6 +206,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||
}
|
||||
|
||||
(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
|
||||
(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
|
||||
&enc_sysfs_path);
|
||||
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
|
||||
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
|
||||
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid);
|
||||
@@ -214,7 +216,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||
return; /* don't intervene if it was taken offline */
|
||||
|
||||
#ifdef HAVE_LIBDEVMAPPER
|
||||
is_dm = dev_is_dm(path);
|
||||
is_dm = zfs_dev_is_dm(path);
|
||||
#endif
|
||||
zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'"
|
||||
" wholedisk %d, dm %d (%llu)", zpool_get_name(zhp), path,
|
||||
@@ -402,6 +404,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
||||
nvlist_add_string(newvd, ZPOOL_CONFIG_DEVID, new_devid) != 0 ||
|
||||
(physpath != NULL && nvlist_add_string(newvd,
|
||||
ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) ||
|
||||
nvlist_add_string(newvd, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
|
||||
enc_sysfs_path) != 0 ||
|
||||
nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
|
||||
nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
|
||||
nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd,
|
||||
|
||||
@@ -2,8 +2,13 @@
|
||||
#
|
||||
# Turn off/on the VDEV's enclosure fault LEDs when the pool's state changes.
|
||||
#
|
||||
# Turn LED on if the VDEV becomes faulted/degraded, and turn it back off when
|
||||
# it's healthy again. This requires that your enclosure be supported by the
|
||||
# Turn LED on if the VDEV becomes faulted or degraded, and turn it back off
|
||||
# when it's online again. It will also turn on the LED (or keep it on) if
|
||||
# the drive becomes unavailable, unless the drive was in was a previously
|
||||
# online state (online->unavail is a normal state transition during an
|
||||
# autoreplace).
|
||||
#
|
||||
# This script requires that your enclosure be supported by the
|
||||
# Linux SCSI enclosure services (ses) driver. The script will do nothing
|
||||
# if you have no enclosure, or if your enclosure isn't supported.
|
||||
#
|
||||
@@ -13,76 +18,82 @@
|
||||
# 0: enclosure led successfully set
|
||||
# 1: enclosure leds not not available
|
||||
# 2: enclosure leds administratively disabled
|
||||
# 3: ZED built without libdevmapper
|
||||
# 3: ZED didn't pass enclosure sysfs path
|
||||
# 4: Enclosure sysfs path doesn't exist
|
||||
|
||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||
|
||||
# ZEVENT_VDEV_UPATH will not be present if ZFS is not built with libdevmapper
|
||||
[ -n "${ZEVENT_VDEV_UPATH}" ] || exit 3
|
||||
|
||||
if [ "${ZED_USE_ENCLOSURE_LEDS}" != "1" ] ; then
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [ ! -d /sys/class/enclosure ] ; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "${ZED_USE_ENCLOSURE_LEDS}" != "1" ] ; then
|
||||
exit 2
|
||||
fi
|
||||
|
||||
[ -n "${ZEVENT_VDEV_ENC_SYSFS_PATH}" ] || exit 3
|
||||
|
||||
[ -e "${ZEVENT_VDEV_ENC_SYSFS_PATH}/fault" ] || exit 4
|
||||
|
||||
# Turn on/off enclosure LEDs
|
||||
function led
|
||||
{
|
||||
name=$1
|
||||
file="$1/fault"
|
||||
val=$2
|
||||
|
||||
# We want to check the current state first, since writing to the
|
||||
# 'fault' entry always always causes a SES command, even if the
|
||||
# current state is already what you want.
|
||||
if [ -e /sys/block/$name/device/enclosure_device*/fault ] ; then
|
||||
# We have to do some monkey business to deal with spaces in
|
||||
# enclosure_device names. I've seen horrible things like this:
|
||||
#
|
||||
# '/sys/block/sdfw/device/enclosure_device:SLOT 43 41 /fault'
|
||||
#
|
||||
# ...so escape all spaces.
|
||||
file=`ls /sys/block/$name/device/enclosure_device*/fault | sed 's/\s/\\ /g'`
|
||||
current=$(cat "${file}")
|
||||
|
||||
current=`cat "$file"`
|
||||
# On some enclosures if you write 1 to fault, and read it back,
|
||||
# it will return 2. Treat all non-zero values as 1 for
|
||||
# simplicity.
|
||||
if [ "$current" != "0" ] ; then
|
||||
current=1
|
||||
fi
|
||||
|
||||
# On some enclosures if you write 1 to fault, and read it back,
|
||||
# it will return 2. Treat all non-zero values as 1 for
|
||||
# simplicity.
|
||||
if [ "$current" != "0" ] ; then
|
||||
current=1
|
||||
fi
|
||||
|
||||
if [ "$current" != "$val" ] ; then
|
||||
# Set the value twice. I've seen enclosures that were
|
||||
# flakey about setting it the first time.
|
||||
echo $val > "$file"
|
||||
echo $val > "$file"
|
||||
fi
|
||||
if [ "$current" != "$val" ] ; then
|
||||
# Set the value twice. I've seen enclosures that were
|
||||
# flakey about setting it the first time.
|
||||
echo "$val" > "$file"
|
||||
echo "$val" > "$file"
|
||||
fi
|
||||
}
|
||||
|
||||
# Decide whether to turn on/off an LED based on the state
|
||||
# Pass in path name and fault string ("ONLINE"/"FAULTED"/"DEGRADED"...etc)
|
||||
#
|
||||
# We only turn on LEDs when a drive becomes FAULTED, DEGRADED, or UNAVAIL and
|
||||
# only turn it on when it comes back ONLINE. All other states are ignored, and
|
||||
# keep the previous LED state.
|
||||
function process {
|
||||
# path=/dev/sda, fault=
|
||||
|
||||
path=$1
|
||||
path="$1"
|
||||
fault=$2
|
||||
name=`basename $path`
|
||||
|
||||
if [ -z "$name" ] ; then
|
||||
return
|
||||
fi
|
||||
|
||||
prev=$3
|
||||
if [ "$fault" == "FAULTED" ] || [ "$fault" == "DEGRADED" ] ; then
|
||||
led $name 1
|
||||
else
|
||||
led $name 0
|
||||
led "$path" 1
|
||||
elif [ "$fault" == "UNAVAIL" ] && [ "$prev" != "ONLINE" ] ; then
|
||||
# For the most part, UNAVAIL should turn on the LED. However,
|
||||
# during an autoreplace, we see our new drive go online,
|
||||
# followed by our "old" drive going ONLINE->UNAVAIL. Since the
|
||||
# "old" drive has the same slot information, we want to ignore
|
||||
# the ONLINE->UNAVAIL event.
|
||||
#
|
||||
# NAME STATE READ WRITE CKSUM
|
||||
# mypool3 DEGRADED 0 0 0
|
||||
# mirror-0 DEGRADED 0 0 0
|
||||
# A1 ONLINE 0 0 0
|
||||
# A2 ONLINE 0 880 0
|
||||
# replacing-3 UNAVAIL 0 0 0
|
||||
# old UNAVAIL 0 2.93K 0 corrupted data
|
||||
# A3 ONLINE 0 0 156 (resilvering)
|
||||
led "$path" 1
|
||||
elif [ "$fault" == "ONLINE" ] ; then
|
||||
led "$path" 0
|
||||
fi
|
||||
}
|
||||
|
||||
process "$ZEVENT_VDEV_UPATH" "$ZEVENT_VDEV_STATE_STR"
|
||||
process "$ZEVENT_VDEV_ENC_SYSFS_PATH" "$ZEVENT_VDEV_STATE_STR" \
|
||||
"$ZEVENT_VDEV_LASTSTATE_STR"
|
||||
|
||||
@@ -286,7 +286,7 @@ zed_udev_monitor(void *arg)
|
||||
udev_device_get_property_value(dev, "DM_UUID") &&
|
||||
udev_device_get_property_value(dev, "MPATH_SBIN_PATH")) {
|
||||
tmp = (char *) udev_device_get_devnode(dev);
|
||||
tmp2 = get_underlying_path(NULL, tmp);
|
||||
tmp2 = zfs_get_underlying_path(tmp);
|
||||
if (tmp && tmp2 && (strcmp(tmp, tmp2) != 0)) {
|
||||
/*
|
||||
* We have a real underlying device, which
|
||||
|
||||
@@ -843,23 +843,6 @@ _zed_internal_event(const char *class, nvlist_t *nvl)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_zed_event_add_upath(uint64_t eid, zed_strings_t *zsp, nvlist_t *nvl)
|
||||
{
|
||||
char *path = NULL;
|
||||
char *upath = NULL;
|
||||
if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH,
|
||||
&path) == 0) {
|
||||
upath = get_underlying_path(NULL, path);
|
||||
if (upath) {
|
||||
_zed_event_add_var(eid, zsp, ZEVENT_VAR_PREFIX,
|
||||
"VDEV_UPATH",
|
||||
"%s", upath);
|
||||
free(upath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Service the next zevent, blocking until one is available.
|
||||
*/
|
||||
@@ -932,16 +915,6 @@ zed_event_service(struct zed_conf *zcp)
|
||||
|
||||
_zed_event_add_time_strings(eid, zsp, etime);
|
||||
|
||||
/*
|
||||
* If a VDEV is included, resolve it's path to the "underlying
|
||||
* device". This is useful for resolving device mapper and
|
||||
* multipath devices to their underlying /dev/sd* devices.
|
||||
* For example, if you have a DM or multipath VDEV
|
||||
* (/dev/mapper/mpatha) that points to one or more /dev/sd*
|
||||
* devices, this will return the first of its devices.
|
||||
*/
|
||||
_zed_event_add_upath(eid, zsp, nvl);
|
||||
|
||||
zed_exec_process(eid, class, subclass,
|
||||
zcp->zedlet_dir, zcp->zedlets, zsp, zcp->zevent_fd);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user