mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-27 02:14:28 +03:00
Add a statechange notify zedlet
Now that ZED has internal fault diagnosis and the statechange event is generated for faulted states, we can replace the io-notify and checksum-notify zedlets with one based on statechange. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Don Brady <don.brady@intel.com> Closes #5383
This commit is contained in:
parent
32dec7bd1a
commit
0df15db98f
@ -61,23 +61,21 @@ zedexecdir = $(libexecdir)/zfs/zed.d
|
||||
dist_zedexec_SCRIPTS = \
|
||||
zed.d/all-debug.sh \
|
||||
zed.d/all-syslog.sh \
|
||||
zed.d/checksum-notify.sh \
|
||||
zed.d/data-notify.sh \
|
||||
zed.d/generic-notify.sh \
|
||||
zed.d/io-notify.sh \
|
||||
zed.d/resilver_finish-notify.sh \
|
||||
zed.d/scrub_finish-notify.sh \
|
||||
zed.d/statechange-led.sh \
|
||||
zed.d/statechange-notify.sh \
|
||||
zed.d/vdev_clear-led.sh
|
||||
|
||||
zedconfdefaults = \
|
||||
all-syslog.sh \
|
||||
checksum-notify.sh \
|
||||
data-notify.sh \
|
||||
io-notify.sh \
|
||||
resilver_finish-notify.sh \
|
||||
scrub_finish-notify.sh \
|
||||
statechange-led.sh \
|
||||
statechange-notify.sh \
|
||||
vdev_clear-led.sh
|
||||
|
||||
install-data-hook:
|
||||
|
@ -1 +0,0 @@
|
||||
io-notify.sh
|
@ -1 +0,0 @@
|
||||
io-notify.sh
|
43
cmd/zed/zed.d/data-notify.sh
Executable file
43
cmd/zed/zed.d/data-notify.sh
Executable file
@ -0,0 +1,43 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Send notification in response to a DATA error.
|
||||
#
|
||||
# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given
|
||||
# class/pool/[vdev] combination. This protects against spamming the recipient
|
||||
# should multiple events occur together in time for the same pool/[vdev].
|
||||
#
|
||||
# Exit codes:
|
||||
# 0: notification sent
|
||||
# 1: notification failed
|
||||
# 2: notification not configured
|
||||
# 3: notification suppressed
|
||||
# 9: internal error
|
||||
|
||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||
|
||||
[ -n "${ZEVENT_POOL}" ] || exit 9
|
||||
[ -n "${ZEVENT_SUBCLASS}" ] || exit 9
|
||||
[ -n "${ZED_NOTIFY_DATA}" ] || exit 3
|
||||
|
||||
rate_limit_tag="${ZEVENT_POOL};${ZEVENT_VDEV_GUID:-0};${ZEVENT_SUBCLASS};notify"
|
||||
zed_rate_limit "${rate_limit_tag}" || exit 3
|
||||
|
||||
umask 077
|
||||
note_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)"
|
||||
note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
|
||||
{
|
||||
echo "ZFS has detected a data error:"
|
||||
echo
|
||||
echo " eid: ${ZEVENT_EID}"
|
||||
echo " class: ${ZEVENT_SUBCLASS}"
|
||||
echo " host: $(hostname)"
|
||||
echo " time: ${ZEVENT_TIME_STRING}"
|
||||
echo " error: ${ZEVENT_ZIO_ERR}"
|
||||
echo " objid: ${ZEVENT_ZIO_OBJSET}:${ZEVENT_ZIO_OBJECT}"
|
||||
echo " pool: ${ZEVENT_POOL}"
|
||||
} > "${note_pathname}"
|
||||
|
||||
zed_notify "${note_subject}" "${note_pathname}"; rv=$?
|
||||
rm -f "${note_pathname}"
|
||||
exit "${rv}"
|
@ -1,64 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Send notification in response to a CHECKSUM, DATA, or IO error.
|
||||
#
|
||||
# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given
|
||||
# class/pool/[vdev] combination. This protects against spamming the recipient
|
||||
# should multiple events occur together in time for the same pool/[vdev].
|
||||
#
|
||||
# Exit codes:
|
||||
# 0: notification sent
|
||||
# 1: notification failed
|
||||
# 2: notification not configured
|
||||
# 3: notification suppressed
|
||||
# 9: internal error
|
||||
|
||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||
|
||||
[ -n "${ZEVENT_POOL}" ] || exit 9
|
||||
[ -n "${ZEVENT_SUBCLASS}" ] || exit 9
|
||||
|
||||
if [ "${ZEVENT_SUBCLASS}" != "checksum" ] \
|
||||
&& [ "${ZEVENT_SUBCLASS}" != "data" ] \
|
||||
&& [ "${ZEVENT_SUBCLASS}" != "io" ]; then
|
||||
zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\""
|
||||
exit 9
|
||||
fi
|
||||
|
||||
rate_limit_tag="${ZEVENT_POOL};${ZEVENT_VDEV_GUID:-0};${ZEVENT_SUBCLASS};notify"
|
||||
zed_rate_limit "${rate_limit_tag}" || exit 3
|
||||
|
||||
umask 077
|
||||
note_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)"
|
||||
note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
|
||||
{
|
||||
[ "${ZEVENT_SUBCLASS}" = "io" ] && article="an" || article="a"
|
||||
|
||||
echo "ZFS has detected ${article} ${ZEVENT_SUBCLASS} error:"
|
||||
echo
|
||||
echo " eid: ${ZEVENT_EID}"
|
||||
echo " class: ${ZEVENT_SUBCLASS}"
|
||||
echo " host: $(hostname)"
|
||||
echo " time: ${ZEVENT_TIME_STRING}"
|
||||
|
||||
[ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}"
|
||||
[ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}"
|
||||
[ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}"
|
||||
|
||||
[ -n "${ZEVENT_VDEV_CKSUM_ERRORS}" ] \
|
||||
&& echo " cksum: ${ZEVENT_VDEV_CKSUM_ERRORS}"
|
||||
|
||||
[ -n "${ZEVENT_VDEV_READ_ERRORS}" ] \
|
||||
&& echo " read: ${ZEVENT_VDEV_READ_ERRORS}"
|
||||
|
||||
[ -n "${ZEVENT_VDEV_WRITE_ERRORS}" ] \
|
||||
&& echo " write: ${ZEVENT_VDEV_WRITE_ERRORS}"
|
||||
|
||||
echo " pool: ${ZEVENT_POOL}"
|
||||
|
||||
} > "${note_pathname}"
|
||||
|
||||
zed_notify "${note_subject}" "${note_pathname}"; rv=$?
|
||||
rm -f "${note_pathname}"
|
||||
exit "${rv}"
|
74
cmd/zed/zed.d/statechange-notify.sh
Executable file
74
cmd/zed/zed.d/statechange-notify.sh
Executable file
@ -0,0 +1,74 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License Version 1.0 (CDDL-1.0).
|
||||
# You can obtain a copy of the license from the top-level file
|
||||
# "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
|
||||
# You may not use this file except in compliance with the license.
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Send notification in response to a fault induced statechange
|
||||
#
|
||||
# ZEVENT_SUBCLASS: 'statechange'
|
||||
# ZEVENT_VDEV_STATE_STR: 'DEGRADED', 'FAULTED' or 'REMOVED'
|
||||
#
|
||||
# Exit codes:
|
||||
# 0: notification sent
|
||||
# 1: notification failed
|
||||
# 2: notification not configured
|
||||
# 3: statechange not relevant
|
||||
# 4: statechange string missing (unexpected)
|
||||
|
||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||
|
||||
[ -n "${ZEVENT_VDEV_STATE_STR}" ] || exit 4
|
||||
|
||||
if [ "${ZEVENT_VDEV_STATE_STR}" != "FAULTED" ] \
|
||||
&& [ "${ZEVENT_VDEV_STATE_STR}" != "DEGRADED" ] \
|
||||
&& [ "${ZEVENT_VDEV_STATE_STR}" != "REMOVED" ]; then
|
||||
exit 3
|
||||
fi
|
||||
|
||||
umask 077
|
||||
note_subject="ZFS device fault for pool ${ZEVENT_POOL_GUID} on $(hostname)"
|
||||
note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
|
||||
{
|
||||
if [ "${ZEVENT_VDEV_STATE_STR}" == "FAULTED" ] ; then
|
||||
echo "The number of I/O errors associated with a ZFS device exceeded"
|
||||
echo "acceptable levels. ZFS has marked the device as faulted."
|
||||
elif [ "${ZEVENT_VDEV_STATE_STR}" == "DEGRADED" ] ; then
|
||||
echo "The number of checksum errors associated with a ZFS device"
|
||||
echo "exceeded acceptable levels. ZFS has marked the device as"
|
||||
echo "degraded."
|
||||
else
|
||||
echo "ZFS has detected that a device was removed."
|
||||
fi
|
||||
|
||||
echo
|
||||
echo " impact: Fault tolerance of the pool may be compromised."
|
||||
echo " eid: ${ZEVENT_EID}"
|
||||
echo " class: ${ZEVENT_SUBCLASS}"
|
||||
echo " state: ${ZEVENT_VDEV_STATE_STR}"
|
||||
echo " host: $(hostname)"
|
||||
echo " time: ${ZEVENT_TIME_STRING}"
|
||||
|
||||
[ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}"
|
||||
[ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}"
|
||||
[ -n "${ZEVENT_VDEV_PHYSPATH}" ] && echo " vphys: ${ZEVENT_VDEV_PHYSPATH}"
|
||||
[ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}"
|
||||
[ -n "${ZEVENT_VDEV_DEVID}" ] && echo " devid: ${ZEVENT_VDEV_DEVID}"
|
||||
|
||||
echo " pool: ${ZEVENT_POOL_GUID}"
|
||||
|
||||
} > "${note_pathname}"
|
||||
|
||||
zed_notify "${note_subject}" "${note_pathname}"; rv=$?
|
||||
|
||||
rm -f "${note_pathname}"
|
||||
exit "${rv}"
|
@ -50,6 +50,12 @@
|
||||
#
|
||||
#ZED_NOTIFY_VERBOSE=0
|
||||
|
||||
##
|
||||
# Send notifications for 'ereport.fs.zfs.data' events.
|
||||
# Disabled by default
|
||||
#
|
||||
#ZED_NOTIFY_DATA=1
|
||||
|
||||
##
|
||||
# Pushbullet access token.
|
||||
# This grants full access to your account -- protect it accordingly!
|
||||
@ -73,18 +79,6 @@
|
||||
#
|
||||
#ZED_RUNDIR="/var/run"
|
||||
|
||||
##
|
||||
# Replace a device with a hot spare after N checksum errors are detected.
|
||||
# Disabled by default; uncomment to enable.
|
||||
#
|
||||
#ZED_SPARE_ON_CHECKSUM_ERRORS=10
|
||||
|
||||
##
|
||||
# Replace a device with a hot spare after N I/O errors are detected.
|
||||
# Disabled by default; uncomment to enable.
|
||||
#
|
||||
#ZED_SPARE_ON_IO_ERRORS=1
|
||||
|
||||
##
|
||||
# Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED. This works for
|
||||
# device mapper and multipath devices as well. Your enclosure must be
|
||||
|
Loading…
Reference in New Issue
Block a user