mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-12 19:20:28 +03:00
Merge branch 'zed-pushbullet'
This patch stack begins with cleaning up the existing ZEDLETs, refactoring common code blocks into zed-functions.sh, adopting a more consistent coding style, updating exit codes, etc. All scripts now run cleanly through ShellCheck. The old "email" ZEDLETs are replaced with new "notify" ZEDLETs. A notification can now be sent via email and/or Pushbullet. Additional notification methods will likely be added in the future. Pushbullet notifications are enabled by setting the ZED_PUSHBULLET_ACCESS_TOKEN and (optionally) ZED_PUSHBULLET_CHANNEL_TAG in zed.rc. The Pushbullet implementation requires awk, curl, and sed executables to be installed in the standard PATH. Signed-off-by: Chris Dunlap <cdunlap@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #3327
This commit is contained in:
commit
52d5a1cc57
@ -4,6 +4,8 @@ DEFAULT_INCLUDES += \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_srcdir)/lib/libspl/include
|
||||
|
||||
EXTRA_DIST = $(top_srcdir)/cmd/zed/zed.d/README
|
||||
|
||||
sbin_PROGRAMS = zed
|
||||
|
||||
zed_SOURCES = \
|
||||
@ -33,6 +35,7 @@ zed_LDADD = \
|
||||
zedconfdir = $(sysconfdir)/zfs/zed.d
|
||||
|
||||
dist_zedconf_DATA = \
|
||||
$(top_srcdir)/cmd/zed/zed.d/zed-functions.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/zed.rc
|
||||
|
||||
zedexecdir = $(libexecdir)/zfs/zed.d
|
||||
@ -40,29 +43,30 @@ zedexecdir = $(libexecdir)/zfs/zed.d
|
||||
dist_zedexec_SCRIPTS = \
|
||||
$(top_srcdir)/cmd/zed/zed.d/all-debug.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/all-syslog.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/checksum-email.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/checksum-notify.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/checksum-spare.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/data-email.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/generic-email.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/io-email.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/data-notify.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/generic-notify.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/io-notify.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/io-spare.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/resilver.finish-email.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/scrub.finish-email.sh
|
||||
$(top_srcdir)/cmd/zed/zed.d/resilver.finish-notify.sh \
|
||||
$(top_srcdir)/cmd/zed/zed.d/scrub.finish-notify.sh
|
||||
|
||||
zedconfdefaults = \
|
||||
all-syslog.sh \
|
||||
checksum-email.sh \
|
||||
checksum-notify.sh \
|
||||
checksum-spare.sh \
|
||||
data-email.sh \
|
||||
io-email.sh \
|
||||
data-notify.sh \
|
||||
io-notify.sh \
|
||||
io-spare.sh \
|
||||
resilver.finish-email.sh \
|
||||
scrub.finish-email.sh
|
||||
resilver.finish-notify.sh \
|
||||
scrub.finish-notify.sh
|
||||
|
||||
install-data-local:
|
||||
install-data-hook:
|
||||
$(MKDIR_P) "$(DESTDIR)$(zedconfdir)"
|
||||
for f in $(zedconfdefaults); do \
|
||||
test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \
|
||||
-L "$(DESTDIR)$(zedconfdir)/$${f}" || \
|
||||
ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \
|
||||
done
|
||||
chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc"
|
||||
|
30
cmd/zed/zed.d/README
Normal file
30
cmd/zed/zed.d/README
Normal file
@ -0,0 +1,30 @@
|
||||
Shell scripts are the recommended choice for ZEDLETs that mostly call
|
||||
other utilities and do relatively little data manipulation.
|
||||
|
||||
Shell scripts MUST work on both bash and dash.
|
||||
|
||||
Shell scripts MUST run cleanly through ShellCheck:
|
||||
http://www.shellcheck.net/
|
||||
|
||||
General functions reside in "zed-functions.sh". Use them where applicable.
|
||||
|
||||
Additional references that may be of use:
|
||||
|
||||
Google Shell Style Guide
|
||||
https://google-styleguide.googlecode.com/svn/trunk/shell.xml
|
||||
|
||||
Dash as /bin/sh
|
||||
https://wiki.ubuntu.com/DashAsBinSh
|
||||
|
||||
Common shell script mistakes
|
||||
http://www.pixelbeat.org/programming/shell_script_mistakes.html
|
||||
|
||||
Filenames and Pathnames in Shell: How to do it Correctly
|
||||
http://www.dwheeler.com/essays/filenames-in-shell.html
|
||||
|
||||
Autoconf: Portable Shell Programming
|
||||
https://www.gnu.org/software/autoconf/manual/autoconf.html#Portable-Shell
|
||||
|
||||
Please BE CONSISTENT with the existing style, check for errors,
|
||||
minimize dependencies where possible, try to be portable,
|
||||
and comment anything non-obvious. Festina lente.
|
@ -2,16 +2,23 @@
|
||||
#
|
||||
# Log all environment variables to ZED_DEBUG_LOG.
|
||||
#
|
||||
test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
# This can be a useful aid when developing/debugging ZEDLETs since it shows the
|
||||
# environment variables defined for each zevent.
|
||||
|
||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||
|
||||
: "${ZED_DEBUG_LOG:="${TMPDIR:="/tmp"}/zed.debug.log"}"
|
||||
|
||||
lockfile="$(basename -- "${ZED_DEBUG_LOG}").lock"
|
||||
|
||||
# Override the default umask to restrict access to a newly-created logfile.
|
||||
umask 077
|
||||
|
||||
# Append stdout to the logfile after obtaining an advisory lock.
|
||||
exec >> "${ZED_DEBUG_LOG:=/tmp/zed.debug.log}"
|
||||
flock -x 1
|
||||
zed_lock "${lockfile}"
|
||||
exec >> "${ZED_DEBUG_LOG}"
|
||||
|
||||
printenv | sort
|
||||
echo
|
||||
|
||||
exec >&-
|
||||
zed_unlock "${lockfile}"
|
||||
exit 0
|
||||
|
@ -1,11 +1,10 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Log the zevent via syslog.
|
||||
#
|
||||
test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
|
||||
logger -t "${ZED_SYSLOG_TAG:=zed}" -p "${ZED_SYSLOG_PRIORITY:=daemon.notice}" \
|
||||
eid="${ZEVENT_EID}" class="${ZEVENT_SUBCLASS}" \
|
||||
"${ZEVENT_POOL:+pool=$ZEVENT_POOL}"
|
||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||
|
||||
zed_log_msg "eid=${ZEVENT_EID}" "class=${ZEVENT_SUBCLASS}" \
|
||||
"${ZEVENT_POOL:+"pool=${ZEVENT_POOL}"}"
|
||||
exit 0
|
||||
|
@ -1 +0,0 @@
|
||||
io-email.sh
|
1
cmd/zed/zed.d/checksum-notify.sh
Symbolic link
1
cmd/zed/zed.d/checksum-notify.sh
Symbolic link
@ -0,0 +1 @@
|
||||
io-notify.sh
|
@ -1,81 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Send email to ZED_EMAIL in response to a DATA zevent.
|
||||
# Only one message per ZED_EMAIL_INTERVAL_SECS will be sent for a given
|
||||
# class/pool combination. This protects against spamming the recipient
|
||||
# should multiple events occur together in time for the same pool.
|
||||
# Exit codes:
|
||||
# 0: email sent
|
||||
# 1: email failed
|
||||
# 2: email suppressed
|
||||
# 3: missing executable
|
||||
# 4: unsupported event class
|
||||
# 5: internal error
|
||||
# State File Format:
|
||||
# POOL;TIME_OF_LAST_EMAIL
|
||||
#
|
||||
test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
|
||||
test -n "${ZEVENT_POOL}" || exit 5
|
||||
test -n "${ZEVENT_SUBCLASS}" || exit 5
|
||||
|
||||
if test "${ZEVENT_SUBCLASS}" != "data"; then \
|
||||
logger -t "${ZED_SYSLOG_TAG:=zed}" \
|
||||
-p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
|
||||
`basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\"
|
||||
exit 4
|
||||
fi
|
||||
|
||||
# Only send email if ZED_EMAIL has been configured.
|
||||
test -n "${ZED_EMAIL}" || exit 2
|
||||
|
||||
# Ensure requisite executables are installed.
|
||||
if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then
|
||||
logger -t "${ZED_SYSLOG_TAG:=zed}" \
|
||||
-p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
|
||||
`basename "$0"`: "${MAIL}" not installed
|
||||
exit 3
|
||||
fi
|
||||
|
||||
NAME="zed.${ZEVENT_SUBCLASS}.email"
|
||||
LOCKFILE="${ZED_LOCKDIR:=/var/lock}/${NAME}.lock"
|
||||
STATEFILE="${ZED_RUNDIR:=/var/run}/${NAME}.state"
|
||||
|
||||
# Obtain lock to ensure mutual exclusion for accessing state.
|
||||
exec 8> "${LOCKFILE}"
|
||||
flock -x 8
|
||||
|
||||
# Query state for last time email was sent for this pool.
|
||||
TIME_NOW=`date +%s`
|
||||
TIME_LAST=`egrep "^${ZEVENT_POOL};" "${STATEFILE}" 2>/dev/null | cut -d ";" -f2`
|
||||
if test -n "${TIME_LAST}"; then
|
||||
TIME_DELTA=`expr "${TIME_NOW}" - "${TIME_LAST}"`
|
||||
if test "${TIME_DELTA}" -lt "${ZED_EMAIL_INTERVAL_SECS:=3600}"; then
|
||||
exit 2
|
||||
fi
|
||||
fi
|
||||
|
||||
"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on `hostname`" \
|
||||
"${ZED_EMAIL}" <<EOF
|
||||
A ZFS ${ZEVENT_SUBCLASS} error has been detected:
|
||||
|
||||
eid: ${ZEVENT_EID}
|
||||
host: `hostname`
|
||||
time: ${ZEVENT_TIME_STRING}
|
||||
pool: ${ZEVENT_POOL}
|
||||
EOF
|
||||
MAIL_STATUS=$?
|
||||
|
||||
# Update state.
|
||||
egrep -v "^${ZEVENT_POOL};" "${STATEFILE}" 2>/dev/null > "${STATEFILE}.$$"
|
||||
echo "${ZEVENT_POOL};${TIME_NOW}" >> "${STATEFILE}.$$"
|
||||
mv -f "${STATEFILE}.$$" "${STATEFILE}"
|
||||
|
||||
if test "${MAIL_STATUS}" -ne 0; then
|
||||
logger -t "${ZED_SYSLOG_TAG:=zed}" \
|
||||
-p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
|
||||
`basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
1
cmd/zed/zed.d/data-notify.sh
Symbolic link
1
cmd/zed/zed.d/data-notify.sh
Symbolic link
@ -0,0 +1 @@
|
||||
io-notify.sh
|
@ -1,59 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Send email to ZED_EMAIL in response to a given zevent.
|
||||
# This is a generic script than can be symlinked to a file in the zed
|
||||
# enabled-scripts directory in order to have email sent when a particular
|
||||
# class of zevents occurs. The symlink filename must begin with the zevent
|
||||
# (sub)class string (eg, "probe_failure-email.sh" for the "probe_failure"
|
||||
# subclass). Refer to the zed(8) manpage for details.
|
||||
# Exit codes:
|
||||
# 0: email sent
|
||||
# 1: email failed
|
||||
# 2: email suppressed
|
||||
# 3: missing executable
|
||||
#
|
||||
test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
|
||||
# Only send email if ZED_EMAIL has been configured.
|
||||
test -n "${ZED_EMAIL}" || exit 2
|
||||
|
||||
# Ensure requisite executables are installed.
|
||||
if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then
|
||||
logger -t "${ZED_SYSLOG_TAG:=zed}" \
|
||||
-p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
|
||||
`basename "$0"`: "${MAIL}" not installed
|
||||
exit 3
|
||||
fi
|
||||
|
||||
# Override the default umask to restrict access to the msgbody tmpfile.
|
||||
umask 077
|
||||
|
||||
SUBJECT="ZFS ${ZEVENT_SUBCLASS} event"
|
||||
test -n "${ZEVENT_POOL}" && SUBJECT="${SUBJECT} for ${ZEVENT_POOL}"
|
||||
SUBJECT="${SUBJECT} on `hostname`"
|
||||
|
||||
MSGBODY="${TMPDIR:=/tmp}/`basename \"$0\"`.$$"
|
||||
{
|
||||
echo "A ZFS ${ZEVENT_SUBCLASS} event has been posted:"
|
||||
echo
|
||||
echo " eid: ${ZEVENT_EID}"
|
||||
echo " host: `hostname`"
|
||||
echo " time: ${ZEVENT_TIME_STRING}"
|
||||
test -n "${ZEVENT_VDEV_TYPE}" -a -n "${ZEVENT_VDEV_PATH}" && \
|
||||
echo " vdev: ${ZEVENT_VDEV_TYPE}:${ZEVENT_VDEV_PATH}"
|
||||
test -n "${ZEVENT_POOL}" -a -x "${ZPOOL}" && \
|
||||
"${ZPOOL}" status "${ZEVENT_POOL}"
|
||||
} > "${MSGBODY}"
|
||||
|
||||
test -f "${MSGBODY}" && "${MAIL}" -s "${SUBJECT}" "${ZED_EMAIL}" < "${MSGBODY}"
|
||||
MAIL_STATUS=$?
|
||||
rm -f "${MSGBODY}"
|
||||
|
||||
if test "${MAIL_STATUS}" -ne 0; then
|
||||
logger -t "${ZED_SYSLOG_TAG:=zed}" \
|
||||
-p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
|
||||
`basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
54
cmd/zed/zed.d/generic-notify.sh
Executable file
54
cmd/zed/zed.d/generic-notify.sh
Executable file
@ -0,0 +1,54 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Send notification in response to a given zevent.
|
||||
#
|
||||
# This is a generic script than can be symlinked to a file in the
|
||||
# enabled-zedlets directory to have a notification sent when a particular
|
||||
# class of zevents occurs. The symlink filename must begin with the zevent
|
||||
# (sub)class string (e.g., "probe_failure-notify.sh" for the "probe_failure"
|
||||
# subclass). Refer to the zed(8) manpage for details.
|
||||
#
|
||||
# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given
|
||||
# class/pool combination. This protects against spamming the recipient
|
||||
# should multiple events occur together in time for the same pool.
|
||||
#
|
||||
# Exit codes:
|
||||
# 0: notification sent
|
||||
# 1: notification failed
|
||||
# 2: notification not configured
|
||||
# 3: notification suppressed
|
||||
|
||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||
|
||||
# Rate-limit the notification based in part on the filename.
|
||||
#
|
||||
rate_limit_tag="${ZEVENT_POOL};${ZEVENT_SUBCLASS};$(basename -- "$0")"
|
||||
rate_limit_interval="${ZED_NOTIFY_INTERVAL_SECS}"
|
||||
zed_rate_limit "${rate_limit_tag}" "${rate_limit_interval}" || exit 3
|
||||
|
||||
umask 077
|
||||
pool_str="${ZEVENT_POOL:+" for ${ZEVENT_POOL}"}"
|
||||
host_str=" on $(hostname)"
|
||||
note_subject="ZFS ${ZEVENT_SUBCLASS} event${pool_str}${host_str}"
|
||||
note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
|
||||
{
|
||||
echo "ZFS has posted the following event:"
|
||||
echo
|
||||
echo " eid: ${ZEVENT_EID}"
|
||||
echo " class: ${ZEVENT_SUBCLASS}"
|
||||
echo " host: $(hostname)"
|
||||
echo " time: ${ZEVENT_TIME_STRING}"
|
||||
|
||||
[ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}"
|
||||
[ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}"
|
||||
[ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}"
|
||||
|
||||
[ -n "${ZEVENT_POOL}" ] && [ -x "${ZPOOL}" ] \
|
||||
&& "${ZPOOL}" status "${ZEVENT_POOL}"
|
||||
|
||||
} > "${note_pathname}"
|
||||
|
||||
zed_notify "${note_subject}" "${note_pathname}"; rv=$?
|
||||
rm -f "${note_pathname}"
|
||||
exit "${rv}"
|
@ -1,86 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Send email to ZED_EMAIL in response to a CHECKSUM or IO zevent.
|
||||
# Only one message per ZED_EMAIL_INTERVAL_SECS will be sent for a given
|
||||
# class/pool/vdev combination. This protects against spamming the recipient
|
||||
# should multiple events occur together in time for the same pool/device.
|
||||
# Exit codes:
|
||||
# 0: email sent
|
||||
# 1: email failed
|
||||
# 2: email suppressed
|
||||
# 3: missing executable
|
||||
# 4: unsupported event class
|
||||
# 5: internal error
|
||||
# State File Format:
|
||||
# POOL;VDEV_PATH;TIME_OF_LAST_EMAIL
|
||||
#
|
||||
test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
|
||||
test -n "${ZEVENT_POOL}" || exit 5
|
||||
test -n "${ZEVENT_SUBCLASS}" || exit 5
|
||||
test -n "${ZEVENT_VDEV_PATH}" || exit 5
|
||||
|
||||
if test "${ZEVENT_SUBCLASS}" != "checksum" \
|
||||
-a "${ZEVENT_SUBCLASS}" != "io"; then
|
||||
logger -t "${ZED_SYSLOG_TAG:=zed}" \
|
||||
-p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
|
||||
`basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\"
|
||||
exit 4
|
||||
fi
|
||||
|
||||
# Only send email if ZED_EMAIL has been configured.
|
||||
test -n "${ZED_EMAIL}" || exit 2
|
||||
|
||||
# Ensure requisite executables are installed.
|
||||
if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then
|
||||
logger -t "${ZED_SYSLOG_TAG:=zed}" \
|
||||
-p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
|
||||
`basename "$0"`: "${MAIL}" not installed
|
||||
exit 3
|
||||
fi
|
||||
|
||||
NAME="zed.${ZEVENT_SUBCLASS}.email"
|
||||
LOCKFILE="${ZED_LOCKDIR:=/var/lock}/${NAME}.lock"
|
||||
STATEFILE="${ZED_RUNDIR:=/var/run}/${NAME}.state"
|
||||
|
||||
# Obtain lock to ensure mutual exclusion for accessing state.
|
||||
exec 8> "${LOCKFILE}"
|
||||
flock -x 8
|
||||
|
||||
# Query state for last time email was sent for this pool/vdev.
|
||||
TIME_NOW=`date +%s`
|
||||
TIME_LAST=`egrep "^${ZEVENT_POOL};${ZEVENT_VDEV_PATH};" "${STATEFILE}" \
|
||||
2>/dev/null | cut -d ";" -f3`
|
||||
if test -n "${TIME_LAST}"; then
|
||||
TIME_DELTA=`expr "${TIME_NOW}" - "${TIME_LAST}"`
|
||||
if test "${TIME_DELTA}" -lt "${ZED_EMAIL_INTERVAL_SECS:=3600}"; then
|
||||
exit 2
|
||||
fi
|
||||
fi
|
||||
|
||||
"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on `hostname`" \
|
||||
"${ZED_EMAIL}" <<EOF
|
||||
A ZFS ${ZEVENT_SUBCLASS} error has been detected:
|
||||
|
||||
eid: ${ZEVENT_EID}
|
||||
host: `hostname`
|
||||
time: ${ZEVENT_TIME_STRING}
|
||||
pool: ${ZEVENT_POOL}
|
||||
vdev: ${ZEVENT_VDEV_TYPE}:${ZEVENT_VDEV_PATH}
|
||||
EOF
|
||||
MAIL_STATUS=$?
|
||||
|
||||
# Update state.
|
||||
egrep -v "^${ZEVENT_POOL};${ZEVENT_VDEV_PATH};" "${STATEFILE}" \
|
||||
2>/dev/null > "${STATEFILE}.$$"
|
||||
echo "${ZEVENT_POOL};${ZEVENT_VDEV_PATH};${TIME_NOW}" >> "${STATEFILE}.$$"
|
||||
mv -f "${STATEFILE}.$$" "${STATEFILE}"
|
||||
|
||||
if test "${MAIL_STATUS}" -ne 0; then
|
||||
logger -t "${ZED_SYSLOG_TAG:=zed}" \
|
||||
-p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
|
||||
`basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
64
cmd/zed/zed.d/io-notify.sh
Executable file
64
cmd/zed/zed.d/io-notify.sh
Executable file
@ -0,0 +1,64 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Send notification in response to a CHECKSUM, DATA, or IO error.
|
||||
#
|
||||
# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given
|
||||
# class/pool/[vdev] combination. This protects against spamming the recipient
|
||||
# should multiple events occur together in time for the same pool/[vdev].
|
||||
#
|
||||
# Exit codes:
|
||||
# 0: notification sent
|
||||
# 1: notification failed
|
||||
# 2: notification not configured
|
||||
# 3: notification suppressed
|
||||
# 9: internal error
|
||||
|
||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||
|
||||
[ -n "${ZEVENT_POOL}" ] || exit 9
|
||||
[ -n "${ZEVENT_SUBCLASS}" ] || exit 9
|
||||
|
||||
if [ "${ZEVENT_SUBCLASS}" != "checksum" ] \
|
||||
&& [ "${ZEVENT_SUBCLASS}" != "data" ] \
|
||||
&& [ "${ZEVENT_SUBCLASS}" != "io" ]; then
|
||||
zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\""
|
||||
exit 9
|
||||
fi
|
||||
|
||||
rate_limit_tag="${ZEVENT_POOL};${ZEVENT_VDEV_GUID:-0};${ZEVENT_SUBCLASS};notify"
|
||||
zed_rate_limit "${rate_limit_tag}" || exit 3
|
||||
|
||||
umask 077
|
||||
note_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)"
|
||||
note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
|
||||
{
|
||||
[ "${ZEVENT_SUBCLASS}" = "io" ] && article="an" || article="a"
|
||||
|
||||
echo "ZFS has detected ${article} ${ZEVENT_SUBCLASS} error:"
|
||||
echo
|
||||
echo " eid: ${ZEVENT_EID}"
|
||||
echo " class: ${ZEVENT_SUBCLASS}"
|
||||
echo " host: $(hostname)"
|
||||
echo " time: ${ZEVENT_TIME_STRING}"
|
||||
|
||||
[ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}"
|
||||
[ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}"
|
||||
[ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}"
|
||||
|
||||
[ -n "${ZEVENT_VDEV_CKSUM_ERRORS}" ] \
|
||||
&& echo " cksum: ${ZEVENT_VDEV_CKSUM_ERRORS}"
|
||||
|
||||
[ -n "${ZEVENT_VDEV_READ_ERRORS}" ] \
|
||||
&& echo " read: ${ZEVENT_VDEV_READ_ERRORS}"
|
||||
|
||||
[ -n "${ZEVENT_VDEV_WRITE_ERRORS}" ] \
|
||||
&& echo " write: ${ZEVENT_VDEV_WRITE_ERRORS}"
|
||||
|
||||
echo " pool: ${ZEVENT_POOL}"
|
||||
|
||||
} > "${note_pathname}"
|
||||
|
||||
zed_notify "${note_subject}" "${note_pathname}"; rv=$?
|
||||
rm -f "${note_pathname}"
|
||||
exit "${rv}"
|
@ -1,6 +1,6 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Replace a device with a hot spare in response to IO or checksum errors.
|
||||
# Replace a device with a hot spare in response to IO or CHECKSUM errors.
|
||||
# The following actions will be performed automatically when the number
|
||||
# of errors exceed the limit set by ZED_SPARE_ON_IO_ERRORS or
|
||||
# ZED_SPARE_ON_CHECKSUM_ERRORS.
|
||||
@ -21,106 +21,219 @@
|
||||
# the majority of the expected hot spare functionality.
|
||||
#
|
||||
# Exit codes:
|
||||
# 0: replaced by hot spare
|
||||
# 1: no hot spare device available
|
||||
# 2: hot sparing disabled
|
||||
# 3: already faulted or degraded
|
||||
# 4: unsupported event class
|
||||
# 5: internal error
|
||||
# 0: hot spare replacement successful
|
||||
# 1: hot spare device not available
|
||||
# 2: hot sparing disabled or threshold not reached
|
||||
# 3: device already faulted or degraded
|
||||
# 9: internal error
|
||||
|
||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||
|
||||
# Disabled by default. Enable in the zed.rc file.
|
||||
: "${ZED_SPARE_ON_CHECKSUM_ERRORS:=0}"
|
||||
: "${ZED_SPARE_ON_IO_ERRORS:=0}"
|
||||
|
||||
|
||||
# query_vdev_status (pool, vdev)
|
||||
#
|
||||
test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
# Given a [pool] and [vdev], return the matching vdev path & status on stdout.
|
||||
#
|
||||
# Warning: This function does not handle the case of [pool] or [vdev]
|
||||
# containing whitespace. Beware of ShellCheck SC2046. Caveat emptor.
|
||||
#
|
||||
# Arguments
|
||||
# pool: pool name
|
||||
# vdev: virtual device name
|
||||
#
|
||||
# StdOut
|
||||
# arg1: vdev pathname
|
||||
# arg2: vdev status
|
||||
#
|
||||
query_vdev_status()
|
||||
{
|
||||
local pool="$1"
|
||||
local vdev="$2"
|
||||
local t
|
||||
|
||||
test -n "${ZEVENT_POOL}" || exit 5
|
||||
test -n "${ZEVENT_SUBCLASS}" || exit 5
|
||||
test -n "${ZEVENT_VDEV_PATH}" || exit 5
|
||||
test -n "${ZEVENT_VDEV_GUID}" || exit 5
|
||||
vdev="$(basename -- "${vdev}")"
|
||||
([ -n "${pool}" ] && [ -n "${vdev}" ]) || return
|
||||
t="$(printf '\t')"
|
||||
|
||||
# Defaults to disabled, enable in the zed.rc file.
|
||||
ZED_SPARE_ON_IO_ERRORS=${ZED_SPARE_ON_IO_ERRORS:-0}
|
||||
ZED_SPARE_ON_CHECKSUM_ERRORS=${ZED_SPARE_ON_CHECKSUM_ERRORS:-0}
|
||||
|
||||
if [ ${ZED_SPARE_ON_IO_ERRORS} -eq 0 -a \
|
||||
${ZED_SPARE_ON_CHECKSUM_ERRORS} -eq 0 ]; then
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# A lock file is used to serialize execution.
|
||||
ZED_LOCKDIR=${ZED_LOCKDIR:-/var/lock}
|
||||
LOCKFILE="${ZED_LOCKDIR}/zed.spare.lock"
|
||||
|
||||
exec 8> "${LOCKFILE}"
|
||||
flock -x 8
|
||||
|
||||
# Given a <pool> and <device> return the status, (ONLINE, FAULTED, etc...).
|
||||
vdev_status() {
|
||||
local POOL=$1
|
||||
local VDEV=`basename $2`
|
||||
local T=' ' # tab character since '\t' isn't portable
|
||||
|
||||
${ZPOOL} status ${POOL} | sed -n -e \
|
||||
"s,^[ $T]*\(.*$VDEV\(-part[0-9]\+\)\?\)[ $T]*\([A-Z]\+\).*,\1 \3,p"
|
||||
return 0
|
||||
"${ZPOOL}" status "${pool}" 2>/dev/null | sed -n -e \
|
||||
"s,^[ $t]*\(.*${vdev}\(-part[0-9]\+\)\?\)[ $t]*\([A-Z]\+\).*,\1 \3,p" \
|
||||
| tail -1
|
||||
}
|
||||
|
||||
# Fault devices after N I/O errors.
|
||||
if [ "${ZEVENT_CLASS}" = "ereport.fs.zfs.io" ]; then
|
||||
ERRORS=`expr ${ZEVENT_VDEV_READ_ERRORS} + ${ZEVENT_VDEV_WRITE_ERRORS}`
|
||||
|
||||
if [ ${ZED_SPARE_ON_IO_ERRORS} -gt 0 -a \
|
||||
${ERRORS} -ge ${ZED_SPARE_ON_IO_ERRORS} ]; then
|
||||
ACTION="fault"
|
||||
fi
|
||||
# Degrade devices after N checksum errors.
|
||||
elif [ "${ZEVENT_CLASS}" = "ereport.fs.zfs.checksum" ]; then
|
||||
ERRORS=${ZEVENT_VDEV_CKSUM_ERRORS}
|
||||
# notify (old_vdev, new_vdev, num_errors)
|
||||
#
|
||||
# Send a notification regarding the hot spare replacement.
|
||||
#
|
||||
# Arguments
|
||||
# old_vdev: path of old vdev that has failed
|
||||
# new_vdev: path of new vdev used as the hot spare replacement
|
||||
# num_errors: number of errors that triggered this replacement
|
||||
#
|
||||
notify()
|
||||
{
|
||||
local old_vdev="$1"
|
||||
local new_vdev="$2"
|
||||
local num_errors="$3"
|
||||
local note_subject
|
||||
local note_pathname
|
||||
local s
|
||||
local rv
|
||||
|
||||
if [ ${ZED_SPARE_ON_CHECKSUM_ERRORS} -gt 0 -a \
|
||||
${ERRORS} -ge ${ZED_SPARE_ON_CHECKSUM_ERRORS} ]; then
|
||||
ACTION="degrade"
|
||||
fi
|
||||
else
|
||||
ACTION=
|
||||
fi
|
||||
umask 077
|
||||
note_subject="ZFS hot spare replacement for ${ZEVENT_POOL} on $(hostname)"
|
||||
note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
|
||||
{
|
||||
[ "${num_errors}" -ne 1 ] 2>/dev/null && s="s"
|
||||
|
||||
if [ -n "${ACTION}" ]; then
|
||||
echo "ZFS has replaced a failing device with a hot spare after" \
|
||||
"${num_errors} ${ZEVENT_SUBCLASS} error${s}:"
|
||||
echo
|
||||
echo " eid: ${ZEVENT_EID}"
|
||||
echo " class: ${ZEVENT_SUBCLASS}"
|
||||
echo " host: $(hostname)"
|
||||
echo " time: ${ZEVENT_TIME_STRING}"
|
||||
echo " old: ${old_vdev}"
|
||||
echo " new: ${new_vdev}"
|
||||
|
||||
# Device is already FAULTED or DEGRADED
|
||||
set -- `vdev_status ${ZEVENT_POOL} ${ZEVENT_VDEV_PATH}`
|
||||
ZEVENT_VDEV_PATH_FOUND=$1
|
||||
STATUS=$2
|
||||
if [ "${STATUS}" = "FAULTED" -o "${STATUS}" = "DEGRADED" ]; then
|
||||
exit 3
|
||||
fi
|
||||
"${ZPOOL}" status "${ZEVENT_POOL}"
|
||||
|
||||
# Step 1) FAULT or DEGRADE the device
|
||||
#
|
||||
${ZINJECT} -d ${ZEVENT_VDEV_GUID} -A ${ACTION} ${ZEVENT_POOL}
|
||||
} > "${note_pathname}"
|
||||
|
||||
# Step 2) Set the SES fault beacon.
|
||||
#
|
||||
# XXX: Set the 'fault' or 'ident' beacon for the device. This can
|
||||
# be done through the sg_ses utility, the only hard part is to map
|
||||
# the sd device to its corresponding enclosure and slot. We may
|
||||
# be able to leverage the existing vdev_id scripts for this.
|
||||
#
|
||||
# $ sg_ses --dev-slot-num=0 --set=ident /dev/sg3
|
||||
# $ sg_ses --dev-slot-num=0 --clear=ident /dev/sg3
|
||||
zed_notify "${note_subject}" "${note_pathname}"; rv=$?
|
||||
rm -f "${note_pathname}"
|
||||
return "${rv}"
|
||||
}
|
||||
|
||||
# Step 3) Replace the device with a hot spare.
|
||||
#
|
||||
# Round robin through the spares selecting those which are available.
|
||||
#
|
||||
for SPARE in ${ZEVENT_VDEV_SPARE_PATHS}; do
|
||||
set -- `vdev_status ${ZEVENT_POOL} ${SPARE}`
|
||||
SPARE_VDEV_FOUND=$1
|
||||
STATUS=$2
|
||||
if [ "${STATUS}" = "AVAIL" ]; then
|
||||
${ZPOOL} replace ${ZEVENT_POOL} \
|
||||
${ZEVENT_VDEV_GUID} ${SPARE_VDEV_FOUND} && exit 0
|
||||
fi
|
||||
done
|
||||
|
||||
exit 1
|
||||
fi
|
||||
# main
|
||||
#
|
||||
# Arguments
|
||||
# none
|
||||
#
|
||||
# Return
|
||||
# see above
|
||||
#
|
||||
main()
|
||||
{
|
||||
local num_errors
|
||||
local action
|
||||
local lockfile
|
||||
local vdev_path
|
||||
local vdev_status
|
||||
local spare
|
||||
local spare_path
|
||||
local spare_status
|
||||
local zpool_err
|
||||
local zpool_rv
|
||||
local rv
|
||||
|
||||
exit 4
|
||||
# Avoid hot-sparing a hot-spare.
|
||||
#
|
||||
# Note: ZEVENT_VDEV_PATH is not defined for ZEVENT_VDEV_TYPE=spare.
|
||||
#
|
||||
[ "${ZEVENT_VDEV_TYPE}" = "spare" ] && exit 2
|
||||
|
||||
[ -n "${ZEVENT_POOL}" ] || exit 9
|
||||
[ -n "${ZEVENT_VDEV_GUID}" ] || exit 9
|
||||
[ -n "${ZEVENT_VDEV_PATH}" ] || exit 9
|
||||
|
||||
zed_check_cmd "${ZPOOL}" "${ZINJECT}" || exit 9
|
||||
|
||||
# Fault the device after a given number of I/O errors.
|
||||
#
|
||||
if [ "${ZEVENT_SUBCLASS}" = "io" ]; then
|
||||
if [ "${ZED_SPARE_ON_IO_ERRORS}" -gt 0 ]; then
|
||||
num_errors=$((ZEVENT_VDEV_READ_ERRORS + ZEVENT_VDEV_WRITE_ERRORS))
|
||||
[ "${num_errors}" -ge "${ZED_SPARE_ON_IO_ERRORS}" ] \
|
||||
&& action="fault"
|
||||
fi 2>/dev/null
|
||||
|
||||
# Degrade the device after a given number of checksum errors.
|
||||
#
|
||||
elif [ "${ZEVENT_SUBCLASS}" = "checksum" ]; then
|
||||
if [ "${ZED_SPARE_ON_CHECKSUM_ERRORS}" -gt 0 ]; then
|
||||
num_errors="${ZEVENT_VDEV_CKSUM_ERRORS}"
|
||||
[ "${num_errors}" -ge "${ZED_SPARE_ON_CHECKSUM_ERRORS}" ] \
|
||||
&& action="degrade"
|
||||
fi 2>/dev/null
|
||||
|
||||
else
|
||||
zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\""
|
||||
exit 9
|
||||
fi
|
||||
|
||||
# Error threshold not reached.
|
||||
#
|
||||
if [ -z "${action}" ]; then
|
||||
exit 2
|
||||
fi
|
||||
|
||||
lockfile="zed.spare.lock"
|
||||
zed_lock "${lockfile}"
|
||||
|
||||
# shellcheck disable=SC2046
|
||||
set -- $(query_vdev_status "${ZEVENT_POOL}" "${ZEVENT_VDEV_PATH}")
|
||||
vdev_path="$1"
|
||||
vdev_status="$2"
|
||||
|
||||
# Device is already FAULTED or DEGRADED.
|
||||
#
|
||||
if [ "${vdev_status}" = "FAULTED" ] \
|
||||
|| [ "${vdev_status}" = "DEGRADED" ]; then
|
||||
rv=3
|
||||
|
||||
else
|
||||
rv=1
|
||||
|
||||
# 1) FAULT or DEGRADE the device.
|
||||
#
|
||||
"${ZINJECT}" -d "${ZEVENT_VDEV_GUID}" -A "${action}" "${ZEVENT_POOL}"
|
||||
|
||||
# 2) Set the SES fault beacon.
|
||||
#
|
||||
# TODO: Set the 'fault' or 'ident' beacon for the device. This can
|
||||
# be done through the sg_ses utility. The only hard part is to map
|
||||
# the sd device to its corresponding enclosure and slot. We may
|
||||
# be able to leverage the existing vdev_id scripts for this.
|
||||
#
|
||||
# $ sg_ses --dev-slot-num=0 --set=ident /dev/sg3
|
||||
# $ sg_ses --dev-slot-num=0 --clear=ident /dev/sg3
|
||||
|
||||
# 3) Replace the device with a hot spare.
|
||||
#
|
||||
# Round-robin through the spares trying those that are available.
|
||||
#
|
||||
for spare in ${ZEVENT_VDEV_SPARE_PATHS}; do
|
||||
|
||||
# shellcheck disable=SC2046
|
||||
set -- $(query_vdev_status "${ZEVENT_POOL}" "${spare}")
|
||||
spare_path="$1"
|
||||
spare_status="$2"
|
||||
|
||||
[ "${spare_status}" = "AVAIL" ] || continue
|
||||
|
||||
zpool_err="$("${ZPOOL}" replace "${ZEVENT_POOL}" \
|
||||
"${ZEVENT_VDEV_GUID}" "${spare_path}" 2>&1)"; zpool_rv=$?
|
||||
|
||||
if [ "${zpool_rv}" -ne 0 ]; then
|
||||
[ -n "${zpool_err}" ] && zed_log_err "zpool ${zpool_err}"
|
||||
else
|
||||
notify "${vdev_path}" "${spare_path}" "${num_errors}"
|
||||
rv=0
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
zed_unlock "${lockfile}"
|
||||
exit "${rv}"
|
||||
}
|
||||
|
||||
|
||||
main "$@"
|
||||
|
@ -1 +0,0 @@
|
||||
scrub.finish-email.sh
|
1
cmd/zed/zed.d/resilver.finish-notify.sh
Symbolic link
1
cmd/zed/zed.d/resilver.finish-notify.sh
Symbolic link
@ -0,0 +1 @@
|
||||
scrub.finish-notify.sh
|
@ -1,73 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Send email to ZED_EMAIL in response to a RESILVER.FINISH or SCRUB.FINISH.
|
||||
# By default, "zpool status" output will only be included in the email for
|
||||
# a scrub.finish zevent if the pool is not healthy; to always include its
|
||||
# output, set ZED_EMAIL_VERBOSE=1.
|
||||
# Exit codes:
|
||||
# 0: email sent
|
||||
# 1: email failed
|
||||
# 2: email suppressed
|
||||
# 3: missing executable
|
||||
# 4: unsupported event class
|
||||
# 5: internal error
|
||||
#
|
||||
test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
|
||||
test -n "${ZEVENT_POOL}" || exit 5
|
||||
test -n "${ZEVENT_SUBCLASS}" || exit 5
|
||||
|
||||
if test "${ZEVENT_SUBCLASS}" = "resilver.finish"; then
|
||||
ACTION="resilvering"
|
||||
elif test "${ZEVENT_SUBCLASS}" = "scrub.finish"; then
|
||||
ACTION="scrubbing"
|
||||
else
|
||||
logger -t "${ZED_SYSLOG_TAG:=zed}" \
|
||||
-p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
|
||||
`basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\"
|
||||
exit 4
|
||||
fi
|
||||
|
||||
# Only send email if ZED_EMAIL has been configured.
|
||||
test -n "${ZED_EMAIL}" || exit 2
|
||||
|
||||
# Ensure requisite executables are installed.
|
||||
if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then
|
||||
logger -t "${ZED_SYSLOG_TAG:=zed}" \
|
||||
-p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
|
||||
`basename "$0"`: "${MAIL}" not installed
|
||||
exit 3
|
||||
fi
|
||||
if ! test -x "${ZPOOL}"; then
|
||||
logger -t "${ZED_SYSLOG_TAG:=zed}" \
|
||||
-p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
|
||||
`basename "$0"`: "${ZPOOL}" not installed
|
||||
exit 3
|
||||
fi
|
||||
|
||||
# For scrub, suppress email if pool is healthy and verbosity is not enabled.
|
||||
if test "${ZEVENT_SUBCLASS}" = "scrub.finish"; then
|
||||
HEALTHY=`"${ZPOOL}" status -x "${ZEVENT_POOL}" | \
|
||||
grep "'${ZEVENT_POOL}' is healthy"`
|
||||
test -n "${HEALTHY}" -a "${ZED_EMAIL_VERBOSE:=0}" = 0 && exit 2
|
||||
fi
|
||||
|
||||
"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} event for ${ZEVENT_POOL} on `hostname`" \
|
||||
"${ZED_EMAIL}" <<EOF
|
||||
A ZFS pool has finished ${ACTION}:
|
||||
|
||||
eid: ${ZEVENT_EID}
|
||||
host: `hostname`
|
||||
time: ${ZEVENT_TIME_STRING}
|
||||
`"${ZPOOL}" status "${ZEVENT_POOL}"`
|
||||
EOF
|
||||
MAIL_STATUS=$?
|
||||
|
||||
if test "${MAIL_STATUS}" -ne 0; then
|
||||
logger -t "${ZED_SYSLOG_TAG:=zed}" \
|
||||
-p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \
|
||||
`basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
59
cmd/zed/zed.d/scrub.finish-notify.sh
Executable file
59
cmd/zed/zed.d/scrub.finish-notify.sh
Executable file
@ -0,0 +1,59 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Send notification in response to a RESILVER.FINISH or SCRUB.FINISH.
|
||||
#
|
||||
# By default, "zpool status" output will only be included for a scrub.finish
|
||||
# zevent if the pool is not healthy; to always include its output, set
|
||||
# ZED_NOTIFY_VERBOSE=1.
|
||||
#
|
||||
# Exit codes:
|
||||
# 0: notification sent
|
||||
# 1: notification failed
|
||||
# 2: notification not configured
|
||||
# 3: notification suppressed
|
||||
# 9: internal error
|
||||
|
||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||
|
||||
[ -n "${ZEVENT_POOL}" ] || exit 9
|
||||
[ -n "${ZEVENT_SUBCLASS}" ] || exit 9
|
||||
|
||||
if [ "${ZEVENT_SUBCLASS}" = "resilver.finish" ]; then
|
||||
action="resilver"
|
||||
elif [ "${ZEVENT_SUBCLASS}" = "scrub.finish" ]; then
|
||||
action="scrub"
|
||||
else
|
||||
zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\""
|
||||
exit 9
|
||||
fi
|
||||
|
||||
zed_check_cmd "${ZPOOL}" || exit 9
|
||||
|
||||
# For scrub, suppress notification if the pool is healthy
|
||||
# and verbosity is not enabled.
|
||||
#
|
||||
if [ "${ZEVENT_SUBCLASS}" = "scrub.finish" ]; then
|
||||
healthy="$("${ZPOOL}" status -x "${ZEVENT_POOL}" \
|
||||
| grep "'${ZEVENT_POOL}' is healthy")"
|
||||
[ -n "${healthy}" ] && [ "${ZED_NOTIFY_VERBOSE}" -eq 0 ] && exit 3
|
||||
fi
|
||||
|
||||
umask 077
|
||||
note_subject="ZFS ${ZEVENT_SUBCLASS} event for ${ZEVENT_POOL} on $(hostname)"
|
||||
note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
|
||||
{
|
||||
echo "ZFS has finished a ${action}:"
|
||||
echo
|
||||
echo " eid: ${ZEVENT_EID}"
|
||||
echo " class: ${ZEVENT_SUBCLASS}"
|
||||
echo " host: $(hostname)"
|
||||
echo " time: ${ZEVENT_TIME_STRING}"
|
||||
|
||||
"${ZPOOL}" status "${ZEVENT_POOL}"
|
||||
|
||||
} > "${note_pathname}"
|
||||
|
||||
zed_notify "${note_subject}" "${note_pathname}"; rv=$?
|
||||
rm -f "${note_pathname}"
|
||||
exit "${rv}"
|
392
cmd/zed/zed.d/zed-functions.sh
Normal file
392
cmd/zed/zed.d/zed-functions.sh
Normal file
@ -0,0 +1,392 @@
|
||||
# zed-functions.sh
|
||||
#
|
||||
# ZED helper functions for use in ZEDLETs
|
||||
|
||||
|
||||
# Variable Defaults
|
||||
#
|
||||
: "${ZED_LOCKDIR:="/var/lock"}"
|
||||
: "${ZED_NOTIFY_INTERVAL_SECS:=3600}"
|
||||
: "${ZED_NOTIFY_VERBOSE:=0}"
|
||||
: "${ZED_RUNDIR:="/var/run"}"
|
||||
: "${ZED_SYSLOG_PRIORITY:="daemon.notice"}"
|
||||
: "${ZED_SYSLOG_TAG:="zed"}"
|
||||
|
||||
ZED_FLOCK_FD=8
|
||||
|
||||
|
||||
# zed_check_cmd (cmd, ...)
|
||||
#
|
||||
# For each argument given, search PATH for the executable command [cmd].
|
||||
# Log a message if [cmd] is not found.
|
||||
#
|
||||
# Arguments
|
||||
# cmd: name of executable command for which to search
|
||||
#
|
||||
# Return
|
||||
# 0 if all commands are found in PATH and are executable
|
||||
# n for a count of the command executables that are not found
|
||||
#
|
||||
zed_check_cmd()
|
||||
{
|
||||
local cmd
|
||||
local rv=0
|
||||
|
||||
for cmd; do
|
||||
if ! command -v "${cmd}" >/dev/null 2>&1; then
|
||||
zed_log_err "\"${cmd}\" not installed"
|
||||
rv=$((rv + 1))
|
||||
fi
|
||||
done
|
||||
return "${rv}"
|
||||
}
|
||||
|
||||
|
||||
# zed_log_msg (msg, ...)
|
||||
#
|
||||
# Write all argument strings to the system log.
|
||||
#
|
||||
# Globals
|
||||
# ZED_SYSLOG_PRIORITY
|
||||
# ZED_SYSLOG_TAG
|
||||
#
|
||||
# Return
|
||||
# nothing
|
||||
#
|
||||
zed_log_msg()
|
||||
{
|
||||
logger -p "${ZED_SYSLOG_PRIORITY}" -t "${ZED_SYSLOG_TAG}" -- "$@"
|
||||
}
|
||||
|
||||
|
||||
# zed_log_err (msg, ...)
|
||||
#
|
||||
# Write an error message to the system log. This message will contain the
|
||||
# script name, EID, and all argument strings.
|
||||
#
|
||||
# Globals
|
||||
# ZED_SYSLOG_PRIORITY
|
||||
# ZED_SYSLOG_TAG
|
||||
# ZEVENT_EID
|
||||
#
|
||||
# Return
|
||||
# nothing
|
||||
#
|
||||
zed_log_err()
|
||||
{
|
||||
logger -p "${ZED_SYSLOG_PRIORITY}" -t "${ZED_SYSLOG_TAG}" -- "error:" \
|
||||
"$(basename -- "$0"):" "${ZEVENT_EID:+"eid=${ZEVENT_EID}:"}" "$@"
|
||||
}
|
||||
|
||||
|
||||
# zed_lock (lockfile, [fd])
|
||||
#
|
||||
# Obtain an exclusive (write) lock on [lockfile]. If the lock cannot be
|
||||
# immediately acquired, wait until it becomes available.
|
||||
#
|
||||
# Every zed_lock() must be paired with a corresponding zed_unlock().
|
||||
#
|
||||
# By default, flock-style locks associate the lockfile with file descriptor 8.
|
||||
# The bash manpage warns that file descriptors >9 should be used with care as
|
||||
# they may conflict with file descriptors used internally by the shell. File
|
||||
# descriptor 9 is reserved for zed_rate_limit(). If concurrent locks are held
|
||||
# within the same process, they must use different file descriptors (preferably
|
||||
# decrementing from 8); otherwise, obtaining a new lock with a given file
|
||||
# descriptor will release the previous lock associated with that descriptor.
|
||||
#
|
||||
# Arguments
|
||||
# lockfile: pathname of the lock file; the lock will be stored in
|
||||
# ZED_LOCKDIR unless the pathname contains a "/".
|
||||
# fd: integer for the file descriptor used by flock (OPTIONAL unless holding
|
||||
# concurrent locks)
|
||||
#
|
||||
# Globals
|
||||
# ZED_FLOCK_FD
|
||||
# ZED_LOCKDIR
|
||||
#
|
||||
# Return
|
||||
# nothing
|
||||
#
|
||||
zed_lock()
|
||||
{
|
||||
local lockfile="$1"
|
||||
local fd="${2:-${ZED_FLOCK_FD}}"
|
||||
local umask_bak
|
||||
local err
|
||||
|
||||
[ -n "${lockfile}" ] || return
|
||||
if ! expr "${lockfile}" : '.*/' >/dev/null 2>&1; then
|
||||
lockfile="${ZED_LOCKDIR}/${lockfile}"
|
||||
fi
|
||||
|
||||
umask_bak="$(umask)"
|
||||
umask 077
|
||||
|
||||
# Obtain a lock on the file bound to the given file descriptor.
|
||||
#
|
||||
eval "exec ${fd}> '${lockfile}'"
|
||||
err="$(flock --exclusive "${fd}" 2>&1)"
|
||||
if [ $? -ne 0 ]; then
|
||||
zed_log_err "failed to lock \"${lockfile}\": ${err}"
|
||||
fi
|
||||
|
||||
umask "${umask_bak}"
|
||||
}
|
||||
|
||||
|
||||
# zed_unlock (lockfile, [fd])
|
||||
#
|
||||
# Release the lock on [lockfile].
|
||||
#
|
||||
# Arguments
|
||||
# lockfile: pathname of the lock file
|
||||
# fd: integer for the file descriptor used by flock (must match the file
|
||||
# descriptor passed to the zed_lock function call)
|
||||
#
|
||||
# Globals
|
||||
# ZED_FLOCK_FD
|
||||
# ZED_LOCKDIR
|
||||
#
|
||||
# Return
|
||||
# nothing
|
||||
#
|
||||
zed_unlock()
|
||||
{
|
||||
local lockfile="$1"
|
||||
local fd="${2:-${ZED_FLOCK_FD}}"
|
||||
local err
|
||||
|
||||
[ -n "${lockfile}" ] || return
|
||||
if ! expr "${lockfile}" : '.*/' >/dev/null 2>&1; then
|
||||
lockfile="${ZED_LOCKDIR}/${lockfile}"
|
||||
fi
|
||||
|
||||
# Release the lock and close the file descriptor.
|
||||
#
|
||||
err="$(flock --unlock "${fd}" 2>&1)"
|
||||
if [ $? -ne 0 ]; then
|
||||
zed_log_err "failed to unlock \"${lockfile}\": ${err}"
|
||||
fi
|
||||
eval "exec ${fd}>&-"
|
||||
}
|
||||
|
||||
|
||||
# zed_notify (subject, pathname)
|
||||
#
|
||||
# Send a notification via all available methods.
|
||||
#
|
||||
# Arguments
|
||||
# subject: notification subject
|
||||
# pathname: pathname containing the notification message (OPTIONAL)
|
||||
#
|
||||
# Return
|
||||
# 0: notification succeeded via at least one method
|
||||
# 1: notification failed
|
||||
# 2: no notification methods configured
|
||||
#
|
||||
zed_notify()
|
||||
{
|
||||
local subject="$1"
|
||||
local pathname="$2"
|
||||
local num_success=0
|
||||
local num_failure=0
|
||||
|
||||
zed_notify_email "${subject}" "${pathname}"; rv=$?
|
||||
[ "${rv}" -eq 0 ] && num_success=$((num_success + 1))
|
||||
[ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1))
|
||||
|
||||
zed_notify_pushbullet "${subject}" "${pathname}"; rv=$?
|
||||
[ "${rv}" -eq 0 ] && num_success=$((num_success + 1))
|
||||
[ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1))
|
||||
|
||||
[ "${num_success}" -gt 0 ] && return 0
|
||||
[ "${num_failure}" -gt 0 ] && return 1
|
||||
return 2
|
||||
}
|
||||
|
||||
|
||||
# zed_notify_email (subject, pathname)
|
||||
#
|
||||
# Send a notification via email to the address specified by ZED_EMAIL.
|
||||
#
|
||||
# Requires the mail executable to be installed in the standard PATH.
|
||||
#
|
||||
# Arguments
|
||||
# subject: notification subject
|
||||
# pathname: pathname containing the notification message (OPTIONAL)
|
||||
#
|
||||
# Globals
|
||||
# ZED_EMAIL
|
||||
#
|
||||
# Return
|
||||
# 0: notification sent
|
||||
# 1: notification failed
|
||||
# 2: not configured
|
||||
#
|
||||
zed_notify_email()
|
||||
{
|
||||
local subject="$1"
|
||||
local pathname="${2:-"/dev/null"}"
|
||||
|
||||
[ -n "${ZED_EMAIL}" ] || return 2
|
||||
|
||||
[ -n "${subject}" ] || return 1
|
||||
if [ ! -r "${pathname}" ]; then
|
||||
zed_log_err "mail cannot read \"${pathname}\""
|
||||
return 1
|
||||
fi
|
||||
|
||||
zed_check_cmd "mail" || return 1
|
||||
|
||||
mail -s "${subject}" "${ZED_EMAIL}" < "${pathname}" >/dev/null 2>&1; rv=$?
|
||||
if [ "${rv}" -ne 0 ]; then
|
||||
zed_log_err "mail exit=${rv}"
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
# zed_notify_pushbullet (subject, pathname)
|
||||
#
|
||||
# Send a notification via Pushbullet <https://www.pushbullet.com/>.
|
||||
# The access token (ZED_PUSHBULLET_ACCESS_TOKEN) identifies this client to the
|
||||
# Pushbullet server. The optional channel tag (ZED_PUSHBULLET_CHANNEL_TAG) is
|
||||
# for pushing to notification feeds that can be subscribed to; if a channel is
|
||||
# not defined, push notifications will instead be sent to all devices
|
||||
# associated with the account specified by the access token.
|
||||
#
|
||||
# Requires awk, curl, and sed executables to be installed in the standard PATH.
|
||||
#
|
||||
# References
|
||||
# https://docs.pushbullet.com/
|
||||
# https://www.pushbullet.com/security
|
||||
#
|
||||
# Arguments
|
||||
# subject: notification subject
|
||||
# pathname: pathname containing the notification message (OPTIONAL)
|
||||
#
|
||||
# Globals
|
||||
# ZED_PUSHBULLET_ACCESS_TOKEN
|
||||
# ZED_PUSHBULLET_CHANNEL_TAG
|
||||
#
|
||||
# Return
|
||||
# 0: notification sent
|
||||
# 1: notification failed
|
||||
# 2: not configured
|
||||
#
|
||||
zed_notify_pushbullet()
|
||||
{
|
||||
local subject="$1"
|
||||
local pathname="${2:-"/dev/null"}"
|
||||
local msg_body
|
||||
local msg_tag
|
||||
local msg_json
|
||||
local msg_out
|
||||
local msg_err
|
||||
local url="https://api.pushbullet.com/v2/pushes"
|
||||
|
||||
[ -n "${ZED_PUSHBULLET_ACCESS_TOKEN}" ] || return 2
|
||||
|
||||
[ -n "${subject}" ] || return 1
|
||||
if [ ! -r "${pathname}" ]; then
|
||||
zed_log_err "pushbullet cannot read \"${pathname}\""
|
||||
return 1
|
||||
fi
|
||||
|
||||
zed_check_cmd "awk" "curl" "sed" || return 1
|
||||
|
||||
# Escape the following characters in the message body for JSON:
|
||||
# newline, backslash, double quote, horizontal tab, vertical tab,
|
||||
# and carriage return.
|
||||
#
|
||||
msg_body="$(awk '{ ORS="\\n" } { gsub(/\\/, "\\\\"); gsub(/"/, "\\\"");
|
||||
gsub(/\t/, "\\t"); gsub(/\f/, "\\f"); gsub(/\r/, "\\r"); print }' \
|
||||
"${pathname}")"
|
||||
|
||||
# Push to a channel if one is configured.
|
||||
#
|
||||
[ -n "${ZED_PUSHBULLET_CHANNEL_TAG}" ] && msg_tag="$(printf \
|
||||
'"channel_tag": "%s", ' "${ZED_PUSHBULLET_CHANNEL_TAG}")"
|
||||
|
||||
# Construct the JSON message for pushing a note.
|
||||
#
|
||||
msg_json="$(printf '{%s"type": "note", "title": "%s", "body": "%s"}' \
|
||||
"${msg_tag}" "${subject}" "${msg_body}")"
|
||||
|
||||
# Send the POST request and check for errors.
|
||||
#
|
||||
msg_out="$(curl -u "${ZED_PUSHBULLET_ACCESS_TOKEN}:" -X POST "${url}" \
|
||||
--header "Content-Type: application/json" --data-binary "${msg_json}" \
|
||||
2>/dev/null)"; rv=$?
|
||||
if [ "${rv}" -ne 0 ]; then
|
||||
zed_log_err "curl exit=${rv}"
|
||||
return 1
|
||||
fi
|
||||
msg_err="$(echo "${msg_out}" \
|
||||
| sed -n -e 's/.*"error" *:.*"message" *: *"\([^"]*\)".*/\1/p')"
|
||||
if [ -n "${msg_err}" ]; then
|
||||
zed_log_err "pushbullet \"${msg_err}"\"
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
# zed_rate_limit (tag, [interval])
|
||||
#
|
||||
# Check whether an event of a given type [tag] has already occurred within the
|
||||
# last [interval] seconds.
|
||||
#
|
||||
# This function obtains a lock on the statefile using file descriptor 9.
|
||||
#
|
||||
# Arguments
|
||||
# tag: arbitrary string for grouping related events to rate-limit
|
||||
# interval: time interval in seconds (OPTIONAL)
|
||||
#
|
||||
# Globals
|
||||
# ZED_NOTIFY_INTERVAL_SECS
|
||||
# ZED_RUNDIR
|
||||
#
|
||||
# Return
|
||||
# 0 if the event should be processed
|
||||
# 1 if the event should be dropped
|
||||
#
|
||||
# State File Format
|
||||
# time;tag
|
||||
#
|
||||
zed_rate_limit()
|
||||
{
|
||||
local tag="$1"
|
||||
local interval="${2:-${ZED_NOTIFY_INTERVAL_SECS}}"
|
||||
local lockfile="zed.zedlet.state.lock"
|
||||
local lockfile_fd=9
|
||||
local statefile="${ZED_RUNDIR}/zed.zedlet.state"
|
||||
local time_now
|
||||
local time_prev
|
||||
local umask_bak
|
||||
local rv=0
|
||||
|
||||
[ -n "${tag}" ] || return 0
|
||||
|
||||
zed_lock "${lockfile}" "${lockfile_fd}"
|
||||
time_now="$(date +%s)"
|
||||
time_prev="$(egrep "^[0-9]+;${tag}\$" "${statefile}" 2>/dev/null \
|
||||
| tail -1 | cut -d\; -f1)"
|
||||
|
||||
if [ -n "${time_prev}" ] \
|
||||
&& [ "$((time_now - time_prev))" -lt "${interval}" ]; then
|
||||
rv=1
|
||||
else
|
||||
umask_bak="$(umask)"
|
||||
umask 077
|
||||
egrep -v "^[0-9]+;${tag}\$" "${statefile}" 2>/dev/null \
|
||||
> "${statefile}.$$"
|
||||
echo "${time_now};${tag}" >> "${statefile}.$$"
|
||||
mv -f "${statefile}.$$" "${statefile}"
|
||||
umask "${umask_bak}"
|
||||
fi
|
||||
|
||||
zed_unlock "${lockfile}" "${lockfile_fd}"
|
||||
return "${rv}"
|
||||
}
|
@ -1,34 +1,80 @@
|
||||
##
|
||||
# zed.rc
|
||||
#
|
||||
# This file should be owned by root and permissioned 0600.
|
||||
##
|
||||
|
||||
##
|
||||
# Absolute path to the debug output file.
|
||||
#
|
||||
#ZED_DEBUG_LOG="/tmp/zed.debug.log"
|
||||
|
||||
# Email address of the zpool administrator.
|
||||
##
|
||||
# Email address of the zpool administrator for receipt of notifications.
|
||||
# Email will only be sent if ZED_EMAIL is defined.
|
||||
# Disabled by default; uncomment to enable.
|
||||
#
|
||||
#ZED_EMAIL="root"
|
||||
|
||||
# Email verbosity.
|
||||
# If set to 0, suppress email if the pool is healthy.
|
||||
# If set to 1, send email regardless of pool health.
|
||||
#ZED_EMAIL_VERBOSE=0
|
||||
|
||||
# Minimum number of seconds between emails sent for a similar event.
|
||||
#ZED_EMAIL_INTERVAL_SECS="3600"
|
||||
|
||||
##
|
||||
# Default directory for zed lock files.
|
||||
#
|
||||
#ZED_LOCKDIR="/var/lock"
|
||||
|
||||
##
|
||||
# Minimum number of seconds between notifications for a similar event.
|
||||
#
|
||||
#ZED_NOTIFY_INTERVAL_SECS=3600
|
||||
|
||||
##
|
||||
# Notification verbosity.
|
||||
# If set to 0, suppress notification if the pool is healthy.
|
||||
# If set to 1, send notification regardless of pool health.
|
||||
#
|
||||
#ZED_NOTIFY_VERBOSE=0
|
||||
|
||||
##
|
||||
# Pushbullet access token.
|
||||
# This grants full access to your account -- protect it accordingly!
|
||||
# <https://www.pushbullet.com/get-started>
|
||||
# <https://www.pushbullet.com/account>
|
||||
# Disabled by default; uncomment to enable.
|
||||
#
|
||||
#ZED_PUSHBULLET_ACCESS_TOKEN=""
|
||||
|
||||
##
|
||||
# Pushbullet channel tag for push notification feeds that can be subscribed to.
|
||||
# <https://www.pushbullet.com/my-channel>
|
||||
# If not defined, push notifications will instead be sent to all devices
|
||||
# associated with the account specified by the access token.
|
||||
# Disabled by default; uncomment to enable.
|
||||
#
|
||||
#ZED_PUSHBULLET_CHANNEL_TAG=""
|
||||
|
||||
##
|
||||
# Default directory for zed state files.
|
||||
#
|
||||
#ZED_RUNDIR="/var/run"
|
||||
|
||||
# The syslog priority (eg, specified as a "facility.level" pair).
|
||||
#ZED_SYSLOG_PRIORITY="daemon.notice"
|
||||
|
||||
# The syslog tag for marking zed events.
|
||||
#ZED_SYSLOG_TAG="zed"
|
||||
##
|
||||
# Replace a device with a hot spare after N checksum errors are detected.
|
||||
# Disabled by default; uncomment to enable.
|
||||
#
|
||||
#ZED_SPARE_ON_CHECKSUM_ERRORS=10
|
||||
|
||||
##
|
||||
# Replace a device with a hot spare after N I/O errors are detected.
|
||||
# Disabled by default; uncomment to enable.
|
||||
#
|
||||
#ZED_SPARE_ON_IO_ERRORS=1
|
||||
|
||||
# Replace a device with a hot spare after N checksum errors are detected.
|
||||
#ZED_SPARE_ON_CHECKSUM_ERRORS=10
|
||||
##
|
||||
# The syslog priority (e.g., specified as a "facility.level" pair).
|
||||
#
|
||||
#ZED_SYSLOG_PRIORITY="daemon.notice"
|
||||
|
||||
##
|
||||
# The syslog tag for marking zed events.
|
||||
#
|
||||
#ZED_SYSLOG_TAG="zed"
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user