From aded9a6814c9f6260437bc186ad08debc5d0b6c8 Mon Sep 17 00:00:00 2001 From: Chris Dunlap Date: Tue, 17 Feb 2015 17:23:54 -0800 Subject: [PATCH 1/5] Cleanup ZEDLETs This commit factors out several common ZEDLET code blocks into zed-functions.sh. This shortens the length of the scripts, thereby (hopefully) making them easier to understand and maintain. In addition, this commit revamps the coding style used by the scripts to be more consistent and (again, hopefully) maintainable. It now mostly follows the Google Shell Style Guide. I've tried to assimilate the following resources: Google Shell Style Guide https://google-styleguide.googlecode.com/svn/trunk/shell.xml Dash as /bin/sh https://wiki.ubuntu.com/DashAsBinSh Filenames and Pathnames in Shell: How to do it Correctly http://www.dwheeler.com/essays/filenames-in-shell.html Common shell script mistakes http://www.pixelbeat.org/programming/shell_script_mistakes.html Finally, this commit updates the exit codes used by the ZEDLETs to be more consistent with one another. All scripts run cleanly through ShellCheck . All scripts have been tested on bash and dash. Signed-off-by: Chris Dunlap --- cmd/zed/Makefile.am | 3 + cmd/zed/zed.d/README | 30 ++++ cmd/zed/zed.d/all-debug.sh | 19 +- cmd/zed/zed.d/all-syslog.sh | 9 +- cmd/zed/zed.d/data-email.sh | 88 ++++----- cmd/zed/zed.d/generic-email.sh | 90 +++++----- cmd/zed/zed.d/io-email.sh | 95 ++++------ cmd/zed/zed.d/io-spare.sh | 267 +++++++++++++++++----------- cmd/zed/zed.d/scrub.finish-email.sh | 92 +++++----- cmd/zed/zed.d/zed-functions.sh | 230 ++++++++++++++++++++++++ cmd/zed/zed.d/zed.rc | 44 ++++- 11 files changed, 630 insertions(+), 337 deletions(-) create mode 100644 cmd/zed/zed.d/README create mode 100644 cmd/zed/zed.d/zed-functions.sh diff --git a/cmd/zed/Makefile.am b/cmd/zed/Makefile.am index b907f6af9b45..8bdc097c77a3 100644 --- a/cmd/zed/Makefile.am +++ b/cmd/zed/Makefile.am @@ -4,6 +4,8 @@ DEFAULT_INCLUDES += \ -I$(top_srcdir)/include \ -I$(top_srcdir)/lib/libspl/include +EXTRA_DIST = $(top_srcdir)/cmd/zed/zed.d/README + sbin_PROGRAMS = zed zed_SOURCES = \ @@ -33,6 +35,7 @@ zed_LDADD = \ zedconfdir = $(sysconfdir)/zfs/zed.d dist_zedconf_DATA = \ + $(top_srcdir)/cmd/zed/zed.d/zed-functions.sh \ $(top_srcdir)/cmd/zed/zed.d/zed.rc zedexecdir = $(libexecdir)/zfs/zed.d diff --git a/cmd/zed/zed.d/README b/cmd/zed/zed.d/README new file mode 100644 index 000000000000..b4cb11514364 --- /dev/null +++ b/cmd/zed/zed.d/README @@ -0,0 +1,30 @@ +Shell scripts are the recommended choice for ZEDLETs that mostly call +other utilities and do relatively little data manipulation. + +Shell scripts MUST work on both bash and dash. + +Shell scripts MUST run cleanly through ShellCheck: + http://www.shellcheck.net/ + +General functions reside in "zed-functions.sh". Use them where applicable. + +Additional references that may be of use: + + Google Shell Style Guide + https://google-styleguide.googlecode.com/svn/trunk/shell.xml + + Dash as /bin/sh + https://wiki.ubuntu.com/DashAsBinSh + + Common shell script mistakes + http://www.pixelbeat.org/programming/shell_script_mistakes.html + + Filenames and Pathnames in Shell: How to do it Correctly + http://www.dwheeler.com/essays/filenames-in-shell.html + + Autoconf: Portable Shell Programming + https://www.gnu.org/software/autoconf/manual/autoconf.html#Portable-Shell + +Please BE CONSISTENT with the existing style, check for errors, +minimize dependencies where possible, try to be portable, +and comment anything non-obvious. Festina lente. diff --git a/cmd/zed/zed.d/all-debug.sh b/cmd/zed/zed.d/all-debug.sh index aa20ef268663..057e39b504b5 100755 --- a/cmd/zed/zed.d/all-debug.sh +++ b/cmd/zed/zed.d/all-debug.sh @@ -2,16 +2,23 @@ # # Log all environment variables to ZED_DEBUG_LOG. # -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" +# This can be a useful aid when developing/debugging ZEDLETs since it shows the +# environment variables defined for each zevent. -# Override the default umask to restrict access to a newly-created logfile. -umask 077 +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" + +: "${ZED_DEBUG_LOG:="${TMPDIR:="/tmp"}/zed.debug.log"}" -# Append stdout to the logfile after obtaining an advisory lock. -exec >> "${ZED_DEBUG_LOG:=/tmp/zed.debug.log}" -flock -x 1 +lockfile="$(basename -- "${ZED_DEBUG_LOG}").lock" + +umask 077 +zed_lock "${lockfile}" +exec >> "${ZED_DEBUG_LOG}" printenv | sort echo +exec >&- +zed_unlock "${lockfile}" exit 0 diff --git a/cmd/zed/zed.d/all-syslog.sh b/cmd/zed/zed.d/all-syslog.sh index acf9e83bde3a..b34d17cef15c 100755 --- a/cmd/zed/zed.d/all-syslog.sh +++ b/cmd/zed/zed.d/all-syslog.sh @@ -1,11 +1,10 @@ #!/bin/sh # # Log the zevent via syslog. -# -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" -logger -t "${ZED_SYSLOG_TAG:=zed}" -p "${ZED_SYSLOG_PRIORITY:=daemon.notice}" \ - eid="${ZEVENT_EID}" class="${ZEVENT_SUBCLASS}" \ - "${ZEVENT_POOL:+pool=$ZEVENT_POOL}" +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" +zed_log_msg "eid=${ZEVENT_EID}" "class=${ZEVENT_SUBCLASS}" \ + "${ZEVENT_POOL:+"pool=${ZEVENT_POOL}"}" exit 0 diff --git a/cmd/zed/zed.d/data-email.sh b/cmd/zed/zed.d/data-email.sh index 543b8fe55c02..2dae8ff6b468 100755 --- a/cmd/zed/zed.d/data-email.sh +++ b/cmd/zed/zed.d/data-email.sh @@ -1,81 +1,53 @@ #!/bin/sh # -# Send email to ZED_EMAIL in response to a DATA zevent. -# Only one message per ZED_EMAIL_INTERVAL_SECS will be sent for a given -# class/pool combination. This protects against spamming the recipient -# should multiple events occur together in time for the same pool. +# Send email to ZED_EMAIL in response to a DATA error. +# +# Only one email per ZED_EMAIL_INTERVAL_SECS will be sent for a given +# class/pool combination. This protects against spamming the recipient +# should multiple events occur together in time for the same pool. +# # Exit codes: # 0: email sent # 1: email failed -# 2: email suppressed -# 3: missing executable -# 4: unsupported event class -# 5: internal error -# State File Format: -# POOL;TIME_OF_LAST_EMAIL -# -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" +# 2: email not configured +# 3: email suppressed +# 9: internal error -test -n "${ZEVENT_POOL}" || exit 5 -test -n "${ZEVENT_SUBCLASS}" || exit 5 +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" -if test "${ZEVENT_SUBCLASS}" != "data"; then \ - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\" - exit 4 -fi +[ -n "${ZED_EMAIL}" ] || exit 2 -# Only send email if ZED_EMAIL has been configured. -test -n "${ZED_EMAIL}" || exit 2 +[ -n "${ZEVENT_POOL}" ] || exit 9 +[ -n "${ZEVENT_SUBCLASS}" ] || exit 9 -# Ensure requisite executables are installed. -if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" not installed - exit 3 +if [ "${ZEVENT_SUBCLASS}" != "data" ]; then \ + zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\"" + exit 9 fi -NAME="zed.${ZEVENT_SUBCLASS}.email" -LOCKFILE="${ZED_LOCKDIR:=/var/lock}/${NAME}.lock" -STATEFILE="${ZED_RUNDIR:=/var/run}/${NAME}.state" +zed_check_cmd "mail" || exit 9 -# Obtain lock to ensure mutual exclusion for accessing state. -exec 8> "${LOCKFILE}" -flock -x 8 +zed_rate_limit "${ZEVENT_POOL};${ZEVENT_SUBCLASS};email" || exit 3 -# Query state for last time email was sent for this pool. -TIME_NOW=`date +%s` -TIME_LAST=`egrep "^${ZEVENT_POOL};" "${STATEFILE}" 2>/dev/null | cut -d ";" -f2` -if test -n "${TIME_LAST}"; then - TIME_DELTA=`expr "${TIME_NOW}" - "${TIME_LAST}"` - if test "${TIME_DELTA}" -lt "${ZED_EMAIL_INTERVAL_SECS:=3600}"; then - exit 2 - fi -fi - -"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on `hostname`" \ - "${ZED_EMAIL}" < "${email_pathname}" </dev/null > "${STATEFILE}.$$" -echo "${ZEVENT_POOL};${TIME_NOW}" >> "${STATEFILE}.$$" -mv -f "${STATEFILE}.$$" "${STATEFILE}" +mail -s "${email_subject}" "${ZED_EMAIL}" < "${email_pathname}" +mail_status=$? -if test "${MAIL_STATUS}" -ne 0; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}" - exit 1 +if [ "${mail_status}" -ne 0 ]; then + zed_log_msg "mail exit=${mail_status}" + exit 1 fi - +rm -f "${email_pathname}" exit 0 diff --git a/cmd/zed/zed.d/generic-email.sh b/cmd/zed/zed.d/generic-email.sh index 357aedee5fa7..ad022e034388 100755 --- a/cmd/zed/zed.d/generic-email.sh +++ b/cmd/zed/zed.d/generic-email.sh @@ -1,59 +1,59 @@ #!/bin/sh # # Send email to ZED_EMAIL in response to a given zevent. -# This is a generic script than can be symlinked to a file in the zed -# enabled-scripts directory in order to have email sent when a particular -# class of zevents occurs. The symlink filename must begin with the zevent -# (sub)class string (eg, "probe_failure-email.sh" for the "probe_failure" -# subclass). Refer to the zed(8) manpage for details. +# +# This is a generic script than can be symlinked to a file in the +# enabled-zedlets directory to have an email sent when a particular class of +# zevents occurs. The symlink filename must begin with the zevent (sub)class +# string (e.g., "probe_failure-email.sh" for the "probe_failure" subclass). +# Refer to the zed(8) manpage for details. +# # Exit codes: # 0: email sent # 1: email failed -# 2: email suppressed -# 3: missing executable -# -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" +# 2: email not configured +# 3: email suppressed -# Only send email if ZED_EMAIL has been configured. -test -n "${ZED_EMAIL}" || exit 2 +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" -# Ensure requisite executables are installed. -if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" not installed - exit 3 -fi +[ -n "${ZED_EMAIL}" ] || exit 2 + +# Rate-limit the message based in part on the filename. +# +rate_limit_tag="${ZEVENT_POOL};${ZEVENT_SUBCLASS};$(basename -- "$0")" +rate_limit_interval="${ZED_EMAIL_INTERVAL_SECS}" +zed_rate_limit "${rate_limit_tag}" "${rate_limit_interval}" || exit 3 -# Override the default umask to restrict access to the msgbody tmpfile. umask 077 +pool_str="${ZEVENT_POOL:+" for ${ZEVENT_POOL}"}" +host_str=" on $(hostname)" +email_subject="ZFS ${ZEVENT_SUBCLASS} event${pool_str}${host_str}" +email_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" +{ + echo "ZFS has posted the following event:" + echo + echo " eid: ${ZEVENT_EID}" + echo " class: ${ZEVENT_SUBCLASS}" + echo " host: $(hostname)" + echo " time: ${ZEVENT_TIME_STRING}" -SUBJECT="ZFS ${ZEVENT_SUBCLASS} event" -test -n "${ZEVENT_POOL}" && SUBJECT="${SUBJECT} for ${ZEVENT_POOL}" -SUBJECT="${SUBJECT} on `hostname`" + if [ -n "${ZEVENT_VDEV_PATH}" ]; then + echo " vpath: ${ZEVENT_VDEV_PATH}" + [ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}" + fi -MSGBODY="${TMPDIR:=/tmp}/`basename \"$0\"`.$$" -{ - echo "A ZFS ${ZEVENT_SUBCLASS} event has been posted:" - echo - echo " eid: ${ZEVENT_EID}" - echo " host: `hostname`" - echo " time: ${ZEVENT_TIME_STRING}" - test -n "${ZEVENT_VDEV_TYPE}" -a -n "${ZEVENT_VDEV_PATH}" && \ - echo " vdev: ${ZEVENT_VDEV_TYPE}:${ZEVENT_VDEV_PATH}" - test -n "${ZEVENT_POOL}" -a -x "${ZPOOL}" && \ - "${ZPOOL}" status "${ZEVENT_POOL}" -} > "${MSGBODY}" - -test -f "${MSGBODY}" && "${MAIL}" -s "${SUBJECT}" "${ZED_EMAIL}" < "${MSGBODY}" -MAIL_STATUS=$? -rm -f "${MSGBODY}" - -if test "${MAIL_STATUS}" -ne 0; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}" - exit 1 -fi + [ -n "${ZEVENT_POOL}" ] && [ -x "${ZPOOL}" ] \ + && "${ZPOOL}" status "${ZEVENT_POOL}" +} > "${email_pathname}" + +mail -s "${email_subject}" "${ZED_EMAIL}" < "${email_pathname}" +mail_status=$? + +if [ "${mail_status}" -ne 0 ]; then + zed_log_msg "mail exit=${mail_status}" + exit 1 +fi +rm -f "${email_pathname}" exit 0 diff --git a/cmd/zed/zed.d/io-email.sh b/cmd/zed/zed.d/io-email.sh index 9edbe6670d94..1854b15933ae 100755 --- a/cmd/zed/zed.d/io-email.sh +++ b/cmd/zed/zed.d/io-email.sh @@ -1,86 +1,57 @@ #!/bin/sh # -# Send email to ZED_EMAIL in response to a CHECKSUM or IO zevent. -# Only one message per ZED_EMAIL_INTERVAL_SECS will be sent for a given -# class/pool/vdev combination. This protects against spamming the recipient -# should multiple events occur together in time for the same pool/device. +# Send email to ZED_EMAIL in response to a CHECKSUM or IO error. +# +# Only one email per ZED_EMAIL_INTERVAL_SECS will be sent for a given +# class/pool/vdev combination. This protects against spamming the recipient +# should multiple events occur together in time for the same pool/device. +# # Exit codes: # 0: email sent # 1: email failed -# 2: email suppressed -# 3: missing executable -# 4: unsupported event class -# 5: internal error -# State File Format: -# POOL;VDEV_PATH;TIME_OF_LAST_EMAIL -# -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" +# 2: email not configured +# 3: email suppressed +# 9: internal error -test -n "${ZEVENT_POOL}" || exit 5 -test -n "${ZEVENT_SUBCLASS}" || exit 5 -test -n "${ZEVENT_VDEV_PATH}" || exit 5 +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" -if test "${ZEVENT_SUBCLASS}" != "checksum" \ - -a "${ZEVENT_SUBCLASS}" != "io"; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\" - exit 4 -fi +[ -n "${ZED_EMAIL}" ] || exit 2 -# Only send email if ZED_EMAIL has been configured. -test -n "${ZED_EMAIL}" || exit 2 +[ -n "${ZEVENT_POOL}" ] || exit 9 +[ -n "${ZEVENT_SUBCLASS}" ] || exit 9 +[ -n "${ZEVENT_VDEV_PATH}" ] || exit 9 -# Ensure requisite executables are installed. -if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" not installed - exit 3 +if [ "${ZEVENT_SUBCLASS}" != "checksum" ] \ + && [ "${ZEVENT_SUBCLASS}" != "io" ]; then + zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\"" + exit 9 fi -NAME="zed.${ZEVENT_SUBCLASS}.email" -LOCKFILE="${ZED_LOCKDIR:=/var/lock}/${NAME}.lock" -STATEFILE="${ZED_RUNDIR:=/var/run}/${NAME}.state" +zed_check_cmd "mail" || exit 9 -# Obtain lock to ensure mutual exclusion for accessing state. -exec 8> "${LOCKFILE}" -flock -x 8 +zed_rate_limit "${ZEVENT_POOL};${ZEVENT_VDEV_PATH};${ZEVENT_SUBCLASS};email" \ + || exit 3 -# Query state for last time email was sent for this pool/vdev. -TIME_NOW=`date +%s` -TIME_LAST=`egrep "^${ZEVENT_POOL};${ZEVENT_VDEV_PATH};" "${STATEFILE}" \ - 2>/dev/null | cut -d ";" -f3` -if test -n "${TIME_LAST}"; then - TIME_DELTA=`expr "${TIME_NOW}" - "${TIME_LAST}"` - if test "${TIME_DELTA}" -lt "${ZED_EMAIL_INTERVAL_SECS:=3600}"; then - exit 2 - fi -fi - -"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on `hostname`" \ - "${ZED_EMAIL}" < "${email_pathname}" </dev/null > "${STATEFILE}.$$" -echo "${ZEVENT_POOL};${ZEVENT_VDEV_PATH};${TIME_NOW}" >> "${STATEFILE}.$$" -mv -f "${STATEFILE}.$$" "${STATEFILE}" +mail -s "${email_subject}" "${ZED_EMAIL}" < "${email_pathname}" +mail_status=$? -if test "${MAIL_STATUS}" -ne 0; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" exit="${MAIL_STATUS}" - exit 1 +if [ "${mail_status}" -ne 0 ]; then + zed_log_msg "mail exit=${mail_status}" + exit 1 fi - +rm -f "${email_pathname}" exit 0 diff --git a/cmd/zed/zed.d/io-spare.sh b/cmd/zed/zed.d/io-spare.sh index b64b2a9f1160..9667dedcb7ca 100755 --- a/cmd/zed/zed.d/io-spare.sh +++ b/cmd/zed/zed.d/io-spare.sh @@ -1,6 +1,6 @@ #!/bin/sh # -# Replace a device with a hot spare in response to IO or checksum errors. +# Replace a device with a hot spare in response to IO or CHECKSUM errors. # The following actions will be performed automatically when the number # of errors exceed the limit set by ZED_SPARE_ON_IO_ERRORS or # ZED_SPARE_ON_CHECKSUM_ERRORS. @@ -21,106 +21,171 @@ # the majority of the expected hot spare functionality. # # Exit codes: -# 0: replaced by hot spare -# 1: no hot spare device available -# 2: hot sparing disabled -# 3: already faulted or degraded -# 4: unsupported event class -# 5: internal error +# 0: hot spare replacement successful +# 1: hot spare device not available +# 2: hot sparing disabled or threshold not reached +# 3: device already faulted or degraded +# 9: internal error + +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" + +# Disabled by default. Enable in the zed.rc file. +: "${ZED_SPARE_ON_CHECKSUM_ERRORS:=0}" +: "${ZED_SPARE_ON_IO_ERRORS:=0}" + + +# query_vdev_status (pool, vdev) +# +# Given a [pool] and [vdev], return the matching vdev path & status on stdout. +# +# Warning: This function does not handle the case of [pool] or [vdev] +# containing whitespace. Beware of ShellCheck SC2046. Caveat emptor. +# +# Arguments +# pool: pool name +# vdev: virtual device name +# +# StdOut +# arg1: vdev pathname +# arg2: vdev status # -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" - -test -n "${ZEVENT_POOL}" || exit 5 -test -n "${ZEVENT_SUBCLASS}" || exit 5 -test -n "${ZEVENT_VDEV_PATH}" || exit 5 -test -n "${ZEVENT_VDEV_GUID}" || exit 5 - -# Defaults to disabled, enable in the zed.rc file. -ZED_SPARE_ON_IO_ERRORS=${ZED_SPARE_ON_IO_ERRORS:-0} -ZED_SPARE_ON_CHECKSUM_ERRORS=${ZED_SPARE_ON_CHECKSUM_ERRORS:-0} - -if [ ${ZED_SPARE_ON_IO_ERRORS} -eq 0 -a \ - ${ZED_SPARE_ON_CHECKSUM_ERRORS} -eq 0 ]; then - exit 2 -fi - -# A lock file is used to serialize execution. -ZED_LOCKDIR=${ZED_LOCKDIR:-/var/lock} -LOCKFILE="${ZED_LOCKDIR}/zed.spare.lock" - -exec 8> "${LOCKFILE}" -flock -x 8 - -# Given a and return the status, (ONLINE, FAULTED, etc...). -vdev_status() { - local POOL=$1 - local VDEV=`basename $2` - local T=' ' # tab character since '\t' isn't portable - - ${ZPOOL} status ${POOL} | sed -n -e \ - "s,^[ $T]*\(.*$VDEV\(-part[0-9]\+\)\?\)[ $T]*\([A-Z]\+\).*,\1 \3,p" - return 0 +query_vdev_status() +{ + local pool="$1" + local vdev="$2" + local t + + vdev="$(basename -- "${vdev}")" + ([ -n "${pool}" ] && [ -n "${vdev}" ]) || return + t="$(printf '\t')" + + "${ZPOOL}" status "${pool}" 2>/dev/null | sed -n -e \ + "s,^[ $t]*\(.*${vdev}\(-part[0-9]\+\)\?\)[ $t]*\([A-Z]\+\).*,\1 \3,p" \ + | tail -1 } -# Fault devices after N I/O errors. -if [ "${ZEVENT_CLASS}" = "ereport.fs.zfs.io" ]; then - ERRORS=`expr ${ZEVENT_VDEV_READ_ERRORS} + ${ZEVENT_VDEV_WRITE_ERRORS}` - - if [ ${ZED_SPARE_ON_IO_ERRORS} -gt 0 -a \ - ${ERRORS} -ge ${ZED_SPARE_ON_IO_ERRORS} ]; then - ACTION="fault" - fi -# Degrade devices after N checksum errors. -elif [ "${ZEVENT_CLASS}" = "ereport.fs.zfs.checksum" ]; then - ERRORS=${ZEVENT_VDEV_CKSUM_ERRORS} - - if [ ${ZED_SPARE_ON_CHECKSUM_ERRORS} -gt 0 -a \ - ${ERRORS} -ge ${ZED_SPARE_ON_CHECKSUM_ERRORS} ]; then - ACTION="degrade" - fi -else - ACTION= -fi - -if [ -n "${ACTION}" ]; then - - # Device is already FAULTED or DEGRADED - set -- `vdev_status ${ZEVENT_POOL} ${ZEVENT_VDEV_PATH}` - ZEVENT_VDEV_PATH_FOUND=$1 - STATUS=$2 - if [ "${STATUS}" = "FAULTED" -o "${STATUS}" = "DEGRADED" ]; then - exit 3 - fi - - # Step 1) FAULT or DEGRADE the device - # - ${ZINJECT} -d ${ZEVENT_VDEV_GUID} -A ${ACTION} ${ZEVENT_POOL} - - # Step 2) Set the SES fault beacon. - # - # XXX: Set the 'fault' or 'ident' beacon for the device. This can - # be done through the sg_ses utility, the only hard part is to map - # the sd device to its corresponding enclosure and slot. We may - # be able to leverage the existing vdev_id scripts for this. - # - # $ sg_ses --dev-slot-num=0 --set=ident /dev/sg3 - # $ sg_ses --dev-slot-num=0 --clear=ident /dev/sg3 - - # Step 3) Replace the device with a hot spare. - # - # Round robin through the spares selecting those which are available. - # - for SPARE in ${ZEVENT_VDEV_SPARE_PATHS}; do - set -- `vdev_status ${ZEVENT_POOL} ${SPARE}` - SPARE_VDEV_FOUND=$1 - STATUS=$2 - if [ "${STATUS}" = "AVAIL" ]; then - ${ZPOOL} replace ${ZEVENT_POOL} \ - ${ZEVENT_VDEV_GUID} ${SPARE_VDEV_FOUND} && exit 0 - fi - done - - exit 1 -fi - -exit 4 + +# main +# +# Arguments +# none +# +# Return +# see above +# +main() +{ + local num_errors + local action + local lockfile + local vdev_path + local vdev_status + local spare + local zpool_err + local zpool_rv + local rv + + # Avoid hot-sparing a hot-spare. + # + # Note: ZEVENT_VDEV_PATH is not defined for ZEVENT_VDEV_TYPE=spare. + # + [ "${ZEVENT_VDEV_TYPE}" = "spare" ] && exit 2 + + [ -n "${ZEVENT_POOL}" ] || exit 9 + [ -n "${ZEVENT_VDEV_GUID}" ] || exit 9 + [ -n "${ZEVENT_VDEV_PATH}" ] || exit 9 + + zed_check_cmd "${ZPOOL}" "${ZINJECT}" || exit 9 + + # Fault the device after a given number of I/O errors. + # + if [ "${ZEVENT_SUBCLASS}" = "io" ]; then + if [ "${ZED_SPARE_ON_IO_ERRORS}" -gt 0 ]; then + num_errors=$((ZEVENT_VDEV_READ_ERRORS + ZEVENT_VDEV_WRITE_ERRORS)) + [ "${num_errors}" -ge "${ZED_SPARE_ON_IO_ERRORS}" ] \ + && action="fault" + fi 2>/dev/null + + # Degrade the device after a given number of checksum errors. + # + elif [ "${ZEVENT_SUBCLASS}" = "checksum" ]; then + if [ "${ZED_SPARE_ON_CHECKSUM_ERRORS}" -gt 0 ]; then + num_errors="${ZEVENT_VDEV_CKSUM_ERRORS}" + [ "${num_errors}" -ge "${ZED_SPARE_ON_CHECKSUM_ERRORS}" ] \ + && action="degrade" + fi 2>/dev/null + + else + zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\"" + exit 9 + fi + + # Error threshold not reached. + # + if [ -z "${action}" ]; then + exit 2 + fi + + lockfile="zed.spare.lock" + zed_lock "${lockfile}" + + # shellcheck disable=SC2046 + set -- $(query_vdev_status "${ZEVENT_POOL}" "${ZEVENT_VDEV_PATH}") + vdev_path="$1" + vdev_status="$2" + + # Device is already FAULTED or DEGRADED. + # + if [ "${vdev_status}" = "FAULTED" ] \ + || [ "${vdev_status}" = "DEGRADED" ]; then + rv=3 + + else + rv=1 + + # 1) FAULT or DEGRADE the device. + # + "${ZINJECT}" -d "${ZEVENT_VDEV_GUID}" -A "${action}" "${ZEVENT_POOL}" + + # 2) Set the SES fault beacon. + # + # TODO: Set the 'fault' or 'ident' beacon for the device. This can + # be done through the sg_ses utility. The only hard part is to map + # the sd device to its corresponding enclosure and slot. We may + # be able to leverage the existing vdev_id scripts for this. + # + # $ sg_ses --dev-slot-num=0 --set=ident /dev/sg3 + # $ sg_ses --dev-slot-num=0 --clear=ident /dev/sg3 + + # 3) Replace the device with a hot spare. + # + # Round-robin through the spares trying those that are available. + # + for spare in ${ZEVENT_VDEV_SPARE_PATHS}; do + + # shellcheck disable=SC2046 + set -- $(query_vdev_status "${ZEVENT_POOL}" "${spare}") + vdev_path="$1" + vdev_status="$2" + + [ "${vdev_status}" = "AVAIL" ] || continue + + zpool_err="$("${ZPOOL}" replace "${ZEVENT_POOL}" \ + "${ZEVENT_VDEV_GUID}" "${vdev_path}" 2>&1)"; zpool_rv=$? + + if [ "${zpool_rv}" -ne 0 ]; then + [ -n "${zpool_err}" ] && zed_log_err "zpool ${zpool_err}" + else + rv=0 + break + fi + done + fi + + zed_unlock "${lockfile}" + exit "${rv}" +} + + +main "$@" diff --git a/cmd/zed/zed.d/scrub.finish-email.sh b/cmd/zed/zed.d/scrub.finish-email.sh index d92ccfea1231..4a8155caf082 100755 --- a/cmd/zed/zed.d/scrub.finish-email.sh +++ b/cmd/zed/zed.d/scrub.finish-email.sh @@ -1,73 +1,63 @@ #!/bin/sh # # Send email to ZED_EMAIL in response to a RESILVER.FINISH or SCRUB.FINISH. -# By default, "zpool status" output will only be included in the email for -# a scrub.finish zevent if the pool is not healthy; to always include its -# output, set ZED_EMAIL_VERBOSE=1. +# +# By default, "zpool status" output will only be included for a scrub.finish +# zevent if the pool is not healthy; to always include its output, set +# ZED_EMAIL_VERBOSE=1. +# # Exit codes: # 0: email sent # 1: email failed -# 2: email suppressed -# 3: missing executable -# 4: unsupported event class -# 5: internal error -# -test -f "${ZED_ZEDLET_DIR}/zed.rc" && . "${ZED_ZEDLET_DIR}/zed.rc" +# 2: email not configured +# 3: email suppressed +# 9: internal error -test -n "${ZEVENT_POOL}" || exit 5 -test -n "${ZEVENT_SUBCLASS}" || exit 5 +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" -if test "${ZEVENT_SUBCLASS}" = "resilver.finish"; then - ACTION="resilvering" -elif test "${ZEVENT_SUBCLASS}" = "scrub.finish"; then - ACTION="scrubbing" -else - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: unsupported event class \"${ZEVENT_SUBCLASS}\" - exit 4 -fi +[ -n "${ZED_EMAIL}" ] || exit 2 -# Only send email if ZED_EMAIL has been configured. -test -n "${ZED_EMAIL}" || exit 2 +[ -n "${ZEVENT_POOL}" ] || exit 9 +[ -n "${ZEVENT_SUBCLASS}" ] || exit 9 -# Ensure requisite executables are installed. -if ! command -v "${MAIL:=mail}" >/dev/null 2>&1; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${MAIL}" not installed - exit 3 -fi -if ! test -x "${ZPOOL}"; then - logger -t "${ZED_SYSLOG_TAG:=zed}" \ - -p "${ZED_SYSLOG_PRIORITY:=daemon.warning}" \ - `basename "$0"`: "${ZPOOL}" not installed - exit 3 +if [ "${ZEVENT_SUBCLASS}" = "resilver.finish" ]; then + action="resilver" +elif [ "${ZEVENT_SUBCLASS}" = "scrub.finish" ]; then + action="scrub" +else + zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\"" + exit 9 fi +zed_check_cmd "mail" "${ZPOOL}" || exit 9 + # For scrub, suppress email if pool is healthy and verbosity is not enabled. -if test "${ZEVENT_SUBCLASS}" = "scrub.finish"; then - HEALTHY=`"${ZPOOL}" status -x "${ZEVENT_POOL}" | \ - grep "'${ZEVENT_POOL}' is healthy"` - test -n "${HEALTHY}" -a "${ZED_EMAIL_VERBOSE:=0}" = 0 && exit 2 +# +if [ "${ZEVENT_SUBCLASS}" = "scrub.finish" ]; then + healthy="$("${ZPOOL}" status -x "${ZEVENT_POOL}" \ + | grep "'${ZEVENT_POOL}' is healthy")" + [ -n "${healthy}" ] && [ "${ZED_EMAIL_VERBOSE}" -eq 0 ] && exit 3 fi -"${MAIL}" -s "ZFS ${ZEVENT_SUBCLASS} event for ${ZEVENT_POOL} on `hostname`" \ - "${ZED_EMAIL}" < "${email_pathname}" </dev/null 2>&1; then + zed_log_err "\"${cmd}\" not installed" + rv=$((rv + 1)) + fi + done + return "${rv}" +} + + +# zed_log_msg (msg, ...) +# +# Write all argument strings to the system log. +# +# Globals +# ZED_SYSLOG_PRIORITY +# ZED_SYSLOG_TAG +# +# Return +# nothing +# +zed_log_msg() +{ + logger -p "${ZED_SYSLOG_PRIORITY}" -t "${ZED_SYSLOG_TAG}" -- "$@" +} + + +# zed_log_err (msg, ...) +# +# Write an error message to the system log. This message will contain the +# script name, EID, and all argument strings. +# +# Globals +# ZED_SYSLOG_PRIORITY +# ZED_SYSLOG_TAG +# ZEVENT_EID +# +# Return +# nothing +# +zed_log_err() +{ + logger -p "${ZED_SYSLOG_PRIORITY}" -t "${ZED_SYSLOG_TAG}" -- "error:" \ + "$(basename -- "$0"):" "${ZEVENT_EID:+"eid=${ZEVENT_EID}:"}" "$@" +} + + +# zed_lock (lockfile, [fd]) +# +# Obtain an exclusive (write) lock on [lockfile]. If the lock cannot be +# immediately acquired, wait until it becomes available. +# +# Every zed_lock() must be paired with a corresponding zed_unlock(). +# +# By default, flock-style locks associate the lockfile with file descriptor 8. +# The bash manpage warns that file descriptors >9 should be used with care as +# they may conflict with file descriptors used internally by the shell. File +# descriptor 9 is reserved for zed_rate_limit(). If concurrent locks are held +# within the same process, they must use different file descriptors (preferably +# decrementing from 8); otherwise, obtaining a new lock with a given file +# descriptor will release the previous lock associated with that descriptor. +# +# Arguments +# lockfile: pathname of the lock file; the lock will be stored in +# ZED_LOCKDIR unless the pathname contains a "/". +# fd: integer for the file descriptor used by flock (OPTIONAL unless holding +# concurrent locks) +# +# Globals +# ZED_FLOCK_FD +# ZED_LOCKDIR +# +# Return +# nothing +# +zed_lock() +{ + local lockfile="$1" + local fd="${2:-${ZED_FLOCK_FD}}" + local umask_bak + local err + + [ -n "${lockfile}" ] || return + if ! expr "${lockfile}" : '.*/' >/dev/null 2>&1; then + lockfile="${ZED_LOCKDIR}/${lockfile}" + fi + + umask_bak="$(umask)" + umask 077 + + # Obtain a lock on the file bound to the given file descriptor. + # + eval "exec ${fd}> '${lockfile}'" + err="$(flock --exclusive "${fd}" 2>&1)" + if [ $? -ne 0 ]; then + zed_log_err "failed to lock \"${lockfile}\": ${err}" + fi + + umask "${umask_bak}" +} + + +# zed_unlock (lockfile, [fd]) +# +# Release the lock on [lockfile]. +# +# Arguments +# lockfile: pathname of the lock file +# fd: integer for the file descriptor used by flock (must match the file +# descriptor passed to the zed_lock function call) +# +# Globals +# ZED_FLOCK_FD +# ZED_LOCKDIR +# +# Return +# nothing +# +zed_unlock() +{ + local lockfile="$1" + local fd="${2:-${ZED_FLOCK_FD}}" + local err + + [ -n "${lockfile}" ] || return + if ! expr "${lockfile}" : '.*/' >/dev/null 2>&1; then + lockfile="${ZED_LOCKDIR}/${lockfile}" + fi + + # Release the lock and close the file descriptor. + # + err="$(flock --unlock "${fd}" 2>&1)" + if [ $? -ne 0 ]; then + zed_log_err "failed to unlock \"${lockfile}\": ${err}" + fi + eval "exec ${fd}>&-" +} + + +# zed_rate_limit (tag, [interval]) +# +# Check whether an event of a given type [tag] has already occurred within the +# last [interval] seconds. +# +# This function obtains a lock on the statefile using file descriptor 9. +# +# Arguments +# tag: arbitrary string for grouping related events to rate-limit +# interval: time interval in seconds (OPTIONAL) +# +# Globals +# ZED_EMAIL_INTERVAL_SECS +# ZED_RUNDIR +# +# Return +# 0 if the event should be processed +# 1 if the event should be dropped +# +# State File Format +# time;tag +# +zed_rate_limit() +{ + local tag="$1" + local interval="${2:-${ZED_EMAIL_INTERVAL_SECS}}" + local lockfile="zed.zedlet.state.lock" + local lockfile_fd=9 + local statefile="${ZED_RUNDIR}/zed.zedlet.state" + local time_now + local time_prev + local umask_bak + local rv=0 + + [ -n "${tag}" ] || return 0 + + zed_lock "${lockfile}" "${lockfile_fd}" + time_now="$(date +%s)" + time_prev="$(egrep "^[0-9]+;${tag}\$" "${statefile}" 2>/dev/null \ + | tail -1 | cut -d\; -f1)" + + if [ -n "${time_prev}" ] \ + && [ "$((time_now - time_prev))" -lt "${interval}" ]; then + rv=1 + else + umask_bak="$(umask)" + umask 077 + egrep -v "^[0-9]+;${tag}\$" "${statefile}" 2>/dev/null \ + > "${statefile}.$$" + echo "${time_now};${tag}" >> "${statefile}.$$" + mv -f "${statefile}.$$" "${statefile}" + umask "${umask_bak}" + fi + + zed_unlock "${lockfile}" "${lockfile_fd}" + return "${rv}" +} diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc index 69989f95315b..4c53207d74dc 100644 --- a/cmd/zed/zed.d/zed.rc +++ b/cmd/zed/zed.d/zed.rc @@ -1,34 +1,60 @@ +## # zed.rc +## +## # Absolute path to the debug output file. +# #ZED_DEBUG_LOG="/tmp/zed.debug.log" +## # Email address of the zpool administrator. # Email will only be sent if ZED_EMAIL is defined. +# Disabled by default; uncomment to enable. +# #ZED_EMAIL="root" +## +# Minimum number of seconds between emails for a similar event. +# +#ZED_EMAIL_INTERVAL_SECS=3600 + +## # Email verbosity. # If set to 0, suppress email if the pool is healthy. # If set to 1, send email regardless of pool health. +# #ZED_EMAIL_VERBOSE=0 -# Minimum number of seconds between emails sent for a similar event. -#ZED_EMAIL_INTERVAL_SECS="3600" - +## # Default directory for zed lock files. +# #ZED_LOCKDIR="/var/lock" +## # Default directory for zed state files. +# #ZED_RUNDIR="/var/run" -# The syslog priority (eg, specified as a "facility.level" pair). +## +# Replace a device with a hot spare after N checksum errors are detected. +# Disabled by default; uncomment to enable. +# +#ZED_SPARE_ON_CHECKSUM_ERRORS=10 + +## +# Replace a device with a hot spare after N I/O errors are detected. +# Disabled by default; uncomment to enable. +# +#ZED_SPARE_ON_IO_ERRORS=1 + +## +# The syslog priority (e.g., specified as a "facility.level" pair). +# #ZED_SYSLOG_PRIORITY="daemon.notice" +## # The syslog tag for marking zed events. +# #ZED_SYSLOG_TAG="zed" -# Replace a device with a hot spare after N I/O errors are detected. -#ZED_SPARE_ON_IO_ERRORS=1 - -# Replace a device with a hot spare after N checksum errors are detected. -#ZED_SPARE_ON_CHECKSUM_ERRORS=10 From 20967ff1a4c2ff3f706505222dd66b3b15645596 Mon Sep 17 00:00:00 2001 From: Chris Dunlap Date: Thu, 26 Feb 2015 16:26:25 -0800 Subject: [PATCH 2/5] Replace "email" ZEDLETs with "notify" ZEDLETs Several ZEDLETs already exist for sending email in reponse to a particular zevent. While email is ubiquitous, alternative methods may be better suited for some configurations. Instead of duplicating the "email" ZEDLETs for every future notification method, it is preferable to abstract the notification method into a function. This has the added benefit of reducing the amount of code duplicated between ZEDLETs, and allowing related bugs to be fixed in a single location. This commit replaces the existing "email" ZEDLETs with corresponding "notify" ZEDLETs. In addition, the ZEDLET code for sending an email message has been moved into the zed_notify_email() function. And this zed_notify_email() has been added to a generic zed_notify() function for sending notifications via all available methods that have been configured. This commit also changes a couple of related zed.rc variables. ZED_EMAIL_INTERVAL_SECS is changed to ZED_NOTIFY_INTERVAL_SECS, and ZED_EMAIL_VERBOSE is changed to ZED_NOTIFY_VERBOSE. Note that ZED_EMAIL remains unchanged as its use is solely for the email notification method. Signed-off-by: Chris Dunlap --- cmd/zed/Makefile.am | 22 +++---- cmd/zed/zed.d/checksum-email.sh | 1 - cmd/zed/zed.d/checksum-notify.sh | 1 + cmd/zed/zed.d/data-email.sh | 53 ---------------- cmd/zed/zed.d/data-notify.sh | 45 ++++++++++++++ cmd/zed/zed.d/generic-email.sh | 59 ------------------ cmd/zed/zed.d/generic-notify.sh | 54 +++++++++++++++++ cmd/zed/zed.d/io-email.sh | 57 ------------------ cmd/zed/zed.d/io-notify.sh | 55 +++++++++++++++++ cmd/zed/zed.d/resilver.finish-email.sh | 1 - cmd/zed/zed.d/resilver.finish-notify.sh | 1 + cmd/zed/zed.d/scrub.finish-email.sh | 63 ------------------- cmd/zed/zed.d/scrub.finish-notify.sh | 59 ++++++++++++++++++ cmd/zed/zed.d/zed-functions.sh | 80 +++++++++++++++++++++++-- cmd/zed/zed.d/zed.rc | 18 +++--- 15 files changed, 311 insertions(+), 258 deletions(-) delete mode 120000 cmd/zed/zed.d/checksum-email.sh create mode 120000 cmd/zed/zed.d/checksum-notify.sh delete mode 100755 cmd/zed/zed.d/data-email.sh create mode 100755 cmd/zed/zed.d/data-notify.sh delete mode 100755 cmd/zed/zed.d/generic-email.sh create mode 100755 cmd/zed/zed.d/generic-notify.sh delete mode 100755 cmd/zed/zed.d/io-email.sh create mode 100755 cmd/zed/zed.d/io-notify.sh delete mode 120000 cmd/zed/zed.d/resilver.finish-email.sh create mode 120000 cmd/zed/zed.d/resilver.finish-notify.sh delete mode 100755 cmd/zed/zed.d/scrub.finish-email.sh create mode 100755 cmd/zed/zed.d/scrub.finish-notify.sh diff --git a/cmd/zed/Makefile.am b/cmd/zed/Makefile.am index 8bdc097c77a3..09be2ca6c3ea 100644 --- a/cmd/zed/Makefile.am +++ b/cmd/zed/Makefile.am @@ -43,24 +43,24 @@ zedexecdir = $(libexecdir)/zfs/zed.d dist_zedexec_SCRIPTS = \ $(top_srcdir)/cmd/zed/zed.d/all-debug.sh \ $(top_srcdir)/cmd/zed/zed.d/all-syslog.sh \ - $(top_srcdir)/cmd/zed/zed.d/checksum-email.sh \ + $(top_srcdir)/cmd/zed/zed.d/checksum-notify.sh \ $(top_srcdir)/cmd/zed/zed.d/checksum-spare.sh \ - $(top_srcdir)/cmd/zed/zed.d/data-email.sh \ - $(top_srcdir)/cmd/zed/zed.d/generic-email.sh \ - $(top_srcdir)/cmd/zed/zed.d/io-email.sh \ + $(top_srcdir)/cmd/zed/zed.d/data-notify.sh \ + $(top_srcdir)/cmd/zed/zed.d/generic-notify.sh \ + $(top_srcdir)/cmd/zed/zed.d/io-notify.sh \ $(top_srcdir)/cmd/zed/zed.d/io-spare.sh \ - $(top_srcdir)/cmd/zed/zed.d/resilver.finish-email.sh \ - $(top_srcdir)/cmd/zed/zed.d/scrub.finish-email.sh + $(top_srcdir)/cmd/zed/zed.d/resilver.finish-notify.sh \ + $(top_srcdir)/cmd/zed/zed.d/scrub.finish-notify.sh zedconfdefaults = \ all-syslog.sh \ - checksum-email.sh \ + checksum-notify.sh \ checksum-spare.sh \ - data-email.sh \ - io-email.sh \ + data-notify.sh \ + io-notify.sh \ io-spare.sh \ - resilver.finish-email.sh \ - scrub.finish-email.sh + resilver.finish-notify.sh \ + scrub.finish-notify.sh install-data-local: $(MKDIR_P) "$(DESTDIR)$(zedconfdir)" diff --git a/cmd/zed/zed.d/checksum-email.sh b/cmd/zed/zed.d/checksum-email.sh deleted file mode 120000 index f95bec21532a..000000000000 --- a/cmd/zed/zed.d/checksum-email.sh +++ /dev/null @@ -1 +0,0 @@ -io-email.sh \ No newline at end of file diff --git a/cmd/zed/zed.d/checksum-notify.sh b/cmd/zed/zed.d/checksum-notify.sh new file mode 120000 index 000000000000..9008738073c9 --- /dev/null +++ b/cmd/zed/zed.d/checksum-notify.sh @@ -0,0 +1 @@ +io-notify.sh \ No newline at end of file diff --git a/cmd/zed/zed.d/data-email.sh b/cmd/zed/zed.d/data-email.sh deleted file mode 100755 index 2dae8ff6b468..000000000000 --- a/cmd/zed/zed.d/data-email.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/sh -# -# Send email to ZED_EMAIL in response to a DATA error. -# -# Only one email per ZED_EMAIL_INTERVAL_SECS will be sent for a given -# class/pool combination. This protects against spamming the recipient -# should multiple events occur together in time for the same pool. -# -# Exit codes: -# 0: email sent -# 1: email failed -# 2: email not configured -# 3: email suppressed -# 9: internal error - -[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" -. "${ZED_ZEDLET_DIR}/zed-functions.sh" - -[ -n "${ZED_EMAIL}" ] || exit 2 - -[ -n "${ZEVENT_POOL}" ] || exit 9 -[ -n "${ZEVENT_SUBCLASS}" ] || exit 9 - -if [ "${ZEVENT_SUBCLASS}" != "data" ]; then \ - zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\"" - exit 9 -fi - -zed_check_cmd "mail" || exit 9 - -zed_rate_limit "${ZEVENT_POOL};${ZEVENT_SUBCLASS};email" || exit 3 - -umask 077 -email_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)" -email_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" -cat > "${email_pathname}" < "${note_pathname}" + +zed_notify "${note_subject}" "${note_pathname}"; rv=$? +rm -f "${note_pathname}" +exit "${rv}" diff --git a/cmd/zed/zed.d/generic-email.sh b/cmd/zed/zed.d/generic-email.sh deleted file mode 100755 index ad022e034388..000000000000 --- a/cmd/zed/zed.d/generic-email.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/sh -# -# Send email to ZED_EMAIL in response to a given zevent. -# -# This is a generic script than can be symlinked to a file in the -# enabled-zedlets directory to have an email sent when a particular class of -# zevents occurs. The symlink filename must begin with the zevent (sub)class -# string (e.g., "probe_failure-email.sh" for the "probe_failure" subclass). -# Refer to the zed(8) manpage for details. -# -# Exit codes: -# 0: email sent -# 1: email failed -# 2: email not configured -# 3: email suppressed - -[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" -. "${ZED_ZEDLET_DIR}/zed-functions.sh" - -[ -n "${ZED_EMAIL}" ] || exit 2 - -# Rate-limit the message based in part on the filename. -# -rate_limit_tag="${ZEVENT_POOL};${ZEVENT_SUBCLASS};$(basename -- "$0")" -rate_limit_interval="${ZED_EMAIL_INTERVAL_SECS}" -zed_rate_limit "${rate_limit_tag}" "${rate_limit_interval}" || exit 3 - -umask 077 -pool_str="${ZEVENT_POOL:+" for ${ZEVENT_POOL}"}" -host_str=" on $(hostname)" -email_subject="ZFS ${ZEVENT_SUBCLASS} event${pool_str}${host_str}" -email_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" -{ - echo "ZFS has posted the following event:" - echo - echo " eid: ${ZEVENT_EID}" - echo " class: ${ZEVENT_SUBCLASS}" - echo " host: $(hostname)" - echo " time: ${ZEVENT_TIME_STRING}" - - if [ -n "${ZEVENT_VDEV_PATH}" ]; then - echo " vpath: ${ZEVENT_VDEV_PATH}" - [ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}" - fi - - [ -n "${ZEVENT_POOL}" ] && [ -x "${ZPOOL}" ] \ - && "${ZPOOL}" status "${ZEVENT_POOL}" - -} > "${email_pathname}" - -mail -s "${email_subject}" "${ZED_EMAIL}" < "${email_pathname}" -mail_status=$? - -if [ "${mail_status}" -ne 0 ]; then - zed_log_msg "mail exit=${mail_status}" - exit 1 -fi -rm -f "${email_pathname}" -exit 0 diff --git a/cmd/zed/zed.d/generic-notify.sh b/cmd/zed/zed.d/generic-notify.sh new file mode 100755 index 000000000000..e438031a088a --- /dev/null +++ b/cmd/zed/zed.d/generic-notify.sh @@ -0,0 +1,54 @@ +#!/bin/sh +# +# Send notification in response to a given zevent. +# +# This is a generic script than can be symlinked to a file in the +# enabled-zedlets directory to have a notification sent when a particular +# class of zevents occurs. The symlink filename must begin with the zevent +# (sub)class string (e.g., "probe_failure-notify.sh" for the "probe_failure" +# subclass). Refer to the zed(8) manpage for details. +# +# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given +# class/pool combination. This protects against spamming the recipient +# should multiple events occur together in time for the same pool. +# +# Exit codes: +# 0: notification sent +# 1: notification failed +# 2: notification not configured +# 3: notification suppressed + +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" +. "${ZED_ZEDLET_DIR}/zed-functions.sh" + +# Rate-limit the notification based in part on the filename. +# +rate_limit_tag="${ZEVENT_POOL};${ZEVENT_SUBCLASS};$(basename -- "$0")" +rate_limit_interval="${ZED_NOTIFY_INTERVAL_SECS}" +zed_rate_limit "${rate_limit_tag}" "${rate_limit_interval}" || exit 3 + +umask 077 +pool_str="${ZEVENT_POOL:+" for ${ZEVENT_POOL}"}" +host_str=" on $(hostname)" +note_subject="ZFS ${ZEVENT_SUBCLASS} event${pool_str}${host_str}" +note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" +{ + echo "ZFS has posted the following event:" + echo + echo " eid: ${ZEVENT_EID}" + echo " class: ${ZEVENT_SUBCLASS}" + echo " host: $(hostname)" + echo " time: ${ZEVENT_TIME_STRING}" + + [ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}" + [ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}" + [ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}" + + [ -n "${ZEVENT_POOL}" ] && [ -x "${ZPOOL}" ] \ + && "${ZPOOL}" status "${ZEVENT_POOL}" + +} > "${note_pathname}" + +zed_notify "${note_subject}" "${note_pathname}"; rv=$? +rm -f "${note_pathname}" +exit "${rv}" diff --git a/cmd/zed/zed.d/io-email.sh b/cmd/zed/zed.d/io-email.sh deleted file mode 100755 index 1854b15933ae..000000000000 --- a/cmd/zed/zed.d/io-email.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/sh -# -# Send email to ZED_EMAIL in response to a CHECKSUM or IO error. -# -# Only one email per ZED_EMAIL_INTERVAL_SECS will be sent for a given -# class/pool/vdev combination. This protects against spamming the recipient -# should multiple events occur together in time for the same pool/device. -# -# Exit codes: -# 0: email sent -# 1: email failed -# 2: email not configured -# 3: email suppressed -# 9: internal error - -[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" -. "${ZED_ZEDLET_DIR}/zed-functions.sh" - -[ -n "${ZED_EMAIL}" ] || exit 2 - -[ -n "${ZEVENT_POOL}" ] || exit 9 -[ -n "${ZEVENT_SUBCLASS}" ] || exit 9 -[ -n "${ZEVENT_VDEV_PATH}" ] || exit 9 - -if [ "${ZEVENT_SUBCLASS}" != "checksum" ] \ - && [ "${ZEVENT_SUBCLASS}" != "io" ]; then - zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\"" - exit 9 -fi - -zed_check_cmd "mail" || exit 9 - -zed_rate_limit "${ZEVENT_POOL};${ZEVENT_VDEV_PATH};${ZEVENT_SUBCLASS};email" \ - || exit 3 - -umask 077 -email_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)" -email_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" -cat > "${email_pathname}" < "${note_pathname}" + +zed_notify "${note_subject}" "${note_pathname}"; rv=$? +rm -f "${note_pathname}" +exit "${rv}" diff --git a/cmd/zed/zed.d/resilver.finish-email.sh b/cmd/zed/zed.d/resilver.finish-email.sh deleted file mode 120000 index 1afad3258d5e..000000000000 --- a/cmd/zed/zed.d/resilver.finish-email.sh +++ /dev/null @@ -1 +0,0 @@ -scrub.finish-email.sh \ No newline at end of file diff --git a/cmd/zed/zed.d/resilver.finish-notify.sh b/cmd/zed/zed.d/resilver.finish-notify.sh new file mode 120000 index 000000000000..2635dcce118b --- /dev/null +++ b/cmd/zed/zed.d/resilver.finish-notify.sh @@ -0,0 +1 @@ +scrub.finish-notify.sh \ No newline at end of file diff --git a/cmd/zed/zed.d/scrub.finish-email.sh b/cmd/zed/zed.d/scrub.finish-email.sh deleted file mode 100755 index 4a8155caf082..000000000000 --- a/cmd/zed/zed.d/scrub.finish-email.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/sh -# -# Send email to ZED_EMAIL in response to a RESILVER.FINISH or SCRUB.FINISH. -# -# By default, "zpool status" output will only be included for a scrub.finish -# zevent if the pool is not healthy; to always include its output, set -# ZED_EMAIL_VERBOSE=1. -# -# Exit codes: -# 0: email sent -# 1: email failed -# 2: email not configured -# 3: email suppressed -# 9: internal error - -[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" -. "${ZED_ZEDLET_DIR}/zed-functions.sh" - -[ -n "${ZED_EMAIL}" ] || exit 2 - -[ -n "${ZEVENT_POOL}" ] || exit 9 -[ -n "${ZEVENT_SUBCLASS}" ] || exit 9 - -if [ "${ZEVENT_SUBCLASS}" = "resilver.finish" ]; then - action="resilver" -elif [ "${ZEVENT_SUBCLASS}" = "scrub.finish" ]; then - action="scrub" -else - zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\"" - exit 9 -fi - -zed_check_cmd "mail" "${ZPOOL}" || exit 9 - -# For scrub, suppress email if pool is healthy and verbosity is not enabled. -# -if [ "${ZEVENT_SUBCLASS}" = "scrub.finish" ]; then - healthy="$("${ZPOOL}" status -x "${ZEVENT_POOL}" \ - | grep "'${ZEVENT_POOL}' is healthy")" - [ -n "${healthy}" ] && [ "${ZED_EMAIL_VERBOSE}" -eq 0 ] && exit 3 -fi - -umask 077 -email_subject="ZFS ${ZEVENT_SUBCLASS} event for ${ZEVENT_POOL} on $(hostname)" -email_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" -cat > "${email_pathname}" < "${note_pathname}" + +zed_notify "${note_subject}" "${note_pathname}"; rv=$? +rm -f "${note_pathname}" +exit "${rv}" diff --git a/cmd/zed/zed.d/zed-functions.sh b/cmd/zed/zed.d/zed-functions.sh index b42911b2ee48..14909d38cb54 100644 --- a/cmd/zed/zed.d/zed-functions.sh +++ b/cmd/zed/zed.d/zed-functions.sh @@ -5,9 +5,9 @@ # Variable Defaults # -: "${ZED_EMAIL_INTERVAL_SECS:=3600}" -: "${ZED_EMAIL_VERBOSE:=0}" : "${ZED_LOCKDIR:="/var/lock"}" +: "${ZED_NOTIFY_INTERVAL_SECS:=3600}" +: "${ZED_NOTIFY_VERBOSE:=0}" : "${ZED_RUNDIR:="/var/run"}" : "${ZED_SYSLOG_PRIORITY:="daemon.notice"}" : "${ZED_SYSLOG_TAG:="zed"}" @@ -171,6 +171,78 @@ zed_unlock() } +# zed_notify (subject, pathname) +# +# Send a notification via all available methods. +# +# Arguments +# subject: notification subject +# pathname: pathname containing the notification message (OPTIONAL) +# +# Return +# 0: notification succeeded via at least one method +# 1: notification failed +# 2: no notification methods configured +# +zed_notify() +{ + local subject="$1" + local pathname="$2" + local num_success=0 + local num_failure=0 + + zed_notify_email "${subject}" "${pathname}"; rv=$? + [ "${rv}" -eq 0 ] && num_success=$((num_success + 1)) + [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1)) + + [ "${num_success}" -gt 0 ] && return 0 + [ "${num_failure}" -gt 0 ] && return 1 + return 2 +} + + +# zed_notify_email (subject, pathname) +# +# Send a notification via email to the address specified by ZED_EMAIL. +# +# Requires the mail executable to be installed in the standard PATH. +# +# Arguments +# subject: notification subject +# pathname: pathname containing the notification message (OPTIONAL) +# +# Globals +# ZED_EMAIL +# +# Return +# 0: notification sent +# 1: notification failed +# 2: not configured +# +zed_notify_email() +{ + local subject="$1" + local pathname="${2:-"/dev/null"}" + + [ -n "${ZED_EMAIL}" ] || return 2 + + [ -n "${subject}" ] || return 1 + if [ ! -r "${pathname}" ]; then + zed_log_err "mail cannot read \"${pathname}\"" + return 1 + fi + + zed_check_cmd "mail" || return 1 + + mail -s "${subject}" "${ZED_EMAIL}" < "${pathname}" >/dev/null 2>&1; rv=$? + if [ "${rv}" -ne 0 ]; then + zed_log_err "mail exit=${rv}" + return 1 + fi + return 0 +} + + # zed_rate_limit (tag, [interval]) # # Check whether an event of a given type [tag] has already occurred within the @@ -183,7 +255,7 @@ zed_unlock() # interval: time interval in seconds (OPTIONAL) # # Globals -# ZED_EMAIL_INTERVAL_SECS +# ZED_NOTIFY_INTERVAL_SECS # ZED_RUNDIR # # Return @@ -196,7 +268,7 @@ zed_unlock() zed_rate_limit() { local tag="$1" - local interval="${2:-${ZED_EMAIL_INTERVAL_SECS}}" + local interval="${2:-${ZED_NOTIFY_INTERVAL_SECS}}" local lockfile="zed.zedlet.state.lock" local lockfile_fd=9 local statefile="${ZED_RUNDIR}/zed.zedlet.state" diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc index 4c53207d74dc..05f84c877304 100644 --- a/cmd/zed/zed.d/zed.rc +++ b/cmd/zed/zed.d/zed.rc @@ -8,28 +8,28 @@ #ZED_DEBUG_LOG="/tmp/zed.debug.log" ## -# Email address of the zpool administrator. +# Email address of the zpool administrator for receipt of notifications. # Email will only be sent if ZED_EMAIL is defined. # Disabled by default; uncomment to enable. # #ZED_EMAIL="root" ## -# Minimum number of seconds between emails for a similar event. +# Default directory for zed lock files. # -#ZED_EMAIL_INTERVAL_SECS=3600 +#ZED_LOCKDIR="/var/lock" ## -# Email verbosity. -# If set to 0, suppress email if the pool is healthy. -# If set to 1, send email regardless of pool health. +# Minimum number of seconds between notifications for a similar event. # -#ZED_EMAIL_VERBOSE=0 +#ZED_NOTIFY_INTERVAL_SECS=3600 ## -# Default directory for zed lock files. +# Notification verbosity. +# If set to 0, suppress notification if the pool is healthy. +# If set to 1, send notification regardless of pool health. # -#ZED_LOCKDIR="/var/lock" +#ZED_NOTIFY_VERBOSE=0 ## # Default directory for zed state files. From a0d065fa922d2ce2e07785eb3d35757763c644bf Mon Sep 17 00:00:00 2001 From: Chris Dunlap Date: Thu, 26 Feb 2015 16:26:48 -0800 Subject: [PATCH 3/5] Add support for Pushbullet notifications This commit adds the zed_notify_pushbullet() function and hooks it into zed_notify(), thereby integrating it with the existing "notify" ZEDLETs. This enables ZED to push notifications to your desktop computer and/or mobile device(s). It is configured with the ZED_PUSHBULLET_ACCESS_TOKEN and ZED_PUSHBULLET_CHANNEL_TAG variables in zed.rc. https://www.pushbullet.com/ The Makefile install-data-local target has been replaced with install-data-hook. With the "-local" target, there is no particular guarantee of execution order. But with the zed.rc now potentially containing sensitive information (i.e., the Pushbullet access token), the recommended permissions have changed to 0600. The "-hook" target is always executed after the main rule's work is done; thus, the chmod will always take place after the zed.rc file has been installed. https://www.gnu.org/software/automake/manual/automake.html#Extending Signed-off-by: Chris Dunlap --- cmd/zed/Makefile.am | 3 +- cmd/zed/zed.d/zed-functions.sh | 90 ++++++++++++++++++++++++++++++++++ cmd/zed/zed.d/zed.rc | 20 ++++++++ 3 files changed, 112 insertions(+), 1 deletion(-) diff --git a/cmd/zed/Makefile.am b/cmd/zed/Makefile.am index 09be2ca6c3ea..f0d22411d6f9 100644 --- a/cmd/zed/Makefile.am +++ b/cmd/zed/Makefile.am @@ -62,10 +62,11 @@ zedconfdefaults = \ resilver.finish-notify.sh \ scrub.finish-notify.sh -install-data-local: +install-data-hook: $(MKDIR_P) "$(DESTDIR)$(zedconfdir)" for f in $(zedconfdefaults); do \ test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \ -L "$(DESTDIR)$(zedconfdir)/$${f}" || \ ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \ done + chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc" diff --git a/cmd/zed/zed.d/zed-functions.sh b/cmd/zed/zed.d/zed-functions.sh index 14909d38cb54..cc4f69667ee7 100644 --- a/cmd/zed/zed.d/zed-functions.sh +++ b/cmd/zed/zed.d/zed-functions.sh @@ -195,6 +195,10 @@ zed_notify() [ "${rv}" -eq 0 ] && num_success=$((num_success + 1)) [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1)) + zed_notify_pushbullet "${subject}" "${pathname}"; rv=$? + [ "${rv}" -eq 0 ] && num_success=$((num_success + 1)) + [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1)) + [ "${num_success}" -gt 0 ] && return 0 [ "${num_failure}" -gt 0 ] && return 1 return 2 @@ -243,6 +247,92 @@ zed_notify_email() } +# zed_notify_pushbullet (subject, pathname) +# +# Send a notification via Pushbullet . +# The access token (ZED_PUSHBULLET_ACCESS_TOKEN) identifies this client to the +# Pushbullet server. The optional channel tag (ZED_PUSHBULLET_CHANNEL_TAG) is +# for pushing to notification feeds that can be subscribed to; if a channel is +# not defined, push notifications will instead be sent to all devices +# associated with the account specified by the access token. +# +# Requires awk, curl, and sed executables to be installed in the standard PATH. +# +# References +# https://docs.pushbullet.com/ +# https://www.pushbullet.com/security +# +# Arguments +# subject: notification subject +# pathname: pathname containing the notification message (OPTIONAL) +# +# Globals +# ZED_PUSHBULLET_ACCESS_TOKEN +# ZED_PUSHBULLET_CHANNEL_TAG +# +# Return +# 0: notification sent +# 1: notification failed +# 2: not configured +# +zed_notify_pushbullet() +{ + local subject="$1" + local pathname="${2:-"/dev/null"}" + local msg_body + local msg_tag + local msg_json + local msg_out + local msg_err + local url="https://api.pushbullet.com/v2/pushes" + + [ -n "${ZED_PUSHBULLET_ACCESS_TOKEN}" ] || return 2 + + [ -n "${subject}" ] || return 1 + if [ ! -r "${pathname}" ]; then + zed_log_err "pushbullet cannot read \"${pathname}\"" + return 1 + fi + + zed_check_cmd "awk" "curl" "sed" || return 1 + + # Escape the following characters in the message body for JSON: + # newline, backslash, double quote, horizontal tab, vertical tab, + # and carriage return. + # + msg_body="$(awk '{ ORS="\\n" } { gsub(/\\/, "\\\\"); gsub(/"/, "\\\""); + gsub(/\t/, "\\t"); gsub(/\f/, "\\f"); gsub(/\r/, "\\r"); print }' \ + "${pathname}")" + + # Push to a channel if one is configured. + # + [ -n "${ZED_PUSHBULLET_CHANNEL_TAG}" ] && msg_tag="$(printf \ + '"channel_tag": "%s", ' "${ZED_PUSHBULLET_CHANNEL_TAG}")" + + # Construct the JSON message for pushing a note. + # + msg_json="$(printf '{%s"type": "note", "title": "%s", "body": "%s"}' \ + "${msg_tag}" "${subject}" "${msg_body}")" + + # Send the POST request and check for errors. + # + msg_out="$(curl -u "${ZED_PUSHBULLET_ACCESS_TOKEN}:" -X POST "${url}" \ + --header "Content-Type: application/json" --data-binary "${msg_json}" \ + 2>/dev/null)"; rv=$? + if [ "${rv}" -ne 0 ]; then + zed_log_err "curl exit=${rv}" + return 1 + fi + msg_err="$(echo "${msg_out}" \ + | sed -n -e 's/.*"error" *:.*"message" *: *"\([^"]*\)".*/\1/p')" + if [ -n "${msg_err}" ]; then + zed_log_err "pushbullet \"${msg_err}"\" + return 1 + fi + return 0 +} + + # zed_rate_limit (tag, [interval]) # # Check whether an event of a given type [tag] has already occurred within the diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc index 05f84c877304..c2336287f201 100644 --- a/cmd/zed/zed.d/zed.rc +++ b/cmd/zed/zed.d/zed.rc @@ -1,5 +1,7 @@ ## # zed.rc +# +# This file should be owned by root and permissioned 0600. ## ## @@ -31,6 +33,24 @@ # #ZED_NOTIFY_VERBOSE=0 +## +# Pushbullet access token. +# This grants full access to your account -- protect it accordingly! +# +# +# Disabled by default; uncomment to enable. +# +#ZED_PUSHBULLET_ACCESS_TOKEN="" + +## +# Pushbullet channel tag for push notification feeds that can be subscribed to. +# +# If not defined, push notifications will instead be sent to all devices +# associated with the account specified by the access token. +# Disabled by default; uncomment to enable. +# +#ZED_PUSHBULLET_CHANNEL_TAG="" + ## # Default directory for zed state files. # From 090b19158a7f776586369a593b271392cabbf175 Mon Sep 17 00:00:00 2001 From: Chris Dunlap Date: Thu, 19 Mar 2015 17:40:54 -0700 Subject: [PATCH 4/5] Add notification to io-spare.sh The io-spare.sh ZEDLET does not generate a notification when a failing device is replaced with a hot spare. Maybe it should tell someone. This commit adds a notification message to the io-spare.sh ZEDLET. This notification is triggered when a failing device is successfully replaced with a hot spare after encountering a checksum or io error. Signed-off-by: Chris Dunlap --- cmd/zed/zed.d/io-spare.sh | 56 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/cmd/zed/zed.d/io-spare.sh b/cmd/zed/zed.d/io-spare.sh index 9667dedcb7ca..1835cb4a3d22 100755 --- a/cmd/zed/zed.d/io-spare.sh +++ b/cmd/zed/zed.d/io-spare.sh @@ -66,6 +66,51 @@ query_vdev_status() } +# notify (old_vdev, new_vdev, num_errors) +# +# Send a notification regarding the hot spare replacement. +# +# Arguments +# old_vdev: path of old vdev that has failed +# new_vdev: path of new vdev used as the hot spare replacement +# num_errors: number of errors that triggered this replacement +# +notify() +{ + local old_vdev="$1" + local new_vdev="$2" + local num_errors="$3" + local note_subject + local note_pathname + local s + local rv + + umask 077 + note_subject="ZFS hot spare replacement for ${ZEVENT_POOL} on $(hostname)" + note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" + { + [ "${num_errors}" -ne 1 ] 2>/dev/null && s="s" + + echo "ZFS has replaced a failing device with a hot spare after" \ + "${num_errors} ${ZEVENT_SUBCLASS} error${s}:" + echo + echo " eid: ${ZEVENT_EID}" + echo " class: ${ZEVENT_SUBCLASS}" + echo " host: $(hostname)" + echo " time: ${ZEVENT_TIME_STRING}" + echo " old: ${old_vdev}" + echo " new: ${new_vdev}" + + "${ZPOOL}" status "${ZEVENT_POOL}" + + } > "${note_pathname}" + + zed_notify "${note_subject}" "${note_pathname}"; rv=$? + rm -f "${note_pathname}" + return "${rv}" +} + + # main # # Arguments @@ -82,6 +127,8 @@ main() local vdev_path local vdev_status local spare + local spare_path + local spare_status local zpool_err local zpool_rv local rv @@ -166,17 +213,18 @@ main() # shellcheck disable=SC2046 set -- $(query_vdev_status "${ZEVENT_POOL}" "${spare}") - vdev_path="$1" - vdev_status="$2" + spare_path="$1" + spare_status="$2" - [ "${vdev_status}" = "AVAIL" ] || continue + [ "${spare_status}" = "AVAIL" ] || continue zpool_err="$("${ZPOOL}" replace "${ZEVENT_POOL}" \ - "${ZEVENT_VDEV_GUID}" "${vdev_path}" 2>&1)"; zpool_rv=$? + "${ZEVENT_VDEV_GUID}" "${spare_path}" 2>&1)"; zpool_rv=$? if [ "${zpool_rv}" -ne 0 ]; then [ -n "${zpool_err}" ] && zed_log_err "zpool ${zpool_err}" else + notify "${vdev_path}" "${spare_path}" "${num_errors}" rv=0 break fi From ce119da33daa9c51b55fb27ea30a1495a4e73885 Mon Sep 17 00:00:00 2001 From: Chris Dunlap Date: Wed, 25 Mar 2015 13:10:32 -0700 Subject: [PATCH 5/5] Combine data-notify.sh with io-notify.sh The data-notify.sh ZEDLET serves a very similar purpose to io-notify.sh, namely, to generate a notification in response to a particular error event. Initially, data-notify.sh was separated from io-notify.sh since the "data" zevent does not (as I understand it) pertain to a specific vdev device. This stands in contrast to the "checksum" and "io" zevents (both handled by io-notify.sh) that can be attributed to a specific vdev. At the time, it seemed simpler to handle these two cases in separate scripts. This commit adds support for the "data" zevent to io-notify.sh, and symlinks io-notify.sh to data-notify.sh. It also adds the counts for vdev_read_errors, vdev_write_errors, and vdev_cksum_errors to the notification message. Signed-off-by: Chris Dunlap --- cmd/zed/zed.d/data-notify.sh | 46 +----------------------------------- cmd/zed/zed.d/io-notify.sh | 21 +++++++++++----- 2 files changed, 16 insertions(+), 51 deletions(-) mode change 100755 => 120000 cmd/zed/zed.d/data-notify.sh diff --git a/cmd/zed/zed.d/data-notify.sh b/cmd/zed/zed.d/data-notify.sh deleted file mode 100755 index 6b65c3815f50..000000000000 --- a/cmd/zed/zed.d/data-notify.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/sh -# -# Send notification in response to a DATA error. -# -# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given -# class/pool combination. This protects against spamming the recipient -# should multiple events occur together in time for the same pool. -# -# Exit codes: -# 0: notification sent -# 1: notification failed -# 2: notification not configured -# 3: notification suppressed -# 9: internal error - -[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" -. "${ZED_ZEDLET_DIR}/zed-functions.sh" - -[ -n "${ZEVENT_POOL}" ] || exit 9 -[ -n "${ZEVENT_SUBCLASS}" ] || exit 9 - -if [ "${ZEVENT_SUBCLASS}" != "data" ]; then \ - zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\"" - exit 9 -fi - -zed_rate_limit "${ZEVENT_POOL};${ZEVENT_SUBCLASS};notify" || exit 3 - -umask 077 -note_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)" -note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" -{ - echo "ZFS has detected a ${ZEVENT_SUBCLASS} error:" - echo - echo " eid: ${ZEVENT_EID}" - echo " class: ${ZEVENT_SUBCLASS}" - echo " host: $(hostname)" - echo " time: ${ZEVENT_TIME_STRING}" - echo " pool: ${ZEVENT_POOL}" - -} > "${note_pathname}" - -zed_notify "${note_subject}" "${note_pathname}"; rv=$? -rm -f "${note_pathname}" -exit "${rv}" diff --git a/cmd/zed/zed.d/data-notify.sh b/cmd/zed/zed.d/data-notify.sh new file mode 120000 index 000000000000..9008738073c9 --- /dev/null +++ b/cmd/zed/zed.d/data-notify.sh @@ -0,0 +1 @@ +io-notify.sh \ No newline at end of file diff --git a/cmd/zed/zed.d/io-notify.sh b/cmd/zed/zed.d/io-notify.sh index 46b066945d0c..3ce918ad70a7 100755 --- a/cmd/zed/zed.d/io-notify.sh +++ b/cmd/zed/zed.d/io-notify.sh @@ -1,10 +1,10 @@ #!/bin/sh # -# Send notification in response to a CHECKSUM or IO error. +# Send notification in response to a CHECKSUM, DATA, or IO error. # # Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given -# class/pool/vdev combination. This protects against spamming the recipient -# should multiple events occur together in time for the same pool/device. +# class/pool/[vdev] combination. This protects against spamming the recipient +# should multiple events occur together in time for the same pool/[vdev]. # # Exit codes: # 0: notification sent @@ -18,16 +18,16 @@ [ -n "${ZEVENT_POOL}" ] || exit 9 [ -n "${ZEVENT_SUBCLASS}" ] || exit 9 -[ -n "${ZEVENT_VDEV_GUID}" ] || exit 9 if [ "${ZEVENT_SUBCLASS}" != "checksum" ] \ + && [ "${ZEVENT_SUBCLASS}" != "data" ] \ && [ "${ZEVENT_SUBCLASS}" != "io" ]; then zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\"" exit 9 fi -zed_rate_limit "${ZEVENT_POOL};${ZEVENT_VDEV_GUID};${ZEVENT_SUBCLASS};notify" \ - || exit 3 +rate_limit_tag="${ZEVENT_POOL};${ZEVENT_VDEV_GUID:-0};${ZEVENT_SUBCLASS};notify" +zed_rate_limit "${rate_limit_tag}" || exit 3 umask 077 note_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)" @@ -46,6 +46,15 @@ note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$" [ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}" [ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}" + [ -n "${ZEVENT_VDEV_CKSUM_ERRORS}" ] \ + && echo " cksum: ${ZEVENT_VDEV_CKSUM_ERRORS}" + + [ -n "${ZEVENT_VDEV_READ_ERRORS}" ] \ + && echo " read: ${ZEVENT_VDEV_READ_ERRORS}" + + [ -n "${ZEVENT_VDEV_WRITE_ERRORS}" ] \ + && echo " write: ${ZEVENT_VDEV_WRITE_ERRORS}" + echo " pool: ${ZEVENT_POOL}" } > "${note_pathname}"