zpool iostat/status -c improvements

Users can now provide their own scripts to be run
with 'zpool iostat/status -c'. User scripts should be
placed in ~/.zpool.d to be included in zpool's
default search path.

Provide a script which can be used with
'zpool iostat|status -c' that will return the type of
device (hdd, sdd, file).

Provide a script to get various values from smartctl
when using 'zpool iostat/status -c'.

Allow users to define the ZPOOL_SCRIPTS_PATH
environment variable which can be used to override
the default 'zpool iostat/status -c' search path.

Allow the ZPOOL_SCRIPTS_ENABLED environment
variable to enable or disable 'zpool status/iostat -c'
functionality.

Use the new smart script to provide the serial command.

Install /etc/sudoers.d/zfs file which contains the sudoer
rule for smartctl as a sample.

Allow 'zpool iostat/status -c' tests to run in tree.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Closes #6121 
Closes #6153
This commit is contained in:
Giuseppe Di Natale
2017-06-05 13:52:15 -04:00
committed by Brian Behlendorf
parent 92aceb2a7e
commit 099700d9df
43 changed files with 812 additions and 61 deletions
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart
+3 -4
View File
@@ -38,16 +38,15 @@
# DISC-ZERO discard zeroes data
#
# If the script is run as just 'lsblk' then print out disk size, vendor,
# model number and serial number.
# and model number.
helpstr="
label: Show filesystem label.
model: Show disk model number.
serial: Show disk serial number.
size: Show the disk capacity.
vendor: Show the disk vendor.
lsblk: Show the disk size, vendor, model number, and serial number."
lsblk: Show the disk size, vendor, and model number."
script=$(basename "$0")
@@ -57,7 +56,7 @@ if [ "$1" = "-h" ] ; then
fi
if [ "$script" = "lsblk" ] ; then
list="size vendor model serial"
list="size vendor model"
else
list=$(echo "$script" | tr '[:upper:]' '[:lower:]')
fi
+27
View File
@@ -0,0 +1,27 @@
#!/bin/sh
#
# Print out the type of device
#
if [ "$1" = "-h" ] ; then
echo "Show whether a vdev is a file, hdd, or ssd."
exit
fi
if [ -b "$VDEV_UPATH" ]; then
device=$(basename "$VDEV_UPATH")
val=$(cat "/sys/block/$device/queue/rotational" 2>/dev/null)
if [ "$val" = "0" ]; then
MEDIA="ssd"
fi
if [ "$val" = "1" ]; then
MEDIA="hdd"
fi
else
if [ -f "$VDEV_UPATH" ]; then
MEDIA="file"
fi
fi
echo "media=$MEDIA"
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart
+1 -1
View File
@@ -1 +1 @@
lsblk
smart
+4 -4
View File
@@ -6,10 +6,10 @@
helpstr="
enc: Show disk enclosure w:x:y:z value.
slot: Show disk slot number as reported by the enclosure.
encdev: Show the /dev/sg* device for the enclosure associated with the disk slot.
fault_led: Show the value of the disk enclosure slot fault LED.
locate_led: Show the value of the disk enclosure slot locate LED.
ses: Show disk's enclosure, enclosure dev, slot number, and fault/locate LED values."
encdev: Show /dev/sg* device associated with the enclosure disk slot.
fault_led: Show value of the disk enclosure slot fault LED.
locate_led: Show value of the disk enclosure slot locate LED.
ses: Show disk's enc, enc device, slot, and fault/locate LED values."
script=$(basename "$0")
if [ "$1" = "-h" ] ; then
+123
View File
@@ -0,0 +1,123 @@
#!/bin/sh
#
# Show SMART stats
#
helpstr="
smart: Show SMART temperature and error stats (specific to drive type)
smartx: Show SMART extended drive stats (specific to drive type).
temp: Show SMART drive temperature in celsius (all drives).
health: Show reported SMART status (all drives).
r_proc: Show SMART read GBytes processed over drive lifetime (SAS).
w_proc: Show SMART write GBytes processed over drive lifetime (SAS).
r_ucor: Show SMART read uncorrectable errors (SAS).
w_ucor: Show SMART write uncorrectable errors (SAS).
nonmed: Show SMART non-medium errors (SAS).
defect: Show SMART grown defect list (SAS).
hours_on: Show number of hours drive powered on (all drives).
realloc: Show SMART reallocated sectors count (ATA).
rep_ucor: Show SMART reported uncorrectable count (ATA).
cmd_to: Show SMART command timeout count (ATA).
pend_sec: Show SMART current pending sector count (ATA).
off_ucor: Show SMART offline uncorrectable errors (ATA).
ata_err: Show SMART ATA errors (ATA).
pwr_cyc: Show SMART power cycle count (ATA).
serial: Show disk serial number.
"
script=$(basename "$0")
if [ "$1" = "-h" ] ; then
echo "$helpstr" | grep "$script:" | tr -s '\t' | cut -f 2-
exit
fi
smartctl_path=$(which smartctl)
if [ -b "$VDEV_UPATH" ] && [ -x "$smartctl_path" ]; then
raw_out=$(eval "sudo $smartctl_path -a $VDEV_UPATH")
# Are we a SAS or ATA drive? Look for the right line in smartctl:
#
# SAS:
# Transport protocol: SAS
#
# SATA:
# ATA Version is: 8
#
type=$(echo "$raw_out" | grep -m 1 -Eo '^ATA|SAS$')
out=$(echo "$raw_out" | awk '
# SAS specific
/read:/{print "rrd="$4"\nr_cor="$5"\nr_proc="$7"\nr_ucor="$8}
/write:/{print "rwr="$4"\nw_cor="$5"\nw_proc="$7"\nw_ucor="$8}
/Non-medium error count/{print "nonmed="$4}
/Elements in grown defect list/{print "defect="$6}
# SAS common
/Drive Temperature:/{print "temp="$4}
# Status can be a long string, substitute spaces for '_'
/SMART Health Status:/{printf "health="; for(i=4;i<=NF-1;i++){printf "%s_", $i}; printf "%s\n", $i}
/number of hours powered up/{print "hours_on="$7}
/Serial number:/{print "serial="$3}
# SATA specific
/Reallocated_Sector_Ct/{print "realloc="$10}
/Reported_Uncorrect/{print "rep_ucor="$10}
/Command_Timeout/{print "cmd_to="$10}
/Current_Pending_Sector/{print "pend_sec="$10}
/Offline_Uncorrectable/{print "off_ucor="$10}
/ATA Error Count:/{print "ata_err="$4}
/Power_Cycle_Count/{print "pwr_cyc="$10}
# SATA common
/Temperature_Celsius/{print "temp="$10}
/SMART overall-health self-assessment test result:/{print "health="$6}
/Power_On_Hours/{print "hours_on="$10}
/Serial Number:/{print "serial="$3}
END {ORS="\n"; print ""}
');
fi
# if type is not set by now, either we don't have a block device
# or smartctl failed. Either way, default to ATA and set out to
# nothing
if [ -z "$type" ]; then
type="ATA"
out=
fi
case $script in
smart)
# Print temperature plus common predictors of drive failure
if [ "$type" = "SAS" ] ; then
scripts="temp|health|r_ucor|w_ucor"
elif [ "$type" = "ATA" ] ; then
scripts="temp|health|ata_err|realloc|rep_ucor|cmd_to|pend_sec|off_ucor"
fi
;;
smartx)
# Print some other interesting stats
if [ "$type" = "SAS" ] ; then
scripts="hours_on|defect|nonmed|r_proc|w_proc"
elif [ "$type" = "ATA" ] ; then
scripts="hours_on|pwr_cyc"
fi
;;
*)
scripts="$script"
esac
with_vals=$(echo "$out" | grep -E "$scripts")
if [ ! -z "$with_vals" ]; then
echo "$with_vals"
without_vals=$(echo "$scripts" | tr "|" "\n" |
grep -v -E "$(echo "$with_vals" |
awk -F "=" '{print $1}')" | awk '{print $0"="}')
else
without_vals=$(echo "$scripts" | tr "|" "\n" | awk '{print $0"="}')
fi
if [ ! -z "$without_vals" ]; then
echo "$without_vals"
fi
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart
+1
View File
@@ -0,0 +1 @@
smart