Add SMART attributes for SSD and NVMe

This adds the SMART attributes required to probe Samsung SSD and NVMe
(and possibly others) disks when using the "zpool status -c" command.

Reviewed-by: loli10K <ezomori.nozomu@gmail.com>
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: bunder2015 <omfgbunder@gmail.com>
Closes #7183
Closes #7193
This commit is contained in:
bunder2015 2018-02-21 16:52:47 -05:00 committed by Tony Hutter
parent d5b10b3ef3
commit c705d8386b
3 changed files with 24 additions and 2 deletions

View File

@ -60,6 +60,7 @@ dist_zpoolexec_SCRIPTS = \
zpool.d/pend_sec \ zpool.d/pend_sec \
zpool.d/off_ucor \ zpool.d/off_ucor \
zpool.d/ata_err \ zpool.d/ata_err \
zpool.d/nvme_err \
zpool.d/pwr_cyc \ zpool.d/pwr_cyc \
zpool.d/upath \ zpool.d/upath \
zpool.d/vendor zpool.d/vendor
@ -98,6 +99,7 @@ zpoolconfdefaults = \
pend_sec \ pend_sec \
off_ucor \ off_ucor \
ata_err \ ata_err \
nvme_err \
pwr_cyc \ pwr_cyc \
upath \ upath \
vendor vendor

1
cmd/zpool/zpool.d/nvme_err Symbolic link
View File

@ -0,0 +1 @@
smart

View File

@ -23,6 +23,7 @@ off_ucor: Show SMART offline uncorrectable errors (ATA).
ata_err: Show SMART ATA errors (ATA). ata_err: Show SMART ATA errors (ATA).
pwr_cyc: Show SMART power cycle count (ATA). pwr_cyc: Show SMART power cycle count (ATA).
serial: Show disk serial number. serial: Show disk serial number.
nvme_err: Show SMART NVMe errors (NVMe).
" "
script=$(basename "$0") script=$(basename "$0")
@ -37,7 +38,7 @@ smartctl_path=$(which smartctl)
if [ -b "$VDEV_UPATH" ] && [ -x "$smartctl_path" ]; then if [ -b "$VDEV_UPATH" ] && [ -x "$smartctl_path" ]; then
raw_out=$(eval "sudo $smartctl_path -a $VDEV_UPATH") raw_out=$(eval "sudo $smartctl_path -a $VDEV_UPATH")
# Are we a SAS or ATA drive? Look for the right line in smartctl: # What kind of drive are we? Look for the right line in smartctl:
# #
# SAS: # SAS:
# Transport protocol: SAS # Transport protocol: SAS
@ -45,7 +46,10 @@ if [ -b "$VDEV_UPATH" ] && [ -x "$smartctl_path" ]; then
# SATA: # SATA:
# ATA Version is: 8 # ATA Version is: 8
# #
type=$(echo "$raw_out" | grep -m 1 -Eo '^ATA|SAS$') # NVMe:
# SMART/Health Information (NVMe Log 0xnn, NSID 0xnn)
#
type=$(echo "$raw_out" | grep -m 1 -Eo '^ATA|NVMe|SAS$')
out=$(echo "$raw_out" | awk ' out=$(echo "$raw_out" | awk '
# SAS specific # SAS specific
/read:/{print "rrd="$4"\nr_cor="$5"\nr_proc="$7"\nr_ucor="$8} /read:/{print "rrd="$4"\nr_cor="$5"\nr_proc="$7"\nr_ucor="$8}
@ -71,10 +75,21 @@ if [ -b "$VDEV_UPATH" ] && [ -x "$smartctl_path" ]; then
# SATA common # SATA common
/Temperature_Celsius/{print "temp="$10} /Temperature_Celsius/{print "temp="$10}
/Airflow_Temperature_Cel/{print "temp="$10}
/SMART overall-health self-assessment test result:/{print "health="$6} /SMART overall-health self-assessment test result:/{print "health="$6}
/Power_On_Hours/{print "hours_on="$10} /Power_On_Hours/{print "hours_on="$10}
/Serial Number:/{print "serial="$3} /Serial Number:/{print "serial="$3}
# NVMe common
/Temperature:/{print "temp="$2}
/SMART overall-health self-assessment test result:/{print "health="$6}
/Power On Hours:/{gsub("[^0-9]","",$4); print "hours_on="$4}
/Serial Number:/{print "serial="$3}
/Power Cycles:/{print "pwr_cyc="$3}
# NVMe specific
/Media and Data Integrity Errors:/{print "nvme_err="$6}
END {ORS="\n"; print ""} END {ORS="\n"; print ""}
'); ');
fi fi
@ -94,6 +109,8 @@ smart)
scripts="temp|health|r_ucor|w_ucor" scripts="temp|health|r_ucor|w_ucor"
elif [ "$type" = "ATA" ] ; then elif [ "$type" = "ATA" ] ; then
scripts="temp|health|ata_err|realloc|rep_ucor|cmd_to|pend_sec|off_ucor" scripts="temp|health|ata_err|realloc|rep_ucor|cmd_to|pend_sec|off_ucor"
elif [ "$type" = "NVMe" ] ; then
scripts="temp|health|nvme_err"
fi fi
;; ;;
smartx) smartx)
@ -102,6 +119,8 @@ smartx)
scripts="hours_on|defect|nonmed|r_proc|w_proc" scripts="hours_on|defect|nonmed|r_proc|w_proc"
elif [ "$type" = "ATA" ] ; then elif [ "$type" = "ATA" ] ; then
scripts="hours_on|pwr_cyc" scripts="hours_on|pwr_cyc"
elif [ "$type" = "NVMe" ] ; then
scripts="hours_on|pwr_cyc"
fi fi
;; ;;
*) *)