#!/bin/bash ###### ZPool & SMART status report with FreeNAS config backup ### Original script by joeschmuck, modified by Bidelu0hm, then by melp (me) ### At a minimum, enter email address in user-definable parameter section. Feel free to edit other user parameters as needed. ### If you find any errors, feel free to contact me on the FreeNAS forums (username melp) or email me at jason at jro dot io. ### Version: v1.3 ### Changelog: # v1.3: # - Added scrub duration column # - Fixed for FreeNAS 11.1 (thanks reven!) # - Fixed fields parsed out of zpool status # - Buffered zpool status to reduce calls to script # v1.2: # - Added switch for power-on time format # - Slimmed down table columns # - Fixed some shellcheck errors & other misc stuff # - Added .tar.gz to backup file attached to email # - (Still coming) Better SSD SMART support # v1.1: # - Config backup now attached to report email # - Added option to turn off config backup # - Added option to save backup configs in a specified directory # - Power-on hours in SMART summary table now listed as YY-MM-DD-HH # - Changed filename of config backup to exclude timestamp (just uses datestamp now) # - Config backup and checksum files now zipped (was just .tar before; now .tar.gz) # - Fixed degrees symbol in SMART table (rendered weird for a lot of people); replaced with a * # - Added switch to enable or disable SSDs in SMART table (SSD reporting still needs work) # - Added most recent Extended & Short SMART tests in drive details section (only listed one before, whichever was more recent) # - Reformatted user-definable parameters section # - Added more general comments to code # v1.0: # - Initial release ### TODO: # - Fix SSD SMART reporting # - Add support for conveyance test ###### User-definable Parameters ### Email Address email="morten@strandbo.org" ### Global table colors okColor="#c9ffcc" # Hex code for color to use in SMART Status column if drives pass (default is light green, #c9ffcc) warnColor="#ffd6d6" # Hex code for WARN color (default is light red, #ffd6d6) critColor="#ff0000" # Hex code for CRITICAL color (default is bright red, #ff0000) altColor="#f4f4f4" # Table background alternates row colors between white and this color (default is light gray, #f4f4f4) ### SMART status summary table settings includeSSD="true" # [NOTE: Currently this is pretty much useless] Change to "true" to include SSDs in SMART status summary table; "false" to disable tempWarn=40 # Drive temp (in C) at which WARNING color will be used tempCrit=45 # Drive temp (in C) at which CRITICAL color will be used sectorsCrit=10 # Number of sectors per drive with errors before CRITICAL color will be used testAgeWarn=7 # Maximum age (in days) of last SMART test before CRITICAL color will be used powerTimeFormat="ymdh" # Format for power-on hours string, valid options are "ymdh", "ymd", "ym", or "y" (year month day hour) ###### Auto-generated Parameters host=$(hostname -s) logfile="/tmp/smart_report.tmp" subject="Status Report for ${host}" boundary="gc0p4Jq0M2Yt08jU534c0p" if [ "$includeSSD" == "true" ]; then drives=$(for drive in $(/bin/lsblk -d | awk '{ print $1 }' | awk '{ if(NR>1)print }' | sort); do if [ "$(/usr/sbin/smartctl -i /dev/"${drive}" | grep "SMART support is: Enabled")" ]; then printf "%s " "${drive}" fi done | awk '{for (i=NF; i!=0 ; i--) print $i }') else drives=$(for drive in $(/bin/lsblk -d | awk '{ print $1 }' | awk '{ if(NR>1)print }' | sort); do if [ "$(/usr/sbin/smartctl -i /dev/"${drive}" | grep "SMART support is: Enabled")" ] && ! [ "$(/usr/sbin/smartctl -i /dev/"${drive}" | grep "Solid State Device")" ]; then printf "%s " "${drive}" fi done | awk '{for (i=NF; i!=0 ; i--) print $i }') fi ###### Email pre-formatting ### Set email headers ( echo "From: ${email}" echo "To: ${email}" echo "Subject: ${subject}" echo "MIME-Version: 1.0" echo "Content-Type: multipart/mixed; boundary=${boundary}" ) > "$logfile" ( echo "--${boundary}" echo "Content-Type: text/html" ) >> "$logfile" ###### Report Summary Section (html tables) ### SMART status summary table ( # Write HTML table headers to log file echo "

" echo "" echo "" echo "" echo " " echo " " echo " " echo " " echo " " echo " " echo " " echo " " echo " " echo " " echo " " echo " " echo " " echo " " echo " " echo "" ) >> "$logfile" for drive in $drives; do ( # For each drive detected, run "smartctl -A -i" and parse its output. This whole section is a single, long statement, so I'll make all comments here. # Start by passing awk variables (all the -v's) used in other parts of the script. Other variables are calculated in-line with other smartctl calls. # Next, pull values out of the original "smartctl -A -i" statement by searching for the text between the //'s. # After parsing the output, compute other values (last test's age, on time in YY-MM-DD-HH). # After these computations, determine the row's background color (alternating as above, subbing in other colors from the palate as needed). # Finally, print the HTML code for the current row of the table with all the gathered data. /usr/sbin/smartctl -A -i /dev/"$drive" | \ awk -v device="$drive" -v tempWarn="$tempWarn" -v tempCrit="$tempCrit" -v sectorsCrit="$sectorsCrit" -v testAgeWarn="$testAgeWarn" \ -v okColor="$okColor" -v warnColor="$warnColor" -v critColor="$critColor" -v altColor="$altColor" -v powerTimeFormat="$powerTimeFormat" \ -v lastTestHours="$(/usr/sbin/smartctl -l selftest /dev/"$drive" | grep "# 1" | awk '{print $9}')" \ -v lastTestType="$(/usr/sbin/smartctl -l selftest /dev/"$drive" | grep "# 1" | awk '{print $3}')" \ -v smartStatus="$(/usr/sbin/smartctl -H /dev/"$drive" | grep "SMART overall-health" | awk '{print $6}')" ' \ /Serial Number:/{serial=$3} \ /Temperature_Celsius/{temp=($10 + 0)} \ /Power_On_Hours/{onHours=$10} \ /Start_Stop_Count/{startStop=$10} \ /Spin_Retry_Count/{spinRetry=$10} \ /Reallocated_Sector/{reAlloc=$10} \ /Reallocated_Event_Count/{reAllocEvent=$10} \ /Current_Pending_Sector/{pending=$10} \ /Offline_Uncorrectable/{offlineUnc=$10} \ /UDMA_CRC_Error_Count/{crcErrors=$10} \ /Seek_Error_Rate/{seekErrorHealth=$4} \ END { testAge=int((onHours - lastTestHours) / 24); yrs=int(onHours / 8760); mos=int((onHours % 8760) / 730); dys=int(((onHours % 8760) % 730) / 24); hrs=((onHours % 8760) % 730) % 24; if (powerTimeFormat == "ymdh") onTime=yrs "y " mos "m " dys "d " hrs "h"; else if (powerTimeFormat == "ymd") onTime=yrs "y " mos "m " dys "d"; else if (powerTimeFormat == "ym") onTime=yrs "y " mos "m"; else if (powerTimeFormat == "y") onTime=yrs "y"; else onTime=yrs "y " mos "m " dys "d " hrs "h "; if ((substr(device,3) + 0) % 2 == 1) bgColor = "#ffffff"; else bgColor = altColor; if (smartStatus != "PASSED") smartStatusColor = critColor; else smartStatusColor = okColor; if (temp >= tempCrit) tempColor = critColor; else if (temp >= tempWarn) tempColor = warnColor; else tempColor = bgColor; if (spinRetry != "0") spinRetryColor = warnColor; else spinRetryColor = bgColor; if ((reAlloc + 0) > sectorsCrit) reAllocColor = critColor; else if (reAlloc != 0) reAllocColor = warnColor; else reAllocColor = bgColor; if (reAllocEvent != "0") reAllocEventColor = warnColor; else reAllocEventColor = bgColor; if ((pending + 0) > sectorsCrit) pendingColor = critColor; else if (pending != 0) pendingColor = warnColor; else pendingColor = bgColor; if ((offlineUnc + 0) > sectorsCrit) offlineUncColor = critColor; else if (offlineUnc != 0) offlineUncColor = warnColor; else offlineUncColor = bgColor; if (crcErrors != "0") crcErrorsColor = warnColor; else crcErrorsColor = bgColor; if ((seekErrorHealth + 0) < 100) seekErrorHealthColor = warnColor; else seekErrorHealthColor = bgColor; if (testAge > testAgeWarn) testAgeColor = warnColor; else testAgeColor = bgColor; printf "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n" \ "\n", bgColor, device, serial, smartStatusColor, smartStatus, tempColor, temp, onTime, startStop, spinRetryColor, spinRetry, reAllocColor, reAlloc, \ reAllocEventColor, reAllocEvent, pendingColor, pending, offlineUncColor, offlineUnc, crcErrorsColor, crcErrors, seekErrorHealthColor, seekErrorHealth, \ testAgeColor, testAge, lastTestType; }' ) >> "$logfile" done # End SMART summary table and summary section ( echo "
SMART Status Report Summary
DeviceSerial
Number
SMART
Status
TempPower-On
Time
Start/Stop
Count
Spin
Retry
Count
Realloc'd
Sectors
Realloc
Events
Current
Pending
Sectors
Offline
Uncorrectable
Sectors
UltraDMA
CRC
Errors
Seek
Error
Health
Last Test
Age (days)
Last Test
Type
/dev/%s%s%s%d*C%s%s%s%s%s%s%s%s%s%%%d%s
" echo "

" ) >> "$logfile" # Drive usage ( echo "RAID Usage
" echo "
"
	df -h /dev/md0
	echo "
" echo "
" ) >> "$logfile" ###### Detailed Report Section (monospace text) echo "
" >> "$logfile"

### SMART status for each drive
for drive in $drives; do
    # Gather brand and serial number of each drive
    brand="$(/usr/sbin/smartctl -i /dev/"$drive" | grep "Model Family" | awk '{print $3, $4, $5}')"
    serial="$(/usr/sbin/smartctl -i /dev/"$drive" | grep "Serial Number" | awk '{print $3}')"
    (
        # Create a simple header and drop the output of some basic smartctl commands
        echo "
" echo "########## SMART status report for ${drive} drive (${brand}: ${serial}) ##########" /usr/sbin/smartctl -H -A -l error /dev/"$drive" /usr/sbin/smartctl -l selftest /dev/"$drive" | grep "Extended \\|Num" | cut -c6- | head -2 /usr/sbin/smartctl -l selftest /dev/"$drive" | grep "Short \\|Num" | cut -c6- | head -2 | tail -n -1 echo "

" ) >> "$logfile" done # Remove some un-needed junk from the output sed -i '' -e '/smartctl 6.6/d' "$logfile" sed -i '' -e '/Copyright/d' "$logfile" sed -i '' -e '/=== START OF READ/d' "$logfile" sed -i '' -e '/SMART Attributes Data/d' "$logfile" sed -i '' -e '/Vendor Specific SMART/d' "$logfile" sed -i '' -e '/SMART Error Log Version/d' "$logfile" ### End details section, close MIME section ( echo "
" echo "--${boundary}--" ) >> "$logfile" ### Send report /usr/sbin/sendmail -t -oi < "$logfile" #rm "$logfile"