mirror of
https://github.com/munin-monitoring/contrib.git
synced 2018-11-08 00:59:34 +01:00
586 lines
22 KiB
Plaintext
586 lines
22 KiB
Plaintext
|
#!/usr/bin/env python
|
||
|
# -*- encoding: iso-8859-1 -*-
|
||
|
#
|
||
|
# Wildcard-plugin to monitor S.M.A.R.T attribute values through smartctl,
|
||
|
# which is part of smartmontools package:
|
||
|
# http://smartmontools.sourceforge.net/
|
||
|
#
|
||
|
# To monitor a S.M.A.R.T device, link smart_<device> to this file.
|
||
|
# E.g.
|
||
|
# ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda
|
||
|
# ...will monitor /dev/hda.
|
||
|
#
|
||
|
# Needs following minimal configuration in plugin-conf.d/munin-node:
|
||
|
# [smart_*]
|
||
|
# user root
|
||
|
# group disk
|
||
|
#
|
||
|
# Parameters
|
||
|
# smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl)
|
||
|
# smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs
|
||
|
# ignorestandby - Ignore the standby state of the drive and perform SMART query. Default: False
|
||
|
#
|
||
|
# Parameters can be specified on a per-drive basis, eg:
|
||
|
# [smart_hda]
|
||
|
# user root
|
||
|
# group disk
|
||
|
# env.smartargs -H -c -l error -l selftest -l selective -d ata
|
||
|
# env.smartpath /usr/local/sbin/smartctl
|
||
|
#
|
||
|
# [smart_twa0-1]
|
||
|
# user root
|
||
|
# group disk
|
||
|
# env.smartargs -H -l error -d 3ware,1
|
||
|
# env.ignorestandby True
|
||
|
#
|
||
|
# [smart_twa0-2]
|
||
|
# user root
|
||
|
# group disk
|
||
|
# env.smartargs -H -l error -d 3ware,2
|
||
|
#
|
||
|
# Author: Nicolas Stransky <Nico@neo-lan.net>
|
||
|
#
|
||
|
# v1.0 22/08/2004 - First draft
|
||
|
# v1.2 28/08/2004 - Clean up the code, add a verbose option
|
||
|
# v1.3 14/11/2004 - Compatibility with python<2.2. See comments in the code
|
||
|
# v1.4 17/11/2004 - Deal with non zero exit codes of smartctl
|
||
|
# - config now prints the critical thresholds, as reported by smartctl
|
||
|
# v1.5 18/11/2004 - Plot smartctl_exit_code bitmask
|
||
|
# v1.6 21/11/2004 - Add autoconf and suggest capabilities
|
||
|
# - smartctl path can be passed through "smartpath" environment variable
|
||
|
# - Additional smartctl args can be passed through "smartargs" environment variable
|
||
|
# v1.7 29/11/2004 - Add suggest capabilities for NetBSD, OpenBSD, FreeBSD and SunOS.
|
||
|
# - Allow to override completely the smartctl arguments with "smartargs"
|
||
|
# v1.8 16/02/2005 - Exit status field now only triggers warnings, not criticals.
|
||
|
# v1.9 07/07/2005 - Allow to query several drives on the same 3ware card.
|
||
|
# - Correct a bug when '-i' was not listed in smartargs
|
||
|
# - Don't fail if no value was obtained for hard drive model
|
||
|
# v1.10 19/08/2005 - smartctl_exit_code is now a numerical value
|
||
|
# v2.0 08/05/2009 - Correct bug in the interpretation of smartctl_exit_code
|
||
|
# - New option to suppress SMART warnings in munin
|
||
|
# - Temporary lack of output for previously existing drive now reports U
|
||
|
# - The plugin now contains its own documentation for use with munindoc
|
||
|
# - Removed python<2.2 compatibility comments
|
||
|
# - Better autodetection of drives
|
||
|
# - Don't spin up devices in a low-power mode.
|
||
|
#
|
||
|
# Copyright (c) 2004-2009 Nicolas Stransky.
|
||
|
#
|
||
|
# Permission to use, copy, and modify this software with or without fee
|
||
|
# is hereby granted, provided that this entire notice is included in
|
||
|
# all source code copies of any software which is or includes a copy or
|
||
|
# modification of this software.
|
||
|
#
|
||
|
# THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
|
||
|
# IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
|
||
|
# REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
|
||
|
# MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
|
||
|
# PURPOSE.
|
||
|
#
|
||
|
#
|
||
|
# Magic markers
|
||
|
#%# capabilities=autoconf suggest
|
||
|
#%# family=auto
|
||
|
|
||
|
## You may edit the following 3 variables
|
||
|
# Increase verbosity (True/False)
|
||
|
verbose=False
|
||
|
# Suppress SMART warnings (True/False)
|
||
|
report_warnings=True
|
||
|
# Modify to your needs:
|
||
|
statefiledir='/var/lib/munin/plugin-state/'
|
||
|
# You may not modify anything below this line
|
||
|
|
||
|
import os, sys, string, pickle
|
||
|
from math import log
|
||
|
plugin_version="2.0"
|
||
|
|
||
|
def verboselog(s):
|
||
|
global plugin_name
|
||
|
sys.stderr.write(plugin_name+': '+s+'\n')
|
||
|
|
||
|
if not verbose :
|
||
|
verboselog = lambda s: None
|
||
|
|
||
|
def read_values(hard_drive):
|
||
|
global smart_values, emptyoutput
|
||
|
try :
|
||
|
verboselog('Reading S.M.A.R.T values')
|
||
|
os.putenv('LC_ALL','C')
|
||
|
smart_output=os.popen(os.getenv('smartpath','/usr/sbin/smartctl')+' '+os.getenv('smartargs','-a')+(os.getenv('ignorestandby',False) and ' ' or ' -n standby ')+'-A -i /dev/'+hard_drive)
|
||
|
read_values=0
|
||
|
for l in smart_output :
|
||
|
if l[:-1]=='' :
|
||
|
read_values=0
|
||
|
elif l[:13]=='Device Model:' or l[:7]=='Device:' :
|
||
|
model_list=string.split(string.split(l,':')[1])
|
||
|
try: model_list.remove('Version')
|
||
|
except : None
|
||
|
model=string.join(model_list)
|
||
|
if read_values==1 :
|
||
|
smart_attribute=string.split(l)
|
||
|
smart_values[string.replace(smart_attribute[1],'-','_')]={"value":smart_attribute[3],"threshold":smart_attribute[5]}
|
||
|
elif l[:18]=="ID# ATTRIBUTE_NAME" :
|
||
|
# Start reading the Attributes block
|
||
|
read_values=1
|
||
|
exit_status=smart_output.close()
|
||
|
if exit_status!=None :
|
||
|
# smartctl exit code is a bitmask, check man page.
|
||
|
num_exit_status=int(exit_status/256) # Python convention
|
||
|
if int(log(num_exit_status,2))<=2 : # bit code
|
||
|
verboselog('smartctl cannot access S.M.A.R.T values on drive '+hard_drive+'. Command exited with code '+str(num_exit_status)+' (bit '+str(int(log(num_exit_status,2)))+')')
|
||
|
else :
|
||
|
verboselog('smartctl exited with code '+str(num_exit_status)+' (bit '+str(int(log(num_exit_status,2)))+'). '+hard_drive+' may be FAILING RIGHT NOW!')
|
||
|
else :
|
||
|
num_exit_status=0
|
||
|
except :
|
||
|
verboselog('Cannot access S.M.A.R.T values! Check user rights or propper smartmontools installation/arguments.')
|
||
|
sys.exit(1)
|
||
|
if smart_values=={} :
|
||
|
verboselog('Can\'t find any S.M.A.R.T values in smartctl output!')
|
||
|
emptyoutput=True
|
||
|
#sys.exit(1)
|
||
|
else : emptyoutput=False
|
||
|
smart_values["smartctl_exit_status"]={"value":str(num_exit_status),"threshold":"1"}
|
||
|
try : smart_values["model"]=model
|
||
|
# For some reason we may have no value for "model"
|
||
|
except : smart_values["model"]="unknown"
|
||
|
return(exit_status)
|
||
|
|
||
|
def open_state_file(hard_drive,mode) :
|
||
|
global statefiledir
|
||
|
return open(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state',mode)
|
||
|
|
||
|
def update_state_file(hard_drive) :
|
||
|
try:
|
||
|
verboselog('Saving statefile')
|
||
|
pickle.dump(smart_values,open_state_file(hard_drive,"w"))
|
||
|
except :
|
||
|
verboselog('Error trying to save state file! Check access rights')
|
||
|
|
||
|
def print_plugin_values(hard_drive) :
|
||
|
global emptyoutput, smart_values
|
||
|
if not emptyoutput:
|
||
|
verboselog('Printing S.M.A.R.T values')
|
||
|
for key in smart_values.keys() :
|
||
|
if key=="model" : continue
|
||
|
print(key+".value "+smart_values[key]["value"])
|
||
|
else:
|
||
|
print_unknown_from_statefile(hard_drive,smart_values)
|
||
|
|
||
|
def print_config(hard_drive) :
|
||
|
global report_warnings, smart_values, statefiledir
|
||
|
if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'):
|
||
|
try :
|
||
|
verboselog('Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,","))
|
||
|
smart_values_state=pickle.load(open_state_file(hard_drive,"r"))
|
||
|
except :
|
||
|
verboselog('Error opening existing state file!')
|
||
|
sys.exit(1)
|
||
|
else :
|
||
|
verboselog('No state file, reading S.M.A.R.T values for the first time')
|
||
|
read_values(hard_drive[0])
|
||
|
pickle.dump(smart_values,open_state_file(hard_drive,"w"))
|
||
|
smart_values_state=smart_values
|
||
|
|
||
|
verboselog('Printing configuration')
|
||
|
print('graph_title S.M.A.R.T values for drive '+string.join(hard_drive,","))
|
||
|
print('graph_vlabel Attribute S.M.A.R.T value')
|
||
|
print('graph_args --base 1000 --lower-limit 0')
|
||
|
print('graph_category disk')
|
||
|
print('graph_info This graph shows the value of all S.M.A.R.T attributes of drive '+string.join(hard_drive,",")+' ('+smart_values_state['model']+'). smartctl_exit_status is the return value of smartctl. A non-zero return value indicates an error, a potential error, or a fault on the drive.')
|
||
|
attributes=smart_values_state.keys()
|
||
|
attributes.sort()
|
||
|
for key in attributes :
|
||
|
if key in ['smartctl_exit_status','model'] : continue
|
||
|
print(key+'.label '+key)
|
||
|
print(key+'.draw LINE2')
|
||
|
if report_warnings: print(key+'.critical '+smart_values_state[key]["threshold"]+':')
|
||
|
print('smartctl_exit_status.label smartctl_exit_status')
|
||
|
print('smartctl_exit_status.draw LINE2')
|
||
|
if report_warnings: print('smartctl_exit_status.warning '+smart_values_state['smartctl_exit_status']["threshold"])
|
||
|
|
||
|
def print_unknown_from_statefile(hard_drive,smart_values) :
|
||
|
global statefiledir
|
||
|
if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'):
|
||
|
try :
|
||
|
verboselog('Failed to get S.M.A.R.T values from drive. Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,","))
|
||
|
smart_values_state=pickle.load(open_state_file(hard_drive,"r"))
|
||
|
except :
|
||
|
verboselog('Error opening existing state file!')
|
||
|
sys.exit(1)
|
||
|
else :
|
||
|
verboselog('No state file, reading S.M.A.R.T values for the first time')
|
||
|
exit(1)
|
||
|
|
||
|
verboselog('Printing unknown values for all attributes in state file')
|
||
|
attributes=smart_values_state.keys()
|
||
|
attributes.sort()
|
||
|
for key in attributes :
|
||
|
if key=='model' : continue
|
||
|
print(key+'.value U')
|
||
|
|
||
|
def get_hard_drive_name() :
|
||
|
global plugin_name
|
||
|
try :
|
||
|
name=[plugin_name[string.rindex(plugin_name,'_')+1:]]
|
||
|
if os.uname()[0]=="SunOS" :
|
||
|
try :
|
||
|
# if hard_drive name starts with "rdsk" or "rmt", try to reconstruct the path
|
||
|
if name[0][0:4]=="rdsk":
|
||
|
name[0]=os.path.join("rdsk",name[0][4:])
|
||
|
elif name[0][0:3]=="rmt":
|
||
|
name[0]=os.path.join("rmt",name[0][3:])
|
||
|
except :
|
||
|
verboselog('Failed to find SunOS hard_drive')
|
||
|
# For 3ware cards, we have to set multiple plugins for the same hard drive name.
|
||
|
# Let's see if we find a '-' in the drive name.
|
||
|
if name[0].find('-')!=-1:
|
||
|
# Put the drive name and it's number in a list
|
||
|
name=[name[0][:string.rindex(name[0],'-')],name[0][string.rindex(name[0],'-')+1:]]
|
||
|
# Chech that the drive exists in /dev
|
||
|
if not os.path.exists('/dev/'+name[0]):
|
||
|
verboselog('/dev/'+name[0]+' not found!')
|
||
|
sys.exit(1)
|
||
|
return(name)
|
||
|
except :
|
||
|
verboselog('No S.M.A.R.T device name found in plugin\'s symlink!')
|
||
|
sys.exit(1)
|
||
|
|
||
|
def find_smart_drives() :
|
||
|
global emptyoutput
|
||
|
# Try to autodetect Linux, *BSD, SunOS drives. Don't try to autodetect drives on a 3Ware card.
|
||
|
drives=[]
|
||
|
if os.uname()[0]=="Linux" :
|
||
|
if os.path.exists('/sys/block/'):
|
||
|
# Running 2.6
|
||
|
try :
|
||
|
for drive in os.listdir('/sys/block/') :
|
||
|
if drive[:2] in ['md','fd','lo','ra','dm'] : continue # Ignore MD, Floppy, loop , RAM and LVM devices.
|
||
|
try :
|
||
|
verboselog('Trying '+drive+'...')
|
||
|
exit_status=read_values(drive)
|
||
|
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||
|
drives.append(drive)
|
||
|
except :
|
||
|
continue
|
||
|
except :
|
||
|
verboselog('Failed to list devices in /sys/block')
|
||
|
else :
|
||
|
verboselog('Not running linux2.6, failing back to /proc/partitions')
|
||
|
try :
|
||
|
partitions=open('/proc/partitions','r')
|
||
|
L=partitions.readlines()
|
||
|
for l in L :
|
||
|
words=string.split(l)
|
||
|
if len(words)==0 or words[0][0] not in string.digits : continue
|
||
|
if words[0] in ['1','9','58','254'] : continue # Ignore RAM, md, LVM and LVM2 devices
|
||
|
if words[-1][-1] not in string.digits :
|
||
|
try :
|
||
|
verboselog('Trying '+words[-1]+'...')
|
||
|
exit_status=read_values(words[-1])
|
||
|
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||
|
drives.append(words[-1])
|
||
|
except :
|
||
|
continue
|
||
|
verboselog('Found drives in /proc/partitions ! '+str(drives))
|
||
|
except :
|
||
|
verboselog('Failed to list devices in /proc/partitions')
|
||
|
elif os.uname()[0]=="OpenBSD" :
|
||
|
try :
|
||
|
sysctl_kerndisks=os.popen('sysctl hw.disknames')
|
||
|
kerndisks=string.strip(sysctl_kerndisks.readline())
|
||
|
for drive in string.split(kerndisks[string.rindex(kerndisks,'=')+1:],',') :
|
||
|
if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
|
||
|
try :
|
||
|
verboselog('Trying '+drive+'c...')
|
||
|
exit_status=read_values(drive+'c')
|
||
|
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||
|
drives.append(drive+'c')
|
||
|
except :
|
||
|
continue
|
||
|
except :
|
||
|
verboselog('Failed to list OpenBSD disks')
|
||
|
elif os.uname()[0]=="FreeBSD" :
|
||
|
try :
|
||
|
sysctl_kerndisks=os.popen('sysctl kern.disks')
|
||
|
kerndisks=string.strip(sysctl_kerndisks.readline())
|
||
|
for drive in string.split(kerndisks)[1:] :
|
||
|
if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
|
||
|
try :
|
||
|
verboselog('Trying '+drive+'...')
|
||
|
exit_status=read_values(drive)
|
||
|
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||
|
drives.append(drive)
|
||
|
except :
|
||
|
continue
|
||
|
except :
|
||
|
verboselog('Failed to list FreeBSD disks')
|
||
|
elif os.uname()[0]=="NetBSD" :
|
||
|
try :
|
||
|
sysctl_kerndisks=os.popen('sysctl hw.disknames')
|
||
|
kerndisks=string.strip(sysctl_kerndisks.readline())
|
||
|
for drive in string.split(kerndisks)[2:] :
|
||
|
if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
|
||
|
try :
|
||
|
verboselog('Trying '+drive+'c...')
|
||
|
exit_status=read_values(drive+'c')
|
||
|
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||
|
drives.append(drive+'c')
|
||
|
except :
|
||
|
continue
|
||
|
except :
|
||
|
verboselog('Failed to list NetBSD disks')
|
||
|
elif os.uname()[0]=="SunOS" :
|
||
|
try :
|
||
|
from glob import glob
|
||
|
for drivepath in glob('/dev/rdsk/*s2') :
|
||
|
try :
|
||
|
drive=os.path.basename(drivepath)
|
||
|
verboselog('Trying rdsk'+drive+'...')
|
||
|
exit_status=read_values('rdsk'+drive)
|
||
|
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||
|
drives.append('rdsk'+drive)
|
||
|
except :
|
||
|
continue
|
||
|
for drivepath in glob('/dev/rmt/*') :
|
||
|
try :
|
||
|
drive=os.path.basename(drivepath)
|
||
|
verboselog('Trying rmt'+drive+'...')
|
||
|
exit_status=read_values('rmt'+drive)
|
||
|
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||
|
drives.append('rmt'+drive)
|
||
|
except :
|
||
|
continue
|
||
|
except :
|
||
|
verboselog('Failed to list SunOS disks')
|
||
|
return(drives)
|
||
|
|
||
|
### Main part ###
|
||
|
|
||
|
smart_values={}
|
||
|
emptyoutput=False
|
||
|
plugin_name=list(os.path.split(sys.argv[0]))[1]
|
||
|
verboselog('plugins\' UID: '+str(os.geteuid())+' / plugins\' GID: '+str(os.getegid()))
|
||
|
|
||
|
# Parse arguments
|
||
|
if len(sys.argv)>1 :
|
||
|
if sys.argv[1]=="config" :
|
||
|
hard_drive=get_hard_drive_name()
|
||
|
print_config(hard_drive)
|
||
|
sys.exit(0)
|
||
|
elif sys.argv[1]=="autoconf" :
|
||
|
if os.path.exists(os.getenv('smartpath','/usr/sbin/smartctl')) :
|
||
|
print('yes')
|
||
|
sys.exit(0)
|
||
|
else :
|
||
|
print('no (smartmontools not found)')
|
||
|
sys.exit(1)
|
||
|
elif sys.argv[1]=="suggest" :
|
||
|
for drive in find_smart_drives() :
|
||
|
print(drive)
|
||
|
sys.exit(0)
|
||
|
elif sys.argv[1]=="version" :
|
||
|
print('smart_ Munin plugin, version '+plugin_version)
|
||
|
sys.exit(0)
|
||
|
elif sys.argv[1]!="" :
|
||
|
verboselog('unknown argument "'+sys.argv[1]+'"')
|
||
|
sys.exit(1)
|
||
|
|
||
|
# No argument given, doing the real job:
|
||
|
hard_drive=get_hard_drive_name()
|
||
|
read_values(hard_drive[0])
|
||
|
if not emptyoutput: update_state_file(hard_drive)
|
||
|
print_plugin_values(hard_drive)
|
||
|
exit(0)
|
||
|
|
||
|
|
||
|
### The following is the smart_ plugin documentation, intended to be used with munindoc
|
||
|
"""
|
||
|
=head1 NAME
|
||
|
|
||
|
smart_ - Munin wildcard-plugin to monitor S.M.A.R.T. attribute values through smartctl
|
||
|
|
||
|
=head1 APPLICABLE SYSTEMS
|
||
|
|
||
|
Node with B<Python> interpreter and B<smartmontools> (http://smartmontools.sourceforge.net/)
|
||
|
installed and in function.
|
||
|
|
||
|
=head1 CONFIGURATION
|
||
|
|
||
|
=head2 Create link in service directory
|
||
|
|
||
|
To monitor a S.M.A.R.T device, create a link in the service directory
|
||
|
of the munin-node named smart_<device>, which is pointing to this file.
|
||
|
|
||
|
E.g.
|
||
|
|
||
|
ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda
|
||
|
|
||
|
...will monitor /dev/hda.
|
||
|
|
||
|
=head2 Grant privileges in munin-node
|
||
|
|
||
|
The plugin must be run under high privileged user B<root>, to get access to the raw device.
|
||
|
|
||
|
So following minimal configuration in plugin-conf.d/munin-node is needed.
|
||
|
|
||
|
=over 2
|
||
|
|
||
|
[smart_*]
|
||
|
user root
|
||
|
group disk
|
||
|
|
||
|
=back
|
||
|
|
||
|
=head2 Set Parameter if needed
|
||
|
|
||
|
smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl)
|
||
|
smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs
|
||
|
ignorestandby - Ignore the standby state of the drive and perform SMART query. Default: False
|
||
|
|
||
|
Parameters can be specified on a per-drive basis, eg:
|
||
|
|
||
|
=over 2
|
||
|
|
||
|
[smart_hda]
|
||
|
user root
|
||
|
env.smartargs -H -c -l error -l selftest -l selective -d ata
|
||
|
env.smartpath /usr/local/sbin/smartctl
|
||
|
|
||
|
=back
|
||
|
|
||
|
In particular, for SATA drives, with older versions of smartctl:
|
||
|
|
||
|
=over 2
|
||
|
|
||
|
[smart_sda]
|
||
|
user root
|
||
|
env.smartargs -d ata -a
|
||
|
|
||
|
[smart_twa0-1]
|
||
|
user root
|
||
|
env.smartargs -H -l error -d 3ware,1
|
||
|
env.ignorestandby True
|
||
|
|
||
|
[smart_twa0-2]
|
||
|
user root
|
||
|
env.smartargs -H -l error -d 3ware,2
|
||
|
|
||
|
=back
|
||
|
|
||
|
=head1 INTERPRETATION
|
||
|
|
||
|
If a device supports the B<Self-Monitoring, Analysis
|
||
|
and Reporting Technology (S.M.A.R.T.)> it offers readable
|
||
|
access to the attribute table. There you find the B<raw value>,
|
||
|
a B<normalised value> and a B<threshold> (set by the vendor)
|
||
|
for each attribute, that is supported by that device.
|
||
|
|
||
|
The meaning and handling of the raw value is a secret of the
|
||
|
vendors embedded S.M.A.R.T.-Software on the disk. The only
|
||
|
relevant info from our external view is the B<normalised value>
|
||
|
in comparison with the B<threshold>. If the attributes value is
|
||
|
equal or below the threshold, it signals its failure and
|
||
|
the B<health status> of the device will switch from B<passed> to B<failed>.
|
||
|
|
||
|
This plugin fetches the B<normalised values of all SMART-Attributes>
|
||
|
and draw a curve for each of them.
|
||
|
It takes the vendors threshold as critical limit for the munin datafield.
|
||
|
So you will see an alarm, if the value reaches the vendors threshold.
|
||
|
|
||
|
Looking at the graph: It is a bad sign, if the curve starts
|
||
|
to curl or to meander. The more horizontal it runs,
|
||
|
the better. Of course it is normal, that the temperatures
|
||
|
curve swings a bit. But the others should stay steady on
|
||
|
their level if everything is ok.
|
||
|
|
||
|
S.M.A.R.T. distinguishes between B<Pre-fail> and B<Old-age>
|
||
|
Attributes. An old disk will have more curling curves
|
||
|
because of degradation, especially for the B<Old-age> Attributes.
|
||
|
You should then backup more often, run more selftests[1] and prepare
|
||
|
the disks replacement.
|
||
|
|
||
|
B<Act directly>, if a <Pre-Fail> Attribute goes below threshold.
|
||
|
Immediately back-up your data and replace your hard disk drive.
|
||
|
A failure may be imminent..
|
||
|
|
||
|
[1] Consult the smartmontools manpages to learn about
|
||
|
offline tests and automated selftests with smartd.
|
||
|
Only with both activated, the values of the SMART-Attributes
|
||
|
reflect the all over state of the device.
|
||
|
|
||
|
Tutorials and articles about S.M.A.R.T. and smartmontools:
|
||
|
http://smartmontools.sourceforge.net/doc.html#tutorials
|
||
|
|
||
|
=head1 MAGIC MARKERS
|
||
|
|
||
|
#%# family=auto
|
||
|
#%# capabilities=autoconf suggest
|
||
|
|
||
|
=head1 CALL OPTIONS
|
||
|
|
||
|
B<none>
|
||
|
|
||
|
=over 2
|
||
|
|
||
|
Fetches values if called without arguments:
|
||
|
|
||
|
E.g.: munin-run smart_hda
|
||
|
|
||
|
=back
|
||
|
|
||
|
B<config>
|
||
|
|
||
|
=over 2
|
||
|
|
||
|
Prints plugins configuration.
|
||
|
|
||
|
E.g.: munin-run smart_hda config
|
||
|
|
||
|
=back
|
||
|
|
||
|
B<autoconf>
|
||
|
|
||
|
=over 2
|
||
|
|
||
|
Tries to find smartctl and outputs value 'yes' for success, 'no' if not.
|
||
|
|
||
|
It's used by B<munin-node-configure> to see wether autoconfiguration is possible.
|
||
|
|
||
|
=back
|
||
|
|
||
|
B<suggest>
|
||
|
|
||
|
=over 2
|
||
|
|
||
|
Outputs the list of device names, that it found plugged to the system.
|
||
|
|
||
|
B<munin-node-configure> use this to build the service links for this wildcard-plugin.
|
||
|
|
||
|
=back
|
||
|
|
||
|
=head1 VERSION
|
||
|
|
||
|
Version 2.0
|
||
|
|
||
|
=head1 BUGS
|
||
|
|
||
|
None known
|
||
|
|
||
|
=head1 AUTHOR
|
||
|
|
||
|
(C) 2004-2009 Nicolas Stransky <Nico@stransky.cx>
|
||
|
|
||
|
(C) 2008 Gabriele Pohl <contact@dipohl.de>
|
||
|
Reformated existent documentation to POD-Style, added section Interpretation to the documentation.
|
||
|
|
||
|
=head1 LICENSE
|
||
|
|
||
|
GPLv2 (http://www.gnu.org/licenses/gpl-2.0.txt)
|
||
|
|
||
|
=cut
|
||
|
|
||
|
|
||
|
"""
|