contrib-munin/plugins/sensors/ipmitool_sensor_

462 lines
17 KiB
Plaintext
Raw Normal View History

2008-10-27 06:48:00 +01:00
#!/usr/bin/perl -w
#
# Wildcard plugin to monitor sensor by using ipmitool sensor program.
#
# Contributed by Jun Futagawa
# This script is based on sensors_ plugin.
#
# Usage:
# ln -s /usr/share/munin/plugins/ipmitool_sensor_ /etc/munin/plugins/ipmitool_sensor_fan
# ln -s /usr/share/munin/plugins/ipmitool_sensor_ /etc/munin/plugins/ipmitool_sensor_temp
# ln -s /usr/share/munin/plugins/ipmitool_sensor_ /etc/munin/plugins/ipmitool_sensor_volt
#
# Requirements:
# - OpenIPMI tool (ipmitool command)
#
# Note:
# - Sensor names are read from the output of the ipmitool sensor program.
#
# Add the following to your /etc/munin/plugin-conf.d/munin-node:
#
# [ipmitool_sensor*]
# user root
# timeout 20
#
# If you want to use "ipmitool sdr", add the following:
# Note: When you use this, the threshold provided by the sensor board is not used.
#
# [ipmitool_sensor*]
# user root
# timeout 20
# env.ipmitool_options sdr
#
# Parameters supported:
#
# config
# autoconf
# suggest
#
# Configurable variables
#
# ipmitool - ipmitool command (default: ipmitool)
# ipmitool_options - ipmitool command options (default: sensor)
# sdr: you can use 'sdr' instead of sensor.
# cache_file - cache file
# (default: /var/lib/munin/plugin-state/plugin-ipmitool_sensor.cache)
# cache_expires - cache expires (default: 275)
#
# fan_type_regex - Regular expression for unit of fan (default: RPM)
# temp_type_regex - Regular expression for unit of temp (default: degrees C)
# volt_type_regex - Regular expression for unit of volt (default: (Volts|Watts|Amps))
#
# fan_warn_percent - Percentage over mininum for warning (default: 5)
# fan_lower_critical - Preferred lower critical value for fan
# fan_upper_critical - Preferred upper critical value for fan
# temp_lower_critical - Preferred lower critical value for temp
# temp_lower_warning - Preferred lower warining value for temp
# temp_upper_warning - Preferred upper warning value for temp
# temp_upper_critical - Preferred upper critical value for temp
# volt_warn_percent - Percentage over mininum/under maximum for warning
# Narrow the voltage bracket by this. (default: 20)
#
# $Log$
# Revision 1.6 2011/02/07 12:50:00 jfut
# Bug fix: Check temp_upper_warning and temp_upper_critical was not working again.
#
# Revision 1.5 2011/01/28 00:39:00 jfut
# Bug fix: Check temp_upper_warning and temp_upper_critical was not working.
#
2008-10-27 06:48:00 +01:00
# Revision 1.4 2009/02/08 23:51:00 jfut
# Support "ipmitool sdr".
# Add Watts and Amp as voltage unit.
# Add fan_type_regex/temp_type_regex/volt_type_regex as option of sensor type.
#
# Revision 1.3 2008/11/11 13:55:00 jfut
# Add infinity value check for HP ProLiant DL160.
# Add preferred value option for fan and temp.
#
# Revision 1.2 2008/10/28 19:21:22 jfut
# Add file check.
#
# Revision 1.1 2008/10/27 18:52:31 jfut
# Add cache mechanism.
#
# Revision 1.0 2008/10/27 14:25:12 jfut
# Initial release.
#
# Magic markers:
#%# family=manual
#%# capabilities=autoconf suggest
use strict;
$ENV{'LANG'} = "C"; # Force parseable output from sensors.
$ENV{'LC_ALL'} = "C"; # Force parseable output from sensors.
my $IPMITOOL = $ENV{'ipmitool'} || 'ipmitool';
my @IPMITOOL_OPTS = exists $ENV{'ipmitool_options'} ? split(/\s+/, $ENV{'ipmitool_options'}) : ('sensor');
my $CACHE_DIR = "/var/lib/munin/plugin-state";
my $CACHE_FILE = $ENV{'cache_file'} || "$CACHE_DIR/plugin-ipmitool_sensor.cache";
my $CACHE_EXPIRES = $ENV{'cache_expires'} || 275;
my %config = (
fan => {
regex => exists $ENV{'fan_type_regex'} ? qr/$ENV{'fan_type_regex'}/im : qr/RPM/im,
title => 'IPMITool Sensor: Fans',
vtitle => 'RPM',
print_threshold => \&fan_threshold,
graph_args => '--base 1000 -l 0'
},
temp => {
regex => exists $ENV{'temp_type_regex'} ? qr/$ENV{'temp_type_regex'}/im : qr/degrees C/im,
title => 'IPMITool Sensor: Temperatures',
vtitle => 'Celsius',
print_threshold => \&temp_threshold,
graph_args => '--base 1000 -l 0'
},
volt => {
regex => exists $ENV{'volt_type_regex'} ? qr/$ENV{'volt_type_regex'}/im : qr/(Volts|Watts|Amps)/im,
title => 'IPMITool Sensor: Voltages',
vtitle => '_AUTO_DETECT_FAILED_',
print_threshold => \&volt_threshold,
graph_args => '--base 1000'
},
);
if (defined $ARGV[0] and $ARGV[0] eq 'autoconf') {
close(STDERR);
my $ret = system($IPMITOOL);
open (STDERR, ">&STDOUT");
if ($ret == 0 || $ret == 256) {
print "yes\n";
exit 0;
} else {
print "no (program $IPMITOOL not found)\n";
}
exit 1;
}
if (defined $ARGV[0] and $ARGV[0] eq 'suggest') {
my $text = get_sensor_data();
my $alltext = join('\n', @{$text});
foreach my $func (keys %config) {
print $func, "\n" if $alltext =~ $config{$func}->{regex};
}
exit;
}
$0 =~ /ipmitool_sensor_(.+)*$/;
my $func = $1;
exit 2 unless defined $func;
my $text = get_sensor_data();
my $sensor = 1;
if (defined $ARGV[0] and $ARGV[0] eq 'config') {
# detect the unit of volt
if ($func eq 'volt') {
foreach my $line (@{$text}) {
if ($line =~ /$config{$func}->{regex}/) {
my ($label, $value, $unit, $lcr, $lnc, $unc, $ucr) = &get_sensor_items($line, $config{$func}->{regex});
$config{$func}->{vtitle} = $unit;
last;
}
}
$text = get_sensor_data();
}
# print header
print "graph_title $config{$func}->{title}\n";
print "graph_vtitle $config{$func}->{vtitle}\n";
print "graph_args $config{$func}->{graph_args}\n";
print "graph_category sensors\n";
# print data
foreach my $line (@{$text}) {
if ($line =~ /$config{$func}->{regex}/) {
my ($label, $value, $unit, $lcr, $lnc, $unc, $ucr) = &get_sensor_items($line, $config{$func}->{regex});
if (&is_valid_value($value)) {
print "$func$sensor.label $label\n";
$config{$func}->{print_threshold}->($func.$sensor, $lcr, $lnc, $unc, $ucr);
print "$func$sensor.graph no\n" if exists $ENV{"ignore_$func$sensor"};
$sensor++;
}
}
}
exit 0;
}
foreach my $line (@{$text}) {
if ($line =~ /$config{$func}->{regex}/) {
my ($label, $value, $unit, $lcr, $lnc, $unc, $ucr) = &get_sensor_items($line, $config{$func}->{regex});
# for debug
# print "$func$sensor.value [$label] [$value] [$lcr] [$lnc] [$unc] [$ucr]\n";
if (&is_valid_value($value)) {
print "$func$sensor.value $value\n";
$sensor++;
}
}
}
sub get_sensor_data {
my $text = undef;
if (-f $CACHE_FILE) {
my $cache_timestamp = (stat($CACHE_FILE))[9];
if ($CACHE_EXPIRES == -1 || time - $cache_timestamp <= $CACHE_EXPIRES) {
open(IN, "<", $CACHE_FILE) or die "Could not open \"$CACHE_FILE\" for reading\n";
while (<IN>) {
push (@{$text}, $_);
}
close(IN);
}
}
if (! defined $text) {
my $pid = open(EXE, '-|');
if ($pid == 0) {
exec($IPMITOOL, @IPMITOOL_OPTS);
} elsif (defined $pid) {
while(<EXE>) {
push (@{$text}, $_);
}
close(EXE);
} else {
die "fork failed: $!";
}
if (-w $CACHE_DIR) {
open(OUT, ">", $CACHE_FILE) or die "Could not open \"$CACHE_FILE\" for writing\n";
foreach my $line (@{$text}) {
print OUT "$line";
}
close OUT;
}
}
return $text;
}
sub get_sensor_items {
my ($line, $regex) = @_;
my @items = split(/\s*\|\s*/, $line);
my ($label, $value, $unit, $lcr, $lnc, $unc, $ucr)
= (trim($items[0]), trim($items[1]), trim($items[2]), trim($items[5]), trim($items[6]), trim($items[7]), trim($items[8]));
if ($#items == 9) {
# ipmitool sensor
} elsif ($#items == 2) {
# ipmitool sdr
if ($value =~ /$regex/) {
$value = trim($`);
$unit = trim($1);
}
}
# some boards show data in incorrect order.
# - HP ProLiant ML110 G5
# CPU FAN | 1434.309 | RPM | ok | 5537.099 | 4960.317 | 4859.086 | na | 937.383 | na
# SYSTEM FAN | 1506.932 | RPM | ok | 5952.381 | 5668.934 | 5411.255 | na | 937.383 | na
# - HP ProLiant DL160
# FAN1 ROTOR1 | 7680.492 | RPM | ok | na | inf | na | na | 1000.400 | na
if (&is_valid_value($lcr) && &is_valid_value($ucr) && $lcr > $ucr || $lcr eq 'inf') {
($lcr, $lnc, $unc, $ucr) = ($ucr, $unc, $lnc, $lcr);
}
if (&is_valid_value($lnc) && &is_valid_value($unc) && $lnc > $unc || $lnc eq 'inf') {
($lcr, $lnc, $unc, $ucr) = ($ucr, $unc, $lnc, $lcr);
}
return ($label, $value, $unit, $lcr, $lnc, $unc, $ucr);
}
sub fan_threshold {
my ($name, $lcr, $lnc, $unc, $ucr) = @_;
my $warn_percent = exists $ENV{fan_warn_percent} ? $ENV{fan_warn_percent} : 5;
# lcr: lower critical
if (exists $ENV{fan_lower_critical}) {
$lcr = $ENV{fan_lower_critical};
} elsif (! &is_valid_value($lcr)) {
if ($lcr eq 'inf') { $lcr = ''; }
else { $lcr = '50'; }
}
# lnc: lower warning
if (! &is_valid_value($lnc)) {
if ($lnc eq 'inf') { $lnc = ''; }
else { $lnc = ($lcr eq '') ? '' : $lcr * (100 + $warn_percent) / 100; }
}
# ucr: upper critical
if (exists $ENV{fan_upper_critical}) {
$ucr = $ENV{fan_upper_critical};
} elsif (! &is_valid_value($ucr)) {
if ($ucr eq 'inf') { $ucr = ''; }
else { $ucr = '6000'; }
}
# unc: upper warning
if (! &is_valid_value($unc)) {
if ($unc eq 'inf') { $unc = ''; }
else { $unc = ($ucr eq '') ? '' : $ucr * (100 - $warn_percent) / 100; }
}
return unless ($lcr ne '' || $lnc ne '' || $unc ne '' || $ucr ne '');
printf "$name.warning $lnc:$unc\n";
printf "$name.critical $lcr:$ucr\n";
}
sub temp_threshold {
my ($name, $lcr, $lnc, $unc, $ucr) = @_;
# lcr: lower critical
if (exists $ENV{temp_lower_critical}) {
$lcr = $ENV{temp_lower_critical};
} elsif (! &is_valid_value($lcr)) {
if ($lcr eq 'inf') { $lcr = ''; }
else { $lcr = 5; }
}
# lnc: lower warning
if (exists $ENV{temp_lower_warning}) {
2008-10-27 06:48:00 +01:00
$lnc = $ENV{temp_lower_warning};
} elsif (! &is_valid_value($lnc)) {
if ($lnc eq 'inf') { $lnc = ''; }
else { $lnc = 10; }
}
# unc: upper warning
if (exists $ENV{temp_upper_warning}) {
$unc = $ENV{temp_upper_warning};
2008-10-27 06:48:00 +01:00
} elsif (! &is_valid_value($unc)) {
if ($unc eq 'inf') { $unc = ''; }
else { $unc = '65'; }
}
# ucr: upper critical
if (exists $ENV{temp_upper_critical}) {
$ucr = $ENV{temp_upper_critical};
2008-10-27 06:48:00 +01:00
} elsif (! &is_valid_value($ucr)) {
if ($ucr eq 'inf') { $ucr = ''; }
else { $ucr = '70'; }
}
return unless ($lcr ne '' || $lnc ne '' || $unc ne '' || $ucr ne '');
printf "$name.warning $lnc:$unc\n";
printf "$name.critical $lcr:$ucr\n";
}
sub volt_threshold {
my ($name, $lcr, $lnc, $unc, $ucr) = @_;
my $warn_percent = exists $ENV{volt_warn_percent} ? $ENV{volt_warn_percent} : 20;
if (! &is_valid_value($lcr)) { $lcr = ''; }
if (! &is_valid_value($lnc)) { $lnc = ($lcr eq '') ? '' : $lcr * (100 + $warn_percent) / 100; }
if (! &is_valid_value($ucr)) { $ucr = ''; }
if (! &is_valid_value($unc)) { $unc = ($ucr eq '') ? '' : $ucr * (100 - $warn_percent) / 100; }
return unless ($lcr ne '' || $lnc ne '' || $unc ne '' || $ucr ne '');
printf "$name.warning $lnc:$unc\n";
printf "$name.critical $lcr:$ucr\n";
}
sub trim {
my $value = shift;
if (defined $value) {
$value =~ s/^\s*(.*?)\s*$/$1/;
} else {
$value = 'na'
}
return $value;
}
sub is_valid_value() {
my $value = shift;
if ($value eq 'na' || $value eq 'inf' || $value eq '') {
return 0;
} else {
return 1;
}
}
########################################
=head1 How to test
cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt
cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt config
cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt suggest
cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt autoconf
fan_warn_percent=50 fan_lower_critical=100 fan_upper_critical=1000 cache_file=ipmitool_sensor_ \
cache_expires=-1 ./ipmitool_sensor_fan config
temp_lower_warning=1 temp_lower_critical=2 temp_upper_critical=71 temp_upper_warning=72 \
cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_temp config
volt_warn_percent=50 \
cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt config
2008-10-27 06:48:00 +01:00
=head1 Test Data
unr Upper Non-Recoverable
ucr Upper Critical
unc Upper Non-Critical
lnc Lower Non-Critical
lcr Lower Critical
lnr Lower Non-Recoverable
2008-10-27 06:48:00 +01:00
=head2 ipmitool sensor
# HP ProLiant ML110 G5
CPU FAN | 1434.309 | RPM | ok | 5537.099 | 4960.317 | 4859.086 | na | 937.383 | na
SYSTEM FAN | 1497.454 | RPM | ok | 5952.381 | 5668.934 | 5411.255 | na | 937.383 | na
System 12V | 12.152 | Volts | ok | na | na | na | na | na | na
System 5V | 5.078 | Volts | ok | na | na | na | na | na | na
System 3.3V | 3.271 | Volts | ok | na | na | na | na | na | na
CPU0 Vcore | 1.127 | Volts | ok | na | na | na | na | na | na
System 1.25V | 1.254 | Volts | ok | na | na | na | na | na | na
System 1.8V | 1.842 | Volts | ok | na | na | na | na | na | na
System 1.2V | 1.107 | Volts | ok | na | na | na | na | na | na
CPU0 Diode | na | degrees C | na | na | 20.000 | 25.000 | 85.000 | 90.000 | 95.000
CPU0 Dmn 0 Temp | 24.500 | degrees C | ok | na | 0.000 | 0.000 | 97.000 | 100.000 | 100.500
CPU0 Dmn 1 Temp | 29.000 | degrees C | ok | na | 0.000 | 0.000 | 97.000 | 100.000 | 100.500
# HP ProLiant DL160
FAN1 ROTOR1 | 7680.492 | RPM | ok | na | inf | na | na | 1000.400 | na
# HP ProLiant DL360 G5
Fan Block 1 | 34.888 | unspecified | nc | na | na | 75.264 | na | na | na
Fan Block 2 | 29.792 | unspecified | nc | na | na | 75.264 | na | na | na
Fan Block 3 | 37.240 | unspecified | nc | na | na | 75.264 | na | na | na
Fan Blocks | 0.000 | unspecified | nc | na | na | 0.000 | na | na | na
Temp 1 | 40.000 | degrees C | ok | na | na | -64.000 | na | na | na
Temp 2 | 21.000 | degrees C | ok | na | na | -64.000 | na | na | na
Temp 3 | 30.000 | degrees C | ok | na | na | -64.000 | na | na | na
Temp 4 | 30.000 | degrees C | ok | na | na | -64.000 | na | na | na
Temp 5 | 28.000 | degrees C | ok | na | na | -64.000 | na | na | na
Temp 6 | na | degrees C | na | na | na | 32.000 | na | na | na
Temp 7 | na | degrees C | na | na | na | 32.000 | na | na | na
Power Meter | 214.000 | Watts | cr | na | na | 384.000 | na | na | na
Power Meter 2 | 220.000 | watts | cr | na | na | 384.000 | na | na | na
=head2 ipmitool sdr
# HP ProLiant ML110 G5
CPU FAN | 1434.31 RPM | ok
SYSTEM FAN | 1497.45 RPM | ok
System 12V | 12.10 Volts | ok
System 5V | 5.08 Volts | ok
System 3.3V | 3.27 Volts | ok
CPU0 Vcore | 1.14 Volts | ok
System 1.25V | 1.25 Volts | ok
System 1.8V | 1.84 Volts | ok
System 1.2V | 1.11 Volts | ok
CPU0 Diode | disabled | ns
CPU0 Dmn 0 Temp | 23.50 degrees C | ok
CPU0 Dmn 1 Temp | 29 degrees C | ok
# HP ProLiant DL360 G5
Fan Block 1 | 34.89 unspecifi | nc
Fan Block 2 | 29.79 unspecifi | nc
Fan Block 3 | 37.24 unspecifi | nc
Fan Blocks | 0 unspecified | nc
Temp 1 | 41 degrees C | ok
Temp 2 | 19 degrees C | ok
Temp 3 | 30 degrees C | ok
Temp 4 | 30 degrees C | ok
Temp 5 | 26 degrees C | ok
Temp 6 | disabled | ns
Temp 7 | disabled | ns
Power Meter | 208 Watts | cr
Power Meter 2 | 210 watts | cr
=cut
# vim:syntax=perl