mirror of
https://github.com/munin-monitoring/contrib.git
synced 2018-11-08 00:59:34 +01:00
2670e4dc26
* one line regular expression matching to multiline text is a bit hard to picture * apply regular expressions to each line * first line should like "active raid1 sda1[0] sdc1[2] sdb1[1]" * second line should like "123456 blocks super 1.2 [2/2] [UU]" * third line will appear when the array is in action
116 lines
3.4 KiB
Perl
Executable File
116 lines
3.4 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
#
|
|
# (c) 2007 Nathan Rutman nathan@clusterfs.com
|
|
#
|
|
# Plugin to monitor RAID status
|
|
#
|
|
# Results are % of healthy drives in a raid device
|
|
# and % rebuilt of devices that are resyncing.
|
|
#
|
|
#%# family=contrib
|
|
#%# capabilities=autoconf
|
|
|
|
if ($ARGV[0] and $ARGV[0] eq "autoconf") {
|
|
if (-r "/proc/mdstat" and `grep md /proc/mdstat`) {
|
|
print "yes\n";
|
|
exit 0;
|
|
} else {
|
|
print "no RAID devices\n";
|
|
exit 1;
|
|
}
|
|
}
|
|
|
|
if ( $ARGV[0] and $ARGV[0] eq "config" ) {
|
|
print "graph_title RAID status\n";
|
|
print "graph_category disk\n";
|
|
print "graph_info This graph monitors RAID disk health. Values are percentage of healthy drives in each raid group. Degraded devices are marked Critical.\n";
|
|
print "graph_args --base 1000 -l 0\n";
|
|
print "graph_vlabel % healthy/rebuilt\n";
|
|
print "graph_scale no\n";
|
|
}
|
|
|
|
open(my $mdstat, "/proc/mdstat");
|
|
my(@text) = <$mdstat>;
|
|
# contents of <$mdstat> may be changed at next reading, so fetch the contents at a time
|
|
close($mdstat);
|
|
|
|
my($devinfo_re, $devstat_re, $action_re) = (
|
|
'(md\d+)\s+:\s+active\s+(\(auto-read-only\)\s+|)(\w+)\s+(.*)',
|
|
'.*\[(\d+)\/(\d+)]\s+\[(\w+)]',
|
|
'(.*(check|resync)\s=\s+(\d+\.\d+)%)',
|
|
);
|
|
|
|
my($dev, $type, $members, $nmem, $nact, $status, $proc);
|
|
while (@text) {
|
|
my $line = shift @text;
|
|
if ($line =~ /$devinfo_re/) {
|
|
# first line should like "active raid1 sda1[0] sdc1[2] sdb1[1]"
|
|
$dev = $1;
|
|
$type = $3;
|
|
$members = $4;
|
|
|
|
$line = shift @text;
|
|
if ($line =~ /$devstat_re/) {
|
|
# second line should like "123456 blocks super 1.2 [2/2] [UU]"
|
|
$nmem = $1;
|
|
$nact = $2;
|
|
$status = $3;
|
|
}
|
|
else {
|
|
# sencond line did not exist on /proc/mdstat
|
|
next;
|
|
}
|
|
|
|
$line = shift @text;
|
|
if ($line =~ /$action_re/) {
|
|
# third line should like " [==>..................] check = 10.0% (12345/123456) finish=123min speed=12345/sec"
|
|
# this line will appear only when the array is in action
|
|
$proc = $3;
|
|
}
|
|
else {
|
|
# array is not in action
|
|
unshift(@text, $line);
|
|
}
|
|
}
|
|
else {
|
|
# skip until first line is found
|
|
next;
|
|
}
|
|
|
|
if ( $ARGV[0] and $ARGV[0] eq "config" ) {
|
|
print "$dev.label $dev\n";
|
|
print "$dev.info $type $members\n";
|
|
# 100: means less than 100
|
|
# Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100.
|
|
print "$dev.critical 98:\n";
|
|
print $dev, "_rebuild.label $dev rebuilt\n";
|
|
print $dev, "_rebuild.info $type\n";
|
|
# Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100.
|
|
print $dev, "_rebuild.critical 98:\n";
|
|
print $dev, "_check.label $dev check/resync \n";
|
|
print $dev, "_check.info $type\n";
|
|
} else {
|
|
my $pct = 100 * $nact / $nmem;
|
|
my $rpct = 100;
|
|
if ( $pct < 100 ) {
|
|
my @output = `/sbin/mdadm -D /dev/$dev | grep Rebuild`;
|
|
if( $output[0] and $output[0] =~ /([0-9]+)% complete/ ) {
|
|
$rpct = $1;
|
|
} else {
|
|
$rpct = 0;
|
|
}
|
|
}
|
|
if ( $proc ) {
|
|
$cpct = $proc;
|
|
} else {
|
|
$cpct = 0;
|
|
}
|
|
print "$dev.value $pct\n";
|
|
print $dev, "_rebuild.value $rpct\n";
|
|
print $dev, "_check.value $cpct\n";
|
|
}
|
|
}
|
|
|
|
exit 0;
|
|
|