#!/usr/bin/perl -w # # (c) 2007 Nathan Rutman nathan@clusterfs.com # # Plugin to monitor RAID status # # Results are % of healthy drives in a raid device # and % rebuilt of devices that are resyncing. # #%# family=contrib #%# capabilities=autoconf if ($ARGV[0] and $ARGV[0] eq "autoconf") { if (-r "/proc/mdstat" and `grep md /proc/mdstat`) { print "yes\n"; exit 0; } else { print "no RAID devices\n"; exit 1; } } if ( $ARGV[0] and $ARGV[0] eq "config" ) { print "graph_title RAID status\n"; print "graph_category disk\n"; print "graph_info This graph monitors RAID disk health. Values are percentage of healthy drives in each raid group. Degraded devices are marked Critical.\n"; print "graph_args --base 1000 -l 0\n"; print "graph_vlabel % healthy/rebuilt\n"; print "graph_scale no\n"; } { local( $/, *MDSTAT ) ; open (MDSTAT, "/proc/mdstat") or exit 1; #open (MDSTAT, "/etc/munin/plugins/sample.failed") or exit 1; my $text = ; close MDSTAT; while ($text =~ /(md\d)\s+:\s+active\s+(\w+)\s+(.*)\n.*\[(\d+)\/(\d+)]\s+\[(\w+)]/ ) { my($dev,$type,$members,$nmem,$nact,$status) = ($1,$2,$3,$4,$5,$6); #print "item: $dev $type ($members) status=$status \n"; if ( $ARGV[0] and $ARGV[0] eq "config" ) { print "$dev.label $dev\n"; print "$dev.info $type $members\n"; # 100: means less than 100 print "$dev.critical 100:\n"; print $dev, "_rebuild.label $dev rebuilt\n"; print $dev, "_rebuild.info $type\n"; print $dev, "_rebuild.critical 100:\n"; } else { my $pct = 100 * $nact / $nmem; my $rpct = 100; if ( $pct < 100 ) { my $output = `/sbin/mdadm -D /dev/$dev | grep Rebuild`; if( $output =~ /([0-9]+)% complete/ ) { $rpct = $1; } else { $rpct = 0; } } print "$dev.value $pct\n"; print $dev, "_rebuild.value $rpct\n"; } $text = $'; } } exit 0;