#! /usr/bin/perl ######################################################################## # Copyright (c) 2012, Adrien Urban # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ######################################################################## # # # WARNING WARNING WARNING WARNING WARNING WARNING # # # # This plugin does not work properly with multiple master # # # ######################################################################## # # multigraph, supersampling, extended cpu informations # # require: mpstat (to actually collect the data) # require linux /proc # (sorry, quick/dirty retrieve the number of cpu from /proc/cpuinfo) # # # ENV (default): # MUNIN_PLUGSTATE - pid and cache files gets there # # ENV (user defined): # MUNIN_UPDATERATE - rate at which to update (default: 1s) # MUNIN_CACHEFLUSH_RATE - flush data every N batch (default: 1) # MUNIN_MPSTAT - binary to use as mpstat # # increase cache flush rate if you have i/o performance issues # warning: increasing flushrate too much might cause partial write, and # loss of data. 0 to disable flush # # # Parent graph: cpu usage per core/thread # child graph(1): detailed cpu usage overall # child graph(n): detailed cpu usage per thread # # Known bugs: # # Multi-Master # If there are many masters, the data is only sent once. Each master will # only have part of the data. # # Everlasting # The daemon is launched on first config/fetch. A touch of the pidfile is # done on every following config/fetch. The daemon should check if the # pidfile is recent (configurable) enough, and stop itself if not. # # Graph Order # There is currently (2.0.6) noway to order childgraphs. # # RRD file # The master currently (2.0.6) generate rrd file for aggregate values, and # complains that no data is provided for them (but the graph still works # fine) # #%# family=auto #%# capabilities=autoconf use strict; use warnings; my $plugin = $0; $plugin =~ s/.*\///; # quick failsafe if (!defined $ENV{MUNIN_PLUGSTATE}) { die "This plugin should be run via munin. Try munin-run $plugin\n"; } ######################################################################## # If you want to change something, it's probably doable here # # order to display my $fields_order = [ 'sys', 'usr', 'nice', 'idle', 'iowait', 'irq', 'soft', 'steal', 'guest', ]; # order is the order given by mpstat my $fields_info = [ { name => 'usr', label => 'usr', info => "%s time spent in normal programs and daemons", }, { name => 'nice', label => 'nice', info => "%s time spent in nice(1)d programs and daemons", }, { name => 'sys', label => 'sys', info => "%s time spent in kernel system activity", }, { name => 'iowait', label => 'iowait', info => "%s time spent waiting for blocking I/O operations", }, { name => 'irq', label => 'irq', info => "%s time spent handling interrupts", }, { name => 'soft', label => 'soft', info => "%s time spent handling software interrupts", }, { name => 'steal', label => 'steal', info => "%s time spent elsewhere (stolen from us)", }, { name => 'guest', label => 'guest', info => "%s time spent in a guest operating system", }, { name => 'idle', label => 'idle', info => "%s time spent idling (waiting to get something to do)", } ]; sub pidfile() { "$ENV{MUNIN_PLUGSTATE}/munin.$plugin.pid" } sub cachefile() { "$ENV{MUNIN_PLUGSTATE}/munin.$plugin.cache" } sub graph_section() { "system:cpu" }; sub graph_name() { "cpu_extended_multi_1s" }; sub graph_title() { "CPU usage" }; sub graph_title_all() { "Overall CPU usage" }; sub graph_title_n($) { "CPU#" . (shift) . " usage" }; sub acquire_name() { "<$plugin> collecting information" } ######################################################################## # if you need to change something after that line, It should probably be # changed to be configurable above it. # # mpstat sampling interval my $update_rate = 1; if (defined $ENV{MUNIN_UPDATERATE}) { if ($ENV{MUNIN_UPDATERATE} =~ /^[1-9][0-9]*$/) { $update_rate = int($ENV{MUNIN_UPDATERATE}); } else { print STDERR "Invalid update_rate: $ENV{MUNIN_UPDATERATE}"; } } my $flush_interval = 1; if (defined $ENV{MUNIN_CACHEFLUSH_RATE}) { if ($ENV{MUNIN_CACHEFLUSH_RATE} =~ /^[0-9]+$/) { $update_rate = int($ENV{MUNIN_CACHEFLUSH_RATE}); } else { print STDERR "Invalid flush rate: $ENV{MUNIN_CACHEFLUSH_RATE}"; } } my $mpstat = "mpstat"; if (defined $ENV{MUNIN_MPSTAT}) { if (-f $ENV{MUNIN_MPSTAT}) { print STDERR "MUNIN_STAT: file not found: $ENV{MUNIN_MPSTAT}"; } else { $mpstat = defined $ENV{MUNIN_MPSTAT}; } } my $cpu_count_cache = undef; sub cpu_count() { # XXX: is there any way to do that cleanly ? if (not defined $cpu_count_cache) { $cpu_count_cache = `grep -c ^processor /proc/cpuinfo`; chomp $cpu_count_cache; } return $cpu_count_cache; } sub is_running() { if (-f pidfile()) { my $pid = undef; if (open FILE, "<", pidfile()) { $pid = ; close FILE; chomp $pid; } if ($pid) { # does not exist ? kill it if (kill 0, $pid) { return 1; } } unlink(pidfile()); } return 0; } # FIXME: should also trap kill sigint and sigterm # FIXME: check pidfile got touched recently sub acquire() { $0 = acquire_name(); $ARGV = [ '' ]; $0 = "<$plugin> collecting information"; open PIDFILE, '>', pidfile() or die "open: @{[ pidfile() ]}: $!\n"; print PIDFILE $$, "\n"; close PIDFILE; open CACHE, ">>", cachefile() or die "open: @{[ cachefile() ]}: $!\n"; open MPSTAT, "-|", "$mpstat -P ALL $update_rate" or die "open mpstat|: $!\n"; my $flush_count = 0; while () { chomp; my @field = split(); if (!($field[1] =~ /^(all|[0-9]+)$/)) { next; } $field[0] = $field[1]; $field[1] = time(); print CACHE join(" ", @field), "\n"; if ($flush_interval) { if ($flush_interval == ++$flush_count) { CACHE->flush(); $flush_count = 0; } } } unlink(pidfile()); unlink(cachefile()); } sub run_daemon() { if (is_running()) { my $atime; my $mtime; $atime = $mtime = time; utime $atime, $mtime, pidfile(); } else { if (0 == fork()) { close(STDIN); close(STDOUT); close(STDERR); open STDIN, "<", "/dev/null"; open STDOUT, ">", "/dev/null"; open STDERR, ">", "/dev/null"; acquire(); exit(0); } } } sub run_autoconf() { # in case we have specified args, check the file before that my $file = $mpstat; $file =~ s/ .*//; my $path = `which "$file"`; if ($path) { print "yes\n"; } else { print "no\n"; } } sub show_config($$$) { my $i = shift; my $name = shift; my $title = shift; my $graph_order = "graph_order"; for my $field (@$fields_order) { $graph_order .= " $field"; } print <{name} eq $fields_order->[0]) { $style = "AREA"; } print <{name}.label $field->{label} $field->{name}.draw $style $field->{name}.info @{[ sprintf($field->{info}, $name) ]} $field->{name}.min 0 $field->{name}.cdef $field->{name} EOF } } sub run_config() { run_daemon(); my $cpus = cpu_count(); my $graph_order = "graph_order"; my $sub_order = "order cpuall"; for (my $i = 0; $i < $cpus; ++$i) { $graph_order .= " use$i=@{[ graph_name() ]}.cpu$i.idle"; $sub_order .= " cpu$i"; } # none of those seems to have any effect #domain_$sub_order #node_$sub_order #graph_$sub_order #service_$sub_order #category_$sub_order #group_$sub_order print <{name}.value $line->[1]:$line->[$n] EOF ++$n; } } sub run_fetch() { run_daemon(); if (open CACHE, "+<", cachefile()) { my $cpus = {}; while () { chomp; my $field = []; @$field = split(/ /); if (not defined $cpus->{$field->[0]}) { $cpus->{$field->[0]} = []; } push @{$cpus->{$field->[0]}}, $field; } # finished reading ? trucate it right away truncate CACHE, 0; close CACHE; foreach my $cpu (keys %$cpus) { print <{$cpu}}) { fetch_showline($line); } } } } my $cmd = 'fetch'; if (defined $ARGV[0]) { $cmd = $ARGV[0]; } if ('fetch' eq $cmd) { run_fetch(); } elsif ('config' eq $cmd) { run_config(); } elsif ('autoconf' eq $cmd) { run_autoconf(); } elsif ('daemon' eq $cmd) { run_daemon(); } else { print STDERR <