mirror of
https://github.com/munin-monitoring/contrib.git
synced 2018-11-08 00:59:34 +01:00
cpu_linux_multi: detailed cpu usage per core (multigraph, supersampling)
This commit is contained in:
parent
580f586dbb
commit
44e6672036
387
plugins/system/cpu_linux_multi
Executable file
387
plugins/system/cpu_linux_multi
Executable file
@ -0,0 +1,387 @@
|
|||||||
|
#! /usr/bin/perl
|
||||||
|
########################################################################
|
||||||
|
# #
|
||||||
|
# WARNING WARNING WARNING WARNING WARNING WARNING #
|
||||||
|
# #
|
||||||
|
# This plugin does not work properly with multiple master #
|
||||||
|
# #
|
||||||
|
########################################################################
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# multigraph, supersampling, extended cpu informations
|
||||||
|
#
|
||||||
|
# require: mpstat (to actually collect the data)
|
||||||
|
# require linux /proc
|
||||||
|
# (sorry, quick and dirty retrieve the number of cpu from /proc/cpuinfo)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# ENV (default):
|
||||||
|
# MUNIN_PLUGSTATE - pid and cache files gets there
|
||||||
|
#
|
||||||
|
# ENV (user defined):
|
||||||
|
# MUNIN_UPDATERATE - rate at which to update (default: 1s)
|
||||||
|
# MUNIN_CACHEFLUSH_RATE - flush data every N batch (default: 1)
|
||||||
|
# MUNIN_MPSTAT - binary to use as mpstat
|
||||||
|
#
|
||||||
|
# increase cache flush rate if you have i/o performance issues
|
||||||
|
# warning: increasing flushrate too much might cause partial write, and loss
|
||||||
|
# of data. 0 to disable flush
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Parent graph: cpu usage per core/thread
|
||||||
|
# child graph(1): detailed cpu usage overall
|
||||||
|
# child graph(n): detailed cpu usage per thread
|
||||||
|
#
|
||||||
|
# Known bugs:
|
||||||
|
#
|
||||||
|
# Multi-Master
|
||||||
|
# If there are many masters, the data is only sent once. Each master will
|
||||||
|
# only have part of the data.
|
||||||
|
#
|
||||||
|
# Everlasting
|
||||||
|
# The daemon is launched on first config/fetch. A touch of the pidfile is
|
||||||
|
# done on every following config/fetch. The daemon should check if the
|
||||||
|
# pidfile is recent (configurable) enough, and stop itself if not.
|
||||||
|
#
|
||||||
|
# Graph Order
|
||||||
|
# There is currently (2.0.6) noway to order childgraphs.
|
||||||
|
#
|
||||||
|
# RRD file
|
||||||
|
# The master currently (2.0.6) generate rrd file for aggregate values, and
|
||||||
|
# complains that no data is provided for them (but the graph still works
|
||||||
|
# fine)
|
||||||
|
|
||||||
|
#%# family=auto
|
||||||
|
#%# capabilities=autoconf
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
my $plugin = $0;
|
||||||
|
$plugin =~ s/.*\///;
|
||||||
|
|
||||||
|
# order to display
|
||||||
|
my $fields_order = [
|
||||||
|
'sys',
|
||||||
|
'usr',
|
||||||
|
'nice',
|
||||||
|
'idle',
|
||||||
|
'iowait',
|
||||||
|
'irq',
|
||||||
|
'soft',
|
||||||
|
'steal',
|
||||||
|
'guest',
|
||||||
|
];
|
||||||
|
# order is the order given by mpstat
|
||||||
|
my $fields_info = [
|
||||||
|
{
|
||||||
|
name => 'usr',
|
||||||
|
label => 'usr',
|
||||||
|
info => "%s time spent in normal programs and daemons",
|
||||||
|
}, {
|
||||||
|
name => 'nice',
|
||||||
|
label => 'nice',
|
||||||
|
info => "%s time spent in nice(1)d programs and daemons",
|
||||||
|
}, {
|
||||||
|
name => 'sys',
|
||||||
|
label => 'sys',
|
||||||
|
info => "%s time spent in kernel system activity",
|
||||||
|
}, {
|
||||||
|
name => 'iowait',
|
||||||
|
label => 'iowait',
|
||||||
|
info => "%s time spent waiting for blocking I/O operations",
|
||||||
|
}, {
|
||||||
|
name => 'irq',
|
||||||
|
label => 'irq',
|
||||||
|
info => "%s time spent handling interrupts",
|
||||||
|
}, {
|
||||||
|
name => 'soft',
|
||||||
|
label => 'soft',
|
||||||
|
info => "%s time spent handling software interrupts",
|
||||||
|
}, {
|
||||||
|
name => 'steal',
|
||||||
|
label => 'steal',
|
||||||
|
info => "%s time spent elsewhere (stolen from us)",
|
||||||
|
}, {
|
||||||
|
name => 'guest',
|
||||||
|
label => 'guest',
|
||||||
|
info => "%s time spent in a guest operating system",
|
||||||
|
}, {
|
||||||
|
name => 'idle',
|
||||||
|
label => 'idle',
|
||||||
|
info => "%s time spent idling (waiting to get something to do)",
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
# mpstat sampling interval
|
||||||
|
my $update_rate = 1;
|
||||||
|
if (defined $ENV{MUNIN_UPDATERATE}) {
|
||||||
|
if ($ENV{MUNIN_UPDATERATE} =~ /^[1-9][0-9]*$/) {
|
||||||
|
$update_rate = int($ENV{MUNIN_UPDATERATE});
|
||||||
|
} else {
|
||||||
|
print STDERR "Invalid update_rate: $ENV{MUNIN_UPDATERATE}";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
my $flush_interval = 1;
|
||||||
|
if (defined $ENV{MUNIN_CACHEFLUSH_RATE}) {
|
||||||
|
if ($ENV{MUNIN_CACHEFLUSH_RATE} =~ /^[0-9]+$/) {
|
||||||
|
$update_rate = int($ENV{MUNIN_CACHEFLUSH_RATE});
|
||||||
|
} else {
|
||||||
|
print STDERR "Invalid flush rate: $ENV{MUNIN_CACHEFLUSH_RATE}";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
my $mpstat = "mpstat";
|
||||||
|
if (defined $ENV{MUNIN_MPSTAT}) {
|
||||||
|
if (-f $ENV{MUNIN_MPSTAT}) {
|
||||||
|
print STDERR "MUNIN_STAT: file not found: $ENV{MUNIN_MPSTAT}";
|
||||||
|
} else {
|
||||||
|
$mpstat = defined $ENV{MUNIN_MPSTAT};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub pidfile() { "$ENV{MUNIN_PLUGSTATE}/munin.$plugin.pid" }
|
||||||
|
sub cachefile() { "$ENV{MUNIN_PLUGSTATE}/munin.$plugin.cache" }
|
||||||
|
|
||||||
|
sub graph_section() { "system:cpu" };
|
||||||
|
sub graph_name() { "cpu_extended_multi_1s" };
|
||||||
|
sub graph_title() { "CPU usage" };
|
||||||
|
sub graph_title_all() { "Overall CPU usage" };
|
||||||
|
sub graph_title_n($) { "CPU#" . shift . " usage" };
|
||||||
|
sub acquire_name() { "<$plugin> collecting information" }
|
||||||
|
|
||||||
|
my $cpu_count_cache = undef;
|
||||||
|
sub cpu_count() {
|
||||||
|
# XXX: is there any way to do that cleanly ?
|
||||||
|
if (not defined $cpu_count_cache) {
|
||||||
|
$cpu_count_cache = `grep -c ^processor /proc/cpuinfo`;
|
||||||
|
chomp $cpu_count_cache;
|
||||||
|
}
|
||||||
|
return $cpu_count_cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub is_running() {
|
||||||
|
if (-f pidfile()) {
|
||||||
|
my $pid = undef;
|
||||||
|
if (open FILE, "<", pidfile()) {
|
||||||
|
$pid = <FILE>;
|
||||||
|
close FILE;
|
||||||
|
chomp $pid;
|
||||||
|
}
|
||||||
|
if ($pid) {
|
||||||
|
# does not exist ? kill it
|
||||||
|
if (kill 0, $pid) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unlink(pidfile());
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# FIXME: should also trap kill sigint and sigterm
|
||||||
|
# FIXME: check pidfile got touched recently
|
||||||
|
sub acquire() {
|
||||||
|
$0 = acquire_name();
|
||||||
|
$ARGV = [ '<daemon>' ];
|
||||||
|
$0 = "<$plugin> collecting information";
|
||||||
|
open PIDFILE, '>', pidfile() or die "open: @{[ pidfile() ]}: $!\n";
|
||||||
|
print PIDFILE $$, "\n";
|
||||||
|
close PIDFILE;
|
||||||
|
open CACHE, ">>", cachefile() or die "open: @{[ cachefile() ]}: $!\n";
|
||||||
|
open MPSTAT, "-|", "$mpstat -P ALL $update_rate" or
|
||||||
|
die "open mpstat|: $!\n";
|
||||||
|
my $flush_count = 0;
|
||||||
|
while (<MPSTAT>) {
|
||||||
|
chomp;
|
||||||
|
my @field = split();
|
||||||
|
if (!($field[1] =~ /^(all|[0-9]+)$/)) {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
$field[0] = $field[1];
|
||||||
|
$field[1] = time();
|
||||||
|
print CACHE join(" ", @field), "\n";
|
||||||
|
if ($flush_interval) {
|
||||||
|
if ($flush_interval == ++$flush_count) {
|
||||||
|
CACHE->flush();
|
||||||
|
$flush_count = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unlink(pidfile());
|
||||||
|
unlink(cachefile());
|
||||||
|
}
|
||||||
|
|
||||||
|
sub run_daemon() {
|
||||||
|
if (is_running()) {
|
||||||
|
my $atime;
|
||||||
|
my $mtime;
|
||||||
|
$atime = $mtime = time;
|
||||||
|
utime $atime, $mtime, pidfile();
|
||||||
|
} else {
|
||||||
|
if (0 == fork()) {
|
||||||
|
close(STDIN);
|
||||||
|
close(STDOUT);
|
||||||
|
close(STDERR);
|
||||||
|
open STDIN, "<", "/dev/null";
|
||||||
|
open STDOUT, ">", "/dev/null";
|
||||||
|
open STDERR, ">", "/dev/null";
|
||||||
|
acquire();
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub run_autoconf() {
|
||||||
|
# in case we have specified args, check the file before that
|
||||||
|
my $file = $mpstat;
|
||||||
|
$file =~ s/ .*//;
|
||||||
|
my $path = `which "$file"`;
|
||||||
|
if ($path) {
|
||||||
|
print "yes\n";
|
||||||
|
} else {
|
||||||
|
print "no\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub show_config($$$) {
|
||||||
|
my $i = shift;
|
||||||
|
my $name = shift;
|
||||||
|
my $title = shift;
|
||||||
|
my $graph_order = "graph_order";
|
||||||
|
for my $field (@$fields_order) {
|
||||||
|
$graph_order .= " $field";
|
||||||
|
}
|
||||||
|
print <<EOF;
|
||||||
|
multigraph @{[ graph_name() ]}.cpu$i
|
||||||
|
graph_title $title
|
||||||
|
graph_vlabel cpu use %
|
||||||
|
graph_scale no
|
||||||
|
update_rate 1
|
||||||
|
graph_data_size custom 1d, 10s for 1w, 1m for 1t, 5m for 1y
|
||||||
|
$graph_order
|
||||||
|
EOF
|
||||||
|
for my $field (@$fields_info) {
|
||||||
|
my $style = "STACK";
|
||||||
|
if ($field->{name} eq $fields_order->[0]) {
|
||||||
|
$style = "AREA";
|
||||||
|
}
|
||||||
|
print <<EOF;
|
||||||
|
$field->{name}.label $field->{label}
|
||||||
|
$field->{name}.draw $style
|
||||||
|
$field->{name}.info @{[ sprintf($field->{info}, $name) ]}
|
||||||
|
$field->{name}.min 0
|
||||||
|
$field->{name}.cdef $field->{name}
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub run_config() {
|
||||||
|
run_daemon();
|
||||||
|
my $cpus = cpu_count();
|
||||||
|
my $graph_order = "graph_order";
|
||||||
|
my $sub_order = "order cpuall";
|
||||||
|
for (my $i = 0; $i < $cpus; ++$i) {
|
||||||
|
$graph_order .= " use$i=@{[ graph_name() ]}.cpu$i.idle";
|
||||||
|
$sub_order .= " cpu$i";
|
||||||
|
}
|
||||||
|
# none of those seems to have any effect
|
||||||
|
#domain_$sub_order
|
||||||
|
#node_$sub_order
|
||||||
|
#graph_$sub_order
|
||||||
|
#service_$sub_order
|
||||||
|
#category_$sub_order
|
||||||
|
#group_$sub_order
|
||||||
|
|
||||||
|
print <<EOF;
|
||||||
|
multigraph @{[ graph_name() ]}
|
||||||
|
graph_category @{[ graph_section() ]}
|
||||||
|
graph_title @{[ graph_title() ]}
|
||||||
|
graph_vlabel cpu use %
|
||||||
|
graph_scale no
|
||||||
|
graph_total All CPUs
|
||||||
|
update_rate 1
|
||||||
|
graph_data_size custom 1d, 10s for 1w, 1m for 1t, 5m for 1y
|
||||||
|
$graph_order
|
||||||
|
EOF
|
||||||
|
my $style="AREA";
|
||||||
|
for (my $i = 0; $i < $cpus; ++$i) {
|
||||||
|
print <<EOF;
|
||||||
|
use$i.label CPU#$i
|
||||||
|
use$i.draw $style
|
||||||
|
use$i.cdef 100,use$i,-,${cpus},/
|
||||||
|
EOF
|
||||||
|
$style = 'STACK';
|
||||||
|
}
|
||||||
|
# detailed sub graphs - 1 for all, and 1 per cpu
|
||||||
|
show_config("all", "all CPU", graph_title_all());
|
||||||
|
for (my $i = 0; $i < $cpus; ++$i) {
|
||||||
|
show_config($i, "CPU$i", graph_title_n($i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub fetch_showline($) {
|
||||||
|
my $line = shift;
|
||||||
|
my $n = 2;
|
||||||
|
for my $field (@$fields_info) {
|
||||||
|
print <<EOF;
|
||||||
|
$field->{name}.value $line->[1]:$line->[$n]
|
||||||
|
EOF
|
||||||
|
++$n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sub run_fetch() {
|
||||||
|
run_daemon();
|
||||||
|
if (open CACHE, "+<", cachefile()) {
|
||||||
|
my $cpus = {};
|
||||||
|
while (<CACHE>) {
|
||||||
|
chomp;
|
||||||
|
my $field = [];
|
||||||
|
@$field = split(/ /);
|
||||||
|
if (not defined $cpus->{$field->[0]}) {
|
||||||
|
$cpus->{$field->[0]} = [];
|
||||||
|
}
|
||||||
|
push @{$cpus->{$field->[0]}}, $field;
|
||||||
|
}
|
||||||
|
# finished reading ? trucate it right away
|
||||||
|
truncate CACHE, 0;
|
||||||
|
close CACHE;
|
||||||
|
foreach my $cpu (keys %$cpus) {
|
||||||
|
print <<EOF;
|
||||||
|
multigraph @{[ graph_name() ]}.cpu$cpu
|
||||||
|
EOF
|
||||||
|
foreach my $line (@{$cpus->{$cpu}}) {
|
||||||
|
fetch_showline($line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
my $cmd = 'fetch';
|
||||||
|
if (defined $ARGV[0]) {
|
||||||
|
$cmd = $ARGV[0];
|
||||||
|
}
|
||||||
|
if ('fetch' eq $cmd) {
|
||||||
|
run_fetch();
|
||||||
|
} elsif ('config' eq $cmd) {
|
||||||
|
run_config();
|
||||||
|
} elsif ('autoconf' eq $cmd) {
|
||||||
|
run_autoconf();
|
||||||
|
} elsif ('daemon' eq $cmd) {
|
||||||
|
run_daemon();
|
||||||
|
} else {
|
||||||
|
print STDERR <<EOF;
|
||||||
|
$0: unrecognized command
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
$0 autoconf - check if we have everything we need
|
||||||
|
$0 config - show plugin configuration
|
||||||
|
$0 fetch - fetch latest data
|
||||||
|
$0 daemon - launch daemon
|
||||||
|
EOF
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
exit(0);
|
Loading…
Reference in New Issue
Block a user