apache_pipelogger: to be used in an apache CustomLog directive
apache_logparser: daemon to watch logs and store stats in shared mem
plugins/apache_vhosts: munin plugin to deliver stats

=head1 README
This is a logfile watcher for apache, it monitors a log dir for access logs and saves some stats to shared memory.
Munin can then fetch and reset the stats periodically.
Just start it once, it runs as a daemon and polls logs every n sec keeping track of changes to the logs.
Filelist is read on startup and on defined scan_intervals. File position is recorded and logs are checked for truncate/delete (for performance reasons).
Requires perl modules File::Tail::Multi Storable IPC::ShareLite Munin::Plugin (optional Data::Dumper)
You can use it in parallel to the pipelogger if that suits you better, the stats are merged in shared mem.
Both ways should show decent performance, the pipelogger works in RAM only, but writes no logs.
Install to /usr/share/munin and run it as root
configure the variables below:
$dir path to your logfiles
$files file-glob to find access logs
$site regexp to find sitename from logfile name
$statefile file to save last log position for tail
$nsec tail and write to shared mem every n seconds
$debug dump tallied data every n seconds, print every log line parsed
$scan_interval rescan for new log files every n minutes
$type log file type:
common: CLF + vhost + time + (other fields)
combined: combined + time + (other fields)
# config
my $dir = "/logs/apache_logs";
my $files = "*access_log";
my $site = "(.*)-access_log";
my $statefile = "/tmp/logstate";
`touch $statefile` unless (-f $statefile);
local $type="combined";
local $nsec=7;
local $debug=0;
my $scan_interval=5; # minutes
# perl modules
use File::Tail::Multi;
use Storable qw(freeze thaw);
use List::Util qw(min max);
use IPC::ShareLite ':lock';
require Data::Dumper if $debug;
use Munin::Plugin;
# shared mem
local $share = IPC::ShareLite->new(
-key => 'mapl',
-create => 1,
-destroy => 1,
-exclusive => 0,
-mode => '0666'
) or die $!;
# drop stored data on reload
$share->store( freeze {} );
# tail log files
my $tail=File::Tail::Multi->new (
LastRun_File => $statefile,
# read to current position
# register counting function
local $temp;
my ($file,$ip,$logname,$user,$rtime,$method,$request,$protocol,$status,$bytes,$referer,$useragent,$time);
sub count {
foreach $_ (@{shift()}) {
if ((()=/"/g)==2) {
# common with filename prefix, optionally add time and vhost at the end
elsif ((()=/"/g)==6) {
# combined with filename prefix, optionally add time and vhost at the end
#find sitename
$file=$vhost if $vhost;
# skip broken lines
next unless $file;
# sitename to munin fieldname
my $vpm=clean_fieldname("$file");
# count all requests
if ($bytes) {
# bytes transmitted
# max bytes
$temp{$vpm}{'max_bytes'}=max($temp{$vpm}{'max_bytes'},$bytes) || 0;
# average bytes
$temp{$vpm}{'avg_bytes'}=$temp{$vpm}{'bytes'}/$temp{$vpm}{'requests'} || 0;
# count by status / error code
$temp{$vpm}{"status"}{$status}++ if $status;
if ($time) {
# microsec to millisec
# min/max execution time
$temp{$vpm}{'max_time'}=max($temp{$vpm}{'max_time'},$time) || 0;
# cumulative execution time
# average time
$temp{$vpm}{'avg_time'}=$temp{$vpm}{'time'}/$temp{$vpm}{'requests'} || 0;
while (1) {
# tail files, calls &count with linearray
# begin transaction
# get data (may be updated by other loggers too)
my %old=%{thaw $share->fetch};
foreach my $vpm (keys %temp){
# merge values
$old{$vpm}{'bytes'}+=$temp{$vpm}{'bytes'} if $temp{$vpm}{'bytes'};
$old{$vpm}{'requests'}+=$temp{$vpm}{'requests'} if $temp{$vpm}{'requests'};
$old{$vpm}{'time'}+=$temp{$vpm}{'time'} if $temp{$vpm}{'time'};
# avoid div by zero
my $div=($old{$vpm}{'requests'} <1)?1:$old{$vpm}{'requests'};
# recalc average on merged data for multiple datasources, use local average after purge/restart
$old{$vpm}{'max_time'}=max($old{$vpm}{'max_time'},$temp{$vpm}{'max_time'}) || 0;
$old{$vpm}{'max_bytes'}=max($old{$vpm}{'max_bytes'},$temp{$vpm}{'max_bytes'}) || 0;
# reset local counters
foreach my $check qw(requests bytes time max_bytes avg_bytes max_time avg_time) {
# reset status counts
foreach my $val (keys %{$temp{$vpm}{'status'}}) {
# save to shm
print Data::Dumper::Dumper(%old) if $debug;
$share->store( freeze \%old );
# end transaction
# parse/write every n seconds (plus processing time)
sleep $nsec;

# Log vhost port method response_bytes response_time status
<IfModule mod_log_config.c>
CustomLog "|/usr/share/munin/apache_pipelogger" "%v %p %m %B %D %s"
# write every n seconds to shared memory
local $nsec=7;
local $debug=undef;
use Storable qw(freeze thaw);
use List::Util qw(min max);
use IPC::ShareLite ':lock';
require Data::Dumper if $debug;
use Munin::Plugin;
local $share = IPC::ShareLite->new(
-key => 'mapl',
-create => 1,
-destroy => 1,
-exclusive => 0,
-mode => '0666'
) or die $!;
local $SIG{'ALRM'}=\&periodic_write;
alarm $nsec;
# drop stored data on reload
local %temp=();
while (<STDIN>) {
my ($vhost,$port,$method,$bytes,$time,$status)=split(/\s/,$_);
# sanity check
next unless m/^([\d\w.-_]+\s){5}([\d\w.-_]+$)/;
$time=sprintf("%d",$time/1000); # microsec to millisec
# sitename to munin fieldname
my $vpm=clean_fieldname($vhost);
# count all requests
if ($bytes) {
# bytes transmitted
# max bytes
# average bytes
$temp{$vpm}{'avg_bytes'}=$temp{$vpm}{'bytes'}/$temp{$vpm}{'requests'} || 0 if ($bytes);
# count by status / error code
$temp{$vpm}{"status"}{$status}++ if $status;
if ($time) {
# microsec to millisec
# min/max execution time
# cumulative and average execution time
# average time
$temp{$vpm}{'avg_time'}=$temp{$vpm}{'cml_time'}/$temp{$vpm}{'requests'} || 0 if ($time);
sub periodic_write {
# begin transaction
# get data (may be updated by other loggers too)
my %old=%{thaw $share->fetch};
foreach my $vpm (keys %temp){
# merge values
$old{$vpm}{'bytes'}+=$temp{$vpm}{'bytes'} if $temp{$vpm}{'bytes'};
$old{$vpm}{'requests'}+=$temp{$vpm}{'requests'} if $temp{$vpm}{'requests'};
$old{$vpm}{'time'}+=$temp{$vpm}{'time'} if $temp{$vpm}{'time'};
# reset local counters
foreach my $check qw(requests bytes time cml_time max_bytes avg_bytes max_time avg_time) {
# reset status counts
foreach my $val (keys %{$temp{$vpm}{'status'}}) {
# save to shm
# print Data::Dumper::Dumper(%old) if $debug;
$share->store( freeze \%old );
# end transaction
# parse/write every n seconds
alarm $nsec;

This plugin requires data from apache. You can get at the data in two ways:
1) Install the pipelogger (logs without using disk space, ram only, highly performant)
- Install /usr/share/munin/apache_pipelogger as executable for apache/wwwrun
- Install logger to httpd.conf
# Log vhost port method response_bytes response_time_ms httpd_status
<IfModule mod_log_config.c>
CustomLog "|/usr/share/munin/apache_pipelogger" "$v %p %m %B %D %s"
2) Install the log parser as daemon (watches multiple access logs in a single folder for changes)
- the log parser should run as root (can simply be run in background)
- slightly less performant, but easier to apply to existing installations
- If you want response time stats, you have to log them in apache:
<IfModule mod_log_config.c>
LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %D" combined-time
- Configure the log parser to match your installation regarding naming and log folders
You can use both solutions simultaneously, the data will be merged.
Be aware that a apache log CustomLog directive in the master config will only log those vhosts that have no directive of their own.
Install plugin conf (after [apache_*])
user root
env.subgraphs requests bytes time
env.checks requests bytes time
# user - probably necessary for shared memory IPC
# subgraphs - create multigraph subgraphs (watch your graphing performance...), default 0
# checks - enable stats on bytes and response times per request, you have to log these in apache
A word on performance:
Requests/sec should not be much of a problem. Pipelogger and Logparser should not have man performance problems, as the apply one regex per line and add some stats.
Stats are saved every n seconds (default: 7) to shared mem in serialized format. That should be ok on the most loaded servers (unless you watch cache logs).
I would estimate that > 10k log lines/sec could start becoming a problem, you might have to start tuning there or use a dedicated system.
You might think about splitting the logs over multiple Logparser scripts to parallelize and merge in larger intervals.
Graphing is another matter, the more vhosts you have.
With subgraphs off, you do 3 main graphs * 4 timescales (day, week, month, year).
With subgraphs on, you get 2 checks * (1 + 6 * #vhosts) + 1 check * (1 + #vhosts * #statuscodes * 4)
With hundreds of vhosts that becomes a problem, as munin-update and munin-html do not scale well.
Timeouts are another matter, munin-updates calls for the plugin-data and works on the received lines while the network timeout is running.
So expect to set your timeouts to 120s with a hundred vhosts.
#%# family=auto
#%# capabilities=autoconf
=head1 LICENSE
my %checks = map {$_=>1} ( ($ENV{'checks'}) ? split(/ /,$ENV{'checks'}) : qw(requests bytes time) );
my %subgraphs= map {$_=>1} ( ($ENV{'subgraphs'}) ? split(/ /,$ENV{'subgraphs'}) : () );
use strict;
#use warnings;
use Munin::Plugin;
use IPC::ShareLite ':lock';
use Storable qw(freeze thaw);
my $share = IPC::ShareLite->new(
-key => 'mapl',
-create => 0,
-destroy => 0,
-exclusive => 0,
-mode => '0744'
) or die $!;
my %data=%{thaw $share->fetch};
if ( defined $ARGV[0] and $ARGV[0] eq "autoconf" ) {
if (scalar(keys %data)>0) {
print "yes\n";
exit 0;
} else {
print "no data available, apache_pipelogger not installed\n";
exit 0;
my ($config,$values);
# config
if ( defined $ARGV[0] and $ARGV[0] eq "config" ) {
foreach my $check (keys %checks) {
next if ($check eq 'requests'); # requests are special
my $order=join("_$check ",sort keys %data)."_$check";
# config: bytes / time + subgraphs
print <<END;
multigraph apache_vhosts_$check
graph_title average $check on all active vhosts
graph_args --base 1000
graph_vlabel average $check per response
graph_category apache_vhosts
graph_period minute
graph_order $order
foreach my $site (keys %data) {
print <<END;
${site}_$check.label $data{$site}{'label'}
${site}_$check.info average $check per response on $data{$site}{'label'}
${site}_$check.draw LINE1
${site}_$check.type GAUGE
} # end sites
if ($subgraphs{'$check'}) {
foreach my $site (keys %data) {
print <<END;
multigraph apache_vhosts_$check.$site
graph_title average $check on $data{$site}{'label'}
graph_args --base 1000
graph_vlabel average response in $check
graph_category apache_vhosts
graph_period minute
foreach my $graph ("avg","max") {
print <<END;
${site}_${graph}_$check.label $graph$check
${site}_${graph}_$check.info $graph$check per response on $data{$site}{'label'}
${site}_${graph}_$check.draw LINE1
${site}_${graph}_$check.type GAUGE
} # end graph
} # end sites
} # end subgraph
} # end checks
# config: requests + subgraphs
my $order=join("_requests ",sort keys %data)."_requests";
print <<END;
multigraph apache_vhosts_requests
graph_title requests by vhost
graph_args --base 1000
graph_vlabel requests / \${graph_period}
graph_category apache_vhosts
graph_period minute
graph_order $order
foreach my $site (keys %data) {
print <<END;
${site}_requests.label $data{$site}{'label'}
${site}_requests.info $site
${site}_requests.draw LINE1
${site}_requests.type GAUGE
} # end site
if ($subgraphs{'requests'}) {
# multigraphs multivalue (status codes)
foreach my $site (keys %data) {
print <<END;
multigraph apache_vhosts_requests.$site
graph_title status codes on $data{$site}{'label'}
graph_args --base 1000
graph_vlabel status codes / \${graph_period}
graph_category apache_vhosts
graph_period minute
my $draw='AREA';
foreach my $status (sort keys %{$data{$site}{'status'}}) {
print <<END;
${site}_s${status}.label status $status
${site}_s${status}.info status $status
${site}_s${status}.draw $draw
${site}_s${status}.type GAUGE
} # end status
} # end sites
} # end multigraph
exit 0;
} # end if config
# values: bytes / time + subgraphs
foreach my $check (keys %checks) {
next if ($check eq 'requests'); # requests are special
# main graphs values
print "\nmultigraph apache_vhosts_$check\n";
foreach my $site (keys %data) {
print "${site}_$check.value $data{$site}{'avg_'.$check}\n";
} # end sites
if ($subgraphs{$check}) {
# subgraph values
foreach my $site (keys %data) {
print "\nmultigraph apache_vhosts_$check.$site\n";
foreach my $graph ("avg","max") {
print "${site}_${graph}_$check.value ".$data{$site}{$graph."_".$check}."\n";
} # end graph
} # end sites
} # end subgraph
} # end checks
# values: requests + subgraphs
print "\nmultigraph apache_vhosts_requests\n";
foreach my $site (keys %data) {
print "${site}_requests.value $data{$site}{'requests'}\n";
} # end sites
if ($subgraphs{'requests'}) {
# multigraphs multivalue (status codes)
foreach my $site (keys %data) {
print "\nmultigraph apache_vhosts_requests.$site\n";
foreach my $status (sort keys %{$data{$site}{'status'}}) {
print "${site}_${status}.value ".($data{$site}{'status'}{$status}||0)."\n";
}# end status
} # end sites
} # end subgraph
# clear data after poll
foreach my $site (keys %data) {
foreach my $check ( qw(requests bytes time max_bytes avg_bytes max_time avg_time) ) {
foreach my $val (keys %{$data{$site}{'status'}}) {
$share->store( freeze \%data );
exit 0;
# vim:syntax=perl

