diff --git a/plugins/apache/apache_vhosts/README.txt b/plugins/apache/apache_vhosts/README.txt new file mode 100644 index 00000000..340b7484 --- /dev/null +++ b/plugins/apache/apache_vhosts/README.txt @@ -0,0 +1,3 @@ +apache_pipelogger: to be used in an apache CustomLog directive +apache_logparser: daemon to watch logs and store stats in shared mem +plugins/apache_vhosts: munin plugin to deliver stats diff --git a/plugins/apache/apache_vhosts/apache_logparser b/plugins/apache/apache_vhosts/apache_logparser new file mode 100644 index 00000000..269a6eb5 --- /dev/null +++ b/plugins/apache/apache_vhosts/apache_logparser @@ -0,0 +1,190 @@ +#!/usr/bin/perl + +=head1 README + +This is a logfile watcher for apache, it monitors a log dir for access logs and saves some stats to shared memory. +Munin can then fetch and reset the stats periodically. + +Just start it once, it runs as a daemon and polls logs every n sec keeping track of changes to the logs. +Filelist is read on startup and on defined scan_intervals. File position is recorded and logs are checked for truncate/delete (for performance reasons). + +Requires perl modules File::Tail::Multi Storable IPC::ShareLite Munin::Plugin (optional Data::Dumper) + +You can use it in parallel to the pipelogger if that suits you better, the stats are merged in shared mem. +Both ways should show decent performance, the pipelogger works in RAM only, but writes no logs. + + +=head1 INSTALLATION + +Install to /usr/share/munin and run it as root + +configure the variables below: + +$dir path to your logfiles +$files file-glob to find access logs +$site regexp to find sitename from logfile name +$statefile file to save last log position for tail +$nsec tail and write to shared mem every n seconds +$debug dump tallied data every n seconds, print every log line parsed +$scan_interval rescan for new log files every n minutes +$type log file type: + common: CLF + vhost + time + (other fields) + combined: combined + time + (other fields) +=cut + +# config +my $dir = "/logs/apache_logs"; +my $files = "*access_log"; +my $site = "(.*)-access_log"; +my $statefile = "/tmp/logstate"; +`touch $statefile` unless (-f $statefile); +local $type="combined"; +local $nsec=7; +local $debug=0; + +my $scan_interval=5; # minutes + +# perl modules +use File::Tail::Multi; +use Storable qw(freeze thaw); +use List::Util qw(min max); +use IPC::ShareLite ':lock'; +require Data::Dumper if $debug; +use Munin::Plugin; + +# shared mem +local $share = IPC::ShareLite->new( + -key => 'mapl', + -create => 1, + -destroy => 1, + -exclusive => 0, + -mode => '0666' +) or die $!; + +# drop stored data on reload +$share->store( freeze {} ); + +# tail log files +my $tail=File::Tail::Multi->new ( + Files=>["$dir/$files"], + ScanForFiles=>$scan_interval, + Debug=>0, + LastRun_File => $statefile, + RemoveDuplicate=>0, + NumLines=>0, + OutputPrefix=>"f" +); + +# read to current position +$tail->read; + +# register counting function +$tail->Function(\&count); + +local $temp; +my ($file,$ip,$logname,$user,$rtime,$method,$request,$protocol,$status,$bytes,$referer,$useragent,$time); +sub count { + foreach $_ (@{shift()}) { + if ((()=/"/g)==2) { + # common with filename prefix, optionally add time and vhost at the end + ($file,$ip,$logname,$user,$rtime,$method,$request,$protocol,$status,$bytes,$time,$vhost)=/^(.*?)\s:\s(.*?)\s(.*?)\s(.*?)\s\[(.*?)\]\s"(.*)\s(.*?)\s(.*?)"\s(\d*)\s(\S*)\s?(\S*)\s?(\S*?)$/o; + } + elsif ((()=/"/g)==6) { + # combined with filename prefix, optionally add time and vhost at the end + ($file,$ip,$logname,$user,$rtime,$method,$request,$protocol,$status,$bytes,$referer,$useragent,$time,$vhost)=/^(.*?)\s:\s(.*?)\s(.*?)\s(.*?)\s\[(.*?)\]\s"(.*)\s(.*?)\s(.*?)"\s(\d*?)\s(.*?)\s"(.*?)"\s"(.*?)"\s?(\S*)\s?(\S*)$/o; + }; + + #find sitename + $file=~s/$site/$1/; + $file=$vhost if $vhost; + + # skip broken lines + next unless $file; + + # sitename to munin fieldname + my $vpm=clean_fieldname("$file"); + $temp{$vpm}{'label'}="$file"; + $temp{$vpm}{'label'}=~s/www\.//; + + # count all requests + $temp{$vpm}{'requests'}++; + + if ($bytes) { + $bytes=~s/-/0/; + # bytes transmitted + $temp{$vpm}{'bytes'}+=$bytes; + + # max bytes + $temp{$vpm}{'max_bytes'}=max($temp{$vpm}{'max_bytes'},$bytes) || 0; + + # average bytes + $temp{$vpm}{'avg_bytes'}=$temp{$vpm}{'bytes'}/$temp{$vpm}{'requests'} || 0; + } + + # count by status / error code + $temp{$vpm}{"status"}{$status}++ if $status; + + if ($time) { + # microsec to millisec + $time=sprintf("%d",$time/1000); + + # min/max execution time + $temp{$vpm}{'max_time'}=max($temp{$vpm}{'max_time'},$time) || 0; + + # cumulative execution time + $temp{$vpm}{'time'}+=$time; + + # average time + $temp{$vpm}{'avg_time'}=$temp{$vpm}{'time'}/$temp{$vpm}{'requests'} || 0; + } + + }; +}; + + +while (1) { + # tail files, calls &count with linearray + $tail->read; + + # begin transaction + $share->lock(LOCK_EX); + + # get data (may be updated by other loggers too) + my %old=%{thaw $share->fetch}; + + foreach my $vpm (keys %temp){ + # merge values + $old{$vpm}{'label'}=$temp{$vpm}{'label'}; + $old{$vpm}{'bytes'}+=$temp{$vpm}{'bytes'} if $temp{$vpm}{'bytes'}; + $old{$vpm}{'requests'}+=$temp{$vpm}{'requests'} if $temp{$vpm}{'requests'}; + $old{$vpm}{'time'}+=$temp{$vpm}{'time'} if $temp{$vpm}{'time'}; + # avoid div by zero + my $div=($old{$vpm}{'requests'} <1)?1:$old{$vpm}{'requests'}; + # recalc average on merged data for multiple datasources, use local average after purge/restart + $old{$vpm}{'avg_time'}=($old{$vpm}{'avg_time'}>0)?sprintf("%d",($old{$vpm}{'time'}+$temp{$vpm}{'time'})/$div):sprintf("%d",$temp{$vpm}{'avg_time'}); + $old{$vpm}{'avg_bytes'}=($old{$vpm}{'avg_bytes'}>0)?sprintf("%d",($old{$vpm}{'bytes'}+$temp{$vpm}{'bytes'})/$div):sprintf("%d",$temp{$vpm}{'avg_bytes'}); + $old{$vpm}{'max_time'}=max($old{$vpm}{'max_time'},$temp{$vpm}{'max_time'}) || 0; + $old{$vpm}{'max_bytes'}=max($old{$vpm}{'max_bytes'},$temp{$vpm}{'max_bytes'}) || 0; + + # reset local counters + foreach my $check qw(requests bytes time max_bytes avg_bytes max_time avg_time) { + $temp{$vpm}{$check}=0; + } + + # reset status counts + foreach my $val (keys %{$temp{$vpm}{'status'}}) { + $old{$vpm}{'status'}{$val}+=$temp{$vpm}{'status'}{$val}; + $temp{$vpm}{'status'}{$val}=0; + } + + }; + + # save to shm + print Data::Dumper::Dumper(%old) if $debug; + $share->store( freeze \%old ); + # end transaction + $share->unlock; + + # parse/write every n seconds (plus processing time) + sleep $nsec; +} diff --git a/plugins/apache/apache_vhosts/apache_pipelogger b/plugins/apache/apache_vhosts/apache_pipelogger new file mode 100644 index 00000000..469d3baf --- /dev/null +++ b/plugins/apache/apache_vhosts/apache_pipelogger @@ -0,0 +1,123 @@ +#!/usr/bin/perl + +=head1 + +# Log vhost port method response_bytes response_time status + + CustomLog "|/usr/share/munin/apache_pipelogger" "%v %p %m %B %D %s" + + +=cut +# write every n seconds to shared memory +local $nsec=7; +local $debug=undef; + +use Storable qw(freeze thaw); +use List::Util qw(min max); +use IPC::ShareLite ':lock'; +require Data::Dumper if $debug; +use Munin::Plugin; + + +local $share = IPC::ShareLite->new( + -key => 'mapl', + -create => 1, + -destroy => 1, + -exclusive => 0, + -mode => '0666' +) or die $!; + + +local $SIG{'ALRM'}=\&periodic_write; +alarm $nsec; + + +# drop stored data on reload +local %temp=(); + +while () { + my ($vhost,$port,$method,$bytes,$time,$status)=split(/\s/,$_); + + # sanity check + next unless m/^([\d\w.-_]+\s){5}([\d\w.-_]+$)/; + $time=sprintf("%d",$time/1000); # microsec to millisec + + # sitename to munin fieldname + my $vpm=clean_fieldname($vhost); + $temp{$vpm}{'label'}=$vhost; + $temp{$vpm}{'label'}=~s/www\.//; + + # count all requests + $temp{$vpm}{'requests'}++; + + if ($bytes) { + $bytes=~s/-/0/; + # bytes transmitted + $temp{$vpm}{'bytes'}+=$bytes; + + # max bytes + $temp{$vpm}{'max_bytes'}=max($temp{$vpm}{'max_bytes'},$bytes); + + # average bytes + $temp{$vpm}{'avg_bytes'}=$temp{$vpm}{'bytes'}/$temp{$vpm}{'requests'} || 0 if ($bytes); + } + + # count by status / error code + $temp{$vpm}{"status"}{$status}++ if $status; + + if ($time) { + # microsec to millisec + $time=sprintf("%d",$time/1000); + + # min/max execution time + $temp{$vpm}{'max_time'}=max($temp{$vpm}{'max_time'},$time); + + # cumulative and average execution time + $temp{$vpm}{'cml_time'}+=$time; + + # average time + $temp{$vpm}{'avg_time'}=$temp{$vpm}{'cml_time'}/$temp{$vpm}{'requests'} || 0 if ($time); + } +}; + +sub periodic_write { + # begin transaction + $share->lock(LOCK_EX); + + # get data (may be updated by other loggers too) + my %old=%{thaw $share->fetch}; + + foreach my $vpm (keys %temp){ + # merge values + $old{$vpm}{'bytes'}+=$temp{$vpm}{'bytes'} if $temp{$vpm}{'bytes'}; + $old{$vpm}{'requests'}+=$temp{$vpm}{'requests'} if $temp{$vpm}{'requests'}; + $old{$vpm}{'time'}+=$temp{$vpm}{'time'} if $temp{$vpm}{'time'}; + $old{$vpm}{'label'}=$temp{$vpm}{'label'}; + $old{$vpm}{'avg_time'}=sprintf("%d",($old{$vpm}{'avg_time'}+$temp{$vpm}{'avg_time'})/2); + $old{$vpm}{'max_time'}=max($old{$vpm}{'max_time'},$temp{$vpm}{'max_time'}); + $old{$vpm}{'max_bytes'}=max($temp{$vpm}{'max_bytes'},$temp{$vpm}{'max_bytes'}); + $old{$vpm}{'avg_bytes'}=sprintf("%d",($old{$vpm}{'avg_bytes'}+$temp{$vpm}{'avg_bytes'})/2); + + # reset local counters + foreach my $check qw(requests bytes time cml_time max_bytes avg_bytes max_time avg_time) { + $temp{$vpm}{$check}=0; + } + + # reset status counts + foreach my $val (keys %{$temp{$vpm}{'status'}}) { + $old{$vpm}{'status'}{$val}+=$temp{$vpm}{'status'}{$val}; + $temp{$vpm}{'status'}{$val}=0; + } + + }; + + # save to shm +# print Data::Dumper::Dumper(%old) if $debug; + $share->store( freeze \%old ); + + # end transaction + $share->unlock; + + # parse/write every n seconds + alarm $nsec; +} \ No newline at end of file diff --git a/plugins/apache/apache_vhosts/apache_vhosts b/plugins/apache/apache_vhosts/apache_vhosts new file mode 100644 index 00000000..7ea68c51 --- /dev/null +++ b/plugins/apache/apache_vhosts/apache_vhosts @@ -0,0 +1,289 @@ +#!/usr/bin/perl + +=head1 INSTALLATION + +This plugin requires data from apache. You can get at the data in two ways: + +1) Install the pipelogger (logs without using disk space, ram only, highly performant) + - Install /usr/share/munin/apache_pipelogger as executable for apache/wwwrun + - Install logger to httpd.conf + + # Log vhost port method response_bytes response_time_ms httpd_status + + CustomLog "|/usr/share/munin/apache_pipelogger" "$v %p %m %B %D %s" + + +2) Install the log parser as daemon (watches multiple access logs in a single folder for changes) + - the log parser should run as root (can simply be run in background) + - slightly less performant, but easier to apply to existing installations + - If you want response time stats, you have to log them in apache: + + LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %D" combined-time + + - Configure the log parser to match your installation regarding naming and log folders + +You can use both solutions simultaneously, the data will be merged. +Be aware that a apache log CustomLog directive in the master config will only log those vhosts that have no directive of their own. + +Install plugin conf (after [apache_*]) + +[apache_vhosts] +user root +env.subgraphs requests bytes time +env.checks requests bytes time + +# user - probably necessary for shared memory IPC +# subgraphs - create multigraph subgraphs (watch your graphing performance...), default 0 +# checks - enable stats on bytes and response times per request, you have to log these in apache + +A word on performance: +Requests/sec should not be much of a problem. Pipelogger and Logparser should not have man performance problems, as the apply one regex per line and add some stats. +Stats are saved every n seconds (default: 7) to shared mem in serialized format. That should be ok on the most loaded servers (unless you watch cache logs). +I would estimate that > 10k log lines/sec could start becoming a problem, you might have to start tuning there or use a dedicated system. +You might think about splitting the logs over multiple Logparser scripts to parallelize and merge in larger intervals. + +Graphing is another matter, the more vhosts you have. +With subgraphs off, you do 3 main graphs * 4 timescales (day, week, month, year). +With subgraphs on, you get 2 checks * (1 + 6 * #vhosts) + 1 check * (1 + #vhosts * #statuscodes * 4) +With hundreds of vhosts that becomes a problem, as munin-update and munin-html do not scale well. + +Timeouts are another matter, munin-updates calls for the plugin-data and works on the received lines while the network timeout is running. +So expect to set your timeouts to 120s with a hundred vhosts. + +=head1 MAGIC MARKERS + + #%# family=auto + #%# capabilities=autoconf + +=head1 LICENSE + +GPLv2 + +=cut + + +my %checks = map {$_=>1} ( ($ENV{'checks'}) ? split(/ /,$ENV{'checks'}) : qw(requests bytes time) ); +my %subgraphs= map {$_=>1} ( ($ENV{'subgraphs'}) ? split(/ /,$ENV{'subgraphs'}) : () ); + +use strict; +#use warnings; +use Munin::Plugin; +use IPC::ShareLite ':lock'; +use Storable qw(freeze thaw); + +my $share = IPC::ShareLite->new( + -key => 'mapl', + -create => 0, + -destroy => 0, + -exclusive => 0, + -mode => '0744' +) or die $!; + + +my %data=%{thaw $share->fetch}; + +if ( defined $ARGV[0] and $ARGV[0] eq "autoconf" ) { + if (scalar(keys %data)>0) { + print "yes\n"; + exit 0; + } else { + print "no data available, apache_pipelogger not installed\n"; + exit 0; + } +} + +need_multigraph(); + + +my ($config,$values); + + +# +# config +# + +if ( defined $ARGV[0] and $ARGV[0] eq "config" ) { + foreach my $check (keys %checks) { + next if ($check eq 'requests'); # requests are special + my $order=join("_$check ",sort keys %data)."_$check"; + +# +# config: bytes / time + subgraphs +# + + print <lock(LOCK_EX); +$share->store( freeze \%data ); +$share->unlock(); + +exit 0; +# vim:syntax=perl diff --git a/plugins/other/apache_vhosts b/plugins/other/apache_vhosts deleted file mode 100755 index 59b5f7e8..00000000 Binary files a/plugins/other/apache_vhosts and /dev/null differ