#!/usr/bin/perl # -*- perl -*- # # Plugin to graph http performance # Version: 0.8.7 # # The purpose of this plugin is to monitor several properties of a web page. # All measurements are done for the complete web page, including images, css # and other content a standard browser would download automatically. # # This version supports monitoring: # * The total time to download a complete web page (using serial GET requests) # * The total size of a web page # * The different response codes (200, 404, 500, etc) # * The different tags (img src, a href, etc) # * The the different content types (image/png, text/css/, etc) # * The number of elements the web page consists of # # Author: Espen Braastad / Linpro AS # espen@linpro.no # ##### Short usage guide: ##### # # Requirements: # * The server running this plugin must be allowed to connect to the web # server(s) you are going to monitor. # * Some perl modules: # Time::HiRes, LWP::UserAgent, HTML::LinkExtor, LWP::ConnCache # # Initial configuration: # 1. Copy this file to /usr/share/munin/plugins/ # # 2. Create a file (/etc/munin/http_load_urls.txt) with one # full url per line, as many as you want, i.e.: # $ echo "http://www.dn.no/" >> /etc/munin/urls.txt # $ echo "http://www.intrafish.no/" >> /etc/munin/urls.txt # # 3. Add a cron job running the plugin with cron as the argument: # */15 * * * * /usr/sbin/munin-run http_load__loadtime cron # should be the user that has write permission to the $cachedir # directory set below. should be any of the configured sites (all # sites will get updated), likewise, you should replace loadtime by any # metric that is enabled for that site (all metrics will get updated). # Set the intervals to whatever you want. # # For verbose output (for debugging) you can do: # sudo -u /usr/share/munin/plugins/http_load_ cron verbose # # 4. Run munin-node-configure --suggest --shell and run the symlink # commands manually to update the munin-node plugin list. # # (5. If you want to change the filter which the plugin uses to select which # tags to follow in a web page, edit the subroutine called "filter" below.) # # Add a new url to monitor: # 1. Add a new line in /etc/munin/urls.txt with the full URL, i.e.: # $ echo "http://www.linpro.no/" >> /etc/munin/http_load_urls.txt # # 2. Run munin-node-configure --suggest --shell and manually # add the new symlink(s) # # 3. /etc/init.d/munin-node restart # # Remove a url from monitoring: # 1. Remove it from /etc/munin/http_load_urls.txt # # 2. Remove ${cachedir}/http_load_* # # 3. Remove /etc/munin/plugins/http_load_* # # 4. /etc/init.d/munin-node restart # ##### # # Todo: # * Add support for forking to simulate real browsers # * Use checksums as fieldnames # # $Id: $ # # Magic markers: #%# family=auto #%# capabilities=autoconf suggest use strict; use Time::HiRes qw( gettimeofday tv_interval ); use LWP::UserAgent; use HTML::LinkExtor; use LWP::ConnCache; my $url_file="/etc/munin/http_load_urls.txt"; my $cachedir=$ENV{MUNIN_PLUGSTATE}; my $debug=$ENV{MUNIN_DEBUG}; my $timeout=10; my $max_redirects=10; my $scriptname="http_load_"; my $useragent="Mozilla/5.0 (Munin; $scriptname)"; # Function to read the $url_file and return the contents in a hash sub read_urls{ my $file=$_[0]; my %urls=(); if(-r $file){ open(FILE,'<'.$file); while () { my $url=$_; chomp($url); my $id=get_id($url); if(length($id)>0){ $urls{$id}=$url; } } close (FILE); } return %urls; } # Function to read cache, return a hash sub read_cache{ my $file=$_[0]; my %cache=(); if(-r $file){ open(FILE,'<'.$file); while () { m/^(\S*)\s+(.*)$/; $cache{ $1 } = $2; } close (FILE); } return %cache; } # Function to filter the html tags, which files do we want to download sub filter{ my $tag=$_[0]; my $status=1; # Some example data: # link href http://www.intrafish.no/template/include/css/intrafish.css # script src http://www.intrafish.no/template/include/js/intrafish.js # a href http://adserver.adtech.de/?adlink%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491;loc=300; # img src http://adserver.adtech.de/?adserv%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491; # area href http://go.vg.no/cgi-bin/go.cgi/sol/http://www.sol.no/sgo/vg/http://www.sol.no/underholdning/humor/?partnerid=vg # status=1 => do download (default) # status=0 => do not download if("$tag" eq "form action"){ $status=0; } if("$tag" eq "a href"){ $status=0; } if("$tag" eq "area href"){ $status=0; } return $status; } # Return the cache file name for this plugin sub get_cache_file_name{ my $scriptname=$_[0]; my $id=$_[1]; my $file=""; $file = $scriptname . $id . ".cache"; $debug && print "Cache file: " . $file . "\n"; return $file; } # Get fieldname (making sure it is munin-1.0 "compatible" as a fieldname) # 1. Remove all non-word characters from a string) # 2. Make sure it has maximum 19 characters # 2.1 If not, truncate the host part, while keeping anything after an underscore (e.g., HTTP response status) sub get_fieldname{ my $url=$_[0]; $url =~ s/\W//g; if(length($url) > 19){ $url =~ s/(\S+)_(\S+)/ /g; my $host = $1; my $info = $2; my $suffixlength = length($info) + 1; if ($suffixlength > 1) { $url = substr($host, 0, 19 - $suffixlength) . '_' . $info; } else { $url = substr($url, 0, 19); } } return $url; } # Same as get_fieldname except it doesn't substr sub get_id{ my $url=$_[0]; $url =~ s/[\W_]//g; return $url; } $debug && print "Scriptname: " . $scriptname . "\n"; # Get the url id and the type of the graph # # The filename format is http_load_X_Y where # X: The line number in urls.txt # Y: The type of graph (elements, size, loadtime, ..) my ($id,$type); $0 =~ /http_load(?:_([^_]+)|)_(.+)\s*$/; $id = $1; $type = $2; $debug && print "Id: $id, Type: $type\n"; if($ARGV[0] and $ARGV[0] eq "autoconf") { my %urls=&read_urls($url_file); if(keys(%urls) gt 0){ print "yes\n"; exit(0); } else { print "no\n"; exit(1); } } elsif($ARGV[0] and $ARGV[0] eq "suggest") { # get the url list, print suggestions for usage my %urls=&read_urls($url_file); while ( my ($id, $url) = each(%urls) ) { $debug && print "id: $id => url: $url\n"; print $id . "_size\n"; print $id . "_loadtime\n"; print $id . "_response\n"; print $id . "_tags\n"; print $id . "_type\n"; print $id . "_elements\n"; } exit(0); } elsif($ARGV[0] and $ARGV[0] eq "cron") { # This thing is run by cron and should write a cache file for munin-node to # read from my $verbose=0; if( $ENV{MUNIN_DEBUG} eq "1" or $ARGV[1] and $ARGV[1] eq "verbose" ) { $verbose=1; print "Verbose output\n"; } my %urls=&read_urls($url_file); my %output; my %res; my $t0; my ($request,$response,$status,$link,$contents,$page_parser,$cachefile); while ( my ($id, $url) = each(%urls) ) { $verbose && print "Fetching $url (id: $id)... \n"; $t0=0; $status=0; %output=(); my $host=""; if($url =~ m/\w+\:\/\/([^\/]+).*/){ $host=$1; $verbose && print " Host: $host\n"; } $output{"url"}=$url; $output{"timestamp"}=time(); $verbose && print " Timestamp: " . $output{"timestamp"} . "\n"; my $browser = LWP::UserAgent->new(); $browser->agent($useragent); $browser->timeout(${timeout}); $browser->max_redirect( $max_redirects ); $browser->conn_cache(LWP::ConnCache->new()); $response = $browser->get($url); # Calculating time from now: $t0 = [gettimeofday]; if ($response->is_success()) { $status=1; $output{"elements_" . $host}+=1; } $contents = $response->content(); $output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday])); $output{"size_" . $host}+=length($contents); $output{"response_" . $host . "_" . $response->code}+=1; $output{"type_" . $response->content_type}+=1; $page_parser = HTML::LinkExtor->new(undef, $url); $page_parser->parse($contents)->eof; my @links = $page_parser->links; $verbose && print " Processing links:\n"; %res=(); foreach $link (@links){ my $tag=$$link[0] . " " . $$link[1]; $output{"tags_" . $$link[0] . "-" . $$link[1]}+=1; if(filter($tag)){ $verbose && print " Processing: " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n"; # Extract the hostname and add it to the hash if($$link[2] =~ m/https?\:\/\/([^\/]+).*/){ $host=$1; $output{"elements_" . $host}+=1; } my $suburl=$$link[2]; $t0 = [gettimeofday]; $response = $browser->get($suburl); $output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday])); $contents = $response->content(); $output{"size_" . $host}+=length($contents); $output{"response_" . $host . "_" . $response->code}+=1; $output{"type_" . $response->content_type}+=1; $verbose && print " Response: " . $response->code . " Size: " . length($contents) . "\n"; } else { $verbose && print " Skipping: " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n"; } } $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id); $debug && print "Reading cache file: " . $cachefile . "... "; my %input=read_cache($cachefile); $debug && print "done\n"; # Resetting all values to 0 before adding new values while ( my ($id, $value) = each(%input) ) { $input{$id}="U"; } # Adding new values while ( my ($id, $value) = each(%output) ) { $input{$id}=$value; $verbose && print " Result: " . $id . " -> " . $value . "\n"; } # Writing the cache $verbose && print "Writing cache file: " . $cachefile . "... "; open(FILE,">".$cachefile); while ( my ($id, $value) = each(%input) ) { print FILE $id . " " . $value . "\n"; } close(FILE); $verbose && print "done\n"; } exit(0); }elsif($ARGV[0] and $ARGV[0] eq "config") { my %urls=&read_urls($url_file); print "graph_title $urls{$id} ${type}\n"; print "graph_args -l 0 --base 1000\n"; print "graph_category webserver\n"; $debug && print "Reading cache file\n"; my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id); my %cache=read_cache($cachefile); my $count=0; $debug && print "The cache file contains " . keys(%cache) . " lines\n"; if($type eq "size"){ print "graph_vlabel Bytes\n"; print "graph_total Total\n"; print "graph_info This graph is generated by a set of serial GETs to calculate the total size of $urls{$id}.\n"; if(keys(%cache)>0){ for my $key ( sort reverse keys %cache ){ my $value=$cache{$key}; if($key =~ m/^size_(\S+)$/){ my $host=$1; my $value=$value; my $name=$1; $name=get_fieldname($name); print "$name.label from $host\n"; print "$name.min 0\n"; print "$name.max 20000000\n"; if($count eq 0){ print "$name.draw AREA\n"; } else { print "$name.draw STACK\n"; } $count+=1; } } } }elsif($type eq "loadtime"){ print "graph_vlabel Seconds\n"; print "graph_total Total\n"; print "graph_info This graph is generated by a set of serial GETs to calculate the total time to load $urls{$id}. "; print "Note that browsers usually fork() the GET requests, resulting in a shorter total loading time.\n"; if(keys(%cache)>0){ for my $key ( sort reverse keys %cache ){ my $value=$cache{$key}; if($key =~ m/^loadtime_(\S+)$/){ my $host=$1; my $value=$value; my $name=$1; $name=get_fieldname($name); print "$name.label from $host\n"; print "$name.min 0\n"; print "$name.max 400\n"; if($count eq 0){ print "$name.draw AREA\n"; } else { print "$name.draw STACK\n"; } $count+=1; } } } }elsif($type eq "elements"){ print "graph_vlabel Number of elements\n"; print "graph_total Total\n"; print "graph_info This graph is generated by a set of serial GETs to count the number of elements (images, CSS files, etc) from $urls{$id}.\n"; if(keys(%cache)>0){ for my $key ( sort reverse keys %cache ){ my $value=$cache{$key}; if($key =~ m/^elements_(\S+)$/){ my $host=$1; my $value=$value; my $name=$1; $name=get_fieldname($name); print "$name.label from $host\n"; print "$name.min 0\n"; print "$name.max 10000\n"; if($count eq 0){ print "$name.draw AREA\n"; } else { print "$name.draw STACK\n"; } $count+=1; } } } }elsif($type eq "response"){ print "graph_vlabel Server response code count\n"; print "graph_total Total\n"; print "graph_info This graph is generated by a set of serial GETs to visualize the server response codes received while loading $urls{$id}.\n"; if(keys(%cache)>0){ for my $key ( sort reverse keys %cache ){ my $value=$cache{$key}; if($key =~ m/^response_(\S+)$/){ my $host=$1; my $value=$value; my $name=$1; $name=get_fieldname($name); $host =~ s/\_/ /g; $host =~ s/(\S+)\s(\d+)/ /g; $host=$1; my $code=$2; print "$name.label $host ($code)\n"; print "$name.min 0\n"; print "$name.max 10000\n"; if($count eq 0){ print "$name.draw AREA\n"; } else { print "$name.draw STACK\n"; } $count+=1; } } } }elsif($type eq "type"){ print "graph_vlabel Content type count\n"; print "graph_total Total\n"; print "graph_info This graph is generated by a set of serial GETs to visualize the different content types $urls{$id} consists of.\n"; if(keys(%cache)>0){ for my $key ( sort reverse keys %cache ){ my $value=$cache{$key}; if($key =~ m/^type_(\S+)$/){ my $type=$1; my $value=$value; my $name=$1; $name=get_fieldname($name); #$host =~ s/\_/ /g; #$host =~ s/(\S+)\s(\S+)/ /g; #$host=$1; #my $type=$2; print "$name.label $type\n"; print "$name.min 0\n"; print "$name.max 100000\n"; if($count eq 0){ print "$name.draw AREA\n"; } else { print "$name.draw STACK\n"; } $count+=1; } } } }elsif($type eq "tags"){ print "graph_vlabel HTML tag count\n"; print "graph_total Total\n"; print "graph_info This graph is generated by a set of serial GETs to visualize the different tags $urls{$id} consists of.\n"; if(keys(%cache)>0){ for my $key ( sort reverse keys %cache ){ my $value=$cache{$key}; if($key =~ m/^tags_(\S+)$/){ my $host=$1; my $value=$value; my $name=$1; $name=get_fieldname($name); $host =~ s/\W/ /g; print "$name.label $host\n"; print "$name.min 0\n"; print "$name.max 100000\n"; if($count eq 0){ print "$name.draw AREA\n"; } else { print "$name.draw STACK\n"; } $count+=1; } } } } exit(0); } else { my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id); $debug && print "Reading cache file: " . $cachefile . "\n"; my %cache=read_cache($cachefile); $debug && print "Number of lines in cache file: " . keys(%cache) . "\n"; if(keys(%cache)>0){ for my $key ( sort keys %cache ){ my $value=$cache{$key}; if($key =~ m/^([A-Za-z]+)\_(\S+)$/){ my $name=$2; if ($1 eq $type){ $name=get_fieldname($name); print $name . ".value " . $value . "\n"; } } elsif(m/^(\S+)\s+(\S+)$/){ if ($1 eq $type){ print $1 . ".value " . $2 . "\n"; } } } } } # vim:syntax=perl