# Simple munin plugin to find the google rank for a URL/WORD combination
#
# THIS SCRIPT BREAKS THE TOS OF GOOGLE SO USE WITH CARE AND DON'T BLAME ME IF THINGS GO WRONG
#
# (c) 2009 i.dobson@planet-ian.com
#
# For each url/words that you want to watch you need to create a variable/word pair in your
# munin-node configuration file for example
#
#[google_rank]
#user root
#timeout 60
#env.URL1 http://www.plant-ian.com
#env.WORD1 avr webcam
#env.URL2 http://www.plant-ian.com
#env.WORD2 bascom
#
# Version 0.5 24.1.2009
# Added loop to check the first 500 pages. Note the script sleeps 5 seconds beween each page grab so
# If the word/url your looking for is in the higher positions then you need to increase the timeout
#
# Version 0.5 21.1.2009
# Dump each page grabbed from google into seperate files (helps with debugging)
#
# Version 0.4 19.1.2009
# Fixed corrupt then empty cache file bug
#
# Version 0.3 19.1.2009
# The script now grabs the google page based on the LASTHIT counter.
# The script grabs the google page for URL1, then the next time it's called URL2 etc. If the url/word pair doesn't exist for LASTHIT then the script just dumps the cached data
#
# Version 0.2 18.01.2009
# Cache added, the script only grabs the pages from google every 10 calls
# The script still only checks to first 100 pages returned by google
#
# Version 0.1 17.01.2009 Initial release
# The script only checks to first 100 pages returned by google
#
# Auto Configure, Check it word 1 is defined
if [ "$1" = "autoconf" ]; then
if [ "$URL1" != "" ]; then
if [ "$WORD1" != "" ]; then
echo yes
exit 0
fi
fi
echo no
exit 1
fi
#Configure, loop through each variable defined WORDx URLx dumping it to munin
#Clean up URL/WORD pair, removing http:// replacing " " with "_", "." with "_", "-" with "-"
VAR=`echo $URL.$WORD | sed -e "s/http:\/\///g"| sed -e "s/ /_/g"| sed -e "s/\./_/g"| sed -e "s/\-/_/g"`
SEARCHWORD=`echo $WORD| sed -e "s/ /%20/g"`
until [ "$FOUND" -ne "0" ]; do
#Grab page from google for the WORD/PAGE combination.Pipe it into awk to pull out the url's only, one per line. Then dump only the lines containing the URL defined