# The script now grabs the google page based on the LASTHIT counter.
# The script grabs the google page for URL1, then the next time it's called URL2 etc. If the url/word pair doesn't exist for LASTHIT then the script just dumps the cached data
#
# Version 0.2 18.01.2009
# Cache added, the script only grabs the pages from google every 10 calls
# The script still only checks to first 100 pages returned by google
#
# Version 0.1 17.01.2009 Initial release
# The script only checks to first 100 pages returned by google
#
# Auto Configure, Check it word 1 is defined
if [ "$1" = "autoconf" ]; then
if [ "$URL1" != "" ]; then
if [ "$WORD1" != "" ]; then
echo yes
exit 0
fi
fi
echo no
exit 1
fi
#Configure, loop through each variable defined WORDx URLx dumping it to munin
#Clean up URL/WORD pair, removing http:// replacing " " with "_", "." with "_", "-" with "-"
VAR=`echo $URL.$WORD | sed -e "s/http:\/\///g"| sed -e "s/ /_/g"| sed -e "s/\./_/g"| sed -e "s/\-/_/g"`
SEARCHWORD=`echo $WORD| sed -e "s/ /%20/g"`
until [ "$FOUND" -ne "0" ]; do
#Grab page from google for the WORD/PAGE combination.Pipe it into awk to pull out the url's only, one per line. Then dump only the lines containing the URL defined