# supergoogle.tcl 1.2fix by brother (luc@rocketmail.com)
#
# This tcl script is an extension to the google.tcl script by 
# aNa|0Gue - analogue@glop.org - http://www.glop.org
# It is completely written from scratch to provide more options
# and features than the original one. 
#
# Features
# * select the channels to listen on for the request
# * proxy support
# * request multiple hits for the search
# * details like sitename and size per hit
# * online help included
# * debug mode
# * crude flood control (for now)
# * partyline google (suggested by Holieman)
# * every option can be set by some simple variables
# * ...
# 
# I have included in the zip a file called http.tcl for users who
# cannot get the http package installed (mainly win32 users)
# This file will fully emulate all its commands and is useable for
# other modules too. You also need alltools.tcl loaded before this
# one.
# 
# Just load the tcl scripts as follows in your config
#   source scripts/alltools.tcl
#   source scripts/http.tcl          #only for win32 or if it complains
#   source scripts/supergoogle.tcl
#
# Revisions:
#  1.0 Initial version
#  1.1 Fixed a bug in detailed section
#      Added a handler variable and partyline searching
#  1.2 Parse procedure sucked, it was slow and buggy; so
#      i have completely rewritten it. It should both be
#      faster and (hopefully) bugfree this time.
#  1.2fix
#      Made this version after i have seen multiple reports
#      of a permission denied message. Seems www.google.com
#      doesn't like anything else than real browsers, but
#      Supergoogle now identifies itself as a Mozilla4.75 
#      browser. Seems good enough...
#
#
# Below you can set some important variables

# Hostname of proxy service. Set to "" if you don't want to use this.
# This one is for pandora.be members
# set sgoogle_proxy "proxy.pandora.be:8080"
set sgoogle_proxy ""

# Only allow command on following channels 
# set sgoogle_channels "#chan1 #chan2 #chanX"
# or for all channels
set sgoogle_channels ""

# maximum number of hits on a search returned by the engine
set sgoogle_hits 10

# default number of hits returned
set sgoogle_hits_default 3

# allow use of detailed mode?
set sgoogle_detail 1

# use detailed mode only when requested?
set sgoogle_detail_request 0

# reply method for a public request (on the channel)
# set to 0 for a notice to the user
# set to 1 for a msg to the user
# set to 2 for a public msg on the channel
set sgoogle_pubmethod 2

# who should i allow a google search in the partyline (use the eggdrop flags)
# ie. only allow botmasters and botowners
set sgoogle_pline_flags "mn"

# log requests?
set sgoogle_log 0

# debug the script
set sgoogle_debug 1


#
# CODE STARTS HERE - DON'T CHANGE BELOW
#
set sgoogle_lastreq ""
set sgoogle_antiflood 0
package require http
if {!($sgoogle_proxy == "")} {
    http::config -proxyhost [lindex [split $sgoogle_proxy {:}] 0] -proxyport [lindex [split $sgoogle_proxy {:}] 1]
}
http::config -useragent "Mozilla/4.75 \[en\] (Win95; U)"
bind pub - .google pub:sgoogle
bind pub - .suche pub:sgoogle
bind dcc $sgoogle_pline_flags google dcc:sgoogle
bind dcc $sgoogle_pline_flags suche dcc:sgoogle
proc sgoogle_antiflood_reset {} {
  global sgoogle_antiflood
  set sgoogle_antiflood 0
  set sgoogle_lastreq ""
  foreach th [timers] {
    if {[lindex $th 1]=="sgoogle_antiflood_reset"} {
      killtimer [lindex $th 2]
    }
  }
}
proc pub:sgoogle { nick uhost handle channel arg } {
 global sgoogle_channels sgoogle_pubmethod
 global sgoogle_antiflood sgoogle_lastreq sgoogle_log 
   if {$sgoogle_antiflood > 4} {return 0}
   if {$arg == $sgoogle_lastreq} {return 0}
   set sgoogle_lastreq $arg
   set sgoogle_antiflood [expr $sgoogle_antiflood + 1]
   timer 1 sgoogle_antiflood_reset
   if {!($sgoogle_channels == "") && ([lsearch -exact [string tolower $sgoogle_channels] [string tolower $channel]] < 0)} {return 0}
   if {$sgoogle_pubmethod == 0} {set logger [sgoogle_start 0 $nick $arg]} elseif {($sgoogle_pubmethod == 2) && ([string tolower $arg] != "help")} {
     set logger [sgoogle_start 2 $channel $arg]} else {set logger [sgoogle_start 1 $nick $arg]}
   if {$sgoogle_log == 1} {putcmdlog "\[SG\] ($channel) $nick $logger"}
   return 0
}
proc dcc:sgoogle { handle idx arg } {
 global sgoogle_antiflood sgoogle_lastreq sgoogle_log 
   if {$sgoogle_antiflood > 8} {return 0}
   if {$arg == $sgoogle_lastreq} {return 0}
   set sgoogle_lastreq $arg
   set sgoogle_antiflood [expr $sgoogle_antiflood + 1]
   timer 1 sgoogle_antiflood_reset
   set logger [sgoogle_start 3 $idx $arg]
   if {$sgoogle_log == 1} {putcmdlog "\[SG\] (Pline) $handle $logger"}
   return 0
}
proc sgoogle_start { public destination arg } {
 global sgoogle_version version sgoogle_hits sgoogle_hits_default sgoogle_debug sgoogle_detail_request sgoogle_detail lastbind
   regsub -all -nocase {\$\{\}\[\]} $arg {} arg
   set arg [string tolower $arg]
   if {([llength $arg]==0) || ([string tolower $arg] == "help")} {
      set temp ""
      lappend temp "Help: $sgoogle_version"
      lappend temp "|-$lastbind \[detail|simple\] <num> <keyword(s)>"
      lappend temp "|  Search google for keywords. You can specify"
      lappend temp "|  the number of hits you want generated and"
      lappend temp "|  you want the results in detailed or simple"
      lappend temp "|  mode. Maximum number of hits is set to $sgoogle_hits"
      lappend temp "|-$lastbind version"
      lappend temp "|  check this scripts version"
      lappend temp "`-$lastbind help"
      sgoogle_sendtext $temp $public $destination
      return "asked for help."
   } 
   if {$arg == "version"} {
      sgoogle_sendtext "{$sgoogle_version running on eggdrop [lindex $version 0].}" $public $destination
      return "requested the version."
   }
   if {[string tolower [lindex $arg 0]] == "detail"} {
      set zoekstring [lrange $arg 1 end]
      if {$sgoogle_detail == 1} {
        set detailed 1
      } else {
        sgoogle_sendtext "{Detailed mode is DISABLED by the admin}" $public $destination
      }
   } elseif {[string tolower [lindex $arg 0]] == "simple"} {
      set zoekstring [lrange $arg 1 end]
      set detailed 0
   } else {
      set zoekstring $arg
      if {$sgoogle_detail_request == 1} {set detailed 0} else {set detailed 1}
   }
   if {[string match {[0-9]} [lindex $zoekstring 0]] || [string match {[0-9][0-9]} [lindex $zoekstring 0]]} {
      set hits [lindex $zoekstring 0]
      set zoekstring [lrange $zoekstring 1 end]
      if {$hits > $sgoogle_hits} {
         sgoogle_sendtext "{Too many hits requested (try $lastbind help)}" $public $destination
         return "tried to retrieve too many hits."
      }
      if {$hits < 1} {
         sgoogle_sendtext "{Number of hits must be at least 1}" $public $destination
         return "gave an invalid hits value."
      } 
   } else {
      set hits $sgoogle_hits_default
      set zoekstring $zoekstring
   }
   set query "http://www.google.com/search?q="
   for { set index 0 } { $index<[llength $zoekstring] } { incr index } {
	set query "$query[lindex $zoekstring $index]"
	if {$index<[llength $zoekstring]-1} then {
	  set query "$query+"
	}
   }
   set query "$query&num=$hits"
   if {$sgoogle_debug == 1} {
      putlog "DEBUG: Query built ($query)"
      putlog "DEBUG: Contacting website..."
   }
   set token [http::geturl $query]
   puts stderr ""
   upvar #0 $token state
   if {$sgoogle_debug == 1} {putlog "DEBUG: Got state ($state(http))"}
   if {[string tolower [lindex $state(http) 2]] != "ok"} {
      sgoogle_sendtext "{Could not find results for $zoekstring (Error: [lrange $state(http) 2 end])}" $public $destination
      return "requested $zoekstring but got an error: [lrange $state(http) 2 end]."
   }
   set htmldata [http::data $token]
   if {$sgoogle_debug == 1} {putlog "DEBUG: Got data ([string length $htmldata] bytes)"}
   set parsed [sgoogle_parse $htmldata]
   if {$parsed == 0} {
     if {$sgoogle_debug == 1} {putlog "DEBUG: Could not find items to parse."}
     sgoogle_sendtext "{Could not find results for $zoekstring}" $public $destination
     return "requested $zoekstring but couldn't find any results."
   }
   if {$sgoogle_debug == 1} {putlog "DEBUG: Data parsed (got [lindex $parsed 0] items)"}
   set sendtext [sgoogle_template $parsed $detailed $zoekstring]
   if {$sgoogle_debug == 1} {putlog "DEBUG: Template applied ([llength $sendtext] lines of text)"}
   sgoogle_sendtext $sendtext $public $destination
   if {$sgoogle_debug == 1} {putlog "DEBUG: Sent text to output. Done..."}
   return "requested $zoekstring and got [lindex $parsed 0] items ([string length $htmldata] bytes)"
}
proc sgoogle_parse {data} {
   set data [split $data \n]
   set count 0
   set starthits 0
   set hitrawdata ""
   set outputdata ""
   foreach line $data {
    if {[string range $line 0 5] == "<p><a "} {
       set starthits 1
       if {$hitrawdata != ""} {
          lappend outputdata [sgoogle_parsehelper $hitrawdata]
          incr count
       }
       set hitrawdata $line
    } else {
       if {$starthits} {set hitrawdata "$hitrawdata$line"}
    }    
    if {[string match -nocase "*<div class=n>*" $line] || [string match -nocase "*<br clear=all>*" $line]} {
       if {$starthits} {
          lappend outputdata [sgoogle_parsehelper $hitrawdata]
          incr count       
       }
       break
    }
    if {[string match -nocase "*<div>*" $line]} {
       regsub -all {<td bgcolor=#3366cc align=right nowrap>} $line {|} info
       regsub -all {</table>} $info {|} info
       regsub -all {<([^<])*>} [lindex [split $info {|}] 2] {} info
       lappend outputdata $info
    }
   }
   if {$starthits == 0} {return 0}
   return "$count $outputdata"
}
proc sgoogle_parsehelper {data} {
   set url ""
   set title ""
   set size ""
   set reply ""
   regsub -all {\|} $data {} data
   regsub {<br>} $data {|} data
   regexp {http://[^>]+} [lindex [split $data {|}] 0] url
   regsub {\&e=[0-9 ]+} $url {} url
   regsub -all {<([^<])*>} [lindex [split $data {|}] 0] {} titel
   regsub -all {&quot;} $titel {'} titel
   regsub -all {&#176;} $titel {°} titel
   regsub -all {&iuml;} $titel {ï} titel
   regsub -all {&euml;} $titel {ë} titel
   regsub -all {&amp;} $titel {\&} titel
   regsub -all {&lt;} $titel {<} titel
   regsub -all {&gt;} $titel {>} titel
   regsub -all {[\[\]]} $titel {} titel
   regsub -all { -  Translate this page} $titel {} titel
   regexp { - [0-9]+k - } [lindex [split $data {|}] 1] size
   regsub -all {[ -]} $size {} size
   if {$size == ""} {set size "N/A"}
   lappend reply $titel $url $size
   return $reply
}
proc sgoogle_template {data detail terms} {
  global sgoogle_version
  set items [lrange $data 2 end]
  set reply ""
  if {$detail == 0} {
    set count 0
    foreach hit $items {
      set url [lindex $hit 1]
      set size [lindex $hit 2]
      set count [expr $count + 1]
      if {[lindex $data 0] == 1} then {
        set temp "$url ($size)"
      } else {
        set temp "\[$count\] $url ($size)"
      }
      lappend reply $temp
    }
  } else {
    lappend reply "Search for $terms ($sgoogle_version)"
    set count 0
    foreach hit $items {
      set name [lindex $hit 0]
      set url [lindex $hit 1]
      set size [lindex $hit 2]
      set count [expr $count + 1]
      if {[lindex $data 0] == 1} then {
        lappend reply "|- $url ($size)"
        lappend reply "|  $name"
      } else {
        lappend reply "|-\[$count\] $url ($size)"
        lappend reply "|  $name"
      }
    }
    lappend reply "`-[lindex $data 1]"
  }
  return $reply
}
proc sgoogle_sendtext {data public destination} {
  if {[llength $data] == 1 && $public == 2} {putserv "PRIVMSG $destination :[lindex $data 0]"} else {
    foreach text $data {
      if {$public == 0} {
        putnotc $destination $text
      } elseif {$public == 1} {
        putmsg $destination $text
      } elseif {$public == 2} {
        putchan $destination $text
      } else {
        putdcc $destination $text
      }
    }
  }
}
set sgoogle_version "SuperGoogle 1.2fix by brother"
putlog "$sgoogle_version Loaded..."


