Rewrite most of Google to use the new AJAX API.

Add a translate command as well.
This commit is contained in:
James Vega 2008-08-14 23:20:06 +00:00
parent de69f9da17
commit 62a8306fbb
6 changed files with 171 additions and 997 deletions

View File

@ -1,83 +0,0 @@
"""
Facade that hides the differences between the SOAPpy and SOAP.py
libraries, so that google.py doesn't have to deal with them.
@author: Brian Landers <brian@bluecoat93.org>
@license: Python
@version: 0.5.4
"""
import warnings
from distutils.version import LooseVersion
__author__ = "Brian Landers <brian@bluecoat93.org>"
__version__ = "0.6"
__license__ = "Python"
#
# Wrapper around the python 'warnings' facility
#
def warn( message, level=RuntimeWarning ):
warnings.warn( message, level, stacklevel=3 )
# We can't use older version of SOAPpy, due to bugs that break the Google API
minSOAPpyVersion = "0.11.3"
#
# Try loading SOAPpy first. If that fails, fall back to the old SOAP.py
#
SOAPpy = None
try:
import SOAPpy
from SOAPpy import SOAPProxy, Types
if LooseVersion( minSOAPpyVersion ) > \
LooseVersion( SOAPpy.version.__version__ ):
warn( "Versions of SOAPpy before %s have known bugs that prevent " +
"PyGoogle from functioning." % minSOAPpyVersion )
raise ImportError
except ImportError:
try:
import SOAP
except ImportError:
raise RuntimeError( "Unable to find SOAPpy or SOAP. Can't continue.\n" )
#
# Constants that differ between the modules
#
if SOAPpy:
false = Types.booleanType(0)
true = Types.booleanType(1)
structType = Types.structType
faultType = Types.faultType
else:
false = SOAP.booleanType(0)
true = SOAP.booleanType(1)
structType = SOAP.structType
faultType = SOAP.faultType
#
# Get a SOAP Proxy object in the correct way for the module we're using
#
def getProxy( url, namespace, http_proxy ):
if SOAPpy:
return SOAPProxy( url,
namespace = namespace,
http_proxy = http_proxy )
else:
return SOAP.SOAPProxy( url,
namespace = namespace,
http_proxy = http_proxy )
#
# Convert an object to a dictionary in the proper way for the module
# we're using for SOAP
#
def toDict( obj ):
if SOAPpy:
return obj._asdict()
else:
return obj._asdict

View File

@ -1 +1,5 @@
Insert a description of your plugin here, with any notes, etc. about using it. This is a simple plugin to provide access to the Google services we all know
and love from our favorite IRC bot.
In order to use this plugin you must have the following modules installed:
- simplejson: http://undefined.org/python/#simplejson

View File

@ -1,5 +1,6 @@
### ###
# Copyright (c) 2005, Jeremiah Fincher # Copyright (c) 2005, Jeremiah Fincher
# Copyright (c) 2008, James Vega
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -30,74 +31,50 @@
import supybot.conf as conf import supybot.conf as conf
import supybot.registry as registry import supybot.registry as registry
import google
def configure(advanced): def configure(advanced):
from supybot.questions import output, expect, anything, something, yn from supybot.questions import output, yn
output('To use Google\'t Web Services, you must have a license key.') conf.registerPlugin('Google', True)
if yn('Do you have a license key?'): output("""The Google plugin has the functionality to watch for URLs
key = something('What is it?') that match a specific pattern. (We call this a snarfer)
while len(key) != 32: When supybot sees such a URL, it will parse the web page
output('That\'s not a valid Google license key.') for information and reply with the results.
if yn('Are you sure you have a valid Google license key?'):
key = something('What is it?')
else:
key = ''
break
if key:
conf.registerPlugin('Google', True)
conf.supybot.plugins.Google.licenseKey.setValue(key)
output("""The Google plugin has the functionality to watch for URLs
that match a specific pattern. (We call this a snarfer)
When supybot sees such a URL, it will parse the web page
for information and reply with the results.
Google has two available snarfers: Google Groups link Google has two available snarfers: Google Groups link
snarfing and a google search snarfer.""") snarfing and a google search snarfer.""")
if yn('Do you want the Google Groups link snarfer enabled by ' if yn('Do you want the Google Groups link snarfer enabled by '
'default?'): 'default?'):
conf.supybot.plugins.Google.groupsSnarfer.setValue(True) conf.supybot.plugins.Google.groupsSnarfer.setValue(True)
if yn('Do you want the Google search snarfer enabled by default?'): if yn('Do you want the Google search snarfer enabled by default?'):
conf.supybot.plugins.Google.searchSnarfer.setValue(True) conf.supybot.plugins.Google.searchSnarfer.setValue(True)
else:
output("""You'll need to get a key before you can use this plugin.
You can apply for a key at
http://code.google.com/apis/soapsearch/""")
class LicenseKey(registry.String):
def setValue(self, s):
if s and len(s) != 32:
raise registry.InvalidRegistryValue, 'Invalid Google license key.'
try:
s = s or ''
registry.String.setValue(self, s)
if s:
google.setLicense(self.value)
except AttributeError:
if world and not world.dying: # At shutdown world can be None.
raise callbacks.Error, \
'It appears that the initial import of ' \
'our underlying google.py module has ' \
'failed. Once the cause of that problem ' \
'has been diagnosed and fixed, the bot ' \
'will need to be restarted in order to ' \
'load this plugin.'
class Language(registry.OnlySomeStrings): class Language(registry.OnlySomeStrings):
validStrings = ['lang_' + s for s in 'ar zh-CN zh-TW cs da nl en et fi fr ' validStrings = ['lang_' + s for s in 'ar bg ca zh-CN zh-TW hr cs da nl en '
'de el iw hu is it ja ko lv lt no pt ' 'et fi fr de el iw hu is id it ja ko '
'pl ro ru es sv tr'.split()] 'lv lt no pl pt ro ru sr sk sl es sv '
'tr'.split()]
validStrings.append('') validStrings.append('')
def normalize(self, s): def normalize(self, s):
if not s.startswith('lang_'): if not s.startswith('lang_'):
s = 'lang_' + s s = 'lang_' + s
if not s.endswith('CN') or s.endswith('TW'): s = s[:-2].lower() + s[-2:]
s = s.lower()
else:
s = s.lower()[:-2] + s[-2:]
return s return s
class NumSearchResults(registry.PositiveInteger):
"""Value must be 1 <= n <= 8"""
def setValue(self, v):
if v > 8:
self.error()
super(NumSearchResults, self).setValue(v)
class SafeSearch(registry.OnlySomeStrings):
validStrings = ['active', 'moderate', 'off']
Google = conf.registerPlugin('Google') Google = conf.registerPlugin('Google')
conf.registerGlobalValue(Google, 'referer',
registry.String('', """Determines the URL that will be sent to Google for
the Referer field of the search requests. If this value is empty, a
Referer will be generated in the following format:
http://$server/$botName"""))
conf.registerChannelValue(Google, 'groupsSnarfer', conf.registerChannelValue(Google, 'groupsSnarfer',
registry.Boolean(False, """Determines whether the groups snarfer is registry.Boolean(False, """Determines whether the groups snarfer is
enabled. If so, URLs at groups.google.com will be snarfed and their enabled. If so, URLs at groups.google.com will be snarfed and their
@ -113,25 +90,14 @@ conf.registerChannelValue(Google, 'colorfulFilter',
conf.registerChannelValue(Google, 'bold', conf.registerChannelValue(Google, 'bold',
registry.Boolean(True, """Determines whether results are bolded.""")) registry.Boolean(True, """Determines whether results are bolded."""))
conf.registerChannelValue(Google, 'maximumResults', conf.registerChannelValue(Google, 'maximumResults',
registry.PositiveInteger(10, """Determines the maximum number of results NumSearchResults(8, """Determines the maximum number of results returned
returned from the google command.""")) from the google command."""))
conf.registerChannelValue(Google, 'defaultLanguage', conf.registerChannelValue(Google, 'defaultLanguage',
Language('lang_en', """Determines what default language is used in Language('lang_en', """Determines what default language is used in
searches. If left empty, no specific language will be requested.""")) searches. If left empty, no specific language will be requested."""))
conf.registerChannelValue(Google, 'safeSearch', conf.registerChannelValue(Google, 'safeSearch',
registry.Boolean(True, "Determines whether safeSearch is on by default.")) SafeSearch('moderate', """Determines what level of safeSearch to use by
conf.registerGlobalValue(Google, 'licenseKey', default. 'active' - most filtering, 'moderate' - default filtering, 'off'
LicenseKey('', """Sets the Google license key for using Google's Web - no filtering"""))
Services API. This is necessary before you can do any searching with this
module.""", private=True))
conf.registerGroup(Google, 'state')
conf.registerGlobalValue(Google.state, 'searches',
registry.Integer(0, """Used to keep the total number of searches Google has
done for this bot. You shouldn't modify this."""))
conf.registerGlobalValue(Google.state, 'time',
registry.Float(0.0, """Used to keep the total amount of time Google has
spent searching for this bot. You shouldn't modify this."""))
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:

View File

@ -1,641 +0,0 @@
"""
Python wrapper for Google web APIs
This module allows you to access Google's web APIs through SOAP,
to do things like search Google and get the results programmatically.
Described U{here <http://code.google.com/apis/soapsearch/>}
You need a Google-provided license key to use these services.
Follow the link above to get one. These functions will look in
several places (in this order) for the license key:
- the "license_key" argument of each function
- the module-level LICENSE_KEY variable (call setLicense once to set it)
- an environment variable called GOOGLE_LICENSE_KEY
- a file called ".googlekey" in the current directory
- a file called "googlekey.txt" in the current directory
- a file called ".googlekey" in your home directory
- a file called "googlekey.txt" in your home directory
- a file called ".googlekey" in the same directory as google.py
- a file called "googlekey.txt" in the same directory as google.py
Sample usage::
>>> import google
>>> google.setLicense('...') # must get your own key!
>>> data = google.doGoogleSearch('python')
>>> data.meta.searchTime
0.043221000000000002
>>> data.results[0].URL
'http://www.python.org/'
>>> data.results[0].title
'<b>Python</b> Language Website'
@newfield contrib: Contributors
@author: Mark Pilgrim <f8dy@diveintomark.org>
@author: Brian Landers <brian@bluecoat93.org>
@license: Python
@version: 0.6
@contrib: David Ascher, for the install script
@contrib: Erik Max Francis, for the command line interface
@contrib: Michael Twomey, for HTTP proxy support
@contrib: Mark Recht, for patches to support SOAPpy
"""
__author__ = "Mark Pilgrim (f8dy@diveintomark.org)"
__version__ = "0.6"
__cvsversion__ = "$Revision: 1.6 $"[11:-2]
__date__ = "$Date: 2004/09/30 08:09:09 $"[7:-2]
__copyright__ = "Copyright (c) 2002 Mark Pilgrim"
__license__ = "Python"
__credits__ = """David Ascher, for the install script
Erik Max Francis, for the command line interface
Michael Twomey, for HTTP proxy support"""
import os, sys, getopt
import GoogleSOAPFacade
LICENSE_KEY = None
HTTP_PROXY = None
#
# Constants
#
_url = 'http://api.google.com/search/beta2'
_namespace = 'urn:GoogleSearch'
_googlefile1 = ".googlekey"
_googlefile2 = "googlekey.txt"
_false = GoogleSOAPFacade.false
_true = GoogleSOAPFacade.true
_licenseLocations = (
( lambda key: key,
'passed to the function in license_key variable' ),
( lambda key: LICENSE_KEY,
'module-level LICENSE_KEY variable (call setLicense to set it)' ),
( lambda key: os.environ.get( 'GOOGLE_LICENSE_KEY', None ),
'an environment variable called GOOGLE_LICENSE_KEY' ),
( lambda key: _contentsOf( os.getcwd(), _googlefile1 ),
'%s in the current directory' % _googlefile1),
( lambda key: _contentsOf( os.getcwd(), _googlefile2 ),
'%s in the current directory' % _googlefile2),
( lambda key: _contentsOf( os.environ.get( 'HOME', '' ), _googlefile1 ),
'%s in your home directory' % _googlefile1),
( lambda key: _contentsOf( os.environ.get( 'HOME', '' ), _googlefile2 ),
'%s in your home directory' % _googlefile2 ),
( lambda key: _contentsOf( _getScriptDir(), _googlefile1 ),
'%s in the google.py directory' % _googlefile1 ),
( lambda key: _contentsOf( _getScriptDir(), _googlefile2 ),
'%s in the google.py directory' % _googlefile2 )
)
## ----------------------------------------------------------------------
## Exceptions
## ----------------------------------------------------------------------
class NoLicenseKey(Exception):
"""
Thrown when the API is unable to find a valid license key.
"""
pass
## ----------------------------------------------------------------------
## administrative functions (non-API)
## ----------------------------------------------------------------------
def _version():
"""
Display a formatted version string for the module
"""
print """PyGoogle %(__version__)s
%(__copyright__)s
released %(__date__)s
Thanks to:
%(__credits__)s""" % globals()
def _usage():
"""
Display usage information for the command-line interface
"""
program = os.path.basename(sys.argv[0])
print """Usage: %(program)s [options] [querytype] query
options:
-k, --key= <license key> Google license key (see important note below)
-1, -l, --lucky show only first hit
-m, --meta show meta information
-r, --reverse show results in reverse order
-x, --proxy= <url> use HTTP proxy
-h, --help print this help
-v, --version print version and copyright information
-t, --test run test queries
querytype:
-s, --search= <query> search (default)
-c, --cache= <url> retrieve cached page
-p, --spelling= <word> check spelling
IMPORTANT NOTE: all Google functions require a valid license key;
visit http://code.google.com/apis/soapsearch/ to get one. %(program)s will
look in these places (in order) and use the first license key it finds:
* the key specified on the command line""" % vars()
for get, location in _licenseLocations[2:]:
print " *", location
## ----------------------------------------------------------------------
## utility functions (API)
## ----------------------------------------------------------------------
def setLicense(license_key):
"""
Set the U{Google APIs <http://code.google.com/apis/soapsearch/>} license
key
@param license_key: The new key to use
@type license_key: String
@todo: validate the key?
"""
global LICENSE_KEY
LICENSE_KEY = license_key
def getLicense(license_key = None):
"""
Get the U{Google APIs <http://code.google.com/apis/soapsearch/>} license
key
The key can be read from any number of locations. See the module-leve
documentation for the search order.
@return: the license key
@rtype: String
@raise NoLicenseKey: if no valid key could be found
"""
for get, location in _licenseLocations:
rc = get(license_key)
if rc: return rc
_usage()
raise NoLicenseKey, 'get a license key at '\
'http://code.google.com/apis/soapsearch/'
def setProxy(http_proxy):
"""
Set the HTTP proxy to be used when accessing Google
@param http_proxy: the proxy to use
@type http_proxy: String
@todo: validiate the input?
"""
global HTTP_PROXY
HTTP_PROXY = http_proxy
def getProxy(http_proxy = None):
"""
Get the HTTP proxy we use for accessing Google
@return: the proxy
@rtype: String
"""
return http_proxy or HTTP_PROXY
def _contentsOf(dirname, filename):
filename = os.path.join(dirname, filename)
if not os.path.exists(filename): return None
fsock = open(filename)
contents = fsock.read()
fsock.close()
return contents
def _getScriptDir():
if __name__ == '__main__':
return os.path.abspath(os.path.dirname(sys.argv[0]))
else:
return os.path.abspath(os.path.dirname(sys.modules[__name__].__file__))
def _marshalBoolean(value):
if value:
return _true
else:
return _false
def _getRemoteServer( http_proxy ):
return GoogleSOAPFacade.getProxy( _url, _namespace, http_proxy )
## ----------------------------------------------------------------------
## search results classes
## ----------------------------------------------------------------------
class _SearchBase:
def __init__(self, params):
for k, v in params.items():
if isinstance(v, GoogleSOAPFacade.structType):
v = GoogleSOAPFacade.toDict( v )
try:
if isinstance(v[0], GoogleSOAPFacade.structType):
v = [ SOAPProxy.toDict( node ) for node in v ]
except:
pass
self.__dict__[str(k)] = v
## ----------------------------------------------------------------------
class SearchResultsMetaData(_SearchBase):
"""
Container class for metadata about a given search query's results.
@ivar documentFiltering: is duplicate page filtering active?
@ivar searchComments: human-readable informational message
example::
"'the' is a very common word and was not included in your search"
@ivar estimatedTotalResultsCount: estimated total number of results
for this query.
@ivar estimateIsExact: is estimatedTotalResultsCount an exact value?
@ivar searchQuery: search string that initiated this search
@ivar startIndex: index of the first result returned (zero-based)
@ivar endIndex: index of the last result returned (zero-based)
@ivar searchTips: human-readable informational message on how to better
use Google.
@ivar directoryCategories: list of categories for the search results
This field is a list of dictionaries, like so::
{ 'fullViewableName': 'the Open Directory category',
'specialEncoding': 'encoding scheme of this directory category'
}
@ivar searchTime: total search time, in seconds
"""
pass
## ----------------------------------------------------------------------
class SearchResult(_SearchBase):
"""
Encapsulates the results from a search.
@ivar URL: URL
@ivar title: title (HTML)
@ivar snippet: snippet showing query context (HTML
@ivar cachedSize: size of cached version of this result, (KB)
@ivar relatedInformationPresent: is the "related:" keyword supported?
Flag indicates that the "related:" keyword is supported for this URL
@ivar hostName: used when filtering occurs
When filtering occurs, a maximum of two results from any given
host is returned. When this occurs, the second resultElement
that comes from that host contains the host name in this parameter.
@ivar directoryCategory: Open Directory category information
This field is a dictionary with the following values::
{ 'fullViewableName': 'the Open Directory category',
'specialEncoding' : 'encoding scheme of this directory category'
}
@ivar directoryTitle: Open Directory title of this result (or blank)
@ivar summary: Open Directory summary for this result (or blank)
"""
pass
## ----------------------------------------------------------------------
class SearchReturnValue:
"""
complete search results for a single query
@ivar meta: L{SearchResultsMetaData} instance for this query
@ivar results: list of L{SearchResult} objects for this query
"""
def __init__( self, metadata, results ):
self.meta = metadata
self.results = results
## ----------------------------------------------------------------------
## main functions
## ----------------------------------------------------------------------
def doGoogleSearch( q, start = 0, maxResults = 10, filter = 1,
restrict='', safeSearch = 0, language = '',
inputencoding = '', outputencoding = '',\
license_key = None, http_proxy = None ):
"""
Search Google using the SOAP API and return the results.
You need a license key to call this function; see the U{Google APIs
<http://code.google.com/apis/soapsearch/>} site to get one. Then you can
either pass it to this function every time, or set it globally; see the
L{google} module-level docs for details.
See U{http://www.google.com/help/features.html}
for examples of advanced features. Anything that works at the
Google web site will work as a query string in this method.
You can use the C{start} and C{maxResults} parameters to page
through multiple pages of results. Note that 'maxResults' is
currently limited by Google to 10.
See the API reference for more advanced examples and a full list of
country codes and topics for use in the C{restrict} parameter, along
with legal values for the C{language}, C{inputencoding}, and
C{outputencoding} parameters.
You can download the API documentation
U{http://code.google.com/apis/soapsearch/download.html <here>}.
@param q: search string.
@type q: String
@param start: (optional) zero-based index of first desired result.
@type start: int
@param maxResults: (optional) maximum number of results to return.
@type maxResults: int
@param filter: (optional) flag to request filtering of similar results
@type filter: int
@param restrict: (optional) restrict results by country or topic.
@type restrict: String
@param safeSearch: (optional)
@type safeSearch: int
@param language: (optional)
@type language: String
@param inputencoding: (optional)
@type inputencoding: String
@param outputencoding: (optional)
@type outputencoding: String
@param license_key: (optional) the Google API license key to use
@type license_key: String
@param http_proxy: (optional) the HTTP proxy to use for talking to Google
@type http_proxy: String
@return: the search results encapsulated in an object
@rtype: L{SearchReturnValue}
"""
license_key = getLicense( license_key )
http_proxy = getProxy( http_proxy )
remoteserver = _getRemoteServer( http_proxy )
filter = _marshalBoolean( filter )
safeSearch = _marshalBoolean( safeSearch )
data = remoteserver.doGoogleSearch( license_key, q, start, maxResults,
filter, restrict, safeSearch,
language, inputencoding,
outputencoding )
metadata = GoogleSOAPFacade.toDict( data )
del metadata["resultElements"]
metadata = SearchResultsMetaData( metadata )
results = [ SearchResult( GoogleSOAPFacade.toDict( node ) ) \
for node in data.resultElements ]
return SearchReturnValue( metadata, results )
## ----------------------------------------------------------------------
def doGetCachedPage( url, license_key = None, http_proxy = None ):
"""
Retrieve a page from the Google cache.
You need a license key to call this function; see the
U{Google APIs <http://code.google.com/apis/soapsearch/>} site to get one.
Then you can either pass it to this function every time, or
set it globally; see the L{google} module-level docs for details.
@param url: full URL to the page to retrieve
@type url: String
@param license_key: (optional) the Google API key to use
@type license_key: String
@param http_proxy: (optional) the HTTP proxy server to use
@type http_proxy: String
@return: full text of the cached page
@rtype: String
"""
license_key = getLicense( license_key )
http_proxy = getProxy( http_proxy )
remoteserver = _getRemoteServer( http_proxy )
return remoteserver.doGetCachedPage( license_key, url )
## ----------------------------------------------------------------------
def doSpellingSuggestion( phrase, license_key = None, http_proxy = None ):
"""
Get spelling suggestions from Google
You need a license key to call this function; see the
U{Google APIs <http://code.google.com/apis/soapsearch/>} site to get one.
Then you can either pass it to this function every time, or
set it globally; see the L{google} module-level docs for details.
@param phrase: word or phrase to spell-check
@type phrase: String
@param license_key: (optional) the Google API key to use
@type license_key: String
@param http_proxy: (optional) the HTTP proxy to use
@type http_proxy: String
@return: text of any suggested replacement, or None
"""
license_key = getLicense( license_key )
http_proxy = getProxy( http_proxy)
remoteserver = _getRemoteServer( http_proxy )
return remoteserver.doSpellingSuggestion( license_key, phrase )
## ----------------------------------------------------------------------
## functional test suite (see googletest.py for unit test suite)
## ----------------------------------------------------------------------
def _test():
"""
Run functional test suite.
"""
try:
getLicense(None)
except NoLicenseKey:
return
print "Searching for Python at google.com..."
data = doGoogleSearch( "Python" )
_output( data, { "func": "doGoogleSearch"} )
print "\nSearching for 5 _French_ pages about Python, "
print "encoded in ISO-8859-1..."
data = doGoogleSearch( "Python", language = 'lang_fr',
outputencoding = 'ISO-8859-1',
maxResults = 5 )
_output( data, { "func": "doGoogleSearch" } )
phrase = "Pyhton programming languager"
print "\nTesting spelling suggestions for '%s'..." % phrase
data = doSpellingSuggestion( phrase )
_output( data, { "func": "doSpellingSuggestion" } )
## ----------------------------------------------------------------------
## Command-line interface
## ----------------------------------------------------------------------
class _OutputFormatter:
def boil(self, data):
if type(data) == type(u""):
return data.encode("ISO-8859-1", "replace")
else:
return data
class _TextOutputFormatter(_OutputFormatter):
def common(self, data, params):
if params.get("showMeta", 0):
meta = data.meta
for category in meta.directoryCategories:
print "directoryCategory: %s" % \
self.boil(category["fullViewableName"])
for attr in [node for node in dir(meta) if \
node <> "directoryCategories" and node[:2] <> '__']:
print "%s:" % attr, self.boil(getattr(meta, attr))
def doGoogleSearch(self, data, params):
results = data.results
if params.get("feelingLucky", 0):
results = results[:1]
if params.get("reverseOrder", 0):
results.reverse()
for result in results:
for attr in dir(result):
if attr == "directoryCategory":
print "directoryCategory:", \
self.boil(result.directoryCategory["fullViewableName"])
elif attr[:2] <> '__':
print "%s:" % attr, self.boil(getattr(result, attr))
print
self.common(data, params)
def doGetCachedPage(self, data, params):
print data
self.common(data, params)
doSpellingSuggestion = doGetCachedPage
def _makeFormatter(outputFormat):
classname = "_%sOutputFormatter" % outputFormat.capitalize()
return globals()[classname]()
def _output(results, params):
formatter = _makeFormatter(params.get("outputFormat", "text"))
outputmethod = getattr(formatter, params["func"])
outputmethod(results, params)
def main(argv):
"""
Command-line interface.
"""
if not argv:
_usage()
return
q = None
func = None
http_proxy = None
license_key = None
feelingLucky = 0
showMeta = 0
reverseOrder = 0
runTest = 0
outputFormat = "text"
try:
opts, args = getopt.getopt(argv, "s:c:p:k:lmrx:hvt1",
["search=", "cache=", "spelling=", "key=", "lucky", "meta",
"reverse", "proxy=", "help", "version", "test"])
except getopt.GetoptError:
_usage()
sys.exit(2)
for opt, arg in opts:
if opt in ("-s", "--search"):
q = arg
func = "doGoogleSearch"
elif opt in ("-c", "--cache"):
q = arg
func = "doGetCachedPage"
elif opt in ("-p", "--spelling"):
q = arg
func = "doSpellingSuggestion"
elif opt in ("-k", "--key"):
license_key = arg
elif opt in ("-l", "-1", "--lucky"):
feelingLucky = 1
elif opt in ("-m", "--meta"):
showMeta = 1
elif opt in ("-r", "--reverse"):
reverseOrder = 1
elif opt in ("-x", "--proxy"):
http_proxy = arg
elif opt in ("-h", "--help"):
_usage()
elif opt in ("-v", "--version"):
_version()
elif opt in ("-t", "--test"):
runTest = 1
if runTest:
setLicense(license_key)
setProxy(http_proxy)
_test()
if args and not q:
q = args[0]
func = "doGoogleSearch"
if func:
results = globals()[func]( q, http_proxy=http_proxy,
license_key=license_key )
_output(results, locals())
if __name__ == '__main__':
main(sys.argv[1:])

View File

@ -1,5 +1,6 @@
### ###
# Copyright (c) 2002-2004, Jeremiah Fincher # Copyright (c) 2002-2004, Jeremiah Fincher
# Copyright (c) 2008, James Vega
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -32,10 +33,8 @@ import cgi
import time import time
import socket import socket
import urllib import urllib
import xml.sax
import SOAP import simplejson
import google
import supybot.conf as conf import supybot.conf as conf
import supybot.utils as utils import supybot.utils as utils
@ -45,74 +44,10 @@ import supybot.ircmsgs as ircmsgs
import supybot.ircutils as ircutils import supybot.ircutils as ircutils
import supybot.callbacks as callbacks import supybot.callbacks as callbacks
def search(log, queries, **kwargs):
# We have to keep stats here, rather than in formatData or elsewhere,
# because not all searching functions use formatData -- fight, lucky, etc.
assert not isinstance(queries, basestring), 'Old code: queries is a list.'
try:
for (i, query) in enumerate(queries):
if len(query.split(None, 1)) > 1:
queries[i] = repr(query)
proxy = conf.supybot.protocols.http.proxy()
if proxy:
kwargs['http_proxy'] = proxy
query = ' '.join(queries).decode('utf-8')
data = google.doGoogleSearch(query, **kwargs)
searches = conf.supybot.plugins.Google.state.searches() + 1
conf.supybot.plugins.Google.state.searches.setValue(searches)
time = conf.supybot.plugins.Google.state.time() + data.meta.searchTime
conf.supybot.plugins.Google.state.time.setValue(time)
last24hours.enqueue(None)
return data
except socket.error, e:
if e.args[0] == 110:
raise callbacks.Error, 'Connection timed out to Google.com.'
else:
raise callbacks.Error, 'Error connecting to Google.com.'
except SOAP.HTTPError, e:
log.info('HTTP Error accessing Google: %s', e)
raise callbacks.Error, 'Error connecting to Google.com.'
except SOAP.faultType, e:
if 'Invalid authorization key' in e.faultstring:
raise callbacks.Error, 'Invalid Google license key.'
elif 'Problem looking up user record' in e.faultstring:
raise callbacks.Error, \
'Google seems to be having trouble looking up the user for '\
'your license key. This probably isn\'t a problem on your '\
'side; it\'s probably a bug on Google\'s side. It seems '\
'to happen intermittently.'
else:
log.exception('Unexpected SOAPpy error:')
raise callbacks.Error, \
'Unexpected error from Google; please report this to the ' \
'Supybot developers.'
except xml.sax.SAXException, e:
log.exception('Uncaught SAX error:')
raise callbacks.Error, 'Google returned an unparsable response. ' \
'The full traceback has been logged.'
# We don't use SOAPpy anymore, apparently.
## except SOAPpy.Error, e:
## log.exception('Uncaught SOAP exception in Google.search:')
## raise callbacks.Error, 'Error connecting to Google.com.'
last24hours = utils.structures.TimeoutQueue(86400)
totalTime = conf.supybot.plugins.Google.state.time()
searches = conf.supybot.plugins.Google.state.searches()
class Google(callbacks.PluginRegexp): class Google(callbacks.PluginRegexp):
threaded = True threaded = True
callBefore = ['Web'] callBefore = ['Web']
regexps = ['googleSnarfer', 'googleGroups'] regexps = ['googleSnarfer', 'googleGroups']
def __init__(self, irc):
self.__parent = super(Google, self)
self.__parent.__init__(irc)
google.setLicense(self.registryValue('licenseKey'))
def callCommand(self, command, irc, msg, *args, **kwargs):
try:
self.__parent.callCommand(command, irc, msg, *args, **kwargs)
except xml.sax.SAXReaderNotAvailable, e:
irc.error('No XML parser available.')
_colorGoogles = {} _colorGoogles = {}
def _getColorGoogle(self, m): def _getColorGoogle(self, m):
@ -139,16 +74,61 @@ class Google(callbacks.PluginRegexp):
msg = ircmsgs.privmsg(msg.args[0], s, msg=msg) msg = ircmsgs.privmsg(msg.args[0], s, msg=msg)
return msg return msg
_gsearchUrl = 'http://ajax.googleapis.com/ajax/services/search/web'
def search(self, query, channel, options={}):
"""Perform a search using Google's AJAX API.
search("search phrase", options={})
Valid options are:
smallsearch - True/False (Default: False)
safesearch - {active,moderate,off} (Default: "moderate")
language - Restrict search to documents in the given language
(Default: "lang_en")
"""
ref = self.registryValue('referer')
if not ref:
ref = 'http://%s/%s' % (dynamic.irc.server,
dynamic.irc.nick)
headers = utils.web.defaultHeaders
headers['Referer'] = ref
opts = {'q': query, 'v': '1.0'}
for (k, v) in options.iteritems():
if k == 'smallsearch':
if v:
opts['rsz'] = 'small'
else:
opts['rsz'] = 'large'
elif k == 'safesearch':
opts['safe'] = v
elif k == 'language':
opts['lr'] = v
defLang = self.registryValue('defaultLanguage', channel)
if 'lr' not in opts and defLang:
opts['lr'] = defLang
if 'safe' not in opts:
opts['safe'] = self.registryValue('safeSearch', dynamic.channel)
if 'rsz' not in opts:
opts['safe'] = 'large'
fd = utils.web.getUrlFd('%s?%s' % (self._gsearchUrl,
urllib.urlencode(opts)),
headers)
json = simplejson.load(fd)
fd.close()
if json['responseStatus'] != 200:
raise callbacks.Error, 'We broke The Google!'
return json
def formatData(self, data, bold=True, max=0): def formatData(self, data, bold=True, max=0):
if isinstance(data, basestring): if isinstance(data, basestring):
return data return data
t = format('Search took %.2f seconds', data.meta.searchTime)
results = [] results = []
if max: if max:
data.results = data.results[:max] data = data[:max]
for result in data.results: for result in data:
title = utils.web.htmlToText(result.title.encode('utf-8')) title = utils.web.htmlToText(result['titleNoFormatting']\
url = result.URL .encode('utf-8'))
url = result['unescapedUrl']
if title: if title:
if bold: if bold:
title = ircutils.bold(title) title = ircutils.bold(title)
@ -156,107 +136,58 @@ class Google(callbacks.PluginRegexp):
else: else:
results.append(url) results.append(url)
if not results: if not results:
return format('No matches found. (%s)', t) return format('No matches found.')
else: else:
return format('%s: %s', t, '; '.join(results)) return format('; '.join(results))
def lucky(self, irc, msg, args, text): def lucky(self, irc, msg, args, text):
"""<search> """<search>
Does a google search, but only returns the first result. Does a google search, but only returns the first result.
""" """
data = search(self.log, text) data = self.search(text, msg.args[0], {'smallsearch': True})
if data.results: if data['responseData']['results']:
url = data.results[0].URL url = data['responseData']['results'][0]['unescapedUrl']
irc.reply(url) irc.reply(url.encode('utf-8'))
else: else:
irc.reply('Google found nothing.') irc.reply('Google found nothing.')
lucky = wrap(lucky, [many('something')]) lucky = wrap(lucky, ['text'])
def google(self, irc, msg, args, optlist, text): def google(self, irc, msg, args, optlist, text):
"""<search> [--{language,restrict} <value>] [--{notsafe,similar}] """<search> [--{safesearch,language} <value>]
Searches google.com for the given string. As many results as can fit Searches google.com for the given string. As many results as can fit
are included. --language accepts a language abbreviation; --restrict are included. --language accepts a language abbreviation; --safesearch
restricts the results to certain classes of things; --similar tells accepts a filtering level ('active', 'moderate', 'off').
Google not to filter similar results. --notsafe allows possibly
work-unsafe results.
""" """
kwargs = {} if 'language' in optlist and optlist['language'].lower() not in \
if self.registryValue('safeSearch', channel=msg.args[0]): conf.supybot.plugins.Google.safesearch.validStrings:
kwargs['safeSearch'] = 1 irc.errorInvalid('language')
lang = self.registryValue('defaultLanguage', channel=msg.args[0]) data = self.search(text, msg.args[0], dict(optlist))
if lang: if data['responseStatus'] != 200:
kwargs['language'] = lang irc.reply('We broke The Google!')
for (option, argument) in optlist:
if option == 'notsafe':
kwargs['safeSearch'] = False
elif option == 'similar':
kwargs['filter'] = False
else:
kwargs[option] = argument
try:
data = search(self.log, text, **kwargs)
except google.NoLicenseKey, e:
irc.error('You must have a free Google web services license key '
'in order to use this command. You can get one at '
'<http://code.google.com/apis/soapsearch/>. Once you '
'have one, you can set it with the command '
'"config supybot.plugins.Google.licenseKey <key>".')
return return
bold = self.registryValue('bold', msg.args[0]) bold = self.registryValue('bold', msg.args[0])
max = self.registryValue('maximumResults', msg.args[0]) max = self.registryValue('maximumResults', msg.args[0])
irc.reply(self.formatData(data, bold=bold, max=max)) irc.reply(self.formatData(data['responseData']['results'],
bold=bold, max=max))
google = wrap(google, [getopts({'language':'something', google = wrap(google, [getopts({'language':'something',
'restrict':'something', 'safesearch':''}),
'notsafe':'', 'similar':''}), 'text'])
many('something')])
def meta(self, irc, msg, args, optlist, text):
"""<search> [--{language,restrict} <value>] [--{similar,notsafe}]
Searches google and gives all the interesting meta information about
the search. See the help for the google command for a detailed
description of the parameters.
"""
kwargs = {'language': 'lang_en', 'safeSearch': 1}
for option, argument in optlist:
if option == 'notsafe':
kwargs['safeSearch'] = False
elif option == 'similar':
kwargs['filter'] = False
else:
kwargs[option[2:]] = argument
data = search(self.log, text, **kwargs)
meta = data.meta
categories = [d['fullViewableName'] for d in meta.directoryCategories]
categories = [format('%q', s.replace('_', ' ')) for s in categories]
s = format('Search for %q returned %s %i results in %.2f seconds.%s',
meta.searchQuery,
meta.estimateIsExact and 'exactly' or 'approximately',
meta.estimatedTotalResultsCount,
meta.searchTime,
categories and format(' Categories include %L.',categories))
irc.reply(s)
meta= wrap(meta, [getopts({'language':'something',
'restrict':'something',
'notsafe':'', 'similar':''}),
many('something')])
_cacheUrlRe = re.compile('<code>([^<]+)</code>')
def cache(self, irc, msg, args, url): def cache(self, irc, msg, args, url):
"""<url> """<url>
Returns a link to the cached version of <url> if it is available. Returns a link to the cached version of <url> if it is available.
""" """
html = google.doGetCachedPage(url) data = self.search(url, msg.args[0], {'smallsearch': True})
m = self._cacheUrlRe.search(html) if data['responseData']['results']:
if m is not None: m = data['responseData']['results'][0]
url = m.group(1) if m['cacheUrl']:
url = utils.web.htmlToText(url) url = m['cacheUrl'].encode('utf-8')
irc.reply(url) irc.reply(url)
else: return
irc.error('Google seems to have no cache for that site.') irc.error('Google seems to have no cache for that site.')
cache = wrap(cache, ['url']) cache = wrap(cache, ['url'])
def fight(self, irc, msg, args): def fight(self, irc, msg, args):
@ -265,11 +196,12 @@ class Google(callbacks.PluginRegexp):
Returns the results of each search, in order, from greatest number Returns the results of each search, in order, from greatest number
of results to least. of results to least.
""" """
channel = msg.args[0]
results = [] results = []
for arg in args: for arg in args:
data = search(self.log, [arg]) data = self.search(arg, channel, {'smallsearch': True})
results.append((data.meta.estimatedTotalResultsCount, arg)) count = data['responseData']['cursor']['estimatedResultCount']
results.append((int(count), arg))
results.sort() results.sort()
results.reverse() results.reverse()
if self.registryValue('bold', msg.args[0]): if self.registryValue('bold', msg.args[0]):
@ -279,35 +211,38 @@ class Google(callbacks.PluginRegexp):
s = ', '.join([format('%s: %i', bold(s), i) for (i, s) in results]) s = ', '.join([format('%s: %i', bold(s), i) for (i, s) in results])
irc.reply(s) irc.reply(s)
def spell(self, irc, msg, args, word): _gtranslateUrl='http://ajax.googleapis.com/ajax/services/language/translate'
"""<word> def translate(self, irc, msg, args, fromLang, toLang, text):
"""<from-language> [to] <to-language> <text>
Returns Google's spelling recommendation for <word>. Returns <text> translated from <from-language> into <to-language>.
Beware that translating to or from languages that use multi-byte
characters may result in some very odd results.
""" """
result = google.doSpellingSuggestion(word) channel = msg.args[0]
if result: ref = self.registryValue('referer')
irc.reply(result) if not ref:
else: ref = 'http://%s/%s' % (dynamic.irc.server,
irc.reply('No spelling suggestion made. This could mean that ' dynamic.irc.nick)
'the word you gave is spelled right; it could also ' headers = utils.web.defaultHeaders
'mean that its spelling was too whacked out even for ' headers['Referer'] = ref
'Google to figure out.') opts = {'q': text, 'v': '1.0'}
spell = wrap(spell, ['text']) if 'lang_%s' % fromLang.lower() not in \
conf.supybot.plugins.Google.defaultLanguage.validStrings:
def stats(self, irc, msg, args): irc.errorInvalid('from language')
"""takes no arguments if 'lang_%s' % toLang.lower() not in \
conf.supybot.plugins.Google.defaultLanguage.validStrings:
Returns interesting information about this Google module. Mostly irc.errorInvalid('to language')
useful for making sure you don't go over your 1000 requests/day limit. opts['langpair'] = '%s|%s' % (fromLang, toLang)
""" fd = utils.web.getUrlFd('%s?%s' % (self._gtranslateUrl,
recent = len(last24hours) urllib.urlencode(opts)),
time = self.registryValue('state.time') headers)
searches = self.registryValue('state.searches') json = simplejson.load(fd)
irc.reply(format('This google module has made %n total; ' fd.close()
'%i in the past 24 hours. ' if json['responseStatus'] != 200:
'Google has spent %.2f seconds searching for me.', raise callbacks.Error, 'We broke The Google!'
(searches, 'search'), recent, time)) irc.reply(json['responseData']['translatedText'].encode('utf-8'))
stats = wrap(stats) translate = wrap(translate, ['something', 'to', 'something', 'text'])
def googleSnarfer(self, irc, msg, match): def googleSnarfer(self, irc, msg, match):
r"^google\s+(.*)$" r"^google\s+(.*)$"
@ -315,8 +250,9 @@ class Google(callbacks.PluginRegexp):
return return
searchString = match.group(1) searchString = match.group(1)
try: try:
data = search(self.log, [searchString], safeSearch=1) data = self.search(searchString, msg.args[0],
except google.NoLicenseKey: {'smallsearch': True})
except callbacks.Error:
return return
if data.results: if data.results:
url = data.results[0].URL url = data.results[0].URL

View File

@ -1,5 +1,6 @@
### ###
# Copyright (c) 2002-2004, Jeremiah Fincher # Copyright (c) 2002-2004, Jeremiah Fincher
# Copyright (c) 2008, James Vega
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -46,17 +47,18 @@ class GoogleTestCase(ChannelPluginTestCase):
'the speed of light ' 'the speed of light '
'in microns / fortnight', '&times;') 'in microns / fortnight', '&times;')
def testSearch(self):
self.assertNotError('google foo')
def testFight(self):
self.assertRegexp('fight supybot moobot', r'.*supybot.*: \d+')
def testTranslate(self):
self.assertRegexp('translate en es hello world', 'mundo')
def testCalcDoesNotHaveExtraSpaces(self): def testCalcDoesNotHaveExtraSpaces(self):
self.assertNotRegexp('google calc 1000^2', r'\s+,\s+') self.assertNotRegexp('google calc 1000^2', r'\s+,\s+')
def testNoNoLicenseKeyError(self):
orig = conf.supybot.plugins.Google.searchSnarfer()
try:
conf.supybot.plugins.Google.searchSnarfer.setValue(True)
self.assertSnarfNoResponse('google blah')
finally:
conf.supybot.plugins.Google.searchSnarfer.setValue(orig)
def testGroupsSnarfer(self): def testGroupsSnarfer(self):
orig = conf.supybot.plugins.Google.groupsSnarfer() orig = conf.supybot.plugins.Google.groupsSnarfer()
try: try:
@ -113,14 +115,4 @@ class GoogleTestCase(ChannelPluginTestCase):
finally: finally:
conf.supybot.plugins.Google.groupsSnarfer.setValue(orig) conf.supybot.plugins.Google.groupsSnarfer.setValue(orig)
def testInvalidKeyCaught(self):
conf.supybot.plugins.Google.licenseKey.set(
'abcdefghijklmnopqrstuvwxyz123456')
self.assertNotRegexp('google foobar', 'faultType')
self.assertNotRegexp('google foobar', 'SOAP')
def testStats(self):
self.assertNotError('google stats')
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: