"""
Python wrapper for Google web APIs
This module allows you to access Google's web APIs through SOAP,
to do things like search Google and get the results programmatically.
Described U{here }
  
You need a Google-provided license key to use these services.
Follow the link above to get one.  These functions will look in
several places (in this order) for the license key:
    - the "license_key" argument of each function
    - the module-level LICENSE_KEY variable (call setLicense once to set it)
    - an environment variable called GOOGLE_LICENSE_KEY
    - a file called ".googlekey" in the current directory
    - a file called "googlekey.txt" in the current directory
    - a file called ".googlekey" in your home directory
    - a file called "googlekey.txt" in your home directory
    - a file called ".googlekey" in the same directory as google.py
    - a file called "googlekey.txt" in the same directory as google.py
Sample usage::
    
    >>> import google
    >>> google.setLicense('...') # must get your own key!
    >>> data = google.doGoogleSearch('python')
    >>> data.meta.searchTime
    0.043221000000000002
    
    >>> data.results[0].URL
    'http://www.python.org/'
    
    >>> data.results[0].title
    'Python Language Website'
@newfield contrib: Contributors
@author:   Mark Pilgrim 
@author:   Brian Landers 
@license:  Python
@version:  0.6
@contrib:  David Ascher, for the install script
@contrib:  Erik Max Francis, for the command line interface
@contrib:  Michael Twomey, for HTTP proxy support
@contrib:  Mark Recht, for patches to support SOAPpy
"""
__author__ = "Mark Pilgrim (f8dy@diveintomark.org)"
__version__ = "0.6"
__cvsversion__ = "$Revision: 1.6 $"[11:-2]
__date__ = "$Date: 2004/09/30 08:09:09 $"[7:-2]
__copyright__ = "Copyright (c) 2002 Mark Pilgrim"
__license__ = "Python"
__credits__ = """David Ascher, for the install script
Erik Max Francis, for the command line interface
Michael Twomey, for HTTP proxy support"""
import os, sys, getopt
import GoogleSOAPFacade
LICENSE_KEY = None
HTTP_PROXY  = None
#
# Constants
#
_url         = 'http://api.google.com/search/beta2'
_namespace   = 'urn:GoogleSearch'
_googlefile1 = ".googlekey"
_googlefile2 = "googlekey.txt"
_false = GoogleSOAPFacade.false
_true  = GoogleSOAPFacade.true
_licenseLocations = (
    ( lambda key: key,
      'passed to the function in license_key variable' ),
    ( lambda key: LICENSE_KEY, 
      'module-level LICENSE_KEY variable (call setLicense to set it)' ),
    ( lambda key: os.environ.get( 'GOOGLE_LICENSE_KEY', None ),
      'an environment variable called GOOGLE_LICENSE_KEY' ),
    ( lambda key: _contentsOf( os.getcwd(), _googlefile1 ), 
      '%s in the current directory' % _googlefile1),
    ( lambda key: _contentsOf( os.getcwd(), _googlefile2 ),
      '%s in the current directory' % _googlefile2),
    ( lambda key: _contentsOf( os.environ.get( 'HOME', '' ), _googlefile1 ),
      '%s in your home directory' % _googlefile1),
    ( lambda key: _contentsOf( os.environ.get( 'HOME', '' ), _googlefile2 ),
      '%s in your home directory' % _googlefile2 ),
    ( lambda key: _contentsOf( _getScriptDir(), _googlefile1 ),
      '%s in the google.py directory' % _googlefile1 ),
    ( lambda key: _contentsOf( _getScriptDir(), _googlefile2 ),
      '%s in the google.py directory' % _googlefile2 )
)
## ----------------------------------------------------------------------
## Exceptions
## ----------------------------------------------------------------------
class NoLicenseKey(Exception): 
    """
    Thrown when the API is unable to find a valid license key.
    """
    pass
## ----------------------------------------------------------------------
## administrative functions (non-API)
## ----------------------------------------------------------------------
def _version():
    """
    Display a formatted version string for the module
    """
    print """PyGoogle %(__version__)s
%(__copyright__)s
released %(__date__)s
Thanks to:
%(__credits__)s""" % globals()
    
def _usage():
    """
    Display usage information for the command-line interface
    """
    program = os.path.basename(sys.argv[0])
    print """Usage: %(program)s [options] [querytype] query
options:
  -k, --key=  Google license key (see important note below)
  -1, -l, --lucky          show only first hit
  -m, --meta               show meta information
  -r, --reverse            show results in reverse order
  -x, --proxy=        use HTTP proxy
  -h, --help               print this help
  -v, --version            print version and copyright information
  -t, --test               run test queries
querytype:
  -s, --search=     search (default)
  -c, --cache=        retrieve cached page
  -p, --spelling=    check spelling
IMPORTANT NOTE: all Google functions require a valid license key;
visit http://www.google.com/apis/ to get one.  %(program)s will look in
these places (in order) and use the first license key it finds:
  * the key specified on the command line""" % vars()
    for get, location in _licenseLocations[2:]:
        print "  *", location
## ----------------------------------------------------------------------
## utility functions (API)
## ----------------------------------------------------------------------
def setLicense(license_key):
    """
    Set the U{Google APIs } license key
    
    @param license_key: The new key to use
    @type  license_key: String
    @todo: validate the key?
    """
    global LICENSE_KEY
    LICENSE_KEY = license_key
    
    
def getLicense(license_key = None):
    """
    Get the U{Google APIs } license key
    
    The key can be read from any number of locations.  See the module-leve
    documentation for the search order.
    
    @return: the license key
    @rtype:  String
    @raise NoLicenseKey: if no valid key could be found
    """
    for get, location in _licenseLocations:
        rc = get(license_key)
        if rc: return rc
    _usage()
    raise NoLicenseKey, 'get a license key at http://www.google.com/apis/'
def setProxy(http_proxy):
    """
    Set the HTTP proxy to be used when accessing Google
    
    @param http_proxy: the proxy to use
    @type  http_proxy: String
    @todo: validiate the input?
    """
    global HTTP_PROXY
    HTTP_PROXY = http_proxy
def getProxy(http_proxy = None):
    """
    Get the HTTP proxy we use for accessing Google
    
    @return: the proxy
    @rtype:  String
    """
    return http_proxy or HTTP_PROXY
def _contentsOf(dirname, filename):
    filename = os.path.join(dirname, filename)
    if not os.path.exists(filename): return None
    fsock = open(filename)
    contents = fsock.read()
    fsock.close()
    return contents
def _getScriptDir():
    if __name__ == '__main__':
        return os.path.abspath(os.path.dirname(sys.argv[0]))
    else:
        return os.path.abspath(os.path.dirname(sys.modules[__name__].__file__))
def _marshalBoolean(value):
    if value:
        return _true
    else:
        return _false
def _getRemoteServer( http_proxy ):
    return GoogleSOAPFacade.getProxy( _url, _namespace, http_proxy )
    
## ----------------------------------------------------------------------
## search results classes
## ----------------------------------------------------------------------
class _SearchBase:
    def __init__(self, params):
        for k, v in params.items():
            if isinstance(v, GoogleSOAPFacade.structType):
                v = GoogleSOAPFacade.toDict( v )
                
            try:
                if isinstance(v[0], GoogleSOAPFacade.structType):
                    v = [ SOAPProxy.toDict( node ) for node in v ]
            except:
                pass
            self.__dict__[str(k)] = v
## ----------------------------------------------------------------------
class SearchResultsMetaData(_SearchBase):
    """
    Container class for metadata about a given search query's results.
    @ivar documentFiltering: is duplicate page filtering active?
    @ivar searchComments: human-readable informational message
        example::
             "'the' is a very common word and was not included in your search"
    @ivar estimatedTotalResultsCount: estimated total number of results 
        for this query.
    @ivar estimateIsExact: is estimatedTotalResultsCount an exact value?
    @ivar searchQuery: search string that initiated this search
    @ivar startIndex: index of the first result returned (zero-based)
    @ivar endIndex: index of the last result returned (zero-based)
    @ivar searchTips: human-readable informational message on how to better
       use Google.
    @ivar directoryCategories: list of categories for the search results
       This field is a list of dictionaries, like so::
           { 'fullViewableName': 'the Open Directory category',
             'specialEncoding':  'encoding scheme of this directory category'
           }
    @ivar searchTime: total search time, in seconds
    """    
    pass
## ----------------------------------------------------------------------
class SearchResult(_SearchBase):
    """
    Encapsulates the results from a search.
    @ivar URL: URL
    @ivar title: title (HTML)
    @ivar snippet: snippet showing query context (HTML
    @ivar cachedSize: size of cached version of this result, (KB)
    @ivar relatedInformationPresent: is the "related:" keyword supported?
        Flag indicates that the "related:" keyword is supported for this URL
    @ivar hostName:  used when filtering occurs
        When filtering occurs, a maximum of two results from any given
        host is returned.  When this occurs, the second resultElement
        that comes from that host contains the host name in this parameter.
    @ivar directoryCategory: Open Directory category information
        This field is a dictionary with the following values::
            { 'fullViewableName': 'the Open Directory category',
              'specialEncoding' : 'encoding scheme of this directory category'
            }
    @ivar directoryTitle: Open Directory title of this result (or blank)
    @ivar summary: Open Directory summary for this result (or blank)
    """
    pass
## ----------------------------------------------------------------------
class SearchReturnValue:
    """
    complete search results for a single query
    @ivar meta: L{SearchResultsMetaData} instance for this query
    @ivar results: list of L{SearchResult} objects for this query 
    """
    def __init__( self, metadata, results ):
        self.meta    = metadata
        self.results = results
## ----------------------------------------------------------------------
## main functions
## ----------------------------------------------------------------------
def doGoogleSearch( q, start = 0, maxResults = 10, filter = 1,
                    restrict='', safeSearch = 0, language = '',
                    inputencoding = '', outputencoding = '',\
                    license_key = None, http_proxy = None ):
    """
    Search Google using the SOAP API and return the results.
    You need a license key to call this function; see the
    U{Google APIs } site to get one.
    Then you can either pass it to this function every time, or
    set it globally; see the L{google} module-level docs for details.
    
    See U{http://www.google.com/help/features.html}
    for examples of advanced features.  Anything that works at the 
    Google web site will work as a query string in this method.
    
    You can use the C{start} and C{maxResults} parameters to page
    through multiple pages of results.  Note that 'maxResults' is
    currently limited by Google to 10.
            
    See the API reference for more advanced examples and a full list of
    country codes and topics for use in the C{restrict} parameter, along
    with legal values for the C{language}, C{inputencoding}, and
    C{outputencoding} parameters.
    
    You can download the API documentation 
    U{http://www.google.com/apis/download.html }.
    
    @param q: search string.  
    @type  q: String
    @param start: (optional) zero-based index of first desired result.
    @type  start: int
    @param maxResults: (optional) maximum number of results to return.
    @type  maxResults: int
    @param filter: (optional) flag to request filtering of similar results
    @type  filter: int
    @param restrict: (optional) restrict results by country or topic.
    @type  restrict: String    
    @param safeSearch: (optional)
    @type  safeSearch: int
    @param language: (optional)
    @type  language: String
    @param inputencoding: (optional)
    @type  inputencoding: String
    @param outputencoding: (optional)
    @type  outputencoding: String
    @param license_key: (optional) the Google API license key to use
    @type  license_key: String
    @param http_proxy: (optional) the HTTP proxy to use for talking to Google
    @type  http_proxy: String
    
    @return: the search results encapsulated in an object
    @rtype:  L{SearchReturnValue}
    """
    license_key  = getLicense( license_key )    
    http_proxy   = getProxy( http_proxy )
    remoteserver = _getRemoteServer( http_proxy )
                                   
    filter     = _marshalBoolean( filter )
    safeSearch = _marshalBoolean( safeSearch )
    
    data = remoteserver.doGoogleSearch( license_key, q, start, maxResults,
                                        filter, restrict, safeSearch,
                                        language, inputencoding, 
                                        outputencoding )
    metadata = GoogleSOAPFacade.toDict( data )
    del metadata["resultElements"]
    
    metadata = SearchResultsMetaData( metadata )
    
    results = [ SearchResult( GoogleSOAPFacade.toDict( node ) ) \
                    for node in data.resultElements ]
    
    return SearchReturnValue( metadata, results )
## ----------------------------------------------------------------------
def doGetCachedPage( url, license_key = None, http_proxy = None ):
    """
    Retrieve a page from the Google cache.
    You need a license key to call this function; see the
    U{Google APIs } site to get one.
    Then you can either pass it to this function every time, or
    set it globally; see the L{google} module-level docs for details.
    
    @param url: full URL to the page to retrieve
    @type  url: String
    
    @param license_key: (optional) the Google API key to use
    @type  license_key: String
    
    @param http_proxy:  (optional) the HTTP proxy server to use
    @type  http_proxy:  String
    
    @return: full text of the cached page
    @rtype:  String
    """
    license_key  = getLicense( license_key )
    http_proxy   = getProxy( http_proxy )
    remoteserver = _getRemoteServer( http_proxy )
                                   
    return remoteserver.doGetCachedPage( license_key, url )
## ----------------------------------------------------------------------
def doSpellingSuggestion( phrase, license_key = None, http_proxy = None ):
    """
    Get spelling suggestions from Google
    You need a license key to call this function; see the
    U{Google APIs } site to get one.
    Then you can either pass it to this function every time, or
    set it globally; see the L{google} module-level docs for details.
    @param phrase: word or phrase to spell-check
    @type  phrase: String
    @param license_key: (optional) the Google API key to use
    @type  license_key: String
    
    @param http_proxy: (optional) the HTTP proxy to use
    @type  http_proxy: String
    
    @return: text of any suggested replacement, or None
    """
    license_key  = getLicense( license_key )    
    http_proxy   = getProxy( http_proxy) 
    remoteserver = _getRemoteServer( http_proxy )
                                   
    return remoteserver.doSpellingSuggestion( license_key, phrase )
## ----------------------------------------------------------------------
## functional test suite (see googletest.py for unit test suite)
## ----------------------------------------------------------------------
def _test():
    """
    Run functional test suite.
    """
    try:
        getLicense(None)
    except NoLicenseKey:
        return
        
    print "Searching for Python at google.com..."
    data = doGoogleSearch( "Python" )
    _output( data, { "func": "doGoogleSearch"} )
    print "\nSearching for 5 _French_ pages about Python, "
    print "encoded in ISO-8859-1..."
    data = doGoogleSearch( "Python", language = 'lang_fr',                 
                                     outputencoding = 'ISO-8859-1',
                                     maxResults = 5 )
                                     
    _output( data, { "func": "doGoogleSearch" } )
    phrase = "Pyhton programming languager"
    print "\nTesting spelling suggestions for '%s'..." % phrase
    
    data = doSpellingSuggestion( phrase )
    
    _output( data, { "func": "doSpellingSuggestion" } )
## ----------------------------------------------------------------------
## Command-line interface
## ----------------------------------------------------------------------
class _OutputFormatter:
    def boil(self, data):
        if type(data) == type(u""):
            return data.encode("ISO-8859-1", "replace")
        else:
            return data
class _TextOutputFormatter(_OutputFormatter):
    def common(self, data, params):
        if params.get("showMeta", 0):
            meta = data.meta
            for category in meta.directoryCategories:
                print "directoryCategory: %s" % \
                  self.boil(category["fullViewableName"])
            for attr in [node for node in dir(meta) if \
              node <> "directoryCategories" and node[:2] <> '__']:
                print "%s:" % attr, self.boil(getattr(meta, attr))
        
    def doGoogleSearch(self, data, params):
        results = data.results
        if params.get("feelingLucky", 0):
            results = results[:1]
        if params.get("reverseOrder", 0):
            results.reverse()
        for result in results:
            for attr in dir(result):
                if attr == "directoryCategory":
                    print "directoryCategory:", \
                      self.boil(result.directoryCategory["fullViewableName"])
                elif attr[:2] <> '__':
                    print "%s:" % attr, self.boil(getattr(result, attr))
            print
        self.common(data, params)
    
    def doGetCachedPage(self, data, params):
        print data
        self.common(data, params)
    doSpellingSuggestion = doGetCachedPage
def _makeFormatter(outputFormat):
    classname = "_%sOutputFormatter" % outputFormat.capitalize()
    return globals()[classname]()
def _output(results, params):
    formatter = _makeFormatter(params.get("outputFormat", "text"))
    outputmethod = getattr(formatter, params["func"])
    outputmethod(results, params)
def main(argv):
    """
    Command-line interface.
    """
    if not argv:
        _usage()
        return
    q = None
    func = None
    http_proxy = None
    license_key = None
    feelingLucky = 0
    showMeta = 0
    reverseOrder = 0
    runTest = 0
    outputFormat = "text"
    try:
        opts, args = getopt.getopt(argv, "s:c:p:k:lmrx:hvt1",
            ["search=", "cache=", "spelling=", "key=", "lucky", "meta",
             "reverse", "proxy=", "help", "version", "test"])
    except getopt.GetoptError:
        _usage()
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-s", "--search"):
            q = arg
            func = "doGoogleSearch"
        elif opt in ("-c", "--cache"):
            q = arg
            func = "doGetCachedPage"
        elif opt in ("-p", "--spelling"):
            q = arg
            func = "doSpellingSuggestion"
        elif opt in ("-k", "--key"):
            license_key = arg
        elif opt in ("-l", "-1", "--lucky"):
            feelingLucky = 1
        elif opt in ("-m", "--meta"):
            showMeta = 1
        elif opt in ("-r", "--reverse"):
            reverseOrder = 1
        elif opt in ("-x", "--proxy"):
            http_proxy = arg
        elif opt in ("-h", "--help"):
            _usage()
        elif opt in ("-v", "--version"):
            _version()
        elif opt in ("-t", "--test"):
            runTest = 1
    if runTest:
        setLicense(license_key)
        setProxy(http_proxy)
        _test()
    if args and not q:
        q = args[0]
        func = "doGoogleSearch"
    if func:
        results = globals()[func]( q, http_proxy=http_proxy, 
                                   license_key=license_key )
        _output(results, locals())
if __name__ == '__main__':
    main(sys.argv[1:])