2003-10-21 06:44:44 +02:00
|
|
|
#re!/usr/bin/env python
|
2003-04-08 21:16:18 +02:00
|
|
|
|
|
|
|
###
|
|
|
|
# Copyright (c) 2002, Jeremiah Fincher
|
|
|
|
# All rights reserved.
|
|
|
|
#
|
|
|
|
# Redistribution and use in source and binary forms, with or without
|
|
|
|
# modification, are permitted provided that the following conditions are met:
|
|
|
|
#
|
|
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
|
|
# this list of conditions, and the following disclaimer.
|
|
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
# this list of conditions, and the following disclaimer in the
|
|
|
|
# documentation and/or other materials provided with the distribution.
|
|
|
|
# * Neither the name of the author of this software nor the name of
|
|
|
|
# contributors to this software may be used to endorse or promote products
|
|
|
|
# derived from this software without specific prior written consent.
|
|
|
|
#
|
|
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
###
|
|
|
|
|
|
|
|
"""
|
|
|
|
Acceses Google for various things.
|
|
|
|
"""
|
|
|
|
|
2003-10-05 14:56:56 +02:00
|
|
|
import plugins
|
2003-04-08 21:16:18 +02:00
|
|
|
|
2003-04-20 00:16:57 +02:00
|
|
|
import re
|
2003-08-11 05:37:15 +02:00
|
|
|
import sets
|
2003-04-08 21:16:18 +02:00
|
|
|
import time
|
2003-04-17 12:05:22 +02:00
|
|
|
import getopt
|
2003-10-22 18:05:18 +02:00
|
|
|
import socket
|
2003-04-20 00:16:57 +02:00
|
|
|
import urllib2
|
2003-04-08 21:16:18 +02:00
|
|
|
|
|
|
|
import google
|
|
|
|
|
2003-10-05 13:22:29 +02:00
|
|
|
import conf
|
2003-10-27 23:58:47 +01:00
|
|
|
import debug
|
2003-04-08 21:16:18 +02:00
|
|
|
import utils
|
2003-04-19 23:38:38 +02:00
|
|
|
import ircmsgs
|
2003-10-29 07:06:56 +01:00
|
|
|
import plugins
|
2003-10-11 23:03:02 +02:00
|
|
|
import ircutils
|
2003-04-08 21:16:18 +02:00
|
|
|
import privmsgs
|
|
|
|
import callbacks
|
2003-05-29 18:35:35 +02:00
|
|
|
import structures
|
2003-04-08 21:16:18 +02:00
|
|
|
|
|
|
|
def configure(onStart, afterConnect, advanced):
|
|
|
|
from questions import expect, anything, something, yn
|
|
|
|
print 'To use Google\'t Web Services, you must have a license key.'
|
|
|
|
if yn('Do you have a license key?') == 'y':
|
2003-09-03 11:45:11 +02:00
|
|
|
key = something('What is it?')
|
|
|
|
while len(key) != 32:
|
|
|
|
print 'That\'s not a valid Google license key.'
|
|
|
|
if yn('Are you sure you have a valid Google license key?') == 'y':
|
|
|
|
key = something('What is it?')
|
|
|
|
else:
|
|
|
|
key = ''
|
|
|
|
break
|
|
|
|
if key:
|
|
|
|
onStart.append('load Google')
|
2003-10-21 06:44:44 +02:00
|
|
|
onStart.append('google licensekey %s' % key)
|
2003-10-10 05:17:24 +02:00
|
|
|
if 'load Alias' not in onStart:
|
|
|
|
print 'Google depends on the Alias module for some commands.'
|
2003-04-17 12:05:22 +02:00
|
|
|
if yn('Would you like to load the Alias module now?') == 'y':
|
|
|
|
onStart.append('load Alias')
|
|
|
|
else:
|
|
|
|
print 'You can still use the Google module, but you won\'t ' \
|
|
|
|
'be asked any further questions.'
|
|
|
|
return
|
|
|
|
onStart.append('alias googlelinux "google --restrict=linux $1"')
|
|
|
|
onStart.append('alias googlebsd "google --restrict=bsd $1"')
|
|
|
|
onStart.append('alias googlemac "google --restrict=mac $1"')
|
2003-10-27 23:58:47 +01:00
|
|
|
if advanced:
|
|
|
|
print 'The Google plugin has the functionality to watch for URLs'
|
|
|
|
print 'that match a specific pattern (we call this a snarfer).'
|
|
|
|
print 'When supybot sees such a URL, he will parse the web page'
|
|
|
|
print 'for information and reply with the results.\n'
|
|
|
|
print 'Google has two available snarfers: Google Groups link'
|
|
|
|
print 'snarfing and a google search snarfer.\n'
|
|
|
|
if yn('Do you want the Google Groups link snarfer enabled by '\
|
|
|
|
'default?') == 'n':
|
2003-11-08 09:07:44 +01:00
|
|
|
onStart.append('Google config groups-snarfer off')
|
2003-10-28 01:20:23 +01:00
|
|
|
if yn('Do you want the Google search snarfer enabled by default?')\
|
2003-10-27 23:58:47 +01:00
|
|
|
== 'y':
|
2003-11-08 09:07:44 +01:00
|
|
|
onStart.append('Google config search-snarfer on')
|
2003-04-08 21:16:18 +02:00
|
|
|
else:
|
|
|
|
print 'You\'ll need to get a key before you can use this plugin.'
|
2003-08-20 10:54:29 +02:00
|
|
|
print 'You can apply for a key at http://www.google.com/apis/'
|
2003-07-30 22:07:25 +02:00
|
|
|
|
2003-08-27 20:06:26 +02:00
|
|
|
|
2003-07-30 22:07:25 +02:00
|
|
|
totalSearches = 0
|
|
|
|
totalTime = 0
|
|
|
|
last24hours = structures.queue()
|
|
|
|
|
|
|
|
def search(*args, **kwargs):
|
2003-10-22 17:12:33 +02:00
|
|
|
try:
|
|
|
|
global totalSearches, totalTime, last24hours
|
|
|
|
data = google.doGoogleSearch(*args, **kwargs)
|
|
|
|
now = time.time()
|
|
|
|
totalSearches += 1
|
|
|
|
totalTime += data.meta.searchTime
|
|
|
|
last24hours.enqueue(now)
|
|
|
|
while last24hours and now - last24hours.peek() > 86400:
|
|
|
|
last24hours.dequeue()
|
|
|
|
return data
|
|
|
|
except socket.error, e:
|
|
|
|
if e.args[0] == 110:
|
|
|
|
return 'Connection timed out to Google.com.'
|
|
|
|
else:
|
|
|
|
raise
|
2003-08-20 10:54:29 +02:00
|
|
|
|
2003-11-08 09:07:44 +01:00
|
|
|
class Google(callbacks.PrivmsgCommandAndRegexp, plugins.Configurable):
|
2003-04-08 21:16:18 +02:00
|
|
|
threaded = True
|
2003-08-11 05:37:15 +02:00
|
|
|
regexps = sets.Set(['googleSnarfer', 'googleGroups'])
|
2003-11-08 09:07:44 +01:00
|
|
|
configurables = plugins.ConfigurableDictionary(
|
|
|
|
[('groups-snarfer', plugins.ConfigurableTypes.bool, True,
|
|
|
|
"""Determines whether the groups snarfer is enabled. If so, URLs at
|
|
|
|
groups.google.com will be snarfed and their group/title messaged to
|
|
|
|
the channel."""),
|
|
|
|
('search-snarfer', plugins.ConfigurableTypes.bool, False,
|
|
|
|
"""Determines whether the search snarfer is enabled. If so, messages
|
|
|
|
(even unaddressed ones) beginning with the word 'google' will result
|
|
|
|
in the first URL Google returns being sent to the channel.""")]
|
|
|
|
)
|
2003-04-08 21:16:18 +02:00
|
|
|
def __init__(self):
|
2003-11-08 09:07:44 +01:00
|
|
|
super(Google, self).__init__()
|
2003-04-08 21:16:18 +02:00
|
|
|
self.total = 0
|
|
|
|
self.totalTime = 0
|
2003-05-29 18:35:35 +02:00
|
|
|
self.last24hours = structures.queue()
|
2003-04-08 21:16:18 +02:00
|
|
|
|
|
|
|
def formatData(self, data):
|
2003-10-22 17:12:33 +02:00
|
|
|
if isinstance(data, basestring):
|
|
|
|
return data
|
2003-09-07 07:34:56 +02:00
|
|
|
time = 'Search took %s seconds: ' % data.meta.searchTime
|
2003-04-16 09:07:39 +02:00
|
|
|
results = []
|
|
|
|
for result in data.results:
|
|
|
|
title = utils.htmlToText(result.title.encode('utf-8'))
|
|
|
|
url = result.URL
|
|
|
|
if title:
|
2003-09-11 00:29:34 +02:00
|
|
|
results.append('\x02%s\x0F: <%s>' % (title, url))
|
2003-04-16 09:07:39 +02:00
|
|
|
else:
|
|
|
|
results.append(url)
|
2003-04-08 21:16:18 +02:00
|
|
|
if not results:
|
|
|
|
return 'No matches found %s' % time
|
|
|
|
else:
|
2003-09-07 07:34:56 +02:00
|
|
|
return '%s %s' % (time, '; '.join(results))
|
2003-04-08 21:16:18 +02:00
|
|
|
|
2003-10-21 06:44:44 +02:00
|
|
|
def licensekey(self, irc, msg, args):
|
2003-04-08 21:19:21 +02:00
|
|
|
"""<key>
|
|
|
|
|
|
|
|
Sets the Google license key for using Google's Web Services API. This
|
|
|
|
is necessary before you can do any searching with this module.
|
|
|
|
"""
|
2003-04-08 21:16:18 +02:00
|
|
|
key = privmsgs.getArgs(args)
|
|
|
|
google.setLicense(key)
|
|
|
|
irc.reply(msg, conf.replySuccess)
|
2003-10-21 06:44:44 +02:00
|
|
|
licensekey = privmsgs.checkCapability(licensekey, 'admin')
|
2003-08-20 10:54:29 +02:00
|
|
|
|
2003-04-08 21:16:18 +02:00
|
|
|
def google(self, irc, msg, args):
|
2003-09-11 00:29:34 +02:00
|
|
|
"""<search> [--{language,restrict}=<value>] [--{notsafe,similar}]
|
2003-04-16 09:07:39 +02:00
|
|
|
|
|
|
|
Searches google.com for the given string. As many results as can fit
|
2003-09-09 10:23:36 +02:00
|
|
|
are included. --language accepts a language abbreviation; --restrict
|
|
|
|
restricts the results to certain classes of things; --similar tells
|
2003-09-11 00:29:34 +02:00
|
|
|
Google not to filter similar results. --notsafe allows possibly
|
|
|
|
work-unsafe results.
|
2003-04-08 21:16:18 +02:00
|
|
|
"""
|
2003-04-17 12:05:22 +02:00
|
|
|
(optlist, rest) = getopt.getopt(args, '', ['language=', 'restrict=',
|
2003-09-11 00:29:34 +02:00
|
|
|
'notsafe', 'similar'])
|
2003-04-17 12:05:22 +02:00
|
|
|
kwargs = {'language': 'lang_en', 'safeSearch': 1}
|
|
|
|
for (option, argument) in optlist:
|
2003-09-11 00:29:34 +02:00
|
|
|
if option == '--notsafe':
|
|
|
|
kwargs['safeSearch'] = False
|
2003-09-09 10:23:36 +02:00
|
|
|
elif option == '--similar':
|
|
|
|
kwargs['filter'] = False
|
|
|
|
else:
|
|
|
|
kwargs[option[2:]] = argument
|
2003-04-17 12:05:22 +02:00
|
|
|
searchString = privmsgs.getArgs(rest)
|
2003-07-30 22:07:25 +02:00
|
|
|
data = search(searchString, **kwargs)
|
2003-04-08 21:16:18 +02:00
|
|
|
irc.reply(msg, self.formatData(data))
|
|
|
|
|
2003-07-30 21:08:05 +02:00
|
|
|
def metagoogle(self, irc, msg, args):
|
2003-09-11 00:29:34 +02:00
|
|
|
"""<search> [--(language,restrict)=<value>] [--{similar,notsafe}]
|
2003-07-30 21:08:05 +02:00
|
|
|
|
|
|
|
Searches google and gives all the interesting meta information about
|
2003-09-11 00:29:34 +02:00
|
|
|
the search. See the help for the google command for a detailed
|
|
|
|
description of the parameters.
|
2003-07-30 21:08:05 +02:00
|
|
|
"""
|
|
|
|
(optlist, rest) = getopt.getopt(args, '', ['language=', 'restrict=',
|
2003-09-11 00:29:34 +02:00
|
|
|
'notsafe', 'similar'])
|
2003-07-30 21:08:05 +02:00
|
|
|
kwargs = {'language': 'lang_en', 'safeSearch': 1}
|
|
|
|
for option, argument in optlist:
|
2003-09-11 00:29:34 +02:00
|
|
|
if option == '--notsafe':
|
|
|
|
kwargs['safeSearch'] = False
|
|
|
|
elif option == '--similar':
|
|
|
|
kwargs['filter'] = False
|
|
|
|
else:
|
|
|
|
kwargs[option[2:]] = argument
|
2003-07-30 21:08:05 +02:00
|
|
|
searchString = privmsgs.getArgs(rest)
|
2003-07-30 22:07:25 +02:00
|
|
|
data = search(searchString, **kwargs)
|
2003-07-30 21:08:05 +02:00
|
|
|
meta = data.meta
|
|
|
|
categories = [d['fullViewableName'] for d in meta.directoryCategories]
|
2003-09-11 00:29:34 +02:00
|
|
|
categories = [utils.dqrepr(s.replace('_', ' ')) for s in categories]
|
|
|
|
if categories:
|
|
|
|
categories = utils.commaAndify(categories)
|
2003-07-30 21:08:05 +02:00
|
|
|
else:
|
2003-09-11 00:29:34 +02:00
|
|
|
categories = ''
|
2003-07-30 21:08:05 +02:00
|
|
|
s = 'Search for %r returned %s %s results in %s seconds.%s' % \
|
|
|
|
(meta.searchQuery,
|
|
|
|
meta.estimateIsExact and 'exactly' or 'approximately',
|
|
|
|
meta.estimatedTotalResultsCount,
|
|
|
|
meta.searchTime,
|
|
|
|
categories and ' Categories include %s.' % categories)
|
|
|
|
irc.reply(msg, s)
|
|
|
|
|
2003-10-21 06:44:44 +02:00
|
|
|
def fight(self, irc, msg, args):
|
2003-07-30 21:39:58 +02:00
|
|
|
"""<search string> <search string> [<search string> ...]
|
|
|
|
|
|
|
|
Returns the results of each search, in order, from greatest number
|
|
|
|
of results to least.
|
|
|
|
"""
|
|
|
|
|
|
|
|
results = []
|
|
|
|
for arg in args:
|
2003-07-30 22:07:25 +02:00
|
|
|
data = search(arg)
|
2003-07-30 21:39:58 +02:00
|
|
|
results.append((data.meta.estimatedTotalResultsCount, arg))
|
|
|
|
results.sort()
|
|
|
|
results.reverse()
|
|
|
|
s = ', '.join(['%r: %s' % (s, i) for (i, s) in results])
|
|
|
|
irc.reply(msg, s)
|
|
|
|
|
2003-10-21 06:44:44 +02:00
|
|
|
def spell(self, irc, msg, args):
|
2003-09-06 20:29:44 +02:00
|
|
|
"""<word>
|
|
|
|
|
|
|
|
Returns Google's spelling recommendation for <word>.
|
|
|
|
"""
|
2003-04-08 21:16:18 +02:00
|
|
|
word = privmsgs.getArgs(args)
|
|
|
|
result = google.doSpellingSuggestion(word)
|
|
|
|
if result:
|
|
|
|
irc.reply(msg, result)
|
|
|
|
else:
|
|
|
|
irc.reply(msg, 'No spelling suggestion made.')
|
|
|
|
|
2003-10-21 06:44:44 +02:00
|
|
|
def info(self, irc, msg, args):
|
2003-04-16 09:10:31 +02:00
|
|
|
"""takes no arguments
|
|
|
|
|
|
|
|
Returns interesting information about this Google module. Mostly
|
|
|
|
useful for making sure you don't go over your 1000 requests/day limit.
|
|
|
|
"""
|
2003-07-30 22:07:25 +02:00
|
|
|
recent = len(last24hours)
|
2003-04-08 21:16:18 +02:00
|
|
|
irc.reply(msg, 'This google module has been called %s time%stotal; '\
|
|
|
|
'%s time%sin the past 24 hours. ' \
|
|
|
|
'Google has spent %s seconds searching for me.' % \
|
2003-07-30 22:07:25 +02:00
|
|
|
(totalSearches, totalSearches != 1 and 's ' or ' ',
|
|
|
|
recent, recent != 1 and 's ' or ' ',
|
|
|
|
totalTime))
|
2003-04-08 21:16:18 +02:00
|
|
|
|
2003-04-19 23:38:38 +02:00
|
|
|
def googleSnarfer(self, irc, msg, match):
|
|
|
|
r"^google\s+(.*)$"
|
2003-11-08 09:07:44 +01:00
|
|
|
if not self.configurables.get('search-snarfer', channel=msg.args[0]):
|
2003-10-03 00:36:20 +02:00
|
|
|
return
|
2003-06-02 07:53:16 +02:00
|
|
|
searchString = match.group(1)
|
2003-10-22 19:43:04 +02:00
|
|
|
try:
|
|
|
|
data = search(searchString, safeSearch=1)
|
|
|
|
except google.NoLicenseKey:
|
|
|
|
return
|
2003-06-02 07:53:16 +02:00
|
|
|
if data.results:
|
|
|
|
url = data.results[0].URL
|
2003-10-21 18:43:02 +02:00
|
|
|
irc.reply(msg, url)
|
2003-06-02 07:53:16 +02:00
|
|
|
else:
|
2003-10-21 18:43:02 +02:00
|
|
|
irc.reply(msg, 'No results for "%s"' % searchString)
|
2003-11-08 09:07:44 +01:00
|
|
|
googleSnarfer = privmsgs.urlSnarfer(googleSnarfer)
|
2003-04-19 23:38:38 +02:00
|
|
|
|
2003-04-20 00:16:57 +02:00
|
|
|
_ggThread = re.compile(r'<br>Subject: ([^<]+)<br>')
|
|
|
|
_ggGroup = re.compile(r'Newsgroups: <a[^>]+>([^<]+)</a>')
|
2003-08-11 05:37:15 +02:00
|
|
|
def googleGroups(self, irc, msg, match):
|
2003-04-20 00:16:57 +02:00
|
|
|
r"http://groups.google.com/[^\s]+"
|
2003-11-08 09:07:44 +01:00
|
|
|
if not self.configurables.get('groups-snarfer', channel=msg.args[0]):
|
2003-10-21 06:44:44 +02:00
|
|
|
return
|
2003-04-20 00:16:57 +02:00
|
|
|
request = urllib2.Request(match.group(0), headers=\
|
|
|
|
{'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)'})
|
|
|
|
fd = urllib2.urlopen(request)
|
|
|
|
text = fd.read()
|
|
|
|
fd.close()
|
|
|
|
if match.group(0).find('&prev=/') >= 0:
|
|
|
|
path = re.search('view the <a href=([^>]+)>no',text)
|
2003-06-02 07:48:57 +02:00
|
|
|
if path is None:
|
|
|
|
return
|
2003-04-20 00:16:57 +02:00
|
|
|
url = 'http://groups.google.com'
|
|
|
|
request = urllib2.Request('%s%s' % (url,path.group(1)),
|
2003-08-20 10:54:29 +02:00
|
|
|
headers={'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5;'
|
2003-04-20 00:16:57 +02:00
|
|
|
'Windows NT 4.0)'})
|
|
|
|
fd = urllib2.urlopen(request)
|
|
|
|
text = fd.read()
|
|
|
|
fd.close()
|
|
|
|
mThread = self._ggThread.search(text)
|
|
|
|
mGroup = self._ggGroup.search(text)
|
|
|
|
if mThread and mGroup:
|
2003-10-21 18:43:02 +02:00
|
|
|
irc.reply(msg, 'Google Groups: %s, %s' % (mGroup.group(1),
|
|
|
|
mThread.group(1)), prefixName = False)
|
2003-04-20 00:16:57 +02:00
|
|
|
else:
|
2003-10-21 18:43:02 +02:00
|
|
|
irc.error(msg, 'That doesn\'t appear to be a proper '\
|
|
|
|
'Google Groups page. (%s)' % conf.replyPossibleBug)
|
2003-11-08 09:07:44 +01:00
|
|
|
googleGroups = privmsgs.urlSnarfer(googleGroups)
|
2003-04-20 00:16:57 +02:00
|
|
|
|
|
|
|
|
2003-10-19 23:04:35 +02:00
|
|
|
Class = Google
|
2003-10-18 15:25:12 +02:00
|
|
|
|
2003-04-08 21:16:18 +02:00
|
|
|
|
|
|
|
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|