Limnoria/plugins/Google.py

364 lines
15 KiB
Python
Raw Normal View History

2003-10-21 06:44:44 +02:00
#re!/usr/bin/env python
2003-04-08 21:16:18 +02:00
###
# Copyright (c) 2002, Jeremiah Fincher
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
"""
Acceses Google for various things.
"""
import plugins
2003-04-08 21:16:18 +02:00
import re
import sets
2003-04-08 21:16:18 +02:00
import time
import getopt
2003-10-22 18:05:18 +02:00
import socket
import urllib2
2003-04-08 21:16:18 +02:00
import google
2003-10-05 13:22:29 +02:00
import conf
2003-10-27 23:58:47 +01:00
import debug
2003-04-08 21:16:18 +02:00
import utils
import ircmsgs
2003-10-11 23:03:02 +02:00
import ircutils
2003-04-08 21:16:18 +02:00
import privmsgs
import callbacks
2003-05-29 18:35:35 +02:00
import structures
2003-04-08 21:16:18 +02:00
def configure(onStart, afterConnect, advanced):
from questions import expect, anything, something, yn
print 'To use Google\'t Web Services, you must have a license key.'
if yn('Do you have a license key?') == 'y':
key = something('What is it?')
while len(key) != 32:
print 'That\'s not a valid Google license key.'
if yn('Are you sure you have a valid Google license key?') == 'y':
key = something('What is it?')
else:
key = ''
break
if key:
onStart.append('load Google')
2003-10-21 06:44:44 +02:00
onStart.append('google licensekey %s' % key)
if 'load Alias' not in onStart:
print 'Google depends on the Alias module for some commands.'
if yn('Would you like to load the Alias module now?') == 'y':
onStart.append('load Alias')
else:
print 'You can still use the Google module, but you won\'t ' \
'be asked any further questions.'
return
onStart.append('alias googlelinux "google --restrict=linux $1"')
onStart.append('alias googlebsd "google --restrict=bsd $1"')
onStart.append('alias googlemac "google --restrict=mac $1"')
2003-10-27 23:58:47 +01:00
if advanced:
print 'The Google plugin has the functionality to watch for URLs'
print 'that match a specific pattern (we call this a snarfer).'
print 'When supybot sees such a URL, he will parse the web page'
print 'for information and reply with the results.\n'
print 'Google has two available snarfers: Google Groups link'
print 'snarfing and a google search snarfer.\n'
if yn('Do you want the Google Groups link snarfer enabled by '\
'default?') == 'n':
onStart.append('Google togglesnarfer groups off')
2003-10-28 01:20:23 +01:00
if yn('Do you want the Google search snarfer enabled by default?')\
2003-10-27 23:58:47 +01:00
== 'y':
onStart.append('Google togglesnarfer search on')
2003-04-08 21:16:18 +02:00
else:
print 'You\'ll need to get a key before you can use this plugin.'
print 'You can apply for a key at http://www.google.com/apis/'
2003-07-30 22:07:25 +02:00
2003-08-27 20:06:26 +02:00
example = utils.wrapLines("""
<jemfinch> @list googletools
2003-08-27 20:06:26 +02:00
<supybot> google, googlefight, googleinfo, googlelicensekey, googlesite, googlespell, metagoogle
<jemfinch> @google jemfinch
<supybot> [Twisted-commits] Like, you know, a bugfix. jemfinch reported.: http://twistedmatrix.com/pipermail/twisted-commits/2002-August/002956.html :: Character Analysis of JemFinch NetEssays.NET - Thousands of FREE ...: http://www.netessays.net/viewpaper/1379.html :: SourceForge.net: Developer Profile: http://sourceforge.net/users/jemfinch/ (search took 0.174663 seconds)
<jemfinch> @googlefight jemfinch supybot moobot ddipaolo
<supybot> 'moobot': 959, 'jemfinch': 236, 'ddipaolo': 229, 'supybot': 80
<jemfinch> @googleinfo
<supybot> This google module has been called 5 times total; 5 times in the past 24 hours. Google has spent 1.229932 seconds searching for me.
<jemfinch> @googlespell recind
<supybot> rescind
<jemfinch> @metagoogle jemfinch
<supybot> Search for 'jemfinch' returned approximately 214 results in 0.072376 seconds.
<jemfinch> @googlesite slashdot.org SCO
<supybot> Slashdot | How SCO Helped Linux Go Enterprise: http://yro.slashdot.org/yro/03/07/22/0528203.shtml?tid=106&tid=185 :: Slashdot | SCO Threatens Red Hat and SuSE: http://science.slashdot.org/articles/03/04/23/1925259.shtml :: Slashdot | Linus Torvalds about SCO , IP, MS and Transmeta: http://slashdot.org/articles/03/07/05/1728201.shtml?tid=106&tid=185 (search took 0.210749 seconds)
""")
2003-07-30 22:07:25 +02:00
totalSearches = 0
totalTime = 0
last24hours = structures.queue()
def search(*args, **kwargs):
try:
global totalSearches, totalTime, last24hours
data = google.doGoogleSearch(*args, **kwargs)
now = time.time()
totalSearches += 1
totalTime += data.meta.searchTime
last24hours.enqueue(now)
while last24hours and now - last24hours.peek() > 86400:
last24hours.dequeue()
return data
except socket.error, e:
if e.args[0] == 110:
return 'Connection timed out to Google.com.'
else:
raise
2003-10-19 23:04:35 +02:00
class Google(callbacks.PrivmsgCommandAndRegexp):
2003-04-08 21:16:18 +02:00
threaded = True
regexps = sets.Set(['googleSnarfer', 'googleGroups'])
2003-04-08 21:16:18 +02:00
def __init__(self):
super(self.__class__, self).__init__()
2003-04-08 21:16:18 +02:00
self.total = 0
self.totalTime = 0
2003-10-27 23:58:47 +01:00
self.snarfers = {'groups' : True,
'search' : False}
2003-05-29 18:35:35 +02:00
self.last24hours = structures.queue()
2003-04-08 21:16:18 +02:00
def formatData(self, data):
if isinstance(data, basestring):
return data
time = 'Search took %s seconds: ' % data.meta.searchTime
results = []
for result in data.results:
title = utils.htmlToText(result.title.encode('utf-8'))
url = result.URL
if title:
results.append('\x02%s\x0F: <%s>' % (title, url))
else:
results.append(url)
2003-04-08 21:16:18 +02:00
if not results:
return 'No matches found %s' % time
else:
return '%s %s' % (time, '; '.join(results))
2003-04-08 21:16:18 +02:00
2003-10-21 06:44:44 +02:00
def licensekey(self, irc, msg, args):
2003-04-08 21:19:21 +02:00
"""<key>
Sets the Google license key for using Google's Web Services API. This
is necessary before you can do any searching with this module.
"""
2003-04-08 21:16:18 +02:00
key = privmsgs.getArgs(args)
google.setLicense(key)
irc.reply(msg, conf.replySuccess)
2003-10-21 06:44:44 +02:00
licensekey = privmsgs.checkCapability(licensekey, 'admin')
2003-10-27 23:58:47 +01:00
def _toggleHelper(self, irc, msg, state, snarfer):
if not state:
self.snarfers[snarfer] = not self.snarfers[snarfer]
elif state in self._enable:
self.snarfers[snarfer] = True
elif state in self._disable:
self.snarfers[snarfer] = False
resp = []
for k in self.snarfers:
if self.snarfers[k]:
resp.append('%s%s: On' % (k[0].upper(), k[1:]))
else:
resp.append('%s%s: Off' % (k[0].upper(), k[1:]))
irc.reply(msg, '%s (%s)' % (conf.replySuccess, '; '.join(resp)))
_enable = ('on', 'enable')
_disable = ('off', 'disable')
def togglesnarfer(self, irc, msg, args):
2003-10-27 23:58:47 +01:00
"""<groups|search> [<on|off>]
2003-10-27 23:58:47 +01:00
Toggles the snarfer that responds to Google Groups links or Google
searches. If nothing is specified, all snarfers will have their states
toggled (on -> off, off -> on). If only a state is specified, all
snarfers will have their state set to the specified state. If a
specific snarfer is specified, the changes will apply only to that
snarfer.
"""
2003-10-27 23:58:47 +01:00
(snarfer, state) = privmsgs.getArgs(args, optional=1)
snarfer = snarfer.lower()
state = state.lower()
if snarfer not in self.snarfers:
raise callbacks.ArgumentError
if state and state not in self._enable and state not in self._disable:
raise callbacks.ArgumentError
self._toggleHelper(irc, msg, state, snarfer)
togglesnarfer=privmsgs.checkCapability(togglesnarfer, 'admin')
2003-04-08 21:16:18 +02:00
def google(self, irc, msg, args):
"""<search> [--{language,restrict}=<value>] [--{notsafe,similar}]
Searches google.com for the given string. As many results as can fit
are included. --language accepts a language abbreviation; --restrict
restricts the results to certain classes of things; --similar tells
Google not to filter similar results. --notsafe allows possibly
work-unsafe results.
2003-04-08 21:16:18 +02:00
"""
(optlist, rest) = getopt.getopt(args, '', ['language=', 'restrict=',
'notsafe', 'similar'])
kwargs = {'language': 'lang_en', 'safeSearch': 1}
for (option, argument) in optlist:
if option == '--notsafe':
kwargs['safeSearch'] = False
elif option == '--similar':
kwargs['filter'] = False
else:
kwargs[option[2:]] = argument
searchString = privmsgs.getArgs(rest)
2003-07-30 22:07:25 +02:00
data = search(searchString, **kwargs)
2003-04-08 21:16:18 +02:00
irc.reply(msg, self.formatData(data))
2003-07-30 21:08:05 +02:00
def metagoogle(self, irc, msg, args):
"""<search> [--(language,restrict)=<value>] [--{similar,notsafe}]
2003-07-30 21:08:05 +02:00
Searches google and gives all the interesting meta information about
the search. See the help for the google command for a detailed
description of the parameters.
2003-07-30 21:08:05 +02:00
"""
(optlist, rest) = getopt.getopt(args, '', ['language=', 'restrict=',
'notsafe', 'similar'])
2003-07-30 21:08:05 +02:00
kwargs = {'language': 'lang_en', 'safeSearch': 1}
for option, argument in optlist:
if option == '--notsafe':
kwargs['safeSearch'] = False
elif option == '--similar':
kwargs['filter'] = False
else:
kwargs[option[2:]] = argument
2003-07-30 21:08:05 +02:00
searchString = privmsgs.getArgs(rest)
2003-07-30 22:07:25 +02:00
data = search(searchString, **kwargs)
2003-07-30 21:08:05 +02:00
meta = data.meta
categories = [d['fullViewableName'] for d in meta.directoryCategories]
categories = [utils.dqrepr(s.replace('_', ' ')) for s in categories]
if categories:
categories = utils.commaAndify(categories)
2003-07-30 21:08:05 +02:00
else:
categories = ''
2003-07-30 21:08:05 +02:00
s = 'Search for %r returned %s %s results in %s seconds.%s' % \
(meta.searchQuery,
meta.estimateIsExact and 'exactly' or 'approximately',
meta.estimatedTotalResultsCount,
meta.searchTime,
categories and ' Categories include %s.' % categories)
irc.reply(msg, s)
2003-10-21 06:44:44 +02:00
def fight(self, irc, msg, args):
"""<search string> <search string> [<search string> ...]
Returns the results of each search, in order, from greatest number
of results to least.
"""
results = []
for arg in args:
2003-07-30 22:07:25 +02:00
data = search(arg)
results.append((data.meta.estimatedTotalResultsCount, arg))
results.sort()
results.reverse()
s = ', '.join(['%r: %s' % (s, i) for (i, s) in results])
irc.reply(msg, s)
2003-10-21 06:44:44 +02:00
def spell(self, irc, msg, args):
2003-09-06 20:29:44 +02:00
"""<word>
Returns Google's spelling recommendation for <word>.
"""
2003-04-08 21:16:18 +02:00
word = privmsgs.getArgs(args)
result = google.doSpellingSuggestion(word)
if result:
irc.reply(msg, result)
else:
irc.reply(msg, 'No spelling suggestion made.')
2003-10-21 06:44:44 +02:00
def info(self, irc, msg, args):
2003-04-16 09:10:31 +02:00
"""takes no arguments
Returns interesting information about this Google module. Mostly
useful for making sure you don't go over your 1000 requests/day limit.
"""
2003-07-30 22:07:25 +02:00
recent = len(last24hours)
2003-04-08 21:16:18 +02:00
irc.reply(msg, 'This google module has been called %s time%stotal; '\
'%s time%sin the past 24 hours. ' \
'Google has spent %s seconds searching for me.' % \
2003-07-30 22:07:25 +02:00
(totalSearches, totalSearches != 1 and 's ' or ' ',
recent, recent != 1 and 's ' or ' ',
totalTime))
2003-04-08 21:16:18 +02:00
def googleSnarfer(self, irc, msg, match):
r"^google\s+(.*)$"
2003-10-27 23:58:47 +01:00
if not self.snarfers['search']:
return
searchString = match.group(1)
try:
data = search(searchString, safeSearch=1)
except google.NoLicenseKey:
return
if data.results:
url = data.results[0].URL
irc.reply(msg, url)
else:
irc.reply(msg, 'No results for "%s"' % searchString)
_ggThread = re.compile(r'<br>Subject: ([^<]+)<br>')
_ggGroup = re.compile(r'Newsgroups: <a[^>]+>([^<]+)</a>')
def googleGroups(self, irc, msg, match):
r"http://groups.google.com/[^\s]+"
2003-10-27 23:58:47 +01:00
if not self.snarfer['group']:
2003-10-21 06:44:44 +02:00
return
request = urllib2.Request(match.group(0), headers=\
{'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)'})
fd = urllib2.urlopen(request)
text = fd.read()
fd.close()
if match.group(0).find('&prev=/') >= 0:
path = re.search('view the <a href=([^>]+)>no',text)
if path is None:
return
url = 'http://groups.google.com'
request = urllib2.Request('%s%s' % (url,path.group(1)),
headers={'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5;'
'Windows NT 4.0)'})
fd = urllib2.urlopen(request)
text = fd.read()
fd.close()
mThread = self._ggThread.search(text)
mGroup = self._ggGroup.search(text)
if mThread and mGroup:
irc.reply(msg, 'Google Groups: %s, %s' % (mGroup.group(1),
mThread.group(1)), prefixName = False)
else:
irc.error(msg, 'That doesn\'t appear to be a proper '\
'Google Groups page. (%s)' % conf.replyPossibleBug)
2003-10-19 23:04:35 +02:00
Class = Google
2003-04-08 21:16:18 +02:00
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78: