Google: Remove Groups snarfer

The regular expressions were woefully out of date and since there's not a
stable API (or any for that matter), keeping things working is a losing
battle.

Closes: Sf#3057485
Signed-off-by: James Vega <jamessan@users.sourceforge.net>
This commit is contained in:
James Vega 2010-09-08 22:20:23 -04:00
parent 7cf61ad046
commit c9274606ce
3 changed files with 4 additions and 108 deletions

View File

@ -1,6 +1,6 @@
### ###
# Copyright (c) 2005, Jeremiah Fincher # Copyright (c) 2005, Jeremiah Fincher
# Copyright (c) 2008-2009, James Vega # Copyright (c) 2008-2010, James Vega
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -37,13 +37,7 @@ def configure(advanced):
output("""The Google plugin has the functionality to watch for URLs output("""The Google plugin has the functionality to watch for URLs
that match a specific pattern. (We call this a snarfer) that match a specific pattern. (We call this a snarfer)
When supybot sees such a URL, it will parse the web page When supybot sees such a URL, it will parse the web page
for information and reply with the results. for information and reply with the results.""")
Google has two available snarfers: Google Groups link
snarfing and a google search snarfer.""")
if yn('Do you want the Google Groups link snarfer enabled by '
'default?'):
conf.supybot.plugins.Google.groupsSnarfer.setValue(True)
if yn('Do you want the Google search snarfer enabled by default?'): if yn('Do you want the Google search snarfer enabled by default?'):
conf.supybot.plugins.Google.searchSnarfer.setValue(True) conf.supybot.plugins.Google.searchSnarfer.setValue(True)
@ -104,10 +98,6 @@ conf.registerGlobalValue(Google, 'referer',
the Referer field of the search requests. If this value is empty, a the Referer field of the search requests. If this value is empty, a
Referer will be generated in the following format: Referer will be generated in the following format:
http://$server/$botName""")) http://$server/$botName"""))
conf.registerChannelValue(Google, 'groupsSnarfer',
registry.Boolean(False, """Determines whether the groups snarfer is
enabled. If so, URLs at groups.google.com will be snarfed and their
group/title messaged to the channel."""))
conf.registerChannelValue(Google, 'searchSnarfer', conf.registerChannelValue(Google, 'searchSnarfer',
registry.Boolean(False, """Determines whether the search snarfer is registry.Boolean(False, """Determines whether the search snarfer is
enabled. If so, messages (even unaddressed ones) beginning with the word enabled. If so, messages (even unaddressed ones) beginning with the word

View File

@ -1,6 +1,6 @@
### ###
# Copyright (c) 2002-2004, Jeremiah Fincher # Copyright (c) 2002-2004, Jeremiah Fincher
# Copyright (c) 2008-2009, James Vega # Copyright (c) 2008-2010, James Vega
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -66,7 +66,7 @@ except ImportError:
class Google(callbacks.PluginRegexp): class Google(callbacks.PluginRegexp):
threaded = True threaded = True
callBefore = ['Web'] callBefore = ['Web']
regexps = ['googleSnarfer', 'googleGroups'] regexps = ['googleSnarfer']
_colorGoogles = {} _colorGoogles = {}
def _getColorGoogle(self, m): def _getColorGoogle(self, m):
@ -288,44 +288,6 @@ class Google(callbacks.PluginRegexp):
irc.reply(url.encode('utf-8'), prefixNick=False) irc.reply(url.encode('utf-8'), prefixNick=False)
googleSnarfer = urlSnarfer(googleSnarfer) googleSnarfer = urlSnarfer(googleSnarfer)
_ggThread = re.compile(r'Subject: <b>([^<]+)</b>', re.I)
_ggGroup = re.compile(r'<TITLE>Google Groups :\s*([^<]+)</TITLE>', re.I)
_ggThreadm = re.compile(r'src="(/group[^"]+)">', re.I)
_ggSelm = re.compile(r'selm=[^&]+', re.I)
_threadmThread = re.compile(r'TITLE="([^"]+)">', re.I)
_threadmGroup = re.compile(r'class=groupname[^>]+>([^<]+)<', re.I)
def googleGroups(self, irc, msg, match):
r"http://groups.google.[\w.]+/\S+\?(\S+)"
if not self.registryValue('groupsSnarfer', msg.args[0]):
return
queries = cgi.parse_qsl(match.group(1))
queries = [q for q in queries if q[0] in ('threadm', 'selm')]
if not queries:
return
queries.append(('hl', 'en'))
url = 'http://groups.google.com/groups?' + urllib.urlencode(queries)
text = utils.web.getUrl(url)
mThread = None
mGroup = None
if 'threadm=' in url:
path = self._ggThreadm.search(text)
if path is not None:
url = 'http://groups-beta.google.com' + path.group(1)
text = utils.web.getUrl(url)
mThread = self._threadmThread.search(text)
mGroup = self._threadmGroup.search(text)
else:
mThread = self._ggThread.search(text)
mGroup = self._ggGroup.search(text)
if mThread and mGroup:
irc.reply(format('Google Groups: %s, %s',
mGroup.group(1), mThread.group(1)),
prefixNick=False)
else:
self.log.debug('Unable to snarf. %s doesn\'t appear to be a '
'proper Google Groups page.', match.group(1))
googleGroups = urlSnarfer(googleGroups)
def _googleUrl(self, s): def _googleUrl(self, s):
s = s.replace('+', '%2B') s = s.replace('+', '%2B')
s = s.replace(' ', '+') s = s.replace(' ', '+')

View File

@ -64,60 +64,4 @@ class GoogleTestCase(ChannelPluginTestCase):
def testCalcDoesNotHaveExtraSpaces(self): def testCalcDoesNotHaveExtraSpaces(self):
self.assertNotRegexp('google calc 1000^2', r'\s+,\s+') self.assertNotRegexp('google calc 1000^2', r'\s+,\s+')
def testGroupsSnarfer(self):
orig = conf.supybot.plugins.Google.groupsSnarfer()
try:
conf.supybot.plugins.Google.groupsSnarfer.setValue(True)
# This should work, and does work in practice, but is failing
# in the tests.
#self.assertSnarfRegexp(
# 'http://groups.google.com/groups?dq=&hl=en&lr=lang_en&'
# 'ie=UTF-8&oe=UTF-8&selm=698f09f8.0310132012.738e22fc'
# '%40posting.google.com',
# r'comp\.lang\.python.*question: usage of __slots__')
self.assertSnarfRegexp(
'http://groups.google.com/groups?selm=ExDm.8bj.23'
'%40gated-at.bofh.it&oe=UTF-8&output=gplain',
r'linux\.kernel.*NFS client freezes')
self.assertSnarfRegexp(
'http://groups.google.com/groups?q=kernel+hot-pants&'
'hl=en&lr=&ie=UTF-8&oe=UTF-8&selm=1.5.4.32.199703131'
'70853.00674d60%40adan.kingston.net&rnum=1',
r'Madrid Bluegrass Ramble')
self.assertSnarfRegexp(
'http://groups.google.com/groups?selm=1.5.4.32.19970'
'313170853.00674d60%40adan.kingston.net&oe=UTF-8&'
'output=gplain',
r'Madrid Bluegrass Ramble')
self.assertSnarfRegexp(
'http://groups.google.com/groups?dq=&hl=en&lr=&'
'ie=UTF-8&threadm=mailman.1010.1069645289.702.'
'python-list%40python.org&prev=/groups%3Fhl%3Den'
'%26lr%3D%26ie%3DUTF-8%26group%3Dcomp.lang.python',
r'comp\.lang\.python.*What exactly are bound')
# Test for Bug #1002547
self.assertSnarfRegexp(
'http://groups.google.com/groups?q=supybot+is+the&'
'hl=en&lr=&ie=UTF-8&c2coff=1&selm=1028329672'
'%40freshmeat.net&rnum=9',
r'fm\.announce.*SupyBot')
finally:
conf.supybot.plugins.Google.groupsSnarfer.setValue(orig)
def testConfig(self):
orig = conf.supybot.plugins.Google.groupsSnarfer()
try:
conf.supybot.plugins.Google.groupsSnarfer.setValue(False)
self.assertSnarfNoResponse(
'http://groups.google.com/groups?dq=&hl=en&lr=lang_en&'
'ie=UTF-8&oe=UTF-8&selm=698f09f8.0310132012.738e22fc'
'%40posting.google.com')
conf.supybot.plugins.Google.groupsSnarfer.setValue(True)
self.assertSnarfNotError(
'http://groups.google.com/groups?dq=&hl=en&lr=lang_en&'
'ie=UTF-8&oe=UTF-8&selm=698f09f8.0310132012.738e22fc'
'%40posting.google.com')
finally:
conf.supybot.plugins.Google.groupsSnarfer.setValue(orig)
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: