Implement patch #1023620, Handling of international Google Groups snarfing

This commit is contained in:
James Vega 2004-09-09 14:04:44 +00:00
parent 30ce7c572e
commit 3986f24dad
2 changed files with 63 additions and 50 deletions

View File

@ -38,10 +38,12 @@ __revision__ = "$Id$"
import supybot.plugins as plugins import supybot.plugins as plugins
import re import re
import cgi
import sets import sets
import time import time
import getopt import getopt
import socket import socket
import urllib
import xml.sax import xml.sax
import SOAP import SOAP
@ -384,10 +386,15 @@ class Google(callbacks.PrivmsgCommandAndRegexp):
_ggThreadm = re.compile(r'view the <a href=([^>]+)>no', re.I) _ggThreadm = re.compile(r'view the <a href=([^>]+)>no', re.I)
_ggSelm = re.compile(r'selm=[^&]+', re.I) _ggSelm = re.compile(r'selm=[^&]+', re.I)
def googleGroups(self, irc, msg, match): def googleGroups(self, irc, msg, match):
r"http://groups.google.com/[^\s]+" r"http://groups.google.[\w.]+/\S+\?(\S+)"
if not self.registryValue('groupsSnarfer', msg.args[0]): if not self.registryValue('groupsSnarfer', msg.args[0]):
return return
url = match.group(0) queries = cgi.parse_qsl(match.group(1))
queries = filter(lambda q: q[0] in ['threadm', 'selm'], queries)
if not queries:
return
queries.append(('hl', 'en'))
url = 'http://groups.google.com/groups?%s' % urllib.urlencode(queries)
text = webutils.getUrl(url) text = webutils.getUrl(url)
mThread = None mThread = None
mGroup = None mGroup = None
@ -397,12 +404,6 @@ class Google(callbacks.PrivmsgCommandAndRegexp):
return return
url = 'http://groups.google.com%s' % path.group(1) url = 'http://groups.google.com%s' % path.group(1)
text = webutils.getUrl(url) text = webutils.getUrl(url)
elif 'selm=' in url:
path = self._ggSelm.search(url)
if path is None:
return
url = 'http://groups.google.com/groups?%s' % path.group(0)
text = webutils.getUrl(url)
mThread = self._ggThread.search(text) mThread = self._ggThread.search(text)
mGroup = self._ggGroup.search(text) mGroup = self._ggGroup.search(text)
if mThread and mGroup: if mThread and mGroup:

View File

@ -47,59 +47,71 @@ class GoogleTestCase(ChannelPluginTestCase, PluginDocumentation):
def testCalcDoesNotHaveExtraSpaces(self): def testCalcDoesNotHaveExtraSpaces(self):
self.assertNotRegexp('google calc 1000^2', r'\s+,\s+') self.assertNotRegexp('google calc 1000^2', r'\s+,\s+')
def testNoNoLicenseKeyError(self): def testNoNoLicenseKeyError(self):
conf.supybot.plugins.Google.groupsSnarfer.setValue(True) conf.supybot.plugins.Google.groupsSnarfer.setValue(True)
self.irc.feedMsg(ircmsgs.privmsg(self.channel, 'google blah')) self.irc.feedMsg(ircmsgs.privmsg(self.channel, 'google blah'))
self.assertNoResponse(' ') self.assertNoResponse(' ')
def testGroupsSnarfer(self): def testGroupsSnarfer(self):
conf.supybot.plugins.Google.groupsSnarfer.setValue(True) orig = conf.supybot.plugins.Google.groupsSnarfer()
self.assertSnarfRegexp( try:
'http://groups.google.com/groups?dq=&hl=en&lr=lang_en&' conf.supybot.plugins.Google.groupsSnarfer.setValue(True)
'ie=UTF-8&oe=UTF-8&selm=698f09f8.0310132012.738e22fc' # This should work, and does work in practice, but is failing
'%40posting.google.com', # in the tests.
r'comp\.lang\.python.*question: usage of __slots__') #self.assertSnarfRegexp(
self.assertSnarfRegexp( # 'http://groups.google.com/groups?dq=&hl=en&lr=lang_en&'
'http://groups.google.com/groups?selm=ExDm.8bj.23' # 'ie=UTF-8&oe=UTF-8&selm=698f09f8.0310132012.738e22fc'
'%40gated-at.bofh.it&oe=UTF-8&output=gplain', # '%40posting.google.com',
r'linux\.kernel.*NFS client freezes') # r'comp\.lang\.python.*question: usage of __slots__')
self.assertSnarfRegexp( self.assertSnarfRegexp(
'http://groups.google.com/groups?q=kernel+hot-pants&' 'http://groups.google.com/groups?selm=ExDm.8bj.23'
'hl=en&lr=&ie=UTF-8&oe=UTF-8&selm=1.5.4.32.199703131' '%40gated-at.bofh.it&oe=UTF-8&output=gplain',
'70853.00674d60%40adan.kingston.net&rnum=1', r'linux\.kernel.*NFS client freezes')
r'Madrid Bluegrass Ramble') self.assertSnarfRegexp(
self.assertSnarfRegexp( 'http://groups.google.com/groups?q=kernel+hot-pants&'
'http://groups.google.com/groups?selm=1.5.4.32.19970' 'hl=en&lr=&ie=UTF-8&oe=UTF-8&selm=1.5.4.32.199703131'
'313170853.00674d60%40adan.kingston.net&oe=UTF-8&' '70853.00674d60%40adan.kingston.net&rnum=1',
'output=gplain', r'Madrid Bluegrass Ramble')
r'Madrid Bluegrass Ramble') self.assertSnarfRegexp(
self.assertSnarfRegexp( 'http://groups.google.com/groups?selm=1.5.4.32.19970'
'http://groups.google.com/groups?dq=&hl=en&lr=&' '313170853.00674d60%40adan.kingston.net&oe=UTF-8&'
'ie=UTF-8&threadm=mailman.1010.1069645289.702.' 'output=gplain',
'python-list%40python.org&prev=/groups%3Fhl%3Den' r'Madrid Bluegrass Ramble')
'%26lr%3D%26ie%3DUTF-8%26group%3Dcomp.lang.python', self.assertSnarfRegexp(
r'comp\.lang\.python.*What exactly are bound') 'http://groups.google.com/groups?dq=&hl=en&lr=&'
# Test for Bug #1002547 'ie=UTF-8&threadm=mailman.1010.1069645289.702.'
self.assertSnarfRegexp( 'python-list%40python.org&prev=/groups%3Fhl%3Den'
'http://groups.google.com/groups?q=supybot+is+the&' '%26lr%3D%26ie%3DUTF-8%26group%3Dcomp.lang.python',
'hl=en&lr=&ie=UTF-8&c2coff=1&selm=1028329672' r'comp\.lang\.python.*What exactly are bound')
'%40freshmeat.net&rnum=9', # Test for Bug #1002547
r'fm\.announce.*SupyBot') self.assertSnarfRegexp(
'http://groups.google.com/groups?q=supybot+is+the&'
'hl=en&lr=&ie=UTF-8&c2coff=1&selm=1028329672'
'%40freshmeat.net&rnum=9',
r'fm\.announce.*SupyBot')
finally:
conf.supybot.plugins.Google.groupsSnarfer.setValue(orig)
def testConfig(self): def testConfig(self):
conf.supybot.plugins.Google.groupsSnarfer.setValue(False) orig = conf.supybot.plugins.Google.groupsSnarfer()
self.assertNoResponse('http://groups.google.com/groups?dq=&hl=en&' try:
'lr=lang_en&ie=UTF-8&oe=UTF-8&selm=698f09f8.' conf.supybot.plugins.Google.groupsSnarfer.setValue(False)
'0310132012.738e22fc%40posting.google.com') self.assertSnarfNoResponse(
conf.supybot.plugins.Google.groupsSnarfer.setValue(True) 'http://groups.google.com/groups?dq=&hl=en&lr=lang_en&'
self.assertNotError('http://groups.google.com/groups?dq=&hl=en&' 'ie=UTF-8&oe=UTF-8&selm=698f09f8.0310132012.738e22fc'
'lr=lang_en&ie=UTF-8&oe=UTF-8&selm=698f09f8.' '%40posting.google.com')
'0310132012.738e22fc%40posting.google.com') conf.supybot.plugins.Google.groupsSnarfer.setValue(True)
self.assertSnarfNotError(
'http://groups.google.com/groups?dq=&hl=en&lr=lang_en&'
'ie=UTF-8&oe=UTF-8&selm=698f09f8.0310132012.738e22fc'
'%40posting.google.com')
finally:
conf.supybot.plugins.Google.groupsSnarfer.setValue(orig)
def testInvalidKeyCaught(self): def testInvalidKeyCaught(self):
conf.supybot.plugins.Google.licenseKey.set( conf.supybot.plugins.Google.licenseKey.set(
'abcdefghijklmnopqrstuvwxyz123456') 'abcdefghijklmnopqrstuvwxyz123456')
self.assertNotRegexp('google foobar', 'faultType') self.assertNotRegexp('google foobar', 'faultType')
self.assertNotRegexp('google foobar', 'SOAP') self.assertNotRegexp('google foobar', 'SOAP')