Fix (and cleanup) thanks to the suggestion in Bug #1002547

This commit is contained in:
James Vega 2004-08-04 02:33:57 +00:00
parent 5327feeeab
commit 9db07346dc
2 changed files with 56 additions and 42 deletions

View File

@ -370,43 +370,48 @@ class Google(callbacks.PrivmsgCommandAndRegexp):
irc.reply(url, prefixName=False)
googleSnarfer = privmsgs.urlSnarfer(googleSnarfer)
_ggThread = re.compile(r'<br>Subject: ([^<]+)<br>')
_ggPlainThread = re.compile(r'Subject: (.*)')
_ggGroup = re.compile(r'Newsgroups: (?:<a[^>]+>)?([^<]+)(?:</a>)?')
_ggPlainGroup = re.compile(r'Newsgroups: (.*)')
_ggThread = re.compile(r'<br>Subject: ([^<]+)<br>', re.I)
_ggGroup = re.compile(r'Newsgroups: (?:<a[^>]+>)?([^<]+)(?:</a>)?', re.I)
_ggThreadm = re.compile(r'view the <a href=([^>]+)>no', re.I)
_ggSelm = re.compile(r'selm=[^&]+', re.I)
def googleGroups(self, irc, msg, match):
r"http://groups.google.com/[^\s]+"
if not self.registryValue('groupsSnarfer', msg.args[0]):
return
request = urllib2.Request(match.group(0), headers= \
{'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)'})
m = match.group(0)
header = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; '
'Windows NT 4.0)'}
request = urllib2.Request(m, headers=header)
fd = urllib2.urlopen(request)
text = fd.read()
fd.close()
mThread = None
mGroup = None
if '&prev=/' in match.group(0):
path = re.search('view the <a href=([^>]+)>no',text)
if 'threadm=' in m:
path = self._ggThreadm.search(text)
if path is None:
return
url = 'http://groups.google.com'
request = urllib2.Request('%s%s' % (url,path.group(1)),
headers={'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5;'
'Windows NT 4.0)'})
url = 'http://groups.google.com%s' % path.group(1)
request = urllib2.Request(url, headers=header)
fd = urllib2.urlopen(request)
text = fd.read()
fd.close()
elif 'selm=' in m:
path = self._ggSelm.search(m)
if m is None:
return
url = 'http://groups.google.com/groups?%s' % path.group(0)
request = urllib2.Request(url, headers=header)
fd = urllib2.urlopen(request)
text = fd.read()
fd.close()
mThread = self._ggThread.search(text)
mGroup = self._ggGroup.search(text)
elif '&output=gplain' in match.group(0):
mThread = self._ggPlainThread.search(text)
mGroup = self._ggPlainGroup.search(text)
else:
pass
mThread = self._ggThread.search(text)
mGroup = self._ggGroup.search(text)
if mThread and mGroup:
irc.reply('Google Groups: %s, %s' % (mGroup.group(1),
mThread.group(1)), prefixName = False)
mThread.group(1)), prefixName=False)
else:
irc.errorPossibleBug('That doesn\'t appear to be a proper '
'Google Groups page.')

View File

@ -41,28 +41,37 @@ class GoogleTestCase(ChannelPluginTestCase, PluginDocumentation):
def testGroupsSnarfer(self):
conf.supybot.plugins.Google.groupsSnarfer.setValue(True)
self.assertRegexp('http://groups.google.com/groups?dq=&hl=en&'
'lr=lang_en&ie=UTF-8&oe=UTF-8&selm=698f09f8.'
'0310132012.738e22fc%40posting.google.com',
self.assertSnarfRegexp(
'http://groups.google.com/groups?dq=&hl=en&lr=lang_en&'
'ie=UTF-8&oe=UTF-8&selm=698f09f8.0310132012.738e22fc'
'%40posting.google.com',
r'comp\.lang\.python.*question: usage of __slots__')
self.assertRegexp('http://groups.google.com/groups?selm=ExDm.'
'8bj.23%40gated-at.bofh.it&oe=UTF-8&output=gplain',
self.assertSnarfRegexp(
'http://groups.google.com/groups?selm=ExDm.8bj.23'
'%40gated-at.bofh.it&oe=UTF-8&output=gplain',
r'linux\.kernel.*NFS client freezes')
self.assertRegexp('http://groups.google.com/groups?'
'q=kernel+hot-pants&hl=en&lr=&ie=UTF-8&oe=UTF-8&'
'selm=1.5.4.32.19970313170853.00674d60%40'
'adan.kingston.net&rnum=1',
self.assertSnarfRegexp(
'http://groups.google.com/groups?q=kernel+hot-pants&'
'hl=en&lr=&ie=UTF-8&oe=UTF-8&selm=1.5.4.32.199703131'
'70853.00674d60%40adan.kingston.net&rnum=1',
r'Madrid Bluegrass Ramble')
self.assertRegexp('http://groups.google.com/groups?'
'selm=1.5.4.32.19970313170853.00674d60%40adan.'
'kingston.net&oe=UTF-8&output=gplain',
self.assertSnarfRegexp(
'http://groups.google.com/groups?selm=1.5.4.32.19970'
'313170853.00674d60%40adan.kingston.net&oe=UTF-8&'
'output=gplain',
r'Madrid Bluegrass Ramble')
self.assertRegexp('http://groups.google.com/groups?'
'dq=&hl=en&lr=&ie=UTF-8&threadm=mailman.1010.'
'1069645289.702.python-list%40python.org'
'&prev=/groups%3Fhl%3Den%26lr%3D%26ie%3DUTF-8'
'%26group%3Dcomp.lang.python',
self.assertSnarfRegexp(
'http://groups.google.com/groups?dq=&hl=en&lr=&'
'ie=UTF-8&threadm=mailman.1010.1069645289.702.'
'python-list%40python.org&prev=/groups%3Fhl%3Den'
'%26lr%3D%26ie%3DUTF-8%26group%3Dcomp.lang.python',
r'comp\.lang\.python.*What exactly are bound')
# Test for Bug #1002547
self.assertSnarfRegexp(
'http://groups.google.com/groups?q=supybot+is+the&'
'hl=en&lr=&ie=UTF-8&c2coff=1&selm=1028329672'
'%40freshmeat.net&rnum=9',
r'fm\.announce.*SupyBot')
def testConfig(self):
conf.supybot.plugins.Google.groupsSnarfer.setValue(False)