Fix (and cleanup) thanks to the suggestion in Bug #1002547

This commit is contained in:
James Vega 2004-08-04 02:33:57 +00:00
parent 5327feeeab
commit 9db07346dc
2 changed files with 56 additions and 42 deletions

View File

@ -370,43 +370,48 @@ class Google(callbacks.PrivmsgCommandAndRegexp):
irc.reply(url, prefixName=False) irc.reply(url, prefixName=False)
googleSnarfer = privmsgs.urlSnarfer(googleSnarfer) googleSnarfer = privmsgs.urlSnarfer(googleSnarfer)
_ggThread = re.compile(r'<br>Subject: ([^<]+)<br>') _ggThread = re.compile(r'<br>Subject: ([^<]+)<br>', re.I)
_ggPlainThread = re.compile(r'Subject: (.*)') _ggGroup = re.compile(r'Newsgroups: (?:<a[^>]+>)?([^<]+)(?:</a>)?', re.I)
_ggGroup = re.compile(r'Newsgroups: (?:<a[^>]+>)?([^<]+)(?:</a>)?') _ggThreadm = re.compile(r'view the <a href=([^>]+)>no', re.I)
_ggPlainGroup = re.compile(r'Newsgroups: (.*)') _ggSelm = re.compile(r'selm=[^&]+', re.I)
def googleGroups(self, irc, msg, match): def googleGroups(self, irc, msg, match):
r"http://groups.google.com/[^\s]+" r"http://groups.google.com/[^\s]+"
if not self.registryValue('groupsSnarfer', msg.args[0]): if not self.registryValue('groupsSnarfer', msg.args[0]):
return return
request = urllib2.Request(match.group(0), headers= \ m = match.group(0)
{'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)'}) header = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; '
'Windows NT 4.0)'}
request = urllib2.Request(m, headers=header)
fd = urllib2.urlopen(request) fd = urllib2.urlopen(request)
text = fd.read() text = fd.read()
fd.close() fd.close()
mThread = None mThread = None
mGroup = None mGroup = None
if '&prev=/' in match.group(0): if 'threadm=' in m:
path = re.search('view the <a href=([^>]+)>no',text) path = self._ggThreadm.search(text)
if path is None: if path is None:
return return
url = 'http://groups.google.com' url = 'http://groups.google.com%s' % path.group(1)
request = urllib2.Request('%s%s' % (url,path.group(1)), request = urllib2.Request(url, headers=header)
headers={'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5;' fd = urllib2.urlopen(request)
'Windows NT 4.0)'}) text = fd.read()
fd.close()
elif 'selm=' in m:
path = self._ggSelm.search(m)
if m is None:
return
url = 'http://groups.google.com/groups?%s' % path.group(0)
request = urllib2.Request(url, headers=header)
fd = urllib2.urlopen(request) fd = urllib2.urlopen(request)
text = fd.read() text = fd.read()
fd.close() fd.close()
mThread = self._ggThread.search(text)
mGroup = self._ggGroup.search(text)
elif '&output=gplain' in match.group(0):
mThread = self._ggPlainThread.search(text)
mGroup = self._ggPlainGroup.search(text)
else: else:
mThread = self._ggThread.search(text) pass
mGroup = self._ggGroup.search(text) mThread = self._ggThread.search(text)
mGroup = self._ggGroup.search(text)
if mThread and mGroup: if mThread and mGroup:
irc.reply('Google Groups: %s, %s' % (mGroup.group(1), irc.reply('Google Groups: %s, %s' % (mGroup.group(1),
mThread.group(1)), prefixName = False) mThread.group(1)), prefixName=False)
else: else:
irc.errorPossibleBug('That doesn\'t appear to be a proper ' irc.errorPossibleBug('That doesn\'t appear to be a proper '
'Google Groups page.') 'Google Groups page.')

View File

@ -41,28 +41,37 @@ class GoogleTestCase(ChannelPluginTestCase, PluginDocumentation):
def testGroupsSnarfer(self): def testGroupsSnarfer(self):
conf.supybot.plugins.Google.groupsSnarfer.setValue(True) conf.supybot.plugins.Google.groupsSnarfer.setValue(True)
self.assertRegexp('http://groups.google.com/groups?dq=&hl=en&' self.assertSnarfRegexp(
'lr=lang_en&ie=UTF-8&oe=UTF-8&selm=698f09f8.' 'http://groups.google.com/groups?dq=&hl=en&lr=lang_en&'
'0310132012.738e22fc%40posting.google.com', 'ie=UTF-8&oe=UTF-8&selm=698f09f8.0310132012.738e22fc'
r'comp\.lang\.python.*question: usage of __slots__') '%40posting.google.com',
self.assertRegexp('http://groups.google.com/groups?selm=ExDm.' r'comp\.lang\.python.*question: usage of __slots__')
'8bj.23%40gated-at.bofh.it&oe=UTF-8&output=gplain', self.assertSnarfRegexp(
r'linux\.kernel.*NFS client freezes') 'http://groups.google.com/groups?selm=ExDm.8bj.23'
self.assertRegexp('http://groups.google.com/groups?' '%40gated-at.bofh.it&oe=UTF-8&output=gplain',
'q=kernel+hot-pants&hl=en&lr=&ie=UTF-8&oe=UTF-8&' r'linux\.kernel.*NFS client freezes')
'selm=1.5.4.32.19970313170853.00674d60%40' self.assertSnarfRegexp(
'adan.kingston.net&rnum=1', 'http://groups.google.com/groups?q=kernel+hot-pants&'
r'Madrid Bluegrass Ramble') 'hl=en&lr=&ie=UTF-8&oe=UTF-8&selm=1.5.4.32.199703131'
self.assertRegexp('http://groups.google.com/groups?' '70853.00674d60%40adan.kingston.net&rnum=1',
'selm=1.5.4.32.19970313170853.00674d60%40adan.' r'Madrid Bluegrass Ramble')
'kingston.net&oe=UTF-8&output=gplain', self.assertSnarfRegexp(
r'Madrid Bluegrass Ramble') 'http://groups.google.com/groups?selm=1.5.4.32.19970'
self.assertRegexp('http://groups.google.com/groups?' '313170853.00674d60%40adan.kingston.net&oe=UTF-8&'
'dq=&hl=en&lr=&ie=UTF-8&threadm=mailman.1010.' 'output=gplain',
'1069645289.702.python-list%40python.org' r'Madrid Bluegrass Ramble')
'&prev=/groups%3Fhl%3Den%26lr%3D%26ie%3DUTF-8' self.assertSnarfRegexp(
'%26group%3Dcomp.lang.python', 'http://groups.google.com/groups?dq=&hl=en&lr=&'
r'comp\.lang\.python.*What exactly are bound') 'ie=UTF-8&threadm=mailman.1010.1069645289.702.'
'python-list%40python.org&prev=/groups%3Fhl%3Den'
'%26lr%3D%26ie%3DUTF-8%26group%3Dcomp.lang.python',
r'comp\.lang\.python.*What exactly are bound')
# Test for Bug #1002547
self.assertSnarfRegexp(
'http://groups.google.com/groups?q=supybot+is+the&'
'hl=en&lr=&ie=UTF-8&c2coff=1&selm=1028329672'
'%40freshmeat.net&rnum=9',
r'fm\.announce.*SupyBot')
def testConfig(self): def testConfig(self):
conf.supybot.plugins.Google.groupsSnarfer.setValue(False) conf.supybot.plugins.Google.groupsSnarfer.setValue(False)