From 9db07346dc2075e0b069d87755481ccb47264bcb Mon Sep 17 00:00:00 2001 From: James Vega Date: Wed, 4 Aug 2004 02:33:57 +0000 Subject: [PATCH] Fix (and cleanup) thanks to the suggestion in Bug #1002547 --- plugins/Google.py | 45 +++++++++++++++++++++----------------- test/test_Google.py | 53 ++++++++++++++++++++++++++------------------- 2 files changed, 56 insertions(+), 42 deletions(-) diff --git a/plugins/Google.py b/plugins/Google.py index 1f11b5bfa..bf3a353a9 100644 --- a/plugins/Google.py +++ b/plugins/Google.py @@ -370,43 +370,48 @@ class Google(callbacks.PrivmsgCommandAndRegexp): irc.reply(url, prefixName=False) googleSnarfer = privmsgs.urlSnarfer(googleSnarfer) - _ggThread = re.compile(r'
Subject: ([^<]+)
') - _ggPlainThread = re.compile(r'Subject: (.*)') - _ggGroup = re.compile(r'Newsgroups: (?:]+>)?([^<]+)(?:)?') - _ggPlainGroup = re.compile(r'Newsgroups: (.*)') + _ggThread = re.compile(r'
Subject: ([^<]+)
', re.I) + _ggGroup = re.compile(r'Newsgroups: (?:]+>)?([^<]+)(?:)?', re.I) + _ggThreadm = re.compile(r'view the ]+)>no', re.I) + _ggSelm = re.compile(r'selm=[^&]+', re.I) def googleGroups(self, irc, msg, match): r"http://groups.google.com/[^\s]+" if not self.registryValue('groupsSnarfer', msg.args[0]): return - request = urllib2.Request(match.group(0), headers= \ - {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)'}) + m = match.group(0) + header = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; ' + 'Windows NT 4.0)'} + request = urllib2.Request(m, headers=header) fd = urllib2.urlopen(request) text = fd.read() fd.close() mThread = None mGroup = None - if '&prev=/' in match.group(0): - path = re.search('view the ]+)>no',text) + if 'threadm=' in m: + path = self._ggThreadm.search(text) if path is None: return - url = 'http://groups.google.com' - request = urllib2.Request('%s%s' % (url,path.group(1)), - headers={'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5;' - 'Windows NT 4.0)'}) + url = 'http://groups.google.com%s' % path.group(1) + request = urllib2.Request(url, headers=header) + fd = urllib2.urlopen(request) + text = fd.read() + fd.close() + elif 'selm=' in m: + path = self._ggSelm.search(m) + if m is None: + return + url = 'http://groups.google.com/groups?%s' % path.group(0) + request = urllib2.Request(url, headers=header) fd = urllib2.urlopen(request) text = fd.read() fd.close() - mThread = self._ggThread.search(text) - mGroup = self._ggGroup.search(text) - elif '&output=gplain' in match.group(0): - mThread = self._ggPlainThread.search(text) - mGroup = self._ggPlainGroup.search(text) else: - mThread = self._ggThread.search(text) - mGroup = self._ggGroup.search(text) + pass + mThread = self._ggThread.search(text) + mGroup = self._ggGroup.search(text) if mThread and mGroup: irc.reply('Google Groups: %s, %s' % (mGroup.group(1), - mThread.group(1)), prefixName = False) + mThread.group(1)), prefixName=False) else: irc.errorPossibleBug('That doesn\'t appear to be a proper ' 'Google Groups page.') diff --git a/test/test_Google.py b/test/test_Google.py index 61f6899b6..00179e6f6 100644 --- a/test/test_Google.py +++ b/test/test_Google.py @@ -41,28 +41,37 @@ class GoogleTestCase(ChannelPluginTestCase, PluginDocumentation): def testGroupsSnarfer(self): conf.supybot.plugins.Google.groupsSnarfer.setValue(True) - self.assertRegexp('http://groups.google.com/groups?dq=&hl=en&' - 'lr=lang_en&ie=UTF-8&oe=UTF-8&selm=698f09f8.' - '0310132012.738e22fc%40posting.google.com', - r'comp\.lang\.python.*question: usage of __slots__') - self.assertRegexp('http://groups.google.com/groups?selm=ExDm.' - '8bj.23%40gated-at.bofh.it&oe=UTF-8&output=gplain', - r'linux\.kernel.*NFS client freezes') - self.assertRegexp('http://groups.google.com/groups?' - 'q=kernel+hot-pants&hl=en&lr=&ie=UTF-8&oe=UTF-8&' - 'selm=1.5.4.32.19970313170853.00674d60%40' - 'adan.kingston.net&rnum=1', - r'Madrid Bluegrass Ramble') - self.assertRegexp('http://groups.google.com/groups?' - 'selm=1.5.4.32.19970313170853.00674d60%40adan.' - 'kingston.net&oe=UTF-8&output=gplain', - r'Madrid Bluegrass Ramble') - self.assertRegexp('http://groups.google.com/groups?' - 'dq=&hl=en&lr=&ie=UTF-8&threadm=mailman.1010.' - '1069645289.702.python-list%40python.org' - '&prev=/groups%3Fhl%3Den%26lr%3D%26ie%3DUTF-8' - '%26group%3Dcomp.lang.python', - r'comp\.lang\.python.*What exactly are bound') + self.assertSnarfRegexp( + 'http://groups.google.com/groups?dq=&hl=en&lr=lang_en&' + 'ie=UTF-8&oe=UTF-8&selm=698f09f8.0310132012.738e22fc' + '%40posting.google.com', + r'comp\.lang\.python.*question: usage of __slots__') + self.assertSnarfRegexp( + 'http://groups.google.com/groups?selm=ExDm.8bj.23' + '%40gated-at.bofh.it&oe=UTF-8&output=gplain', + r'linux\.kernel.*NFS client freezes') + self.assertSnarfRegexp( + 'http://groups.google.com/groups?q=kernel+hot-pants&' + 'hl=en&lr=&ie=UTF-8&oe=UTF-8&selm=1.5.4.32.199703131' + '70853.00674d60%40adan.kingston.net&rnum=1', + r'Madrid Bluegrass Ramble') + self.assertSnarfRegexp( + 'http://groups.google.com/groups?selm=1.5.4.32.19970' + '313170853.00674d60%40adan.kingston.net&oe=UTF-8&' + 'output=gplain', + r'Madrid Bluegrass Ramble') + self.assertSnarfRegexp( + 'http://groups.google.com/groups?dq=&hl=en&lr=&' + 'ie=UTF-8&threadm=mailman.1010.1069645289.702.' + 'python-list%40python.org&prev=/groups%3Fhl%3Den' + '%26lr%3D%26ie%3DUTF-8%26group%3Dcomp.lang.python', + r'comp\.lang\.python.*What exactly are bound') + # Test for Bug #1002547 + self.assertSnarfRegexp( + 'http://groups.google.com/groups?q=supybot+is+the&' + 'hl=en&lr=&ie=UTF-8&c2coff=1&selm=1028329672' + '%40freshmeat.net&rnum=9', + r'fm\.announce.*SupyBot') def testConfig(self): conf.supybot.plugins.Google.groupsSnarfer.setValue(False)