Finished the google groups fix. Added a check for links to framed pages in order to grab non-framed pages to parse through.

This commit is contained in:
James Vega 2003-03-28 17:56:27 +00:00
parent ce741a1f0e
commit f7939bd43b

View File

@ -51,10 +51,20 @@ class Forums(callbacks.PrivmsgRegexp):
_ggGroup = re.compile(r'Newsgroups: <a[^>]+>([^<]+)</a>') _ggGroup = re.compile(r'Newsgroups: <a[^>]+>([^<]+)</a>')
def googlegroups(self, irc, msg, match): def googlegroups(self, irc, msg, match):
r"http://groups.google.com/[^\s]+" r"http://groups.google.com/[^\s]+"
request = urllib2.Request(match.group(0)+'&frame=off', headers=\ request = urllib2.Request(match.group(0), headers=\
{'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)'}) {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)'})
fd = urllib2.urlopen(request) fd = urllib2.urlopen(request)
text = fd.read() text = fd.read()
fd.close()
if match.group(0).find('&prev=/') >= 0:
path = re.search('view the <a href=([^>]+)>no',text)
url = 'http://groups.google.com'
request = urllib2.Request('%s%s' % (url,path.group(1)),
headers={'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5;'
'Windows NT 4.0)'})
fd = urllib2.urlopen(request)
text = fd.read()
fd.close()
mThread = self._ggThread.search(text) mThread = self._ggThread.search(text)
mGroup = self._ggGroup.search(text) mGroup = self._ggGroup.search(text)
if mThread and mGroup: if mThread and mGroup: