mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-27 05:09:23 +01:00
Finished the google groups fix. Added a check for links to framed pages in order to grab non-framed pages to parse through.
This commit is contained in:
parent
ce741a1f0e
commit
f7939bd43b
@ -51,10 +51,20 @@ class Forums(callbacks.PrivmsgRegexp):
|
|||||||
_ggGroup = re.compile(r'Newsgroups: <a[^>]+>([^<]+)</a>')
|
_ggGroup = re.compile(r'Newsgroups: <a[^>]+>([^<]+)</a>')
|
||||||
def googlegroups(self, irc, msg, match):
|
def googlegroups(self, irc, msg, match):
|
||||||
r"http://groups.google.com/[^\s]+"
|
r"http://groups.google.com/[^\s]+"
|
||||||
request = urllib2.Request(match.group(0)+'&frame=off', headers=\
|
request = urllib2.Request(match.group(0), headers=\
|
||||||
{'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)'})
|
{'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)'})
|
||||||
fd = urllib2.urlopen(request)
|
fd = urllib2.urlopen(request)
|
||||||
text = fd.read()
|
text = fd.read()
|
||||||
|
fd.close()
|
||||||
|
if match.group(0).find('&prev=/') >= 0:
|
||||||
|
path = re.search('view the <a href=([^>]+)>no',text)
|
||||||
|
url = 'http://groups.google.com'
|
||||||
|
request = urllib2.Request('%s%s' % (url,path.group(1)),
|
||||||
|
headers={'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5;'
|
||||||
|
'Windows NT 4.0)'})
|
||||||
|
fd = urllib2.urlopen(request)
|
||||||
|
text = fd.read()
|
||||||
|
fd.close()
|
||||||
mThread = self._ggThread.search(text)
|
mThread = self._ggThread.search(text)
|
||||||
mGroup = self._ggGroup.search(text)
|
mGroup = self._ggGroup.search(text)
|
||||||
if mThread and mGroup:
|
if mThread and mGroup:
|
||||||
|
Loading…
Reference in New Issue
Block a user