mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-19 08:59:27 +01:00
Added utils.py, with a quality HTML stripper; removed stripHtml from other source files
This commit is contained in:
parent
f355465ad6
commit
fc20715427
@ -40,15 +40,12 @@ from baseplugin import *
|
|||||||
import re
|
import re
|
||||||
import urllib2
|
import urllib2
|
||||||
|
|
||||||
|
import utils
|
||||||
import debug
|
import debug
|
||||||
import ircmsgs
|
import ircmsgs
|
||||||
import ircutils
|
import ircutils
|
||||||
import callbacks
|
import callbacks
|
||||||
|
|
||||||
htmlStripper = re.compile(r'<[^>]+>')
|
|
||||||
def stripHtml(s):
|
|
||||||
return htmlStripper.sub('', s)
|
|
||||||
|
|
||||||
class Forums(callbacks.PrivmsgRegexp):
|
class Forums(callbacks.PrivmsgRegexp):
|
||||||
threaded = True
|
threaded = True
|
||||||
_ggThread = re.compile(r'from thread "<b>(.*?)</b>"')
|
_ggThread = re.compile(r'from thread "<b>(.*?)</b>"')
|
||||||
|
@ -48,17 +48,13 @@ import re
|
|||||||
import time
|
import time
|
||||||
import urllib
|
import urllib
|
||||||
import urllib2
|
import urllib2
|
||||||
import htmlentitydefs
|
|
||||||
import xml.dom.minidom
|
import xml.dom.minidom
|
||||||
|
|
||||||
|
import utils
|
||||||
import debug
|
import debug
|
||||||
import privmsgs
|
import privmsgs
|
||||||
import callbacks
|
import callbacks
|
||||||
|
|
||||||
_htmlstripper = re.compile('<[^>]+>')
|
|
||||||
def stripHtml(s):
|
|
||||||
return _htmlstripper.sub('', s)
|
|
||||||
|
|
||||||
class FreshmeatException(Exception):
|
class FreshmeatException(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -132,7 +128,7 @@ class Http(callbacks.Privmsg):
|
|||||||
text = html.split('<P>\n', 2)[1]
|
text = html.split('<P>\n', 2)[1]
|
||||||
text = text.replace('.\n', '. ')
|
text = text.replace('.\n', '. ')
|
||||||
text = text.replace('\n', ' ')
|
text = text.replace('\n', ' ')
|
||||||
text = stripHtml(text)
|
text = utils.htmlToText(text)
|
||||||
irc.reply(msg, text.strip())
|
irc.reply(msg, text.strip())
|
||||||
|
|
||||||
_gkrating = re.compile(r'<font color="#FFFF33">(\d+)</font>')
|
_gkrating = re.compile(r'<font color="#FFFF33">(\d+)</font>')
|
||||||
@ -246,10 +242,8 @@ class Http(callbacks.Privmsg):
|
|||||||
if m is None:
|
if m is None:
|
||||||
irc.error(msg, 'No quote found.')
|
irc.error(msg, 'No quote found.')
|
||||||
return
|
return
|
||||||
quote = m.group(1)
|
quote = utils.htmlToText(m.group(1))
|
||||||
quote = ' // '.join(quote.splitlines())
|
quote = ' // '.join(quote.splitlines())
|
||||||
for (entity, replacement) in htmlentitydefs.entitydefs.iteritems():
|
|
||||||
quote = quote.replace(entity, replacement)
|
|
||||||
irc.reply(msg, quote)
|
irc.reply(msg, quote)
|
||||||
|
|
||||||
_acronymre = re.compile('<td[^>]*><b>[^<]+</b></td>[^<]+<td[^>]*>(\w+)')
|
_acronymre = re.compile('<td[^>]*><b>[^<]+</b></td>[^<]+<td[^>]*>(\w+)')
|
||||||
|
@ -59,6 +59,7 @@ class Relay(privmsgs.CapabilityCheckingPrivmsg):
|
|||||||
callbacks.Privmsg.__init__(self)
|
callbacks.Privmsg.__init__(self)
|
||||||
self.ircs = {}
|
self.ircs = {}
|
||||||
self.started = False
|
self.started = False
|
||||||
|
self.channels = set()
|
||||||
self.abbreviations = {}
|
self.abbreviations = {}
|
||||||
|
|
||||||
def startrelay(self, irc, msg, args):
|
def startrelay(self, irc, msg, args):
|
||||||
@ -97,6 +98,7 @@ class Relay(privmsgs.CapabilityCheckingPrivmsg):
|
|||||||
def relayjoin(self, irc, msg, args):
|
def relayjoin(self, irc, msg, args):
|
||||||
"<channel>"
|
"<channel>"
|
||||||
channel = privmsgs.getArgs(args)
|
channel = privmsgs.getArgs(args)
|
||||||
|
self.channels.add(channel)
|
||||||
for otherIrc in self.ircs.itervalues():
|
for otherIrc in self.ircs.itervalues():
|
||||||
if channel not in otherIrc.state.channels:
|
if channel not in otherIrc.state.channels:
|
||||||
otherIrc.queueMsg(ircmsgs.join(channel))
|
otherIrc.queueMsg(ircmsgs.join(channel))
|
||||||
@ -105,6 +107,7 @@ class Relay(privmsgs.CapabilityCheckingPrivmsg):
|
|||||||
def relaypart(self, irc, msg, args):
|
def relaypart(self, irc, msg, args):
|
||||||
"<channel>"
|
"<channel>"
|
||||||
channel = privmsgs.getArgs(args)
|
channel = privmsgs.getArgs(args)
|
||||||
|
self.channels.remove(channel)
|
||||||
for otherIrc in self.ircs.itervalues():
|
for otherIrc in self.ircs.itervalues():
|
||||||
if channel in otherIrc.state.channels:
|
if channel in otherIrc.state.channels:
|
||||||
otherIrc.queueMsg(ircmsgs.part(channel))
|
otherIrc.queueMsg(ircmsgs.part(channel))
|
||||||
@ -122,6 +125,8 @@ class Relay(privmsgs.CapabilityCheckingPrivmsg):
|
|||||||
irc = irc.getRealIrc()
|
irc = irc.getRealIrc()
|
||||||
if self.started and ircutils.isChannel(msg.args[0]):
|
if self.started and ircutils.isChannel(msg.args[0]):
|
||||||
channel = msg.args[0]
|
channel = msg.args[0]
|
||||||
|
if channel not in self.channels:
|
||||||
|
return
|
||||||
#debug.printf('self.abbreviations = %s' % self.abbreviations)
|
#debug.printf('self.abbreviations = %s' % self.abbreviations)
|
||||||
#debug.printf('self.ircs = %s' % self.ircs)
|
#debug.printf('self.ircs = %s' % self.ircs)
|
||||||
#debug.printf('irc = %s' % irc)
|
#debug.printf('irc = %s' % irc)
|
||||||
@ -171,6 +176,8 @@ class Relay(privmsgs.CapabilityCheckingPrivmsg):
|
|||||||
rAction = re.compile(r'\* \w+/(?:%s) ' % '|'.join(abbreviations))
|
rAction = re.compile(r'\* \w+/(?:%s) ' % '|'.join(abbreviations))
|
||||||
if not (rPrivmsg.match(msg.args[1]) or rAction.match(msg.args[1])):
|
if not (rPrivmsg.match(msg.args[1]) or rAction.match(msg.args[1])):
|
||||||
channel = msg.args[0]
|
channel = msg.args[0]
|
||||||
|
if channel not in self.channels:
|
||||||
|
return
|
||||||
abbreviation = self.abbreviations[irc]
|
abbreviation = self.abbreviations[irc]
|
||||||
s = self._formatPrivmsg(irc.nick, abbreviation, msg)
|
s = self._formatPrivmsg(irc.nick, abbreviation, msg)
|
||||||
for otherIrc in self.ircs.itervalues():
|
for otherIrc in self.ircs.itervalues():
|
||||||
|
66
src/utils.py
Executable file
66
src/utils.py
Executable file
@ -0,0 +1,66 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
###
|
||||||
|
# Copyright (c) 2002, Jeremiah Fincher
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are met:
|
||||||
|
#
|
||||||
|
# * Redistributions of source code must retain the above copyright notice,
|
||||||
|
# this list of conditions, and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
# this list of conditions, and the following disclaimer in the
|
||||||
|
# documentation and/or other materials provided with the distribution.
|
||||||
|
# * Neither the name of the author of this software nor the name of
|
||||||
|
# contributors to this software may be used to endorse or promote products
|
||||||
|
# derived from this software without specific prior written consent.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
###
|
||||||
|
|
||||||
|
"""
|
||||||
|
Simple utility functions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from fix import *
|
||||||
|
|
||||||
|
import sgmllib
|
||||||
|
import htmlentitydefs
|
||||||
|
|
||||||
|
class HtmlToText(sgmllib.SGMLParser):
|
||||||
|
"""Taken from some eff-bot code on c.l.p."""
|
||||||
|
entitydefs = htmlentitydefs.entitydefs
|
||||||
|
def __init__(self):
|
||||||
|
self.data = []
|
||||||
|
sgmllib.SGMLParser.__init__(self)
|
||||||
|
|
||||||
|
def unknown_starttag(self, tag, attrib):
|
||||||
|
self.data.append(" ")
|
||||||
|
|
||||||
|
def unknown_endtag(self, tag):
|
||||||
|
self.data.append(" ")
|
||||||
|
|
||||||
|
def handle_data(self, data):
|
||||||
|
self.data.append(data)
|
||||||
|
|
||||||
|
def getText(self):
|
||||||
|
text = ''.join(self.data).strip()
|
||||||
|
return ''.join(text.split()) # normalize whitespace
|
||||||
|
|
||||||
|
def htmlToText(s):
|
||||||
|
x = HtmlToText()
|
||||||
|
x.feed(s)
|
||||||
|
return x.getText()
|
||||||
|
|
||||||
|
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|
Loading…
Reference in New Issue
Block a user