mirror of
https://github.com/Mikaela/Limnoria.git
synced 2025-01-25 19:44:13 +01:00
Added utils.py, with a quality HTML stripper; removed stripHtml from other source files
This commit is contained in:
parent
f355465ad6
commit
fc20715427
@ -40,15 +40,12 @@ from baseplugin import *
|
||||
import re
|
||||
import urllib2
|
||||
|
||||
import utils
|
||||
import debug
|
||||
import ircmsgs
|
||||
import ircutils
|
||||
import callbacks
|
||||
|
||||
htmlStripper = re.compile(r'<[^>]+>')
|
||||
def stripHtml(s):
|
||||
return htmlStripper.sub('', s)
|
||||
|
||||
class Forums(callbacks.PrivmsgRegexp):
|
||||
threaded = True
|
||||
_ggThread = re.compile(r'from thread "<b>(.*?)</b>"')
|
||||
|
@ -48,17 +48,13 @@ import re
|
||||
import time
|
||||
import urllib
|
||||
import urllib2
|
||||
import htmlentitydefs
|
||||
import xml.dom.minidom
|
||||
|
||||
import utils
|
||||
import debug
|
||||
import privmsgs
|
||||
import callbacks
|
||||
|
||||
_htmlstripper = re.compile('<[^>]+>')
|
||||
def stripHtml(s):
|
||||
return _htmlstripper.sub('', s)
|
||||
|
||||
class FreshmeatException(Exception):
|
||||
pass
|
||||
|
||||
@ -132,7 +128,7 @@ class Http(callbacks.Privmsg):
|
||||
text = html.split('<P>\n', 2)[1]
|
||||
text = text.replace('.\n', '. ')
|
||||
text = text.replace('\n', ' ')
|
||||
text = stripHtml(text)
|
||||
text = utils.htmlToText(text)
|
||||
irc.reply(msg, text.strip())
|
||||
|
||||
_gkrating = re.compile(r'<font color="#FFFF33">(\d+)</font>')
|
||||
@ -246,10 +242,8 @@ class Http(callbacks.Privmsg):
|
||||
if m is None:
|
||||
irc.error(msg, 'No quote found.')
|
||||
return
|
||||
quote = m.group(1)
|
||||
quote = utils.htmlToText(m.group(1))
|
||||
quote = ' // '.join(quote.splitlines())
|
||||
for (entity, replacement) in htmlentitydefs.entitydefs.iteritems():
|
||||
quote = quote.replace(entity, replacement)
|
||||
irc.reply(msg, quote)
|
||||
|
||||
_acronymre = re.compile('<td[^>]*><b>[^<]+</b></td>[^<]+<td[^>]*>(\w+)')
|
||||
|
@ -59,6 +59,7 @@ class Relay(privmsgs.CapabilityCheckingPrivmsg):
|
||||
callbacks.Privmsg.__init__(self)
|
||||
self.ircs = {}
|
||||
self.started = False
|
||||
self.channels = set()
|
||||
self.abbreviations = {}
|
||||
|
||||
def startrelay(self, irc, msg, args):
|
||||
@ -97,6 +98,7 @@ class Relay(privmsgs.CapabilityCheckingPrivmsg):
|
||||
def relayjoin(self, irc, msg, args):
|
||||
"<channel>"
|
||||
channel = privmsgs.getArgs(args)
|
||||
self.channels.add(channel)
|
||||
for otherIrc in self.ircs.itervalues():
|
||||
if channel not in otherIrc.state.channels:
|
||||
otherIrc.queueMsg(ircmsgs.join(channel))
|
||||
@ -105,6 +107,7 @@ class Relay(privmsgs.CapabilityCheckingPrivmsg):
|
||||
def relaypart(self, irc, msg, args):
|
||||
"<channel>"
|
||||
channel = privmsgs.getArgs(args)
|
||||
self.channels.remove(channel)
|
||||
for otherIrc in self.ircs.itervalues():
|
||||
if channel in otherIrc.state.channels:
|
||||
otherIrc.queueMsg(ircmsgs.part(channel))
|
||||
@ -122,6 +125,8 @@ class Relay(privmsgs.CapabilityCheckingPrivmsg):
|
||||
irc = irc.getRealIrc()
|
||||
if self.started and ircutils.isChannel(msg.args[0]):
|
||||
channel = msg.args[0]
|
||||
if channel not in self.channels:
|
||||
return
|
||||
#debug.printf('self.abbreviations = %s' % self.abbreviations)
|
||||
#debug.printf('self.ircs = %s' % self.ircs)
|
||||
#debug.printf('irc = %s' % irc)
|
||||
@ -171,6 +176,8 @@ class Relay(privmsgs.CapabilityCheckingPrivmsg):
|
||||
rAction = re.compile(r'\* \w+/(?:%s) ' % '|'.join(abbreviations))
|
||||
if not (rPrivmsg.match(msg.args[1]) or rAction.match(msg.args[1])):
|
||||
channel = msg.args[0]
|
||||
if channel not in self.channels:
|
||||
return
|
||||
abbreviation = self.abbreviations[irc]
|
||||
s = self._formatPrivmsg(irc.nick, abbreviation, msg)
|
||||
for otherIrc in self.ircs.itervalues():
|
||||
|
66
src/utils.py
Executable file
66
src/utils.py
Executable file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
###
|
||||
# Copyright (c) 2002, Jeremiah Fincher
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions, and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions, and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the author of this software nor the name of
|
||||
# contributors to this software may be used to endorse or promote products
|
||||
# derived from this software without specific prior written consent.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
###
|
||||
|
||||
"""
|
||||
Simple utility functions.
|
||||
"""
|
||||
|
||||
from fix import *
|
||||
|
||||
import sgmllib
|
||||
import htmlentitydefs
|
||||
|
||||
class HtmlToText(sgmllib.SGMLParser):
|
||||
"""Taken from some eff-bot code on c.l.p."""
|
||||
entitydefs = htmlentitydefs.entitydefs
|
||||
def __init__(self):
|
||||
self.data = []
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
|
||||
def unknown_starttag(self, tag, attrib):
|
||||
self.data.append(" ")
|
||||
|
||||
def unknown_endtag(self, tag):
|
||||
self.data.append(" ")
|
||||
|
||||
def handle_data(self, data):
|
||||
self.data.append(data)
|
||||
|
||||
def getText(self):
|
||||
text = ''.join(self.data).strip()
|
||||
return ''.join(text.split()) # normalize whitespace
|
||||
|
||||
def htmlToText(s):
|
||||
x = HtmlToText()
|
||||
x.feed(s)
|
||||
return x.getText()
|
||||
|
||||
# vim:set shiftwidth=4 tabstop=8 expandtab textwidth=78:
|
Loading…
Reference in New Issue
Block a user