Merge branch 'sedregex-merge' into testing

Import SedRegex plugin: History replacer using sed-style expressions.
This commit is contained in:
James Lu 2020-01-03 21:24:21 -08:00
commit 9570ea9fb4
7 changed files with 547 additions and 1 deletions

View File

@ -8,7 +8,7 @@ install:
script:
- echo $TRAVIS_PYTHON_VERSION
- python setup.py install
- supybot-test test -v --plugins-dir=./plugins/ --no-network --disable-multiprocessing
- supybot-test test -v --plugins-dir=./plugins/ --no-network
- if [ "$WITH_OPT_DEPS" = "true" ] -a [[ "$TRAVIS_PYTHON_VERSION" =~ ^3\.[4-9] ]] ; then python -m irctest irctest.controllers.limnoria; fi
notifications:
email: false

View File

@ -0,0 +1,25 @@
History replacer using sed-style expressions.
### Configuration
Enable SedRegex on the desired channels: `config channel #yourchannel plugins.sedregex.enable True`
### Usage
After enabling SedRegex, typing a regex in the form `s/text/replacement/` will make the bot announce replacements.
```
20:24 <~GL> helli world
20:24 <~GL> s/i/o/
20:24 <@Lily> GL meant to say: hello world
```
You can also do `othernick: s/text/replacement/` to only replace messages from a certain user. Supybot ignores are respected by the plugin, and messages from ignored users will only be considered if their nick is explicitly given.
#### Regex flags
The following regex flags (i.e. the `g` in `s/abc/def/g`, etc.) are supported:
- `i`: case insensitive replacement
- `g`: replace all occurences of the original text
- `s`: *(custom flag specific to this plugin)* replace only messages from the caller

View File

@ -0,0 +1,63 @@
###
# Copyright (c) 2015, Michael Daniel Telatynski <postmaster@webdevguru.co.uk>
# Copyright (c) 2015-2020, James Lu <james@overdrivenetworks.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
"""
History replacer using sed-style expressions.
"""
import supybot
import supybot.world as world
__version__ = supybot.version.version
__author__ = supybot.Author("Michael Daniel Telatynski", "t3chguy", "postmaster@webdevguru.co.uk")
__contributors__ = {supybot.authors.jlu:
["options bolding the replacement text", "misc. bug fixes and enhancements"],
supybot.Author('nyuszika7h', 'nyuszika7h', 'nyuszika7h@openmailbox.org'):
["_unpack_sed method within plugin.py"]
}
__maintainer__ = supybot.authors.limnoria_core
__url__ = 'https://github.com/ProgVal/Limnoria/tree/master/plugins/SedRegex'
from . import config
from . import plugin
from importlib import reload
reload(config)
reload(plugin)
if world.testing:
from . import test
Class = plugin.Class
configure = config.configure
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

View File

@ -0,0 +1,68 @@
###
# Copyright (c) 2015, Michael Daniel Telatynski <postmaster@webdevguru.co.uk>
# Copyright (c) 2015-2019, James Lu <james@overdrivenetworks.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
import supybot.conf as conf
import supybot.registry as registry
try:
from supybot.i18n import PluginInternationalization
_ = PluginInternationalization('SedRegex')
except:
_ = lambda x: x
def configure(advanced):
from supybot.questions import expect, anything, something, yn
conf.registerPlugin('SedRegex', True)
if advanced:
output("""The SedRegex plugin allows you to make Perl/sed-style regex
replacements to your chat history.""")
SedRegex = conf.registerPlugin('SedRegex')
conf.registerChannelValue(SedRegex, 'displayErrors',
registry.Boolean(True, _("""Should errors be displayed?""")))
conf.registerChannelValue(SedRegex, 'boldReplacementText',
registry.Boolean(True, _("""Should the replacement text be bolded?""")))
conf.registerChannelValue(SedRegex, 'enable',
registry.Boolean(False, _("""Should Perl/sed-style regex replacing
work in this channel?""")))
conf.registerChannelValue(SedRegex, 'ignoreRegex',
registry.Boolean(True, _("""Should Perl/sed regex replacing
ignore messages which look like valid regex?""")))
conf.registerGlobalValue(SedRegex, 'processTimeout',
registry.PositiveFloat(0.5, _("""Sets the timeout when processing a single
regexp. The default should be adequate unless
you have a busy or low-powered system that
cannot process regexps quickly enough. However,
you will not want to set this value too high
as that would make your bot vulnerable to ReDoS
attacks.""")))
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

View File

@ -0,0 +1 @@
# Stub so local is a module, used for third-party modules

203
plugins/SedRegex/plugin.py Normal file
View File

@ -0,0 +1,203 @@
###
# Copyright (c) 2015, Michael Daniel Telatynski <postmaster@webdevguru.co.uk>
# Copyright (c) 2015-2019, James Lu <james@overdrivenetworks.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
from supybot.commands import *
from supybot.commands import ProcessTimeoutError
import supybot.plugins as plugins
import supybot.ircmsgs as ircmsgs
import supybot.callbacks as callbacks
import supybot.ircutils as ircutils
import supybot.ircdb as ircdb
import supybot.utils as utils
import re
import sys
try:
from supybot.i18n import PluginInternationalization
_ = PluginInternationalization('SedRegex')
except ImportError:
_ = lambda x: x
if sys.version_info[0] < 3:
raise ImportError('This plugin requires Python 3. For a legacy version of this plugin that still '
'supports Python 2, consult the python2-legacy branch at '
'https://github.com/jlu5/SupyPlugins/tree/python2-legacy')
SED_REGEX = re.compile(r"^(?:(?P<nick>.+?)[:,] )?s(?P<delim>[^\w\s])(?P<pattern>.*?)(?P=delim)"
r"(?P<replacement>.*?)(?P=delim)(?P<flags>[a-z]*)$")
# Replace newlines and friends with things like literal "\n" (backslash and "n")
axe_spaces = utils.str.MultipleReplacer({'\n': '\\n', '\t': '\\t', '\r': '\\r'})
class SearchNotFound(Exception):
pass
class SedRegex(callbacks.PluginRegexp):
"""History replacer using sed-style regex syntax."""
threaded = True
public = True
unaddressedRegexps = ['replacer']
@staticmethod
def _unpack_sed(expr):
if '\0' in expr:
raise ValueError('Expression can\'t contain NUL')
delim = expr[1]
escaped_expr = ''
for (i, c) in enumerate(expr):
if c == delim and i > 0:
if expr[i - 1] == '\\':
escaped_expr = escaped_expr[:-1] + '\0'
continue
escaped_expr += c
match = SED_REGEX.search(escaped_expr)
if not match:
return
groups = match.groupdict()
pattern = groups['pattern'].replace('\0', delim)
replacement = groups['replacement'].replace('\0', delim)
if groups['flags']:
raw_flags = set(groups['flags'])
else:
raw_flags = set()
flags = 0
count = 1
for flag in raw_flags:
if flag == 'g':
count = 0
if flag == 'i':
flags |= re.IGNORECASE
pattern = re.compile(pattern, flags)
return (pattern, replacement, count, raw_flags)
def replacer(self, irc, msg, regex):
if not self.registryValue('enable', msg.args[0]):
return
iterable = reversed(irc.state.history)
msg.tag('Replacer')
try:
(pattern, replacement, count, flags) = self._unpack_sed(msg.args[1])
except Exception as e:
self.log.warning(_("SedRegex error: %s"), e, exc_info=True)
if self.registryValue('displayErrors', msg.args[0]):
irc.error('%s.%s: %s' % (e.__class__.__module__, e.__class__.__name__, e))
return
next(iterable)
if 's' in flags: # Special 's' flag lets the bot only look at self messages
target = msg.nick
else:
target = regex.group('nick')
if not ircutils.isNick(str(target), strictRfc=True):
return
regex_timeout = self.registryValue('processTimeout')
try:
message = process(self._replacer_process, irc, msg,
target, pattern, replacement, count, iterable,
timeout=regex_timeout, pn=self.name(), cn='replacer')
except ProcessTimeoutError:
irc.error(_("Search timed out."))
except SearchNotFound:
irc.error(_("Search not found in the last %i messages.") %
len(irc.state.history))
except Exception as e:
if self.registryValue('displayErrors', msg.args[0]):
irc.error('%s.%s: %s' % (e.__class__.__module__,
e.__class__.__name__, e))
else:
irc.reply(message, prefixNick=False)
def _replacer_process(self, irc, msg, target, pattern, replacement, count, messages):
for m in messages:
if m.command in ('PRIVMSG', 'NOTICE') and \
ircutils.strEqual(m.args[0], msg.args[0]) and m.tagged('receivedBy') == irc:
if target and m.nick != target:
continue
# Don't snarf ignored users' messages unless specifically
# told to.
if ircdb.checkIgnored(m.prefix) and not target:
continue
# When running substitutions, ignore the "* nick" part of any actions.
action = ircmsgs.isAction(m)
if action:
text = ircmsgs.unAction(m)
else:
text = m.args[1]
if self.registryValue('ignoreRegex', msg.args[0]) and \
m.tagged('Replacer'):
continue
if m.nick == msg.nick:
messageprefix = msg.nick
else:
messageprefix = '%s thinks %s' % (msg.nick, m.nick)
try:
replace_result = pattern.search(text)
if replace_result:
if self.registryValue('boldReplacementText', msg.args[0]):
replacement = ircutils.bold(replacement)
subst = pattern.sub(replacement, text, count)
if action: # If the message was an ACTION, prepend the nick back.
subst = '* %s %s' % (m.nick, subst)
subst = axe_spaces(subst)
return _("%s meant to say: %s") % \
(messageprefix, subst)
except Exception as e:
self.log.warning(_("SedRegex error: %s"), e, exc_info=True)
raise
self.log.debug(_("SedRegex: Search %r not found in the last %i messages of %s."),
msg.args[1], len(irc.state.history), msg.args[0])
raise SearchNotFound()
replacer.__doc__ = SED_REGEX.pattern
Class = SedRegex
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:

186
plugins/SedRegex/test.py Normal file
View File

@ -0,0 +1,186 @@
###
# Copyright (c) 2017-2019, James Lu <james@overdrivenetworks.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###
from __future__ import print_function
from supybot.test import *
class SedRegexTestCase(ChannelPluginTestCase):
other = "blah!blah@someone.else"
other2 = "ghost!ghost@spooky"
plugins = ('SedRegex', 'Utilities')
config = {'plugins.sedregex.enable': True,
'plugins.sedregex.boldReplacementText': False}
# getMsg() stalls if no message is ever sent (i.e. if the plugin fails to respond to a request)
# We should limit the timeout to prevent the tests from taking forever.
timeout = 3
def testSimpleReplace(self):
self.feedMsg('Abcd abcdefgh')
self.feedMsg('s/abcd/test/')
# Run an empty command so that messages from the previous trigger are caught.
m = self.getMsg(' ')
self.assertIn('Abcd testefgh', str(m))
def testCaseInsensitiveReplace(self):
self.feedMsg('Aliens Are Invading, Help!')
self.feedMsg('s/a/e/i')
m = self.getMsg(' ')
self.assertIn('eliens', str(m))
def testGlobalReplace(self):
self.feedMsg('AAaa aaAa a b')
self.feedMsg('s/a/e/g')
m = self.getMsg(' ')
self.assertIn('AAee eeAe e b', str(m))
def testGlobalCaseInsensitiveReplace(self):
self.feedMsg('Abba')
self.feedMsg('s/a/e/gi')
m = self.getMsg(' ')
self.assertIn('ebbe', str(m))
def testOnlySelfReplace(self):
self.feedMsg('evil machines')
self.feedMsg('evil tacocats', frm=self.__class__.other)
self.feedMsg('s/evil/kind/s')
m = self.getMsg(' ')
self.assertIn('kind machines', str(m))
def testAllFlagsReplace(self):
self.feedMsg('Terrible, terrible crimes')
self.feedMsg('Terrible, terrible TV shows', frm=self.__class__.other)
self.feedMsg('s/terr/horr/sgi')
m = self.getMsg(' ')
self.assertIn('horrible, horrible crimes', str(m))
def testOtherPersonReplace(self):
self.feedMsg('yeah, right', frm=self.__class__.other)
self.feedMsg('s/right/left/', frm=self.__class__.other2)
m = self.getMsg(' ')
# Note: using the bot prefix for the s/right/left/ part causes the first nick in "X thinks Y"
# to be empty? It works fine in runtime though...
self.assertIn('%s thinks %s meant to say' % (ircutils.nickFromHostmask(self.__class__.other2),
ircutils.nickFromHostmask(self.__class__.other)), str(m))
def testExplicitOtherReplace(self):
self.feedMsg('ouch', frm=self.__class__.other2)
self.feedMsg('poof', frm=self.__class__.other)
self.feedMsg('wow!')
self.feedMsg('%s: s/^/p/' % ircutils.nickFromHostmask(self.__class__.other2))
m = self.getMsg(' ')
self.assertIn('pouch', str(m))
@unittest.skipUnless(sys.version_info[0] >= 3, 'Test fails on Python 2.')
def testBoldReplacement(self):
with conf.supybot.plugins.sedregex.boldReplacementText.context(True):
self.feedMsg('hahahaha', frm=self.__class__.other)
# One replacement
self.feedMsg('s/h/H/', frm=self.__class__.other2)
m = self.getMsg(' ')
self.assertIn('\x02H\x02aha', str(m))
# Replace all instances
self.feedMsg('s/h/H/g', frm=self.__class__.other2)
m = self.getMsg(' ')
self.assertIn('\x02H\x02a\x02H\x02a', str(m))
# One whole word
self.feedMsg('sweet dreams are made of this', frm=self.__class__.other)
self.feedMsg('s/this/cheese/', frm=self.__class__.other2)
m = self.getMsg(' ')
self.assertIn('of \x02cheese\x02', str(m))
def testNonSlashSeparator(self):
self.feedMsg('we are all decelopers on this blessed day')
self.feedMsg('s.c.v.')
m = self.getMsg(' ')
self.assertIn('developers', str(m))
self.feedMsg('4 / 2 = 8')
self.feedMsg('s@/@*@')
m = self.getMsg(' ')
self.assertIn('4 * 2 = 8', str(m))
def testWeirdSeparatorsFail(self):
self.feedMsg("can't touch this", frm=self.__class__.other)
# Only symbols are allowed as separators
self.feedMsg('blah: s a b ')
self.feedMsg('blah: sdadbd')
m = self.getMsg('echo dummy message')
# XXX: this is a total hack...
for msg in self.irc.state.history:
print("Message in history: %s" % msg, end='')
self.assertNotIn("cbn't", str(msg))
def testActionReplace(self):
self.feedMsg("\x01ACTION sleeps\x01")
self.feedMsg('s/sleeps/wakes/')
m = self.getMsg(' ')
self.assertIn('meant to say: * %s wakes' % self.nick, str(m))
def testOtherPersonActionReplace(self):
self.feedMsg("\x01ACTION sleeps\x01", frm=self.__class__.other)
self.feedMsg('s/sleeps/wakes/')
m = self.getMsg(' ')
n = ircutils.nickFromHostmask(self.__class__.other)
self.assertIn('thinks %s meant to say: * %s wakes' % (n, n), str(m))
# https://github.com/jlu5/SupyPlugins/commit/e19abe049888667c3d0a4eb4a2c3ae88b8bea511
# We want to make sure the bot treats channel names case-insensitively, if some client
# writes to it using a differente case.
def testCaseNormalizationInRead(self):
assert self.channel != self.channel.title() # In case Limnoria's defaults change
self.feedMsg("what a strange bug", to=self.channel.title())
self.feedMsg('s/strange/hilarious/', to=self.channel)
m = self.getMsg(' ')
self.assertIn('what a hilarious bug', str(m))
def testCaseNormalizationInReplace(self):
assert self.channel != self.channel.title() # In case Limnoria's defaults change
self.feedMsg("Segmentation fault", to=self.channel)
self.feedMsg('s/$/ (core dumped)/', to=self.channel.title())
m = self.getMsg(' ')
self.assertIn('Segmentation fault (core dumped)', str(m))
def testReDoSTimeout(self):
# From https://snyk.io/blog/redos-and-catastrophic-backtracking/
for idx in range(500):
self.feedMsg("ACCCCCCCCCCCCCCCCCCCCCCCCCCCCX")
self.feedMsg(r"s/A(B|C+)+D/this should abort/")
m = self.getMsg(' ', timeout=1)
self.assertIn('timed out', str(m))
# TODO: test ignores
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: