From 9adb4f0e8c13ab2a4bf8775d0d9e84f1435a368d Mon Sep 17 00:00:00 2001 From: James Lu Date: Mon, 12 Oct 2020 19:41:10 -0700 Subject: [PATCH] SedRegex: ignore regexp-like messages sent before the plugin is active Reported by fred` at #limnoria. This adds a new message tag applied to all messages SedRegex has seen, in addition to the one for marking messages parsed as a regexp. SedRegex will now look through the message history and check that all messages without the "seen" tag are not in fact a regexp, before marking it as seen so that it doesn't do repeated work. --- plugins/SedRegex/constants.py | 3 +++ plugins/SedRegex/plugin.py | 36 ++++++++++++++++++++++++++++++----- plugins/SedRegex/test.py | 15 ++++++++++++++- 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/plugins/SedRegex/constants.py b/plugins/SedRegex/constants.py index 3610e40fd..598aff09c 100755 --- a/plugins/SedRegex/constants.py +++ b/plugins/SedRegex/constants.py @@ -2,6 +2,9 @@ import re +TAG_SEEN = 'SedRegex.seen' +TAG_IS_REGEX = 'SedRegex.isRegex' + SED_REGEX = re.compile( # This part matches an optional nick followed by ":" or ",", used to direct replacement # at a particular user. diff --git a/plugins/SedRegex/plugin.py b/plugins/SedRegex/plugin.py index 1d0c70df1..03458a776 100644 --- a/plugins/SedRegex/plugin.py +++ b/plugins/SedRegex/plugin.py @@ -52,7 +52,7 @@ if sys.version_info[0] < 3: 'supports Python 2, consult the python2-legacy branch at ' 'https://github.com/jlu5/SupyPlugins/tree/python2-legacy') -from .constants import * +from .constants import SED_REGEX, TAG_SEEN, TAG_IS_REGEX # Replace newlines and friends with things like literal "\n" (backslash and "n") axe_spaces = utils.str.MultipleReplacer({'\n': '\\n', '\t': '\\t', '\r': '\\r'}) @@ -110,11 +110,30 @@ class SedRegex(callbacks.PluginRegexp): return (pattern, replacement, count, raw_flags) + # Tag all messages that SedRegex has seen before. This slightly optimizes the ignoreRegex + # feature as all messages tagged with SedRegex.seen but not SedRegex.isRegex is NOT a regexp. + # If we didn't have this tag, we'd have to run a regexp match on each message in the history + # to check if it's a regexp, as there could've been regexp-like messages sent before + # SedRegex was enabled. + def doNotice(self, irc, msg): + if self.registryValue('enable', msg.channel, irc.network): + msg.tag(TAG_SEEN) + + def doPrivmsg(self, irc, msg): + # callbacks.PluginRegexp works by defining doPrivmsg(), we don't want to overwrite + # its behaviour + super().doPrivmsg(irc, msg) + self.doNotice(irc, msg) + + # SedRegex main routine. This is called automatically by callbacks.PluginRegexp on every + # message that matches the SED_REGEX expression defined in constants.py + # The actual regexp is passed into PluginRegexp by setting __doc__ equal to the regexp. def replacer(self, irc, msg, regex): if not self.registryValue('enable', msg.channel, irc.network): return + self.log.debug("SedRegex: running on %s/%s for %s", irc.network, msg.channel, regex) iterable = reversed(irc.state.history) - msg.tag('Replacer') + msg.tag(TAG_IS_REGEX) try: (pattern, replacement, count, flags) = self._unpack_sed(msg.args[1]) @@ -149,6 +168,7 @@ class SedRegex(callbacks.PluginRegexp): e.__class__.__name__, e)) else: irc.reply(message, prefixNick=False) + replacer.__doc__ = SED_REGEX.pattern def _replacer_process(self, irc, msg, target, pattern, replacement, count, messages): for m in messages: @@ -168,8 +188,15 @@ class SedRegex(callbacks.PluginRegexp): else: text = m.args[1] - if self.registryValue('ignoreRegex', msg.channel, irc.network) and \ - m.tagged('Replacer'): + # Test messages sent before SedRegex was activated. Mark them all as seen + # so we only need to do this check once per message. + if not m.tagged(TAG_SEEN): + m.tag(TAG_SEEN) + if SED_REGEX.match(m.args[1]): + m.tag(TAG_IS_REGEX) + # Ignore messages containing a regexp if ignoreRegex is on. + if self.registryValue('ignoreRegex', msg.channel, irc.network) and m.tagged(TAG_IS_REGEX): + self.log.debug("Skipping message %s because it is tagged as isRegex", m.args[1]) continue if m.nick == msg.nick: messageprefix = msg.nick @@ -197,7 +224,6 @@ class SedRegex(callbacks.PluginRegexp): self.log.debug(_("SedRegex: Search %r not found in the last %i messages of %s."), msg.args[1], len(irc.state.history), msg.args[0]) raise SearchNotFound() - replacer.__doc__ = SED_REGEX.pattern Class = SedRegex diff --git a/plugins/SedRegex/test.py b/plugins/SedRegex/test.py index 296352062..23ff03d93 100644 --- a/plugins/SedRegex/test.py +++ b/plugins/SedRegex/test.py @@ -66,7 +66,7 @@ class SedRegexTestCase(ChannelPluginTestCase): m = self.getMsg(' ') self.assertIn('eliens', str(m)) - def testIgnoreRegexpWithBadCase(self): + def testIgnoreRegexWithBadCase(self): self.feedMsg('aliens are invading, help!') self.assertSnarfNoResponse('S/aliens/monsters/') @@ -225,6 +225,19 @@ class SedRegexTestCase(ChannelPluginTestCase): m = self.getMsg(' ') self.assertIn('see you later, bye', str(m)) + def testIgnoreRegexOnMessagesBeforeEnable(self): + # Before 2020-10-12 SedRegex used a single msg.tag() to track and ignore messages parsed as a regexp. + # However, a common complaint is that this doesn't catch regexps sent before SedRegex was loaded/enabled... + with conf.supybot.plugins.sedregex.enable.context(False): + self.feedMsg('foo') + self.feedMsg('barbell') + self.feedMsg('s/foo/bar/') + self.feedMsg('abcdef') + self.feedMsg('s/bar/door/') + m = self.getMsg(' ') + # The INCORRECT response would be "s/foo/door/" + self.assertIn('doorbell', str(m)) + # TODO: test ignores # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: