SedRegex: ignore regexp-like messages sent before the plugin is active

Reported by fred` at #limnoria.

This adds a new message tag applied to all messages SedRegex has seen, in addition to the one for marking messages parsed as a regexp.
SedRegex will now look through the message history and check that all messages without the "seen" tag are not in fact a regexp, before
marking it as seen so that it doesn't do repeated work.
This commit is contained in:
James Lu 2020-10-12 19:41:10 -07:00
parent ffa24bf56e
commit 9adb4f0e8c
3 changed files with 48 additions and 6 deletions

View File

@ -2,6 +2,9 @@
import re import re
TAG_SEEN = 'SedRegex.seen'
TAG_IS_REGEX = 'SedRegex.isRegex'
SED_REGEX = re.compile( SED_REGEX = re.compile(
# This part matches an optional nick followed by ":" or ",", used to direct replacement # This part matches an optional nick followed by ":" or ",", used to direct replacement
# at a particular user. # at a particular user.

View File

@ -52,7 +52,7 @@ if sys.version_info[0] < 3:
'supports Python 2, consult the python2-legacy branch at ' 'supports Python 2, consult the python2-legacy branch at '
'https://github.com/jlu5/SupyPlugins/tree/python2-legacy') 'https://github.com/jlu5/SupyPlugins/tree/python2-legacy')
from .constants import * from .constants import SED_REGEX, TAG_SEEN, TAG_IS_REGEX
# Replace newlines and friends with things like literal "\n" (backslash and "n") # Replace newlines and friends with things like literal "\n" (backslash and "n")
axe_spaces = utils.str.MultipleReplacer({'\n': '\\n', '\t': '\\t', '\r': '\\r'}) axe_spaces = utils.str.MultipleReplacer({'\n': '\\n', '\t': '\\t', '\r': '\\r'})
@ -110,11 +110,30 @@ class SedRegex(callbacks.PluginRegexp):
return (pattern, replacement, count, raw_flags) return (pattern, replacement, count, raw_flags)
# Tag all messages that SedRegex has seen before. This slightly optimizes the ignoreRegex
# feature as all messages tagged with SedRegex.seen but not SedRegex.isRegex is NOT a regexp.
# If we didn't have this tag, we'd have to run a regexp match on each message in the history
# to check if it's a regexp, as there could've been regexp-like messages sent before
# SedRegex was enabled.
def doNotice(self, irc, msg):
if self.registryValue('enable', msg.channel, irc.network):
msg.tag(TAG_SEEN)
def doPrivmsg(self, irc, msg):
# callbacks.PluginRegexp works by defining doPrivmsg(), we don't want to overwrite
# its behaviour
super().doPrivmsg(irc, msg)
self.doNotice(irc, msg)
# SedRegex main routine. This is called automatically by callbacks.PluginRegexp on every
# message that matches the SED_REGEX expression defined in constants.py
# The actual regexp is passed into PluginRegexp by setting __doc__ equal to the regexp.
def replacer(self, irc, msg, regex): def replacer(self, irc, msg, regex):
if not self.registryValue('enable', msg.channel, irc.network): if not self.registryValue('enable', msg.channel, irc.network):
return return
self.log.debug("SedRegex: running on %s/%s for %s", irc.network, msg.channel, regex)
iterable = reversed(irc.state.history) iterable = reversed(irc.state.history)
msg.tag('Replacer') msg.tag(TAG_IS_REGEX)
try: try:
(pattern, replacement, count, flags) = self._unpack_sed(msg.args[1]) (pattern, replacement, count, flags) = self._unpack_sed(msg.args[1])
@ -149,6 +168,7 @@ class SedRegex(callbacks.PluginRegexp):
e.__class__.__name__, e)) e.__class__.__name__, e))
else: else:
irc.reply(message, prefixNick=False) irc.reply(message, prefixNick=False)
replacer.__doc__ = SED_REGEX.pattern
def _replacer_process(self, irc, msg, target, pattern, replacement, count, messages): def _replacer_process(self, irc, msg, target, pattern, replacement, count, messages):
for m in messages: for m in messages:
@ -168,8 +188,15 @@ class SedRegex(callbacks.PluginRegexp):
else: else:
text = m.args[1] text = m.args[1]
if self.registryValue('ignoreRegex', msg.channel, irc.network) and \ # Test messages sent before SedRegex was activated. Mark them all as seen
m.tagged('Replacer'): # so we only need to do this check once per message.
if not m.tagged(TAG_SEEN):
m.tag(TAG_SEEN)
if SED_REGEX.match(m.args[1]):
m.tag(TAG_IS_REGEX)
# Ignore messages containing a regexp if ignoreRegex is on.
if self.registryValue('ignoreRegex', msg.channel, irc.network) and m.tagged(TAG_IS_REGEX):
self.log.debug("Skipping message %s because it is tagged as isRegex", m.args[1])
continue continue
if m.nick == msg.nick: if m.nick == msg.nick:
messageprefix = msg.nick messageprefix = msg.nick
@ -197,7 +224,6 @@ class SedRegex(callbacks.PluginRegexp):
self.log.debug(_("SedRegex: Search %r not found in the last %i messages of %s."), self.log.debug(_("SedRegex: Search %r not found in the last %i messages of %s."),
msg.args[1], len(irc.state.history), msg.args[0]) msg.args[1], len(irc.state.history), msg.args[0])
raise SearchNotFound() raise SearchNotFound()
replacer.__doc__ = SED_REGEX.pattern
Class = SedRegex Class = SedRegex

View File

@ -66,7 +66,7 @@ class SedRegexTestCase(ChannelPluginTestCase):
m = self.getMsg(' ') m = self.getMsg(' ')
self.assertIn('eliens', str(m)) self.assertIn('eliens', str(m))
def testIgnoreRegexpWithBadCase(self): def testIgnoreRegexWithBadCase(self):
self.feedMsg('aliens are invading, help!') self.feedMsg('aliens are invading, help!')
self.assertSnarfNoResponse('S/aliens/monsters/') self.assertSnarfNoResponse('S/aliens/monsters/')
@ -225,6 +225,19 @@ class SedRegexTestCase(ChannelPluginTestCase):
m = self.getMsg(' ') m = self.getMsg(' ')
self.assertIn('see you later, bye', str(m)) self.assertIn('see you later, bye', str(m))
def testIgnoreRegexOnMessagesBeforeEnable(self):
# Before 2020-10-12 SedRegex used a single msg.tag() to track and ignore messages parsed as a regexp.
# However, a common complaint is that this doesn't catch regexps sent before SedRegex was loaded/enabled...
with conf.supybot.plugins.sedregex.enable.context(False):
self.feedMsg('foo')
self.feedMsg('barbell')
self.feedMsg('s/foo/bar/')
self.feedMsg('abcdef')
self.feedMsg('s/bar/door/')
m = self.getMsg(' ')
# The INCORRECT response would be "s/foo/door/"
self.assertIn('doorbell', str(m))
# TODO: test ignores # TODO: test ignores
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79: