Store IrcUser hostmasks in HostmaskSet to prevent their pattern cache from expiring

When the number of hostmasks exceeds 1000 (the hardcoded size of
_patternCache and _hostmaskPatternEqualCache), this triggers
a pathological case in the LRU caches, that causes all calls to be
a cache miss.
This means that on every IRC message received, ircdb.checkIgnored triggers
a recompilation of *all* user hostmasks, which is very expensive
computationally.

This commit stores them in their own cache to prevent them from
expiring.
This commit is contained in:
Valentin Lorentz 2021-05-27 22:13:44 +02:00
parent a5cd870bd2
commit e0fdcb67c0
3 changed files with 71 additions and 9 deletions

View File

@ -196,6 +196,7 @@ class UserCapabilitySet(CapabilitySet):
assert capability != '-owner', '"-owner" disallowed.'
self.__parent.add(capability)
class IrcUser(object):
"""This class holds the capabilities and authentications for a user."""
__slots__ = ('id', 'auth', 'name', 'ignore', 'secure', 'hashed',
@ -213,10 +214,10 @@ class IrcUser(object):
self.capabilities = UserCapabilitySet()
for capability in capabilities:
self.capabilities.add(capability)
if hostmasks is None:
self.hostmasks = ircutils.IrcSet() # hostmasks used for recognition
else:
self.hostmasks = hostmasks
# hostmasks used for recognition
self.hostmasks = ircutils.HostmaskSet(hostmasks or [])
if nicks is None:
# {'network1': ['foo', 'bar'], 'network': ['baz']}
self.nicks = ircutils.IrcDict()
@ -289,9 +290,9 @@ class IrcUser(object):
finally:
while removals:
self.auth.remove(removals.pop())
for pat in self.hostmasks:
if ircutils.hostmaskPatternEqual(pat, hostmask):
return pat
matched_pattern = self.hostmasks.match(hostmask)
if matched_pattern is not None:
return matched_pattern
return False
def addHostmask(self, hostmask):

View File

@ -47,6 +47,7 @@ import random
import string
import textwrap
import functools
import collections.abc
from . import utils
from .utils import minisix
@ -169,7 +170,7 @@ def areReceivers(s, strictRfc=True, nicklen=None, chantypes='#&!',
return all([nick(x) or chan(x) for x in s.split(',')])
_patternCache = utils.structures.CacheDict(1000)
def compileHostmaskPattern(pattern):
def _compileHostmaskPattern(pattern):
try:
return _patternCache[pattern]
except KeyError:
@ -203,10 +204,55 @@ def hostmaskPatternEqual(pattern, hostmask):
try:
return _hostmaskPatternEqualCache[(pattern, hostmask)]
except KeyError:
matched = compileHostmaskPattern(pattern)(hostmask) is not None
matched = _compileHostmaskPattern(pattern)(hostmask) is not None
_hostmaskPatternEqualCache[(pattern, hostmask)] = matched
return matched
class HostmaskSet(collections.abc.MutableSet):
"""Stores a set of hostmasks and caches their pattern as compiled
by _compileHostmaskPattern.
This is an alternative to hostmaskPatternEqual for sets of patterns that
do not change often, such as ircdb.IrcUser.
ircdb.IrcUser used to store a real set, of hostmasks as strings, then
call hostmaskPatternEqual on each of these strings. This is good enough
most of the time, as hostmaskPatternEqual has a cache.
Unfortunately, it is a LRU cache, and hostmasks are checked in order.
This means that as soon as you have most hostmasks than the size of the
cache, EVERY call to hostmaskPatternEqual will be a cache miss, so the
regexp will need to be recompile every time.
This is VERY expensive, because building the regexp is slow, and
re.compile() is even slower."""
def __init__(self, hostmasks=()):
self.data = {} # {hostmask_str: _compileHostmaskPattern(hostmask_str)}
for hostmask in hostmasks:
self.add(hostmask)
def add(self, hostmask):
self.data[hostmask] = _compileHostmaskPattern(hostmask)
def discard(self, hostmask):
self.data.pop(hostmask, None)
def __contains__(self, hostmask):
return hostmask in self.data
def __iter__(self):
return iter(self.data)
def __len__(self):
return len(self.data)
def match(self, hostname):
# Potential optimization: join all the patterns into a single one.
for (pattern, compiled_pattern) in self.data.items():
if compiled_pattern(hostname) is not None:
return pattern
return None
def banmask(hostmask):
"""Returns a properly generic banning hostmask for a hostmask.

View File

@ -58,6 +58,21 @@ class FunctionsTestCase(SupyTestCase):
'abr-ubr1.sbo-abr.ma.cable.rcn.com'
self.assertTrue(ircutils.hostmaskPatternEqual(s, s))
def testHostmaskSet(self):
hs = ircutils.HostmaskSet()
self.assertEqual(hs.match("nick!user@host"), None)
hs.add("*!user@host")
hs.add("*!user@host2")
self.assertEqual(hs.match("nick!user@host"), "*!user@host")
self.assertEqual(hs.match("nick!user@host2"), "*!user@host2")
self.assertEqual(list(hs), ["*!user@host", "*!user@host2"])
hs.remove("*!user@host2")
self.assertEqual(hs.match("nick!user@host"), "*!user@host")
self.assertEqual(hs.match("nick!user@host2"), None)
hs = ircutils.HostmaskSet(["*!user@host"])
self.assertEqual(hs.match("nick!user@host"), "*!user@host")
def testIsUserHostmask(self):
self.assertTrue(ircutils.isUserHostmask(self.hostmask))
self.assertTrue(ircutils.isUserHostmask('a!b@c'))