From e0fdcb67c09b26a79bbb3cbb2244d18149c5b0c2 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Thu, 27 May 2021 22:13:44 +0200 Subject: [PATCH] Store IrcUser hostmasks in HostmaskSet to prevent their pattern cache from expiring When the number of hostmasks exceeds 1000 (the hardcoded size of _patternCache and _hostmaskPatternEqualCache), this triggers a pathological case in the LRU caches, that causes all calls to be a cache miss. This means that on every IRC message received, ircdb.checkIgnored triggers a recompilation of *all* user hostmasks, which is very expensive computationally. This commit stores them in their own cache to prevent them from expiring. --- src/ircdb.py | 15 +++++++------ src/ircutils.py | 50 +++++++++++++++++++++++++++++++++++++++++-- test/test_ircutils.py | 15 +++++++++++++ 3 files changed, 71 insertions(+), 9 deletions(-) diff --git a/src/ircdb.py b/src/ircdb.py index 9fa260cad..804d3931b 100644 --- a/src/ircdb.py +++ b/src/ircdb.py @@ -196,6 +196,7 @@ class UserCapabilitySet(CapabilitySet): assert capability != '-owner', '"-owner" disallowed.' self.__parent.add(capability) + class IrcUser(object): """This class holds the capabilities and authentications for a user.""" __slots__ = ('id', 'auth', 'name', 'ignore', 'secure', 'hashed', @@ -213,10 +214,10 @@ class IrcUser(object): self.capabilities = UserCapabilitySet() for capability in capabilities: self.capabilities.add(capability) - if hostmasks is None: - self.hostmasks = ircutils.IrcSet() # hostmasks used for recognition - else: - self.hostmasks = hostmasks + + # hostmasks used for recognition + self.hostmasks = ircutils.HostmaskSet(hostmasks or []) + if nicks is None: # {'network1': ['foo', 'bar'], 'network': ['baz']} self.nicks = ircutils.IrcDict() @@ -289,9 +290,9 @@ class IrcUser(object): finally: while removals: self.auth.remove(removals.pop()) - for pat in self.hostmasks: - if ircutils.hostmaskPatternEqual(pat, hostmask): - return pat + matched_pattern = self.hostmasks.match(hostmask) + if matched_pattern is not None: + return matched_pattern return False def addHostmask(self, hostmask): diff --git a/src/ircutils.py b/src/ircutils.py index bcc142c89..10ede24c7 100644 --- a/src/ircutils.py +++ b/src/ircutils.py @@ -47,6 +47,7 @@ import random import string import textwrap import functools +import collections.abc from . import utils from .utils import minisix @@ -169,7 +170,7 @@ def areReceivers(s, strictRfc=True, nicklen=None, chantypes='#&!', return all([nick(x) or chan(x) for x in s.split(',')]) _patternCache = utils.structures.CacheDict(1000) -def compileHostmaskPattern(pattern): +def _compileHostmaskPattern(pattern): try: return _patternCache[pattern] except KeyError: @@ -203,10 +204,55 @@ def hostmaskPatternEqual(pattern, hostmask): try: return _hostmaskPatternEqualCache[(pattern, hostmask)] except KeyError: - matched = compileHostmaskPattern(pattern)(hostmask) is not None + matched = _compileHostmaskPattern(pattern)(hostmask) is not None _hostmaskPatternEqualCache[(pattern, hostmask)] = matched return matched +class HostmaskSet(collections.abc.MutableSet): + """Stores a set of hostmasks and caches their pattern as compiled + by _compileHostmaskPattern. + + This is an alternative to hostmaskPatternEqual for sets of patterns that + do not change often, such as ircdb.IrcUser. + ircdb.IrcUser used to store a real set, of hostmasks as strings, then + call hostmaskPatternEqual on each of these strings. This is good enough + most of the time, as hostmaskPatternEqual has a cache. + + Unfortunately, it is a LRU cache, and hostmasks are checked in order. + This means that as soon as you have most hostmasks than the size of the + cache, EVERY call to hostmaskPatternEqual will be a cache miss, so the + regexp will need to be recompile every time. + This is VERY expensive, because building the regexp is slow, and + re.compile() is even slower.""" + + def __init__(self, hostmasks=()): + self.data = {} # {hostmask_str: _compileHostmaskPattern(hostmask_str)} + for hostmask in hostmasks: + self.add(hostmask) + + def add(self, hostmask): + self.data[hostmask] = _compileHostmaskPattern(hostmask) + + def discard(self, hostmask): + self.data.pop(hostmask, None) + + def __contains__(self, hostmask): + return hostmask in self.data + + def __iter__(self): + return iter(self.data) + + def __len__(self): + return len(self.data) + + def match(self, hostname): + # Potential optimization: join all the patterns into a single one. + for (pattern, compiled_pattern) in self.data.items(): + if compiled_pattern(hostname) is not None: + return pattern + return None + + def banmask(hostmask): """Returns a properly generic banning hostmask for a hostmask. diff --git a/test/test_ircutils.py b/test/test_ircutils.py index 1459902ba..88ff5708d 100644 --- a/test/test_ircutils.py +++ b/test/test_ircutils.py @@ -58,6 +58,21 @@ class FunctionsTestCase(SupyTestCase): 'abr-ubr1.sbo-abr.ma.cable.rcn.com' self.assertTrue(ircutils.hostmaskPatternEqual(s, s)) + def testHostmaskSet(self): + hs = ircutils.HostmaskSet() + self.assertEqual(hs.match("nick!user@host"), None) + hs.add("*!user@host") + hs.add("*!user@host2") + self.assertEqual(hs.match("nick!user@host"), "*!user@host") + self.assertEqual(hs.match("nick!user@host2"), "*!user@host2") + self.assertEqual(list(hs), ["*!user@host", "*!user@host2"]) + hs.remove("*!user@host2") + self.assertEqual(hs.match("nick!user@host"), "*!user@host") + self.assertEqual(hs.match("nick!user@host2"), None) + + hs = ircutils.HostmaskSet(["*!user@host"]) + self.assertEqual(hs.match("nick!user@host"), "*!user@host") + def testIsUserHostmask(self): self.assertTrue(ircutils.isUserHostmask(self.hostmask)) self.assertTrue(ircutils.isUserHostmask('a!b@c'))