URL: Lazily deserialize records from the end in @last

Before this commit, the plugin first fetched a list of all
(deserialized) records in a list, then reversed the list, and iterated
on the reverse list.
This proved to be slow, with most of the time being spent in
`dbi.DB._newRecord` (which essentially deserializes one list of CSV).

After this commit, the list is reversed first, then the plugin iterates
on its generator, which calls `_newRecord` on records as they are
requested.

This means that when there are many URLs in the database, `@last` does
not need to waste time deserializing most records, when the result is
near the end (and if the result is the first record, then it does
exactly as much work as before).
This commit is contained in:
Valentin Lorentz 2022-10-30 20:43:43 +01:00
parent f4ac7f88fe
commit 4da1291876
2 changed files with 32 additions and 12 deletions

View File

@ -1,7 +1,7 @@
###
# Copyright (c) 2002-2004, Jeremiah Fincher
# Copyright (c) 2010, James McCoy
# Copyright (c) 2010-2021, Valentin Lorentz
# Copyright (c) 2010-2022, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@ -29,6 +29,8 @@
# POSSIBILITY OF SUCH DAMAGE.
###
import itertools
import supybot.dbi as dbi
import supybot.conf as conf
import supybot.utils as utils
@ -57,9 +59,7 @@ class DbiUrlDB(plugins.DbiChannelDB):
near=msg.args[1], at=msg.receivedAt)
super(self.__class__, self).add(record)
def urls(self, p):
L = list(self.select(p))
L.reverse()
return L
return self.select(p, reverse=True)
URLDB = plugins.DB('URL', {'flat': DbiUrlDB})
@ -142,16 +142,18 @@ class URL(callbacks.Plugin):
if not predicate(record):
return False
return True
urls = [record.url for record in self.db.urls(channel, predicate)]
if not urls:
urls = (record.url for record in self.db.urls(channel, predicate))
(urls, urls_copy) = itertools.tee(urls)
first_url = next(urls_copy, None)
if first_url is None:
irc.reply(_('No URLs matched that criteria.'))
else:
if nolimit:
urls = [format('%u', url) for url in urls]
urls = (format('%u', url) for url in urls)
s = ', '.join(urls)
else:
# We should optimize this with another URLDB method eventually.
s = urls[0]
s = first_url
irc.reply(s)
last = wrap(last, ['channeldb',
getopts({'from': 'something', 'with': 'something',

View File

@ -1,6 +1,6 @@
###
# Copyright (c) 2002-2005, Jeremiah Fincher
# Copyright (c) 2010-2021, Valentin Lorentz
# Copyright (c) 2010-2022, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@ -358,12 +358,30 @@ class DB(object):
self.map.remove(id)
def __iter__(self):
for (id, s) in self.map:
yield from self._iter()
def _iter(self, *, reverse=False):
if reverse:
if hasattr(self.map, "__reversed__"):
# neither FlatfileMapping nor CdbMapping support __reversed__
# and DirMapping does not support iteration at all, but
# there is no harm in allowing this short-path in case
# plugins use their own mapping.
it = reversed(self.map)
else:
# This does load the whole database in memory instead of
# iterating lazily, but plugins requesting a reverse list
# would probably need do it themselves otherwise, so it does
# not make matters worse to do it here.
it = reversed(list(self.map))
else:
it = self.map
for (id, s) in it:
# We don't need to yield the id because it's in the record.
yield self._newRecord(id, s)
def select(self, p):
for record in self:
def select(self, p, reverse=False):
for record in self._iter(reverse=reverse):
if p(record):
yield record