2005-02-02 07:53:15 +01:00
|
|
|
###
|
|
|
|
# Copyright (c) 2002-2004, Jeremiah Fincher
|
2012-09-01 16:16:48 +02:00
|
|
|
# Copyright (c) 2010, James McCoy
|
URL: Lazily deserialize records from the end in @last
Before this commit, the plugin first fetched a list of all
(deserialized) records in a list, then reversed the list, and iterated
on the reverse list.
This proved to be slow, with most of the time being spent in
`dbi.DB._newRecord` (which essentially deserializes one list of CSV).
After this commit, the list is reversed first, then the plugin iterates
on its generator, which calls `_newRecord` on records as they are
requested.
This means that when there are many URLs in the database, `@last` does
not need to waste time deserializing most records, when the result is
near the end (and if the result is the first record, then it does
exactly as much work as before).
2022-10-30 20:43:43 +01:00
|
|
|
# Copyright (c) 2010-2022, Valentin Lorentz
|
2005-02-02 07:53:15 +01:00
|
|
|
# All rights reserved.
|
|
|
|
#
|
|
|
|
# Redistribution and use in source and binary forms, with or without
|
|
|
|
# modification, are permitted provided that the following conditions are met:
|
|
|
|
#
|
|
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
|
|
# this list of conditions, and the following disclaimer.
|
|
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
# this list of conditions, and the following disclaimer in the
|
|
|
|
# documentation and/or other materials provided with the distribution.
|
|
|
|
# * Neither the name of the author of this software nor the name of
|
|
|
|
# contributors to this software may be used to endorse or promote products
|
|
|
|
# derived from this software without specific prior written consent.
|
|
|
|
#
|
|
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
###
|
|
|
|
|
URL: Lazily deserialize records from the end in @last
Before this commit, the plugin first fetched a list of all
(deserialized) records in a list, then reversed the list, and iterated
on the reverse list.
This proved to be slow, with most of the time being spent in
`dbi.DB._newRecord` (which essentially deserializes one list of CSV).
After this commit, the list is reversed first, then the plugin iterates
on its generator, which calls `_newRecord` on records as they are
requested.
This means that when there are many URLs in the database, `@last` does
not need to waste time deserializing most records, when the result is
near the end (and if the result is the first record, then it does
exactly as much work as before).
2022-10-30 20:43:43 +01:00
|
|
|
import itertools
|
|
|
|
|
2005-02-02 07:53:15 +01:00
|
|
|
import supybot.dbi as dbi
|
|
|
|
import supybot.conf as conf
|
|
|
|
import supybot.utils as utils
|
|
|
|
from supybot.commands import *
|
2015-08-11 16:50:23 +02:00
|
|
|
import supybot.utils.minisix as minisix
|
2005-02-02 07:53:15 +01:00
|
|
|
import supybot.plugins as plugins
|
|
|
|
import supybot.ircmsgs as ircmsgs
|
|
|
|
import supybot.ircutils as ircutils
|
|
|
|
import supybot.callbacks as callbacks
|
2010-10-20 09:39:44 +02:00
|
|
|
from supybot.i18n import PluginInternationalization, internationalizeDocstring
|
|
|
|
_ = PluginInternationalization('URL')
|
2005-02-02 07:53:15 +01:00
|
|
|
|
|
|
|
class UrlRecord(dbi.Record):
|
|
|
|
__fields__ = [
|
|
|
|
('url', eval),
|
|
|
|
('by', eval),
|
|
|
|
('near', eval),
|
|
|
|
('at', eval),
|
|
|
|
]
|
|
|
|
|
|
|
|
class DbiUrlDB(plugins.DbiChannelDB):
|
|
|
|
class DB(dbi.DB):
|
|
|
|
Record = UrlRecord
|
|
|
|
def add(self, url, msg):
|
|
|
|
record = self.Record(url=url, by=msg.nick,
|
|
|
|
near=msg.args[1], at=msg.receivedAt)
|
|
|
|
super(self.__class__, self).add(record)
|
|
|
|
def urls(self, p):
|
URL: Lazily deserialize records from the end in @last
Before this commit, the plugin first fetched a list of all
(deserialized) records in a list, then reversed the list, and iterated
on the reverse list.
This proved to be slow, with most of the time being spent in
`dbi.DB._newRecord` (which essentially deserializes one list of CSV).
After this commit, the list is reversed first, then the plugin iterates
on its generator, which calls `_newRecord` on records as they are
requested.
This means that when there are many URLs in the database, `@last` does
not need to waste time deserializing most records, when the result is
near the end (and if the result is the first record, then it does
exactly as much work as before).
2022-10-30 20:43:43 +01:00
|
|
|
return self.select(p, reverse=True)
|
2005-02-02 07:53:15 +01:00
|
|
|
|
|
|
|
URLDB = plugins.DB('URL', {'flat': DbiUrlDB})
|
|
|
|
|
2005-02-09 08:04:04 +01:00
|
|
|
class URL(callbacks.Plugin):
|
2014-11-30 21:07:41 +01:00
|
|
|
"""This plugin records how many URLs have been mentioned in
|
|
|
|
a channel and what the last URL was."""
|
2005-02-02 07:53:15 +01:00
|
|
|
def __init__(self, irc):
|
|
|
|
self.__parent = super(URL, self)
|
|
|
|
self.__parent.__init__(irc)
|
|
|
|
self.db = URLDB()
|
|
|
|
|
|
|
|
def doPrivmsg(self, irc, msg):
|
2010-01-28 14:14:44 +01:00
|
|
|
if ircmsgs.isCtcp(msg) and not ircmsgs.isAction(msg):
|
|
|
|
return
|
2019-08-24 17:50:05 +02:00
|
|
|
if msg.channel:
|
2005-02-02 07:53:15 +01:00
|
|
|
if ircmsgs.isAction(msg):
|
|
|
|
text = ircmsgs.unAction(msg)
|
|
|
|
else:
|
|
|
|
text = msg.args[1]
|
|
|
|
for url in utils.web.urlRe.findall(text):
|
2019-08-24 17:50:05 +02:00
|
|
|
r = self.registryValue('nonSnarfingRegexp',
|
|
|
|
msg.channel, irc.network)
|
2005-02-02 07:53:15 +01:00
|
|
|
if r and r.search(url):
|
|
|
|
self.log.debug('Skipping adding %u to db.', url)
|
|
|
|
continue
|
|
|
|
self.log.debug('Adding %u to db.', url)
|
2019-08-24 17:50:05 +02:00
|
|
|
self.db.add(msg.channel, url, msg)
|
2005-02-02 07:53:15 +01:00
|
|
|
|
2010-10-20 09:39:44 +02:00
|
|
|
@internationalizeDocstring
|
2005-02-02 07:53:15 +01:00
|
|
|
def stats(self, irc, msg, args, channel):
|
|
|
|
"""[<channel>]
|
|
|
|
|
|
|
|
Returns the number of URLs in the URL database. <channel> is only
|
|
|
|
required if the message isn't sent in the channel itself.
|
|
|
|
"""
|
|
|
|
self.db.vacuum(channel)
|
|
|
|
count = self.db.size(channel)
|
2010-10-20 09:39:44 +02:00
|
|
|
irc.reply(format(_('I have %n in my database.'), (count, 'URL')))
|
2005-02-02 07:53:15 +01:00
|
|
|
stats = wrap(stats, ['channeldb'])
|
|
|
|
|
2010-10-20 09:39:44 +02:00
|
|
|
@internationalizeDocstring
|
2005-02-02 07:53:15 +01:00
|
|
|
def last(self, irc, msg, args, channel, optlist):
|
|
|
|
"""[<channel>] [--{from,with,without,near,proto} <value>] [--nolimit]
|
|
|
|
|
|
|
|
Gives the last URL matching the given criteria. --from is from whom
|
|
|
|
the URL came; --proto is the protocol the URL used; --with is something
|
|
|
|
inside the URL; --without is something that should not be in the URL;
|
2011-08-10 11:26:47 +02:00
|
|
|
--near is something in the same message as the URL. If --nolimit is
|
|
|
|
given, returns all the URLs that are found to just the URL.
|
2005-02-02 07:53:15 +01:00
|
|
|
<channel> is only necessary if the message isn't sent in the channel
|
|
|
|
itself.
|
|
|
|
"""
|
|
|
|
predicates = []
|
|
|
|
f = None
|
|
|
|
nolimit = False
|
|
|
|
for (option, arg) in optlist:
|
2015-08-10 20:24:11 +02:00
|
|
|
if isinstance(arg, minisix.string_types):
|
2005-12-17 02:35:31 +01:00
|
|
|
arg = arg.lower()
|
2005-02-02 07:53:15 +01:00
|
|
|
if option == 'nolimit':
|
|
|
|
nolimit = True
|
|
|
|
elif option == 'from':
|
|
|
|
def f(record, arg=arg):
|
|
|
|
return ircutils.strEqual(record.by, arg)
|
|
|
|
elif option == 'with':
|
|
|
|
def f(record, arg=arg):
|
2005-10-21 16:51:23 +02:00
|
|
|
return arg in record.url.lower()
|
2005-02-02 07:53:15 +01:00
|
|
|
elif option == 'without':
|
|
|
|
def f(record, arg=arg):
|
2005-10-21 16:51:23 +02:00
|
|
|
return arg not in record.url.lower()
|
2005-02-02 07:53:15 +01:00
|
|
|
elif option == 'proto':
|
|
|
|
def f(record, arg=arg):
|
2005-10-21 16:51:23 +02:00
|
|
|
return record.url.lower().startswith(arg)
|
2005-02-02 07:53:15 +01:00
|
|
|
elif option == 'near':
|
|
|
|
def f(record, arg=arg):
|
2005-10-21 16:51:23 +02:00
|
|
|
return arg in record.near.lower()
|
2005-02-02 07:53:15 +01:00
|
|
|
if f is not None:
|
|
|
|
predicates.append(f)
|
|
|
|
def predicate(record):
|
|
|
|
for predicate in predicates:
|
|
|
|
if not predicate(record):
|
|
|
|
return False
|
|
|
|
return True
|
URL: Lazily deserialize records from the end in @last
Before this commit, the plugin first fetched a list of all
(deserialized) records in a list, then reversed the list, and iterated
on the reverse list.
This proved to be slow, with most of the time being spent in
`dbi.DB._newRecord` (which essentially deserializes one list of CSV).
After this commit, the list is reversed first, then the plugin iterates
on its generator, which calls `_newRecord` on records as they are
requested.
This means that when there are many URLs in the database, `@last` does
not need to waste time deserializing most records, when the result is
near the end (and if the result is the first record, then it does
exactly as much work as before).
2022-10-30 20:43:43 +01:00
|
|
|
urls = (record.url for record in self.db.urls(channel, predicate))
|
|
|
|
(urls, urls_copy) = itertools.tee(urls)
|
|
|
|
first_url = next(urls_copy, None)
|
|
|
|
if first_url is None:
|
2010-10-20 09:39:44 +02:00
|
|
|
irc.reply(_('No URLs matched that criteria.'))
|
2005-02-02 07:53:15 +01:00
|
|
|
else:
|
|
|
|
if nolimit:
|
URL: Lazily deserialize records from the end in @last
Before this commit, the plugin first fetched a list of all
(deserialized) records in a list, then reversed the list, and iterated
on the reverse list.
This proved to be slow, with most of the time being spent in
`dbi.DB._newRecord` (which essentially deserializes one list of CSV).
After this commit, the list is reversed first, then the plugin iterates
on its generator, which calls `_newRecord` on records as they are
requested.
This means that when there are many URLs in the database, `@last` does
not need to waste time deserializing most records, when the result is
near the end (and if the result is the first record, then it does
exactly as much work as before).
2022-10-30 20:43:43 +01:00
|
|
|
urls = (format('%u', url) for url in urls)
|
2005-02-02 07:53:15 +01:00
|
|
|
s = ', '.join(urls)
|
|
|
|
else:
|
|
|
|
# We should optimize this with another URLDB method eventually.
|
URL: Lazily deserialize records from the end in @last
Before this commit, the plugin first fetched a list of all
(deserialized) records in a list, then reversed the list, and iterated
on the reverse list.
This proved to be slow, with most of the time being spent in
`dbi.DB._newRecord` (which essentially deserializes one list of CSV).
After this commit, the list is reversed first, then the plugin iterates
on its generator, which calls `_newRecord` on records as they are
requested.
This means that when there are many URLs in the database, `@last` does
not need to waste time deserializing most records, when the result is
near the end (and if the result is the first record, then it does
exactly as much work as before).
2022-10-30 20:43:43 +01:00
|
|
|
s = first_url
|
2005-02-02 07:53:15 +01:00
|
|
|
irc.reply(s)
|
|
|
|
last = wrap(last, ['channeldb',
|
|
|
|
getopts({'from': 'something', 'with': 'something',
|
|
|
|
'near': 'something', 'proto': 'something',
|
|
|
|
'nolimit': '', 'without': 'something',})])
|
|
|
|
|
|
|
|
Class = URL
|
|
|
|
|
2006-02-11 16:52:51 +01:00
|
|
|
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
|