Merge branch 'rewrite-rss' into testing

Conflicts:
	src/utils/str.py
This commit is contained in:
Valentin Lorentz 2014-07-31 19:24:01 +00:00
commit 4a3d39e747
5 changed files with 310 additions and 358 deletions

View File

@ -50,19 +50,33 @@ class FeedItemSortOrder(registry.OnlySomeStrings):
validStrings = ('asInFeed', 'oldestFirst', 'newestFirst')
RSS = conf.registerPlugin('RSS')
conf.registerChannelValue(RSS, 'bold', registry.Boolean(
True, _("""Determines whether the bot will bold the title of the feed when
it announces news.""")))
conf.registerGlobalValue(RSS, 'feeds',
FeedNames([], _("""Determines what feeds should be accessible as
commands.""")))
########
# Format
conf.registerChannelValue(RSS, 'headlineSeparator',
registry.StringSurroundedBySpaces('|', _("""Determines what string is
used to separate headlines in new feeds.""")))
conf.registerChannelValue(RSS, 'announcementPrefix',
registry.StringWithSpaceOnRight(_('News from '), _("""Determines what
prefix is prepended (if any) to the news item announcements made in the
channel.""")))
conf.registerChannelValue(RSS, 'announcementSeparator',
registry.StringWithSpaceOnRight(_(': '), _("""Determines what
suffix is appended to the feed name in a news item.""")))
conf.registerChannelValue(RSS, 'format',
registry.String(_('$date: $title <$link>'), _("""The format the bot
will use for displaying headlines of a RSS feed that is triggered
manually. In addition to fields defined by feedparser ($published
(the entry date), $title, $link, $description, $id, etc.), the following
variables can be used: $feed_name, $date (parsed date, as defined in
supybot.reply.format.time)""")))
conf.registerChannelValue(RSS, 'announceFormat',
registry.String(_('News from $feed_name: $title <$link>'),
_("""The format the bot will use for displaying headlines of a RSS feed
that is announced. See supybot.plugins.RSS.format for the available
variables.""")))
###########
# Announces
conf.registerChannelValue(RSS, 'announce',
registry.SpaceSeparatedSetOfStrings([], _("""Determines which RSS feeds
should be announced in the channel; valid input is a list of strings
@ -75,25 +89,10 @@ conf.registerGlobalValue(RSS, 'sortFeedItems',
FeedItemSortOrder('asInFeed', _("""Determines whether feed items should be
sorted by their update timestamp or kept in the same order as they appear
in a feed.""")))
conf.registerGlobalValue(RSS, 'stripRedirect', registry.Boolean(
True, """Determines whether the bot will attempt to strip url redirection
from headline links, by taking things after the last http://."""))
conf.registerGlobalValue(RSS, 'feeds',
FeedNames([], _("""Determines what feeds should be accessible as
commands.""")))
conf.registerChannelValue(RSS, 'showLinks',
registry.Boolean(True, _("""Determines whether the bot will list the link
along with the title of the feed when the rss command is called.
supybot.plugins.RSS.announce.showLinks affects whether links will be
listed when a feed is automatically announced.""")))
conf.registerChannelValue(RSS, 'showPubDate',
registry.Boolean(False, """Determines whether the bot will list the
publication datetime stamp along with the title of the feed when the rss
command is called.
supybot.plugins.RSS.announce.showPubDate affects whether this will be
listed when a feed is automatically announced."""))
conf.registerGlobalValue(RSS, 'defaultNumberOfHeadlines',
####################
# Headlines filtering
conf.registerChannelValue(RSS, 'defaultNumberOfHeadlines',
registry.PositiveInteger(1, _("""Indicates how many headlines an rss feed
will output by default, if no number is provided.""")))
conf.registerChannelValue(RSS, 'initialAnnounceHeadlines',
@ -108,19 +107,5 @@ conf.registerChannelValue(RSS, 'keywordBlacklist',
strings, lets you filter headlines to those not containing any items
in this blacklist.""")))
conf.registerGroup(RSS, 'announce')
conf.registerChannelValue(RSS.announce, 'showLinks',
registry.Boolean(True, _("""Determines whether the bot will list the link
along with the title of the feed when a feed is automatically
announced.""")))
conf.registerChannelValue(RSS.announce, 'showPubDate',
registry.Boolean(False, """Determines whether the bot will list the
publication datetime stamp along with the title of the feed when a feed
is automatically announced."""))
conf.registerGlobalValue(RSS.announce, 'cachePeriod',
registry.PositiveInteger(604800, """Maximum age of cached RSS headlines,
in seconds. Headline cache is used to avoid re-announcing old news."""))
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:

View File

@ -1,6 +1,7 @@
###
# Copyright (c) 2002-2004, Jeremiah Fincher
# Copyright (c) 2008-2010, James McCoy
# Copyright (c) 2014, Valentin Lorentz
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@ -30,6 +31,7 @@
import time
import types
import string
import socket
import threading
import re
@ -40,300 +42,55 @@ import supybot.conf as conf
import supybot.utils as utils
import supybot.world as world
from supybot.commands import *
import supybot.ircmsgs as ircmsgs
import supybot.ircutils as ircutils
import supybot.registry as registry
import supybot.callbacks as callbacks
from supybot.i18n import PluginInternationalization, internationalizeDocstring
_ = PluginInternationalization('RSS')
def getFeedName(irc, msg, args, state):
def get_feedName(irc, msg, args, state):
if not registry.isValidRegistryName(args[0]):
state.errorInvalid('feed name', args[0],
'Feed names must not include spaces.')
state.args.append(callbacks.canonicalName(args.pop(0)))
addConverter('feedName', getFeedName)
addConverter('feedName', get_feedName)
class RSS(callbacks.Plugin):
"""This plugin is useful both for announcing updates to RSS feeds in a
channel, and for retrieving the headlines of RSS feeds via command. Use
the "add" command to add feeds to this plugin, and use the "announce"
command to determine what feeds should be announced in a given channel."""
threaded = True
def __init__(self, irc):
self.__parent = super(RSS, self)
self.__parent.__init__(irc)
# Schema is feed : [url, command]
self.feedNames = callbacks.CanonicalNameDict()
self.locks = {}
self.lastRequest = {}
self.cachedFeeds = {}
self.cachedHeadlines = {}
self.gettingLockLock = threading.Lock()
for name in self.registryValue('feeds'):
self._registerFeed(name)
try:
url = self.registryValue(registry.join(['feeds', name]))
except registry.NonExistentRegistryEntry:
self.log.warning('%s is not a registered feed, removing.',name)
continue
self.makeFeedCommand(name, url)
self.getFeed(url) # So announced feeds don't announce on startup.
def isCommandMethod(self, name):
if not self.__parent.isCommandMethod(name):
if name in self.feedNames:
return True
else:
return False
else:
return True
def listCommands(self):
return self.__parent.listCommands(self.feedNames.keys())
def getCommandMethod(self, command):
try:
return self.__parent.getCommandMethod(command)
except AttributeError:
return self.feedNames[command[0]][1]
def _registerFeed(self, name, url=''):
self.registryValue('feeds').add(name)
group = self.registryValue('feeds', value=False)
conf.registerGlobalValue(group, name, registry.String(url, ''))
def __call__(self, irc, msg):
self.__parent.__call__(irc, msg)
irc = callbacks.SimpleProxy(irc, msg)
newFeeds = {}
for channel in irc.state.channels:
feeds = self.registryValue('announce', channel)
for name in feeds:
commandName = callbacks.canonicalName(name)
if self.isCommandMethod(commandName):
url = self.feedNames[commandName][0]
else:
class Feed:
__slots__ = ('url', 'name', 'data', 'last_update', 'entries',
'lock', 'announced_entries')
def __init__(self, name, url, plugin_is_loading=False):
assert name, name
if not url:
assert utils.web.httpUrlRe.match(name), name
url = name
if self.willGetNewFeed(url):
newFeeds.setdefault((url, name), []).append(channel)
for ((url, name), channels) in newFeeds.iteritems():
# We check if we can acquire the lock right here because if we
# don't, we'll possibly end up spawning a lot of threads to get
# the feed, because this thread may run for a number of bytecodes
# before it switches to a thread that'll get the lock in
# _newHeadlines.
if self.acquireLock(url, blocking=False):
try:
t = threading.Thread(target=self._newHeadlines,
name=format('Fetching %u', url),
args=(irc, channels, name, url))
self.log.info('Checking for announcements at %u', url)
world.threadsSpawned += 1
t.setDaemon(True)
t.start()
finally:
self.releaseLock(url)
time.sleep(0.1) # So other threads can run.
self.name = name
self.url = url
self.data = None
# We don't want to fetch feeds right after the plugin is
# loaded (the bot could be starting, and thus already busy)
self.last_update = time.time() if plugin_is_loading else 0
self.entries = []
self.lock = threading.Lock()
self.announced_entries = utils.structures.TruncatableSet()
def buildHeadlines(self, headlines, channel, linksconfig='announce.showLinks', dateconfig='announce.showPubDate'):
newheadlines = []
for headline in headlines:
link = ''
pubDate = ''
if self.registryValue(linksconfig, channel):
if headline[1]:
if self.registryValue('stripRedirect'):
link = re.sub('^.*http://', 'http://', headline[1])
else:
link = headline[1]
if self.registryValue(dateconfig, channel):
if headline[2]:
pubDate = ' [%s]' % (headline[2],)
if sys.version_info[0] < 3:
if isinstance(headline[0], unicode):
try:
import charade.universaldetector
u = charade.universaldetector.UniversalDetector()
u.feed(headline[0])
u.close()
encoding = u.result['encoding']
except ImportError:
encoding = 'utf8'
newheadlines.append(format('%s %u%s',
headline[0].encode(encoding,'replace'),
link,
pubDate))
else:
newheadlines.append(format('%s %u%s',
headline[0],
link,
pubDate))
else:
newheadlines.append(format('%s %u%s',
headline[0],
link,
pubDate))
return newheadlines
def get_command(self, plugin):
docstring = format(_("""[<number of headlines>]
def _newHeadlines(self, irc, channels, name, url):
try:
# We acquire the lock here so there's only one announcement thread
# in this code at any given time. Otherwise, several announcement
# threads will getFeed (all blocking, in turn); then they'll all
# want to send their news messages to the appropriate channels.
# Note that we're allowed to acquire this lock twice within the
# same thread because it's an RLock and not just a normal Lock.
self.acquireLock(url)
t = time.time()
try:
#oldresults = self.cachedFeeds[url]
#oldheadlines = self.getHeadlines(oldresults)
oldheadlines = self.cachedHeadlines[url]
oldheadlines = list(filter(lambda x: t - x[3] <
self.registryValue('announce.cachePeriod'), oldheadlines))
except KeyError:
oldheadlines = []
newresults = self.getFeed(url)
newheadlines = self.getHeadlines(newresults)
if len(newheadlines) == 1:
s = newheadlines[0][0]
if s in ('Timeout downloading feed.',
'Unable to download feed.'):
self.log.debug('%s %u', s, url)
return
def normalize(headline):
return (tuple(headline[0].lower().split()), headline[1])
oldheadlinesset = set(map(normalize, oldheadlines))
for (i, headline) in enumerate(newheadlines):
if normalize(headline) in oldheadlinesset:
newheadlines[i] = None
newheadlines = list(filter(None, newheadlines)) # Removes Nones.
number_of_headlines = len(oldheadlines)
oldheadlines.extend(newheadlines)
self.cachedHeadlines[url] = oldheadlines
if newheadlines:
def filter_whitelist(headline):
v = False
for kw in whitelist:
if kw in headline[0] or kw in headline[1]:
v = True
break
return v
def filter_blacklist(headline):
v = True
for kw in blacklist:
if kw in headline[0] or kw in headline[1]:
v = False
break
return v
for channel in channels:
if number_of_headlines == 0:
channelnewheadlines = newheadlines[:self.registryValue('initialAnnounceHeadlines', channel)]
else:
channelnewheadlines = newheadlines[:]
whitelist = self.registryValue('keywordWhitelist', channel)
blacklist = self.registryValue('keywordBlacklist', channel)
if len(whitelist) != 0:
channelnewheadlines = filter(filter_whitelist, channelnewheadlines)
if len(blacklist) != 0:
channelnewheadlines = filter(filter_blacklist, channelnewheadlines)
channelnewheadlines = list(channelnewheadlines)
if len(channelnewheadlines) == 0:
return
bold = self.registryValue('bold', channel)
sep = self.registryValue('headlineSeparator', channel)
prefix = self.registryValue('announcementPrefix', channel)
suffix = self.registryValue('announcementSeparator', channel)
pre = format('%s%s%s', prefix, name, suffix)
if bold:
pre = ircutils.bold(pre)
sep = ircutils.bold(sep)
headlines = self.buildHeadlines(channelnewheadlines, channel)
irc.replies(headlines, prefixer=pre, joiner=sep,
to=channel, prefixNick=False, private=True)
finally:
self.releaseLock(url)
Reports the titles for %s at the RSS feed %u. If
<number of headlines> is given, returns only that many headlines.
RSS feeds are only looked up every supybot.plugins.RSS.waitPeriod
seconds, which defaults to 1800 (30 minutes) since that's what most
websites prefer."""), self.name, self.url)
def f(self2, irc, msg, args):
args.insert(0, self.url)
self2.rss(irc, msg, args)
f = utils.python.changeFunctionName(f, self.name, docstring)
f = types.MethodType(f, plugin)
return f
def willGetNewFeed(self, url):
now = time.time()
wait = self.registryValue('waitPeriod')
if url not in self.lastRequest or now - self.lastRequest[url] > wait:
return True
else:
return False
def acquireLock(self, url, blocking=True):
try:
self.gettingLockLock.acquire()
try:
lock = self.locks[url]
except KeyError:
lock = threading.RLock()
self.locks[url] = lock
return lock.acquire(blocking=blocking)
finally:
self.gettingLockLock.release()
def releaseLock(self, url):
self.locks[url].release()
def getFeed(self, url):
def error(s):
return {'items': [{'title': s}]}
try:
# This is the most obvious place to acquire the lock, because a
# malicious user could conceivably flood the bot with rss commands
# and DoS the website in question.
self.acquireLock(url)
if self.willGetNewFeed(url):
results = {}
try:
self.log.debug('Downloading new feed from %u', url)
results = feedparser.parse(url)
if 'bozo_exception' in results and not results['entries']:
raise results['bozo_exception']
except feedparser.sgmllib.SGMLParseError:
self.log.exception('Uncaught exception from feedparser:')
raise callbacks.Error('Invalid (unparsable) RSS feed.')
except socket.timeout:
return error('Timeout downloading feed.')
except Exception as e:
# These seem mostly harmless. We'll need reports of a
# kind that isn't.
self.log.debug('Allowing bozo_exception %r through.', e)
if results.get('feed', {}) and self.getHeadlines(results):
self.cachedFeeds[url] = results
self.lastRequest[url] = time.time()
else:
self.log.debug('Not caching results; feed is empty.')
try:
return self.cachedFeeds[url]
except KeyError:
wait = self.registryValue('waitPeriod')
# If there's a problem retrieving the feed, we should back off
# for a little bit before retrying so that there is time for
# the error to be resolved.
self.lastRequest[url] = time.time() - .5 * wait
return error('Unable to download feed.')
finally:
self.releaseLock(url)
def _getConverter(self, feed):
toText = utils.web.htmlToText
if 'encoding' in feed:
def conv(s):
# encode() first so there implicit encoding doesn't happen in
# other functions when unicode and bytestring objects are used
# together
s = s.encode(feed['encoding'], 'replace')
s = toText(s).strip()
return s
return conv
else:
return lambda s: toText(s).strip()
def _sortFeedItems(self, items):
def sort_feed_items(items, order):
"""Return feed items, sorted according to sortFeedItems."""
order = self.registryValue('sortFeedItems')
if order not in ['oldestFirst', 'newestFirst']:
return items
if order == 'oldestFirst':
@ -348,40 +105,179 @@ class RSS(callbacks.Plugin):
return items
return sitems
def getHeadlines(self, feed):
headlines = []
t = time.time()
conv = self._getConverter(feed)
for d in self._sortFeedItems(feed['items']):
if 'title' in d:
title = conv(d['title'])
link = d.get('link')
pubDate = d.get('pubDate', d.get('updated'))
headlines.append((title, link, pubDate, t))
return headlines
class RSS(callbacks.Plugin):
"""This plugin is useful both for announcing updates to RSS feeds in a
channel, and for retrieving the headlines of RSS feeds via command. Use
the "add" command to add feeds to this plugin, and use the "announce"
command to determine what feeds should be announced in a given channel."""
threaded = True
def __init__(self, irc):
self.__parent = super(RSS, self)
self.__parent.__init__(irc)
# Scheme: {name: url}
self.feed_names = callbacks.CanonicalNameDict()
# Scheme: {url: feed}
self.feeds = {}
for name in self.registryValue('feeds'):
self.assert_feed_does_not_exist(name)
self.register_feed_config(name)
try:
url = self.registryValue(registry.join(['feeds', name]))
except registry.NonExistentRegistryEntry:
self.log.warning('%s is not a registered feed, removing.',name)
continue
self.register_feed(name, url, True)
@internationalizeDocstring
def makeFeedCommand(self, name, url):
docstring = format("""[<number of headlines>]
##################
# Feed registering
Reports the titles for %s at the RSS feed %u. If
<number of headlines> is given, returns only that many headlines.
RSS feeds are only looked up every supybot.plugins.RSS.waitPeriod
seconds, which defaults to 1800 (30 minutes) since that's what most
websites prefer.
""", name, url)
if url not in self.locks:
self.locks[url] = threading.RLock()
def assert_feed_does_not_exist(self, name):
if self.isCommandMethod(name):
s = format('I already have a command in this plugin named %s.',name)
raise callbacks.Error(s)
def f(self, irc, msg, args):
args.insert(0, url)
self.rss(irc, msg, args)
f = utils.python.changeFunctionName(f, name, docstring)
f = types.MethodType(f, self)
self.feedNames[name] = (url, f)
self._registerFeed(name, url)
def register_feed_config(self, name, url=''):
self.registryValue('feeds').add(name)
group = self.registryValue('feeds', value=False)
conf.registerGlobalValue(group, name, registry.String(url, ''))
def register_feed(self, name, url, plugin_is_loading):
self.feed_names[name] = url
self.feeds[url] = Feed(name, url, plugin_is_loading)
def remove_feed(self, feed):
del self.feed_names[feed.name]
del self.feeds[feed.url]
conf.supybot.plugins.RSS.feeds().remove(feed.name)
conf.supybot.plugins.RSS.feeds.unregister(feed.name)
##################
# Methods handling
def isCommandMethod(self, name):
if not self.__parent.isCommandMethod(name):
return bool(self.get_feed(name))
else:
return True
def listCommands(self):
return self.__parent.listCommands(self.feeds.keys())
def getCommandMethod(self, command):
try:
return self.__parent.getCommandMethod(command)
except AttributeError:
return self.get_feed(command[0]).get_command(self)
def __call__(self, irc, msg):
self.__parent.__call__(irc, msg)
self.update_feeds()
##################
# Status accessors
def get_feed(self, name):
return self.feeds.get(self.feed_names.get(name, name), None)
def is_expired(self, feed):
assert feed
event_horizon = time.time() - self.registryValue('waitPeriod')
return feed.last_update < event_horizon
###############
# Feed fetching
def update_feed(self, feed):
with feed.lock:
d = feedparser.parse(feed.url)
feed.data = d.feed
feed.entries = d.entries
feed.last_update = time.time()
self.announce_feed(feed)
def update_feed_in_thread(self, feed):
feed.last_update = time.time()
t = world.SupyThread(target=self.update_feed,
name=format('Fetching feed %u', feed.url),
args=(feed,))
t.setDaemon(True)
t.start()
def update_feed_if_needed(self, feed):
if self.is_expired(feed):
self.update_feed(feed)
def update_feeds(self):
announced_feeds = set()
for irc in world.ircs:
for channel in irc.state.channels:
announced_feeds |= self.registryValue('announce', channel)
for name in announced_feeds:
self.update_feed_if_needed(self.get_feed(name))
def get_new_entries(self, feed):
with feed.lock:
entries = feed.entries
new_entries = [entry for entry in entries
if entry.id not in feed.announced_entries]
if not new_entries:
return []
feed.announced_entries |= {entry.id for entry in new_entries}
# We keep a little more because we don't want to re-announce
# oldest entries if one of the newest gets removed.
feed.announced_entries.truncate(2*len(entries))
return new_entries
def announce_feed(self, feed):
new_entries = self.get_new_entries(feed)
order = self.registryValue('sortFeedItems')
new_entries = sort_feed_items(new_entries, order)
for irc in world.ircs:
for channel in irc.state.channels:
if feed.name not in self.registryValue('announce', channel):
continue
for entry in new_entries:
self.announce_entry(irc, channel, feed, entry)
#################
# Entry rendering
def should_send_entry(self, channel, entry):
whitelist = self.registryValue('keywordWhitelist', channel)
blacklist = self.registryValue('keywordBlacklist', channel)
if whitelist:
if all(kw not in entry.title and kw not in entry.description
for kw in whitelist):
return False
if blacklist:
if any(kw in entry.title or kw in entry.description
for kw in blacklist):
return False
return True
def format_entry(self, channel, feed, entry, is_announce):
if is_announce:
template = self.registryValue('announceFormat', channel)
else:
template = self.registryValue('format', channel)
date = entry.get('published_parsed', entry.get('updated_parsed'))
date = utils.str.timestamp(date)
return string.Template(template).safe_substitute(template,
feed_name=feed.name,
date=date,
**entry)
def announce_entry(self, irc, channel, feed, entry):
if self.should_send_entry(channel, entry):
s = self.format_entry(channel, feed, entry, True)
irc.queueMsg(ircmsgs.privmsg(channel, s))
##########
# Commands
@internationalizeDocstring
def add(self, irc, msg, args, name, url):
@ -390,7 +286,9 @@ class RSS(callbacks.Plugin):
Adds a command to this plugin that will look up the RSS feed at the
given URL.
"""
self.makeFeedCommand(name, url)
self.assert_feed_does_not_exist(name)
self.register_feed_config(name, url)
self.register_feed(name, url, False)
irc.replySuccess()
add = wrap(add, ['feedName', 'url'])
@ -401,12 +299,11 @@ class RSS(callbacks.Plugin):
Removes the command for looking up RSS feeds at <name> from
this plugin.
"""
if name not in self.feedNames:
feed = self.get_feed(name)
if not feed:
irc.error(_('That\'s not a valid RSS feed command name.'))
return
del self.feedNames[name]
conf.supybot.plugins.RSS.feeds().remove(name)
conf.supybot.plugins.RSS.feeds.unregister(name)
self.remove_feed(feed)
irc.replySuccess()
remove = wrap(remove, ['feedName'])
@ -434,10 +331,14 @@ class RSS(callbacks.Plugin):
"""
announce = conf.supybot.plugins.RSS.announce
S = announce.get(channel)()
for feed in feeds:
S.add(feed)
plugin = irc.getCallback('RSS')
for name in feeds:
S.add(name)
announce.get(channel).setValue(S)
irc.replySuccess()
for name in feeds:
feed = plugin.get_feed(name)
plugin.announce_feed(feed)
add = wrap(add, [('checkChannelCapability', 'op'),
many(first('url', 'feedName'))])
@ -467,23 +368,25 @@ class RSS(callbacks.Plugin):
If <number of headlines> is given, return only that many headlines.
"""
self.log.debug('Fetching %u', url)
feed = self.getFeed(url)
feed = self.get_feed(url)
if not feed:
feed = Feed(url, url)
if irc.isChannel(msg.args[0]):
channel = msg.args[0]
else:
channel = None
headlines = self.getHeadlines(feed)
if not headlines:
self.update_feed_if_needed(feed)
entries = feed.entries
if not entries:
irc.error(_('Couldn\'t get RSS feed.'))
return
headlines = self.buildHeadlines(headlines, channel, 'showLinks', 'showPubDate')
if n:
headlines = headlines[:n]
else:
headlines = headlines[:self.registryValue('defaultNumberOfHeadlines')]
n = n or self.registryValue('defaultNumberOfHeadlines', channel)
entries = list(filter(lambda e:self.should_send_entry(channel, e),
feed.entries))
entries = entries[:n]
headlines = map(lambda e:self.format_entry(channel, feed, e, False),
entries)
sep = self.registryValue('headlineSeparator', channel)
if self.registryValue('bold', channel):
sep = ircutils.bold(sep)
irc.replies(headlines, joiner=sep)
rss = wrap(rss, ['url', additional('int')])
@ -498,9 +401,11 @@ class RSS(callbacks.Plugin):
url = self.registryValue('feeds.%s' % url)
except registry.NonExistentRegistryEntry:
pass
feed = self.getFeed(url)
conv = self._getConverter(feed)
info = feed.get('feed')
feed = self.get_feed(url)
if not feed:
feed = Feed(url, url)
self.update_feed_if_needed(feed)
info = feed.data
if not info:
irc.error(_('I couldn\'t retrieve that RSS feed.'))
return
@ -510,10 +415,10 @@ class RSS(callbacks.Plugin):
now = time.mktime(time.gmtime())
when = utils.timeElapsed(now - seconds) + ' ago'
else:
when = 'time unavailable'
title = conv(info.get('title', 'unavailable'))
desc = conv(info.get('description', 'unavailable'))
link = conv(info.get('link', 'unavailable'))
when = _('time unavailable')
title = info.get('title', _('unavailable'))
desc = info.get('description', _('unavailable'))
link = info.get('link', _('unavailable'))
# The rest of the entries are all available in the channel key
response = format(_('Title: %s; URL: %u; '
'Description: %s; Last updated: %s.'),

View File

@ -374,6 +374,7 @@ registerChannelValue(supybot.reply.format, 'time',
def timestamp(t):
if t is None:
t = time.time()
elif isinstance(t, float):
t = time.localtime(t)
format = get(supybot.reply.format.time, dynamic.channel)
return time.strftime(format, t)

View File

@ -454,5 +454,40 @@ class CacheDict(collections.MutableMapping):
def __len__(self):
return len(self.d)
class TruncatableSet(collections.MutableSet):
"""A set that keeps track of the order of inserted elements so
the oldest can be removed."""
def __init__(self, iterable=[]):
self._ordered_items = list(iterable)
self._items = set(self._ordered_items)
def __contains__(self, item):
return item in self._items
def __iter__(self):
return iter(self._items)
def __len__(self):
return len(self._items)
def add(self, item):
if item not in self._items:
self._items.add(item)
self._ordered_items.append(item)
def discard(self, item):
self._items.discard(item)
self._ordered_items.remove(item)
def truncate(self, size):
assert size >= 0
removed_size = len(self)-size
# I make two different cases depending on removed_size<size
# in order to make if faster if one is significantly bigger than the
# other.
if removed_size <= 0:
return
elif removed_size < size:
# If there are more kept items than removed items
for old_item in self._ordered_items[0:-size]:
self.discard(old_item)
self._ordered_items = self._ordered_items[-size:]
else:
self._ordered_items = self._ordered_items[-size:]
self._items = set(self._ordered_items)
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:

View File

@ -1133,6 +1133,32 @@ class TestCacheDict(SupyTestCase):
self.failUnless(i in d)
self.failUnless(d[i] == i)
class TestTruncatableSet(SupyTestCase):
def testBasics(self):
s = TruncatableSet(['foo', 'bar', 'baz', 'qux'])
self.assertEqual(s, {'foo', 'bar', 'baz', 'qux'})
self.failUnless('foo' in s)
self.failUnless('bar' in s)
self.failIf('quux' in s)
s.discard('baz')
self.failUnless('foo' in s)
self.failIf('baz' in s)
s.add('quux')
self.failUnless('quux' in s)
def testTruncate(self):
s = TruncatableSet(['foo', 'bar'])
s.add('baz')
s.add('qux')
s.truncate(3)
self.assertEqual(s, {'bar', 'baz', 'qux'})
def testTruncateUnion(self):
s = TruncatableSet(['bar', 'foo'])
s |= {'baz', 'qux'}
s.truncate(3)
self.assertEqual(s, {'foo', 'baz', 'qux'})
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: