Merge branch 'rewrite-rss' into testing

Conflicts:
	src/utils/str.py
This commit is contained in:
Valentin Lorentz 2014-07-31 19:24:01 +00:00
commit 4a3d39e747
5 changed files with 310 additions and 358 deletions

View File

@ -50,19 +50,33 @@ class FeedItemSortOrder(registry.OnlySomeStrings):
validStrings = ('asInFeed', 'oldestFirst', 'newestFirst') validStrings = ('asInFeed', 'oldestFirst', 'newestFirst')
RSS = conf.registerPlugin('RSS') RSS = conf.registerPlugin('RSS')
conf.registerChannelValue(RSS, 'bold', registry.Boolean(
True, _("""Determines whether the bot will bold the title of the feed when conf.registerGlobalValue(RSS, 'feeds',
it announces news."""))) FeedNames([], _("""Determines what feeds should be accessible as
commands.""")))
########
# Format
conf.registerChannelValue(RSS, 'headlineSeparator', conf.registerChannelValue(RSS, 'headlineSeparator',
registry.StringSurroundedBySpaces('|', _("""Determines what string is registry.StringSurroundedBySpaces('|', _("""Determines what string is
used to separate headlines in new feeds."""))) used to separate headlines in new feeds.""")))
conf.registerChannelValue(RSS, 'announcementPrefix', conf.registerChannelValue(RSS, 'format',
registry.StringWithSpaceOnRight(_('News from '), _("""Determines what registry.String(_('$date: $title <$link>'), _("""The format the bot
prefix is prepended (if any) to the news item announcements made in the will use for displaying headlines of a RSS feed that is triggered
channel."""))) manually. In addition to fields defined by feedparser ($published
conf.registerChannelValue(RSS, 'announcementSeparator', (the entry date), $title, $link, $description, $id, etc.), the following
registry.StringWithSpaceOnRight(_(': '), _("""Determines what variables can be used: $feed_name, $date (parsed date, as defined in
suffix is appended to the feed name in a news item."""))) supybot.reply.format.time)""")))
conf.registerChannelValue(RSS, 'announceFormat',
registry.String(_('News from $feed_name: $title <$link>'),
_("""The format the bot will use for displaying headlines of a RSS feed
that is announced. See supybot.plugins.RSS.format for the available
variables.""")))
###########
# Announces
conf.registerChannelValue(RSS, 'announce', conf.registerChannelValue(RSS, 'announce',
registry.SpaceSeparatedSetOfStrings([], _("""Determines which RSS feeds registry.SpaceSeparatedSetOfStrings([], _("""Determines which RSS feeds
should be announced in the channel; valid input is a list of strings should be announced in the channel; valid input is a list of strings
@ -75,25 +89,10 @@ conf.registerGlobalValue(RSS, 'sortFeedItems',
FeedItemSortOrder('asInFeed', _("""Determines whether feed items should be FeedItemSortOrder('asInFeed', _("""Determines whether feed items should be
sorted by their update timestamp or kept in the same order as they appear sorted by their update timestamp or kept in the same order as they appear
in a feed."""))) in a feed.""")))
conf.registerGlobalValue(RSS, 'stripRedirect', registry.Boolean(
True, """Determines whether the bot will attempt to strip url redirection
from headline links, by taking things after the last http://."""))
conf.registerGlobalValue(RSS, 'feeds', ####################
FeedNames([], _("""Determines what feeds should be accessible as # Headlines filtering
commands."""))) conf.registerChannelValue(RSS, 'defaultNumberOfHeadlines',
conf.registerChannelValue(RSS, 'showLinks',
registry.Boolean(True, _("""Determines whether the bot will list the link
along with the title of the feed when the rss command is called.
supybot.plugins.RSS.announce.showLinks affects whether links will be
listed when a feed is automatically announced.""")))
conf.registerChannelValue(RSS, 'showPubDate',
registry.Boolean(False, """Determines whether the bot will list the
publication datetime stamp along with the title of the feed when the rss
command is called.
supybot.plugins.RSS.announce.showPubDate affects whether this will be
listed when a feed is automatically announced."""))
conf.registerGlobalValue(RSS, 'defaultNumberOfHeadlines',
registry.PositiveInteger(1, _("""Indicates how many headlines an rss feed registry.PositiveInteger(1, _("""Indicates how many headlines an rss feed
will output by default, if no number is provided."""))) will output by default, if no number is provided.""")))
conf.registerChannelValue(RSS, 'initialAnnounceHeadlines', conf.registerChannelValue(RSS, 'initialAnnounceHeadlines',
@ -108,19 +107,5 @@ conf.registerChannelValue(RSS, 'keywordBlacklist',
strings, lets you filter headlines to those not containing any items strings, lets you filter headlines to those not containing any items
in this blacklist."""))) in this blacklist.""")))
conf.registerGroup(RSS, 'announce')
conf.registerChannelValue(RSS.announce, 'showLinks',
registry.Boolean(True, _("""Determines whether the bot will list the link
along with the title of the feed when a feed is automatically
announced.""")))
conf.registerChannelValue(RSS.announce, 'showPubDate',
registry.Boolean(False, """Determines whether the bot will list the
publication datetime stamp along with the title of the feed when a feed
is automatically announced."""))
conf.registerGlobalValue(RSS.announce, 'cachePeriod',
registry.PositiveInteger(604800, """Maximum age of cached RSS headlines,
in seconds. Headline cache is used to avoid re-announcing old news."""))
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:

View File

@ -1,6 +1,7 @@
### ###
# Copyright (c) 2002-2004, Jeremiah Fincher # Copyright (c) 2002-2004, Jeremiah Fincher
# Copyright (c) 2008-2010, James McCoy # Copyright (c) 2008-2010, James McCoy
# Copyright (c) 2014, Valentin Lorentz
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -30,6 +31,7 @@
import time import time
import types import types
import string
import socket import socket
import threading import threading
import re import re
@ -40,18 +42,68 @@ import supybot.conf as conf
import supybot.utils as utils import supybot.utils as utils
import supybot.world as world import supybot.world as world
from supybot.commands import * from supybot.commands import *
import supybot.ircmsgs as ircmsgs
import supybot.ircutils as ircutils import supybot.ircutils as ircutils
import supybot.registry as registry import supybot.registry as registry
import supybot.callbacks as callbacks import supybot.callbacks as callbacks
from supybot.i18n import PluginInternationalization, internationalizeDocstring from supybot.i18n import PluginInternationalization, internationalizeDocstring
_ = PluginInternationalization('RSS') _ = PluginInternationalization('RSS')
def getFeedName(irc, msg, args, state): def get_feedName(irc, msg, args, state):
if not registry.isValidRegistryName(args[0]): if not registry.isValidRegistryName(args[0]):
state.errorInvalid('feed name', args[0], state.errorInvalid('feed name', args[0],
'Feed names must not include spaces.') 'Feed names must not include spaces.')
state.args.append(callbacks.canonicalName(args.pop(0))) state.args.append(callbacks.canonicalName(args.pop(0)))
addConverter('feedName', getFeedName) addConverter('feedName', get_feedName)
class Feed:
__slots__ = ('url', 'name', 'data', 'last_update', 'entries',
'lock', 'announced_entries')
def __init__(self, name, url, plugin_is_loading=False):
assert name, name
if not url:
assert utils.web.httpUrlRe.match(name), name
url = name
self.name = name
self.url = url
self.data = None
# We don't want to fetch feeds right after the plugin is
# loaded (the bot could be starting, and thus already busy)
self.last_update = time.time() if plugin_is_loading else 0
self.entries = []
self.lock = threading.Lock()
self.announced_entries = utils.structures.TruncatableSet()
def get_command(self, plugin):
docstring = format(_("""[<number of headlines>]
Reports the titles for %s at the RSS feed %u. If
<number of headlines> is given, returns only that many headlines.
RSS feeds are only looked up every supybot.plugins.RSS.waitPeriod
seconds, which defaults to 1800 (30 minutes) since that's what most
websites prefer."""), self.name, self.url)
def f(self2, irc, msg, args):
args.insert(0, self.url)
self2.rss(irc, msg, args)
f = utils.python.changeFunctionName(f, self.name, docstring)
f = types.MethodType(f, plugin)
return f
def sort_feed_items(items, order):
"""Return feed items, sorted according to sortFeedItems."""
if order not in ['oldestFirst', 'newestFirst']:
return items
if order == 'oldestFirst':
reverse = False
if order == 'newestFirst':
reverse = True
try:
sitems = sorted(items, key=lambda i: i['updated'], reverse=reverse)
except KeyError:
# feedparser normalizes required timestamp fields in ATOM and RSS
# to the "updated" field. Feeds missing it are unsortable by date.
return items
return sitems
class RSS(callbacks.Plugin): class RSS(callbacks.Plugin):
"""This plugin is useful both for announcing updates to RSS feeds in a """This plugin is useful both for announcing updates to RSS feeds in a
@ -62,326 +114,170 @@ class RSS(callbacks.Plugin):
def __init__(self, irc): def __init__(self, irc):
self.__parent = super(RSS, self) self.__parent = super(RSS, self)
self.__parent.__init__(irc) self.__parent.__init__(irc)
# Schema is feed : [url, command] # Scheme: {name: url}
self.feedNames = callbacks.CanonicalNameDict() self.feed_names = callbacks.CanonicalNameDict()
self.locks = {} # Scheme: {url: feed}
self.lastRequest = {} self.feeds = {}
self.cachedFeeds = {}
self.cachedHeadlines = {}
self.gettingLockLock = threading.Lock()
for name in self.registryValue('feeds'): for name in self.registryValue('feeds'):
self._registerFeed(name) self.assert_feed_does_not_exist(name)
self.register_feed_config(name)
try: try:
url = self.registryValue(registry.join(['feeds', name])) url = self.registryValue(registry.join(['feeds', name]))
except registry.NonExistentRegistryEntry: except registry.NonExistentRegistryEntry:
self.log.warning('%s is not a registered feed, removing.',name) self.log.warning('%s is not a registered feed, removing.',name)
continue continue
self.makeFeedCommand(name, url) self.register_feed(name, url, True)
self.getFeed(url) # So announced feeds don't announce on startup.
##################
# Feed registering
def assert_feed_does_not_exist(self, name):
if self.isCommandMethod(name):
s = format('I already have a command in this plugin named %s.',name)
raise callbacks.Error(s)
def register_feed_config(self, name, url=''):
self.registryValue('feeds').add(name)
group = self.registryValue('feeds', value=False)
conf.registerGlobalValue(group, name, registry.String(url, ''))
def register_feed(self, name, url, plugin_is_loading):
self.feed_names[name] = url
self.feeds[url] = Feed(name, url, plugin_is_loading)
def remove_feed(self, feed):
del self.feed_names[feed.name]
del self.feeds[feed.url]
conf.supybot.plugins.RSS.feeds().remove(feed.name)
conf.supybot.plugins.RSS.feeds.unregister(feed.name)
##################
# Methods handling
def isCommandMethod(self, name): def isCommandMethod(self, name):
if not self.__parent.isCommandMethod(name): if not self.__parent.isCommandMethod(name):
if name in self.feedNames: return bool(self.get_feed(name))
return True
else:
return False
else: else:
return True return True
def listCommands(self): def listCommands(self):
return self.__parent.listCommands(self.feedNames.keys()) return self.__parent.listCommands(self.feeds.keys())
def getCommandMethod(self, command): def getCommandMethod(self, command):
try: try:
return self.__parent.getCommandMethod(command) return self.__parent.getCommandMethod(command)
except AttributeError: except AttributeError:
return self.feedNames[command[0]][1] return self.get_feed(command[0]).get_command(self)
def _registerFeed(self, name, url=''):
self.registryValue('feeds').add(name)
group = self.registryValue('feeds', value=False)
conf.registerGlobalValue(group, name, registry.String(url, ''))
def __call__(self, irc, msg): def __call__(self, irc, msg):
self.__parent.__call__(irc, msg) self.__parent.__call__(irc, msg)
irc = callbacks.SimpleProxy(irc, msg) self.update_feeds()
newFeeds = {}
for channel in irc.state.channels:
feeds = self.registryValue('announce', channel)
for name in feeds:
commandName = callbacks.canonicalName(name)
if self.isCommandMethod(commandName):
url = self.feedNames[commandName][0]
else:
url = name
if self.willGetNewFeed(url):
newFeeds.setdefault((url, name), []).append(channel)
for ((url, name), channels) in newFeeds.iteritems():
# We check if we can acquire the lock right here because if we
# don't, we'll possibly end up spawning a lot of threads to get
# the feed, because this thread may run for a number of bytecodes
# before it switches to a thread that'll get the lock in
# _newHeadlines.
if self.acquireLock(url, blocking=False):
try:
t = threading.Thread(target=self._newHeadlines,
name=format('Fetching %u', url),
args=(irc, channels, name, url))
self.log.info('Checking for announcements at %u', url)
world.threadsSpawned += 1
t.setDaemon(True)
t.start()
finally:
self.releaseLock(url)
time.sleep(0.1) # So other threads can run.
def buildHeadlines(self, headlines, channel, linksconfig='announce.showLinks', dateconfig='announce.showPubDate'):
newheadlines = []
for headline in headlines:
link = ''
pubDate = ''
if self.registryValue(linksconfig, channel):
if headline[1]:
if self.registryValue('stripRedirect'):
link = re.sub('^.*http://', 'http://', headline[1])
else:
link = headline[1]
if self.registryValue(dateconfig, channel):
if headline[2]:
pubDate = ' [%s]' % (headline[2],)
if sys.version_info[0] < 3:
if isinstance(headline[0], unicode):
try:
import charade.universaldetector
u = charade.universaldetector.UniversalDetector()
u.feed(headline[0])
u.close()
encoding = u.result['encoding']
except ImportError:
encoding = 'utf8'
newheadlines.append(format('%s %u%s',
headline[0].encode(encoding,'replace'),
link,
pubDate))
else:
newheadlines.append(format('%s %u%s',
headline[0],
link,
pubDate))
else:
newheadlines.append(format('%s %u%s',
headline[0],
link,
pubDate))
return newheadlines
def _newHeadlines(self, irc, channels, name, url): ##################
try: # Status accessors
# We acquire the lock here so there's only one announcement thread
# in this code at any given time. Otherwise, several announcement
# threads will getFeed (all blocking, in turn); then they'll all
# want to send their news messages to the appropriate channels.
# Note that we're allowed to acquire this lock twice within the
# same thread because it's an RLock and not just a normal Lock.
self.acquireLock(url)
t = time.time()
try:
#oldresults = self.cachedFeeds[url]
#oldheadlines = self.getHeadlines(oldresults)
oldheadlines = self.cachedHeadlines[url]
oldheadlines = list(filter(lambda x: t - x[3] <
self.registryValue('announce.cachePeriod'), oldheadlines))
except KeyError:
oldheadlines = []
newresults = self.getFeed(url)
newheadlines = self.getHeadlines(newresults)
if len(newheadlines) == 1:
s = newheadlines[0][0]
if s in ('Timeout downloading feed.',
'Unable to download feed.'):
self.log.debug('%s %u', s, url)
return
def normalize(headline):
return (tuple(headline[0].lower().split()), headline[1])
oldheadlinesset = set(map(normalize, oldheadlines))
for (i, headline) in enumerate(newheadlines):
if normalize(headline) in oldheadlinesset:
newheadlines[i] = None
newheadlines = list(filter(None, newheadlines)) # Removes Nones.
number_of_headlines = len(oldheadlines)
oldheadlines.extend(newheadlines)
self.cachedHeadlines[url] = oldheadlines
if newheadlines:
def filter_whitelist(headline):
v = False
for kw in whitelist:
if kw in headline[0] or kw in headline[1]:
v = True
break
return v
def filter_blacklist(headline):
v = True
for kw in blacklist:
if kw in headline[0] or kw in headline[1]:
v = False
break
return v
for channel in channels:
if number_of_headlines == 0:
channelnewheadlines = newheadlines[:self.registryValue('initialAnnounceHeadlines', channel)]
else:
channelnewheadlines = newheadlines[:]
whitelist = self.registryValue('keywordWhitelist', channel)
blacklist = self.registryValue('keywordBlacklist', channel)
if len(whitelist) != 0:
channelnewheadlines = filter(filter_whitelist, channelnewheadlines)
if len(blacklist) != 0:
channelnewheadlines = filter(filter_blacklist, channelnewheadlines)
channelnewheadlines = list(channelnewheadlines)
if len(channelnewheadlines) == 0:
return
bold = self.registryValue('bold', channel)
sep = self.registryValue('headlineSeparator', channel)
prefix = self.registryValue('announcementPrefix', channel)
suffix = self.registryValue('announcementSeparator', channel)
pre = format('%s%s%s', prefix, name, suffix)
if bold:
pre = ircutils.bold(pre)
sep = ircutils.bold(sep)
headlines = self.buildHeadlines(channelnewheadlines, channel)
irc.replies(headlines, prefixer=pre, joiner=sep,
to=channel, prefixNick=False, private=True)
finally:
self.releaseLock(url)
def willGetNewFeed(self, url): def get_feed(self, name):
now = time.time() return self.feeds.get(self.feed_names.get(name, name), None)
wait = self.registryValue('waitPeriod')
if url not in self.lastRequest or now - self.lastRequest[url] > wait:
return True
else:
return False
def acquireLock(self, url, blocking=True): def is_expired(self, feed):
try: assert feed
self.gettingLockLock.acquire() event_horizon = time.time() - self.registryValue('waitPeriod')
try: return feed.last_update < event_horizon
lock = self.locks[url]
except KeyError:
lock = threading.RLock()
self.locks[url] = lock
return lock.acquire(blocking=blocking)
finally:
self.gettingLockLock.release()
def releaseLock(self, url): ###############
self.locks[url].release() # Feed fetching
def getFeed(self, url): def update_feed(self, feed):
def error(s): with feed.lock:
return {'items': [{'title': s}]} d = feedparser.parse(feed.url)
try: feed.data = d.feed
# This is the most obvious place to acquire the lock, because a feed.entries = d.entries
# malicious user could conceivably flood the bot with rss commands feed.last_update = time.time()
# and DoS the website in question. self.announce_feed(feed)
self.acquireLock(url)
if self.willGetNewFeed(url): def update_feed_in_thread(self, feed):
results = {} feed.last_update = time.time()
try: t = world.SupyThread(target=self.update_feed,
self.log.debug('Downloading new feed from %u', url) name=format('Fetching feed %u', feed.url),
results = feedparser.parse(url) args=(feed,))
if 'bozo_exception' in results and not results['entries']: t.setDaemon(True)
raise results['bozo_exception'] t.start()
except feedparser.sgmllib.SGMLParseError:
self.log.exception('Uncaught exception from feedparser:') def update_feed_if_needed(self, feed):
raise callbacks.Error('Invalid (unparsable) RSS feed.') if self.is_expired(feed):
except socket.timeout: self.update_feed(feed)
return error('Timeout downloading feed.')
except Exception as e: def update_feeds(self):
# These seem mostly harmless. We'll need reports of a announced_feeds = set()
# kind that isn't. for irc in world.ircs:
self.log.debug('Allowing bozo_exception %r through.', e) for channel in irc.state.channels:
if results.get('feed', {}) and self.getHeadlines(results): announced_feeds |= self.registryValue('announce', channel)
self.cachedFeeds[url] = results for name in announced_feeds:
self.lastRequest[url] = time.time() self.update_feed_if_needed(self.get_feed(name))
else:
self.log.debug('Not caching results; feed is empty.') def get_new_entries(self, feed):
try: with feed.lock:
return self.cachedFeeds[url] entries = feed.entries
except KeyError: new_entries = [entry for entry in entries
wait = self.registryValue('waitPeriod') if entry.id not in feed.announced_entries]
# If there's a problem retrieving the feed, we should back off if not new_entries:
# for a little bit before retrying so that there is time for return []
# the error to be resolved. feed.announced_entries |= {entry.id for entry in new_entries}
self.lastRequest[url] = time.time() - .5 * wait # We keep a little more because we don't want to re-announce
return error('Unable to download feed.') # oldest entries if one of the newest gets removed.
finally: feed.announced_entries.truncate(2*len(entries))
self.releaseLock(url) return new_entries
def announce_feed(self, feed):
new_entries = self.get_new_entries(feed)
def _getConverter(self, feed):
toText = utils.web.htmlToText
if 'encoding' in feed:
def conv(s):
# encode() first so there implicit encoding doesn't happen in
# other functions when unicode and bytestring objects are used
# together
s = s.encode(feed['encoding'], 'replace')
s = toText(s).strip()
return s
return conv
else:
return lambda s: toText(s).strip()
def _sortFeedItems(self, items):
"""Return feed items, sorted according to sortFeedItems."""
order = self.registryValue('sortFeedItems') order = self.registryValue('sortFeedItems')
if order not in ['oldestFirst', 'newestFirst']: new_entries = sort_feed_items(new_entries, order)
return items for irc in world.ircs:
if order == 'oldestFirst': for channel in irc.state.channels:
reverse = False if feed.name not in self.registryValue('announce', channel):
if order == 'newestFirst': continue
reverse = True for entry in new_entries:
try: self.announce_entry(irc, channel, feed, entry)
sitems = sorted(items, key=lambda i: i['updated'], reverse=reverse)
except KeyError:
# feedparser normalizes required timestamp fields in ATOM and RSS
# to the "updated" field. Feeds missing it are unsortable by date.
return items
return sitems
def getHeadlines(self, feed):
headlines = []
t = time.time()
conv = self._getConverter(feed)
for d in self._sortFeedItems(feed['items']):
if 'title' in d:
title = conv(d['title'])
link = d.get('link')
pubDate = d.get('pubDate', d.get('updated'))
headlines.append((title, link, pubDate, t))
return headlines
@internationalizeDocstring #################
def makeFeedCommand(self, name, url): # Entry rendering
docstring = format("""[<number of headlines>]
Reports the titles for %s at the RSS feed %u. If def should_send_entry(self, channel, entry):
<number of headlines> is given, returns only that many headlines. whitelist = self.registryValue('keywordWhitelist', channel)
RSS feeds are only looked up every supybot.plugins.RSS.waitPeriod blacklist = self.registryValue('keywordBlacklist', channel)
seconds, which defaults to 1800 (30 minutes) since that's what most if whitelist:
websites prefer. if all(kw not in entry.title and kw not in entry.description
""", name, url) for kw in whitelist):
if url not in self.locks: return False
self.locks[url] = threading.RLock() if blacklist:
if self.isCommandMethod(name): if any(kw in entry.title or kw in entry.description
s = format('I already have a command in this plugin named %s.',name) for kw in blacklist):
raise callbacks.Error(s) return False
def f(self, irc, msg, args): return True
args.insert(0, url)
self.rss(irc, msg, args) def format_entry(self, channel, feed, entry, is_announce):
f = utils.python.changeFunctionName(f, name, docstring) if is_announce:
f = types.MethodType(f, self) template = self.registryValue('announceFormat', channel)
self.feedNames[name] = (url, f) else:
self._registerFeed(name, url) template = self.registryValue('format', channel)
date = entry.get('published_parsed', entry.get('updated_parsed'))
date = utils.str.timestamp(date)
return string.Template(template).safe_substitute(template,
feed_name=feed.name,
date=date,
**entry)
def announce_entry(self, irc, channel, feed, entry):
if self.should_send_entry(channel, entry):
s = self.format_entry(channel, feed, entry, True)
irc.queueMsg(ircmsgs.privmsg(channel, s))
##########
# Commands
@internationalizeDocstring @internationalizeDocstring
def add(self, irc, msg, args, name, url): def add(self, irc, msg, args, name, url):
@ -390,7 +286,9 @@ class RSS(callbacks.Plugin):
Adds a command to this plugin that will look up the RSS feed at the Adds a command to this plugin that will look up the RSS feed at the
given URL. given URL.
""" """
self.makeFeedCommand(name, url) self.assert_feed_does_not_exist(name)
self.register_feed_config(name, url)
self.register_feed(name, url, False)
irc.replySuccess() irc.replySuccess()
add = wrap(add, ['feedName', 'url']) add = wrap(add, ['feedName', 'url'])
@ -401,12 +299,11 @@ class RSS(callbacks.Plugin):
Removes the command for looking up RSS feeds at <name> from Removes the command for looking up RSS feeds at <name> from
this plugin. this plugin.
""" """
if name not in self.feedNames: feed = self.get_feed(name)
if not feed:
irc.error(_('That\'s not a valid RSS feed command name.')) irc.error(_('That\'s not a valid RSS feed command name.'))
return return
del self.feedNames[name] self.remove_feed(feed)
conf.supybot.plugins.RSS.feeds().remove(name)
conf.supybot.plugins.RSS.feeds.unregister(name)
irc.replySuccess() irc.replySuccess()
remove = wrap(remove, ['feedName']) remove = wrap(remove, ['feedName'])
@ -434,10 +331,14 @@ class RSS(callbacks.Plugin):
""" """
announce = conf.supybot.plugins.RSS.announce announce = conf.supybot.plugins.RSS.announce
S = announce.get(channel)() S = announce.get(channel)()
for feed in feeds: plugin = irc.getCallback('RSS')
S.add(feed) for name in feeds:
S.add(name)
announce.get(channel).setValue(S) announce.get(channel).setValue(S)
irc.replySuccess() irc.replySuccess()
for name in feeds:
feed = plugin.get_feed(name)
plugin.announce_feed(feed)
add = wrap(add, [('checkChannelCapability', 'op'), add = wrap(add, [('checkChannelCapability', 'op'),
many(first('url', 'feedName'))]) many(first('url', 'feedName'))])
@ -467,23 +368,25 @@ class RSS(callbacks.Plugin):
If <number of headlines> is given, return only that many headlines. If <number of headlines> is given, return only that many headlines.
""" """
self.log.debug('Fetching %u', url) self.log.debug('Fetching %u', url)
feed = self.getFeed(url) feed = self.get_feed(url)
if not feed:
feed = Feed(url, url)
if irc.isChannel(msg.args[0]): if irc.isChannel(msg.args[0]):
channel = msg.args[0] channel = msg.args[0]
else: else:
channel = None channel = None
headlines = self.getHeadlines(feed) self.update_feed_if_needed(feed)
if not headlines: entries = feed.entries
if not entries:
irc.error(_('Couldn\'t get RSS feed.')) irc.error(_('Couldn\'t get RSS feed.'))
return return
headlines = self.buildHeadlines(headlines, channel, 'showLinks', 'showPubDate') n = n or self.registryValue('defaultNumberOfHeadlines', channel)
if n: entries = list(filter(lambda e:self.should_send_entry(channel, e),
headlines = headlines[:n] feed.entries))
else: entries = entries[:n]
headlines = headlines[:self.registryValue('defaultNumberOfHeadlines')] headlines = map(lambda e:self.format_entry(channel, feed, e, False),
entries)
sep = self.registryValue('headlineSeparator', channel) sep = self.registryValue('headlineSeparator', channel)
if self.registryValue('bold', channel):
sep = ircutils.bold(sep)
irc.replies(headlines, joiner=sep) irc.replies(headlines, joiner=sep)
rss = wrap(rss, ['url', additional('int')]) rss = wrap(rss, ['url', additional('int')])
@ -498,9 +401,11 @@ class RSS(callbacks.Plugin):
url = self.registryValue('feeds.%s' % url) url = self.registryValue('feeds.%s' % url)
except registry.NonExistentRegistryEntry: except registry.NonExistentRegistryEntry:
pass pass
feed = self.getFeed(url) feed = self.get_feed(url)
conv = self._getConverter(feed) if not feed:
info = feed.get('feed') feed = Feed(url, url)
self.update_feed_if_needed(feed)
info = feed.data
if not info: if not info:
irc.error(_('I couldn\'t retrieve that RSS feed.')) irc.error(_('I couldn\'t retrieve that RSS feed.'))
return return
@ -510,10 +415,10 @@ class RSS(callbacks.Plugin):
now = time.mktime(time.gmtime()) now = time.mktime(time.gmtime())
when = utils.timeElapsed(now - seconds) + ' ago' when = utils.timeElapsed(now - seconds) + ' ago'
else: else:
when = 'time unavailable' when = _('time unavailable')
title = conv(info.get('title', 'unavailable')) title = info.get('title', _('unavailable'))
desc = conv(info.get('description', 'unavailable')) desc = info.get('description', _('unavailable'))
link = conv(info.get('link', 'unavailable')) link = info.get('link', _('unavailable'))
# The rest of the entries are all available in the channel key # The rest of the entries are all available in the channel key
response = format(_('Title: %s; URL: %u; ' response = format(_('Title: %s; URL: %u; '
'Description: %s; Last updated: %s.'), 'Description: %s; Last updated: %s.'),

View File

@ -374,7 +374,8 @@ registerChannelValue(supybot.reply.format, 'time',
def timestamp(t): def timestamp(t):
if t is None: if t is None:
t = time.time() t = time.time()
t = time.localtime(t) elif isinstance(t, float):
t = time.localtime(t)
format = get(supybot.reply.format.time, dynamic.channel) format = get(supybot.reply.format.time, dynamic.channel)
return time.strftime(format, t) return time.strftime(format, t)
utils.str.timestamp = timestamp utils.str.timestamp = timestamp

View File

@ -454,5 +454,40 @@ class CacheDict(collections.MutableMapping):
def __len__(self): def __len__(self):
return len(self.d) return len(self.d)
class TruncatableSet(collections.MutableSet):
"""A set that keeps track of the order of inserted elements so
the oldest can be removed."""
def __init__(self, iterable=[]):
self._ordered_items = list(iterable)
self._items = set(self._ordered_items)
def __contains__(self, item):
return item in self._items
def __iter__(self):
return iter(self._items)
def __len__(self):
return len(self._items)
def add(self, item):
if item not in self._items:
self._items.add(item)
self._ordered_items.append(item)
def discard(self, item):
self._items.discard(item)
self._ordered_items.remove(item)
def truncate(self, size):
assert size >= 0
removed_size = len(self)-size
# I make two different cases depending on removed_size<size
# in order to make if faster if one is significantly bigger than the
# other.
if removed_size <= 0:
return
elif removed_size < size:
# If there are more kept items than removed items
for old_item in self._ordered_items[0:-size]:
self.discard(old_item)
self._ordered_items = self._ordered_items[-size:]
else:
self._ordered_items = self._ordered_items[-size:]
self._items = set(self._ordered_items)
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:

View File

@ -1133,6 +1133,32 @@ class TestCacheDict(SupyTestCase):
self.failUnless(i in d) self.failUnless(i in d)
self.failUnless(d[i] == i) self.failUnless(d[i] == i)
class TestTruncatableSet(SupyTestCase):
def testBasics(self):
s = TruncatableSet(['foo', 'bar', 'baz', 'qux'])
self.assertEqual(s, {'foo', 'bar', 'baz', 'qux'})
self.failUnless('foo' in s)
self.failUnless('bar' in s)
self.failIf('quux' in s)
s.discard('baz')
self.failUnless('foo' in s)
self.failIf('baz' in s)
s.add('quux')
self.failUnless('quux' in s)
def testTruncate(self):
s = TruncatableSet(['foo', 'bar'])
s.add('baz')
s.add('qux')
s.truncate(3)
self.assertEqual(s, {'bar', 'baz', 'qux'})
def testTruncateUnion(self):
s = TruncatableSet(['bar', 'foo'])
s |= {'baz', 'qux'}
s.truncate(3)
self.assertEqual(s, {'foo', 'baz', 'qux'})
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: