Merge branch 'rewrite-rss' into testing

Conflicts:
	src/utils/str.py
This commit is contained in:
Valentin Lorentz 2014-07-31 19:24:01 +00:00
commit 4a3d39e747
5 changed files with 310 additions and 358 deletions

View File

@ -50,19 +50,33 @@ class FeedItemSortOrder(registry.OnlySomeStrings):
validStrings = ('asInFeed', 'oldestFirst', 'newestFirst') validStrings = ('asInFeed', 'oldestFirst', 'newestFirst')
RSS = conf.registerPlugin('RSS') RSS = conf.registerPlugin('RSS')
conf.registerChannelValue(RSS, 'bold', registry.Boolean(
True, _("""Determines whether the bot will bold the title of the feed when conf.registerGlobalValue(RSS, 'feeds',
it announces news."""))) FeedNames([], _("""Determines what feeds should be accessible as
commands.""")))
########
# Format
conf.registerChannelValue(RSS, 'headlineSeparator', conf.registerChannelValue(RSS, 'headlineSeparator',
registry.StringSurroundedBySpaces('|', _("""Determines what string is registry.StringSurroundedBySpaces('|', _("""Determines what string is
used to separate headlines in new feeds."""))) used to separate headlines in new feeds.""")))
conf.registerChannelValue(RSS, 'announcementPrefix', conf.registerChannelValue(RSS, 'format',
registry.StringWithSpaceOnRight(_('News from '), _("""Determines what registry.String(_('$date: $title <$link>'), _("""The format the bot
prefix is prepended (if any) to the news item announcements made in the will use for displaying headlines of a RSS feed that is triggered
channel."""))) manually. In addition to fields defined by feedparser ($published
conf.registerChannelValue(RSS, 'announcementSeparator', (the entry date), $title, $link, $description, $id, etc.), the following
registry.StringWithSpaceOnRight(_(': '), _("""Determines what variables can be used: $feed_name, $date (parsed date, as defined in
suffix is appended to the feed name in a news item."""))) supybot.reply.format.time)""")))
conf.registerChannelValue(RSS, 'announceFormat',
registry.String(_('News from $feed_name: $title <$link>'),
_("""The format the bot will use for displaying headlines of a RSS feed
that is announced. See supybot.plugins.RSS.format for the available
variables.""")))
###########
# Announces
conf.registerChannelValue(RSS, 'announce', conf.registerChannelValue(RSS, 'announce',
registry.SpaceSeparatedSetOfStrings([], _("""Determines which RSS feeds registry.SpaceSeparatedSetOfStrings([], _("""Determines which RSS feeds
should be announced in the channel; valid input is a list of strings should be announced in the channel; valid input is a list of strings
@ -75,25 +89,10 @@ conf.registerGlobalValue(RSS, 'sortFeedItems',
FeedItemSortOrder('asInFeed', _("""Determines whether feed items should be FeedItemSortOrder('asInFeed', _("""Determines whether feed items should be
sorted by their update timestamp or kept in the same order as they appear sorted by their update timestamp or kept in the same order as they appear
in a feed."""))) in a feed.""")))
conf.registerGlobalValue(RSS, 'stripRedirect', registry.Boolean(
True, """Determines whether the bot will attempt to strip url redirection
from headline links, by taking things after the last http://."""))
conf.registerGlobalValue(RSS, 'feeds', ####################
FeedNames([], _("""Determines what feeds should be accessible as # Headlines filtering
commands."""))) conf.registerChannelValue(RSS, 'defaultNumberOfHeadlines',
conf.registerChannelValue(RSS, 'showLinks',
registry.Boolean(True, _("""Determines whether the bot will list the link
along with the title of the feed when the rss command is called.
supybot.plugins.RSS.announce.showLinks affects whether links will be
listed when a feed is automatically announced.""")))
conf.registerChannelValue(RSS, 'showPubDate',
registry.Boolean(False, """Determines whether the bot will list the
publication datetime stamp along with the title of the feed when the rss
command is called.
supybot.plugins.RSS.announce.showPubDate affects whether this will be
listed when a feed is automatically announced."""))
conf.registerGlobalValue(RSS, 'defaultNumberOfHeadlines',
registry.PositiveInteger(1, _("""Indicates how many headlines an rss feed registry.PositiveInteger(1, _("""Indicates how many headlines an rss feed
will output by default, if no number is provided."""))) will output by default, if no number is provided.""")))
conf.registerChannelValue(RSS, 'initialAnnounceHeadlines', conf.registerChannelValue(RSS, 'initialAnnounceHeadlines',
@ -108,19 +107,5 @@ conf.registerChannelValue(RSS, 'keywordBlacklist',
strings, lets you filter headlines to those not containing any items strings, lets you filter headlines to those not containing any items
in this blacklist."""))) in this blacklist.""")))
conf.registerGroup(RSS, 'announce')
conf.registerChannelValue(RSS.announce, 'showLinks',
registry.Boolean(True, _("""Determines whether the bot will list the link
along with the title of the feed when a feed is automatically
announced.""")))
conf.registerChannelValue(RSS.announce, 'showPubDate',
registry.Boolean(False, """Determines whether the bot will list the
publication datetime stamp along with the title of the feed when a feed
is automatically announced."""))
conf.registerGlobalValue(RSS.announce, 'cachePeriod',
registry.PositiveInteger(604800, """Maximum age of cached RSS headlines,
in seconds. Headline cache is used to avoid re-announcing old news."""))
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:

View File

@ -1,6 +1,7 @@
### ###
# Copyright (c) 2002-2004, Jeremiah Fincher # Copyright (c) 2002-2004, Jeremiah Fincher
# Copyright (c) 2008-2010, James McCoy # Copyright (c) 2008-2010, James McCoy
# Copyright (c) 2014, Valentin Lorentz
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -30,6 +31,7 @@
import time import time
import types import types
import string
import socket import socket
import threading import threading
import re import re
@ -40,300 +42,55 @@ import supybot.conf as conf
import supybot.utils as utils import supybot.utils as utils
import supybot.world as world import supybot.world as world
from supybot.commands import * from supybot.commands import *
import supybot.ircmsgs as ircmsgs
import supybot.ircutils as ircutils import supybot.ircutils as ircutils
import supybot.registry as registry import supybot.registry as registry
import supybot.callbacks as callbacks import supybot.callbacks as callbacks
from supybot.i18n import PluginInternationalization, internationalizeDocstring from supybot.i18n import PluginInternationalization, internationalizeDocstring
_ = PluginInternationalization('RSS') _ = PluginInternationalization('RSS')
def getFeedName(irc, msg, args, state): def get_feedName(irc, msg, args, state):
if not registry.isValidRegistryName(args[0]): if not registry.isValidRegistryName(args[0]):
state.errorInvalid('feed name', args[0], state.errorInvalid('feed name', args[0],
'Feed names must not include spaces.') 'Feed names must not include spaces.')
state.args.append(callbacks.canonicalName(args.pop(0))) state.args.append(callbacks.canonicalName(args.pop(0)))
addConverter('feedName', getFeedName) addConverter('feedName', get_feedName)
class RSS(callbacks.Plugin): class Feed:
"""This plugin is useful both for announcing updates to RSS feeds in a __slots__ = ('url', 'name', 'data', 'last_update', 'entries',
channel, and for retrieving the headlines of RSS feeds via command. Use 'lock', 'announced_entries')
the "add" command to add feeds to this plugin, and use the "announce" def __init__(self, name, url, plugin_is_loading=False):
command to determine what feeds should be announced in a given channel.""" assert name, name
threaded = True if not url:
def __init__(self, irc): assert utils.web.httpUrlRe.match(name), name
self.__parent = super(RSS, self)
self.__parent.__init__(irc)
# Schema is feed : [url, command]
self.feedNames = callbacks.CanonicalNameDict()
self.locks = {}
self.lastRequest = {}
self.cachedFeeds = {}
self.cachedHeadlines = {}
self.gettingLockLock = threading.Lock()
for name in self.registryValue('feeds'):
self._registerFeed(name)
try:
url = self.registryValue(registry.join(['feeds', name]))
except registry.NonExistentRegistryEntry:
self.log.warning('%s is not a registered feed, removing.',name)
continue
self.makeFeedCommand(name, url)
self.getFeed(url) # So announced feeds don't announce on startup.
def isCommandMethod(self, name):
if not self.__parent.isCommandMethod(name):
if name in self.feedNames:
return True
else:
return False
else:
return True
def listCommands(self):
return self.__parent.listCommands(self.feedNames.keys())
def getCommandMethod(self, command):
try:
return self.__parent.getCommandMethod(command)
except AttributeError:
return self.feedNames[command[0]][1]
def _registerFeed(self, name, url=''):
self.registryValue('feeds').add(name)
group = self.registryValue('feeds', value=False)
conf.registerGlobalValue(group, name, registry.String(url, ''))
def __call__(self, irc, msg):
self.__parent.__call__(irc, msg)
irc = callbacks.SimpleProxy(irc, msg)
newFeeds = {}
for channel in irc.state.channels:
feeds = self.registryValue('announce', channel)
for name in feeds:
commandName = callbacks.canonicalName(name)
if self.isCommandMethod(commandName):
url = self.feedNames[commandName][0]
else:
url = name url = name
if self.willGetNewFeed(url): self.name = name
newFeeds.setdefault((url, name), []).append(channel) self.url = url
for ((url, name), channels) in newFeeds.iteritems(): self.data = None
# We check if we can acquire the lock right here because if we # We don't want to fetch feeds right after the plugin is
# don't, we'll possibly end up spawning a lot of threads to get # loaded (the bot could be starting, and thus already busy)
# the feed, because this thread may run for a number of bytecodes self.last_update = time.time() if plugin_is_loading else 0
# before it switches to a thread that'll get the lock in self.entries = []
# _newHeadlines. self.lock = threading.Lock()
if self.acquireLock(url, blocking=False): self.announced_entries = utils.structures.TruncatableSet()
try:
t = threading.Thread(target=self._newHeadlines,
name=format('Fetching %u', url),
args=(irc, channels, name, url))
self.log.info('Checking for announcements at %u', url)
world.threadsSpawned += 1
t.setDaemon(True)
t.start()
finally:
self.releaseLock(url)
time.sleep(0.1) # So other threads can run.
def buildHeadlines(self, headlines, channel, linksconfig='announce.showLinks', dateconfig='announce.showPubDate'): def get_command(self, plugin):
newheadlines = [] docstring = format(_("""[<number of headlines>]
for headline in headlines:
link = ''
pubDate = ''
if self.registryValue(linksconfig, channel):
if headline[1]:
if self.registryValue('stripRedirect'):
link = re.sub('^.*http://', 'http://', headline[1])
else:
link = headline[1]
if self.registryValue(dateconfig, channel):
if headline[2]:
pubDate = ' [%s]' % (headline[2],)
if sys.version_info[0] < 3:
if isinstance(headline[0], unicode):
try:
import charade.universaldetector
u = charade.universaldetector.UniversalDetector()
u.feed(headline[0])
u.close()
encoding = u.result['encoding']
except ImportError:
encoding = 'utf8'
newheadlines.append(format('%s %u%s',
headline[0].encode(encoding,'replace'),
link,
pubDate))
else:
newheadlines.append(format('%s %u%s',
headline[0],
link,
pubDate))
else:
newheadlines.append(format('%s %u%s',
headline[0],
link,
pubDate))
return newheadlines
def _newHeadlines(self, irc, channels, name, url): Reports the titles for %s at the RSS feed %u. If
try: <number of headlines> is given, returns only that many headlines.
# We acquire the lock here so there's only one announcement thread RSS feeds are only looked up every supybot.plugins.RSS.waitPeriod
# in this code at any given time. Otherwise, several announcement seconds, which defaults to 1800 (30 minutes) since that's what most
# threads will getFeed (all blocking, in turn); then they'll all websites prefer."""), self.name, self.url)
# want to send their news messages to the appropriate channels. def f(self2, irc, msg, args):
# Note that we're allowed to acquire this lock twice within the args.insert(0, self.url)
# same thread because it's an RLock and not just a normal Lock. self2.rss(irc, msg, args)
self.acquireLock(url) f = utils.python.changeFunctionName(f, self.name, docstring)
t = time.time() f = types.MethodType(f, plugin)
try: return f
#oldresults = self.cachedFeeds[url]
#oldheadlines = self.getHeadlines(oldresults)
oldheadlines = self.cachedHeadlines[url]
oldheadlines = list(filter(lambda x: t - x[3] <
self.registryValue('announce.cachePeriod'), oldheadlines))
except KeyError:
oldheadlines = []
newresults = self.getFeed(url)
newheadlines = self.getHeadlines(newresults)
if len(newheadlines) == 1:
s = newheadlines[0][0]
if s in ('Timeout downloading feed.',
'Unable to download feed.'):
self.log.debug('%s %u', s, url)
return
def normalize(headline):
return (tuple(headline[0].lower().split()), headline[1])
oldheadlinesset = set(map(normalize, oldheadlines))
for (i, headline) in enumerate(newheadlines):
if normalize(headline) in oldheadlinesset:
newheadlines[i] = None
newheadlines = list(filter(None, newheadlines)) # Removes Nones.
number_of_headlines = len(oldheadlines)
oldheadlines.extend(newheadlines)
self.cachedHeadlines[url] = oldheadlines
if newheadlines:
def filter_whitelist(headline):
v = False
for kw in whitelist:
if kw in headline[0] or kw in headline[1]:
v = True
break
return v
def filter_blacklist(headline):
v = True
for kw in blacklist:
if kw in headline[0] or kw in headline[1]:
v = False
break
return v
for channel in channels:
if number_of_headlines == 0:
channelnewheadlines = newheadlines[:self.registryValue('initialAnnounceHeadlines', channel)]
else:
channelnewheadlines = newheadlines[:]
whitelist = self.registryValue('keywordWhitelist', channel)
blacklist = self.registryValue('keywordBlacklist', channel)
if len(whitelist) != 0:
channelnewheadlines = filter(filter_whitelist, channelnewheadlines)
if len(blacklist) != 0:
channelnewheadlines = filter(filter_blacklist, channelnewheadlines)
channelnewheadlines = list(channelnewheadlines)
if len(channelnewheadlines) == 0:
return
bold = self.registryValue('bold', channel)
sep = self.registryValue('headlineSeparator', channel)
prefix = self.registryValue('announcementPrefix', channel)
suffix = self.registryValue('announcementSeparator', channel)
pre = format('%s%s%s', prefix, name, suffix)
if bold:
pre = ircutils.bold(pre)
sep = ircutils.bold(sep)
headlines = self.buildHeadlines(channelnewheadlines, channel)
irc.replies(headlines, prefixer=pre, joiner=sep,
to=channel, prefixNick=False, private=True)
finally:
self.releaseLock(url)
def willGetNewFeed(self, url): def sort_feed_items(items, order):
now = time.time()
wait = self.registryValue('waitPeriod')
if url not in self.lastRequest or now - self.lastRequest[url] > wait:
return True
else:
return False
def acquireLock(self, url, blocking=True):
try:
self.gettingLockLock.acquire()
try:
lock = self.locks[url]
except KeyError:
lock = threading.RLock()
self.locks[url] = lock
return lock.acquire(blocking=blocking)
finally:
self.gettingLockLock.release()
def releaseLock(self, url):
self.locks[url].release()
def getFeed(self, url):
def error(s):
return {'items': [{'title': s}]}
try:
# This is the most obvious place to acquire the lock, because a
# malicious user could conceivably flood the bot with rss commands
# and DoS the website in question.
self.acquireLock(url)
if self.willGetNewFeed(url):
results = {}
try:
self.log.debug('Downloading new feed from %u', url)
results = feedparser.parse(url)
if 'bozo_exception' in results and not results['entries']:
raise results['bozo_exception']
except feedparser.sgmllib.SGMLParseError:
self.log.exception('Uncaught exception from feedparser:')
raise callbacks.Error('Invalid (unparsable) RSS feed.')
except socket.timeout:
return error('Timeout downloading feed.')
except Exception as e:
# These seem mostly harmless. We'll need reports of a
# kind that isn't.
self.log.debug('Allowing bozo_exception %r through.', e)
if results.get('feed', {}) and self.getHeadlines(results):
self.cachedFeeds[url] = results
self.lastRequest[url] = time.time()
else:
self.log.debug('Not caching results; feed is empty.')
try:
return self.cachedFeeds[url]
except KeyError:
wait = self.registryValue('waitPeriod')
# If there's a problem retrieving the feed, we should back off
# for a little bit before retrying so that there is time for
# the error to be resolved.
self.lastRequest[url] = time.time() - .5 * wait
return error('Unable to download feed.')
finally:
self.releaseLock(url)
def _getConverter(self, feed):
toText = utils.web.htmlToText
if 'encoding' in feed:
def conv(s):
# encode() first so there implicit encoding doesn't happen in
# other functions when unicode and bytestring objects are used
# together
s = s.encode(feed['encoding'], 'replace')
s = toText(s).strip()
return s
return conv
else:
return lambda s: toText(s).strip()
def _sortFeedItems(self, items):
"""Return feed items, sorted according to sortFeedItems.""" """Return feed items, sorted according to sortFeedItems."""
order = self.registryValue('sortFeedItems')
if order not in ['oldestFirst', 'newestFirst']: if order not in ['oldestFirst', 'newestFirst']:
return items return items
if order == 'oldestFirst': if order == 'oldestFirst':
@ -348,40 +105,179 @@ class RSS(callbacks.Plugin):
return items return items
return sitems return sitems
def getHeadlines(self, feed): class RSS(callbacks.Plugin):
headlines = [] """This plugin is useful both for announcing updates to RSS feeds in a
t = time.time() channel, and for retrieving the headlines of RSS feeds via command. Use
conv = self._getConverter(feed) the "add" command to add feeds to this plugin, and use the "announce"
for d in self._sortFeedItems(feed['items']): command to determine what feeds should be announced in a given channel."""
if 'title' in d: threaded = True
title = conv(d['title']) def __init__(self, irc):
link = d.get('link') self.__parent = super(RSS, self)
pubDate = d.get('pubDate', d.get('updated')) self.__parent.__init__(irc)
headlines.append((title, link, pubDate, t)) # Scheme: {name: url}
return headlines self.feed_names = callbacks.CanonicalNameDict()
# Scheme: {url: feed}
self.feeds = {}
for name in self.registryValue('feeds'):
self.assert_feed_does_not_exist(name)
self.register_feed_config(name)
try:
url = self.registryValue(registry.join(['feeds', name]))
except registry.NonExistentRegistryEntry:
self.log.warning('%s is not a registered feed, removing.',name)
continue
self.register_feed(name, url, True)
@internationalizeDocstring ##################
def makeFeedCommand(self, name, url): # Feed registering
docstring = format("""[<number of headlines>]
Reports the titles for %s at the RSS feed %u. If def assert_feed_does_not_exist(self, name):
<number of headlines> is given, returns only that many headlines.
RSS feeds are only looked up every supybot.plugins.RSS.waitPeriod
seconds, which defaults to 1800 (30 minutes) since that's what most
websites prefer.
""", name, url)
if url not in self.locks:
self.locks[url] = threading.RLock()
if self.isCommandMethod(name): if self.isCommandMethod(name):
s = format('I already have a command in this plugin named %s.',name) s = format('I already have a command in this plugin named %s.',name)
raise callbacks.Error(s) raise callbacks.Error(s)
def f(self, irc, msg, args):
args.insert(0, url) def register_feed_config(self, name, url=''):
self.rss(irc, msg, args) self.registryValue('feeds').add(name)
f = utils.python.changeFunctionName(f, name, docstring) group = self.registryValue('feeds', value=False)
f = types.MethodType(f, self) conf.registerGlobalValue(group, name, registry.String(url, ''))
self.feedNames[name] = (url, f)
self._registerFeed(name, url) def register_feed(self, name, url, plugin_is_loading):
self.feed_names[name] = url
self.feeds[url] = Feed(name, url, plugin_is_loading)
def remove_feed(self, feed):
del self.feed_names[feed.name]
del self.feeds[feed.url]
conf.supybot.plugins.RSS.feeds().remove(feed.name)
conf.supybot.plugins.RSS.feeds.unregister(feed.name)
##################
# Methods handling
def isCommandMethod(self, name):
if not self.__parent.isCommandMethod(name):
return bool(self.get_feed(name))
else:
return True
def listCommands(self):
return self.__parent.listCommands(self.feeds.keys())
def getCommandMethod(self, command):
try:
return self.__parent.getCommandMethod(command)
except AttributeError:
return self.get_feed(command[0]).get_command(self)
def __call__(self, irc, msg):
self.__parent.__call__(irc, msg)
self.update_feeds()
##################
# Status accessors
def get_feed(self, name):
return self.feeds.get(self.feed_names.get(name, name), None)
def is_expired(self, feed):
assert feed
event_horizon = time.time() - self.registryValue('waitPeriod')
return feed.last_update < event_horizon
###############
# Feed fetching
def update_feed(self, feed):
with feed.lock:
d = feedparser.parse(feed.url)
feed.data = d.feed
feed.entries = d.entries
feed.last_update = time.time()
self.announce_feed(feed)
def update_feed_in_thread(self, feed):
feed.last_update = time.time()
t = world.SupyThread(target=self.update_feed,
name=format('Fetching feed %u', feed.url),
args=(feed,))
t.setDaemon(True)
t.start()
def update_feed_if_needed(self, feed):
if self.is_expired(feed):
self.update_feed(feed)
def update_feeds(self):
announced_feeds = set()
for irc in world.ircs:
for channel in irc.state.channels:
announced_feeds |= self.registryValue('announce', channel)
for name in announced_feeds:
self.update_feed_if_needed(self.get_feed(name))
def get_new_entries(self, feed):
with feed.lock:
entries = feed.entries
new_entries = [entry for entry in entries
if entry.id not in feed.announced_entries]
if not new_entries:
return []
feed.announced_entries |= {entry.id for entry in new_entries}
# We keep a little more because we don't want to re-announce
# oldest entries if one of the newest gets removed.
feed.announced_entries.truncate(2*len(entries))
return new_entries
def announce_feed(self, feed):
new_entries = self.get_new_entries(feed)
order = self.registryValue('sortFeedItems')
new_entries = sort_feed_items(new_entries, order)
for irc in world.ircs:
for channel in irc.state.channels:
if feed.name not in self.registryValue('announce', channel):
continue
for entry in new_entries:
self.announce_entry(irc, channel, feed, entry)
#################
# Entry rendering
def should_send_entry(self, channel, entry):
whitelist = self.registryValue('keywordWhitelist', channel)
blacklist = self.registryValue('keywordBlacklist', channel)
if whitelist:
if all(kw not in entry.title and kw not in entry.description
for kw in whitelist):
return False
if blacklist:
if any(kw in entry.title or kw in entry.description
for kw in blacklist):
return False
return True
def format_entry(self, channel, feed, entry, is_announce):
if is_announce:
template = self.registryValue('announceFormat', channel)
else:
template = self.registryValue('format', channel)
date = entry.get('published_parsed', entry.get('updated_parsed'))
date = utils.str.timestamp(date)
return string.Template(template).safe_substitute(template,
feed_name=feed.name,
date=date,
**entry)
def announce_entry(self, irc, channel, feed, entry):
if self.should_send_entry(channel, entry):
s = self.format_entry(channel, feed, entry, True)
irc.queueMsg(ircmsgs.privmsg(channel, s))
##########
# Commands
@internationalizeDocstring @internationalizeDocstring
def add(self, irc, msg, args, name, url): def add(self, irc, msg, args, name, url):
@ -390,7 +286,9 @@ class RSS(callbacks.Plugin):
Adds a command to this plugin that will look up the RSS feed at the Adds a command to this plugin that will look up the RSS feed at the
given URL. given URL.
""" """
self.makeFeedCommand(name, url) self.assert_feed_does_not_exist(name)
self.register_feed_config(name, url)
self.register_feed(name, url, False)
irc.replySuccess() irc.replySuccess()
add = wrap(add, ['feedName', 'url']) add = wrap(add, ['feedName', 'url'])
@ -401,12 +299,11 @@ class RSS(callbacks.Plugin):
Removes the command for looking up RSS feeds at <name> from Removes the command for looking up RSS feeds at <name> from
this plugin. this plugin.
""" """
if name not in self.feedNames: feed = self.get_feed(name)
if not feed:
irc.error(_('That\'s not a valid RSS feed command name.')) irc.error(_('That\'s not a valid RSS feed command name.'))
return return
del self.feedNames[name] self.remove_feed(feed)
conf.supybot.plugins.RSS.feeds().remove(name)
conf.supybot.plugins.RSS.feeds.unregister(name)
irc.replySuccess() irc.replySuccess()
remove = wrap(remove, ['feedName']) remove = wrap(remove, ['feedName'])
@ -434,10 +331,14 @@ class RSS(callbacks.Plugin):
""" """
announce = conf.supybot.plugins.RSS.announce announce = conf.supybot.plugins.RSS.announce
S = announce.get(channel)() S = announce.get(channel)()
for feed in feeds: plugin = irc.getCallback('RSS')
S.add(feed) for name in feeds:
S.add(name)
announce.get(channel).setValue(S) announce.get(channel).setValue(S)
irc.replySuccess() irc.replySuccess()
for name in feeds:
feed = plugin.get_feed(name)
plugin.announce_feed(feed)
add = wrap(add, [('checkChannelCapability', 'op'), add = wrap(add, [('checkChannelCapability', 'op'),
many(first('url', 'feedName'))]) many(first('url', 'feedName'))])
@ -467,23 +368,25 @@ class RSS(callbacks.Plugin):
If <number of headlines> is given, return only that many headlines. If <number of headlines> is given, return only that many headlines.
""" """
self.log.debug('Fetching %u', url) self.log.debug('Fetching %u', url)
feed = self.getFeed(url) feed = self.get_feed(url)
if not feed:
feed = Feed(url, url)
if irc.isChannel(msg.args[0]): if irc.isChannel(msg.args[0]):
channel = msg.args[0] channel = msg.args[0]
else: else:
channel = None channel = None
headlines = self.getHeadlines(feed) self.update_feed_if_needed(feed)
if not headlines: entries = feed.entries
if not entries:
irc.error(_('Couldn\'t get RSS feed.')) irc.error(_('Couldn\'t get RSS feed.'))
return return
headlines = self.buildHeadlines(headlines, channel, 'showLinks', 'showPubDate') n = n or self.registryValue('defaultNumberOfHeadlines', channel)
if n: entries = list(filter(lambda e:self.should_send_entry(channel, e),
headlines = headlines[:n] feed.entries))
else: entries = entries[:n]
headlines = headlines[:self.registryValue('defaultNumberOfHeadlines')] headlines = map(lambda e:self.format_entry(channel, feed, e, False),
entries)
sep = self.registryValue('headlineSeparator', channel) sep = self.registryValue('headlineSeparator', channel)
if self.registryValue('bold', channel):
sep = ircutils.bold(sep)
irc.replies(headlines, joiner=sep) irc.replies(headlines, joiner=sep)
rss = wrap(rss, ['url', additional('int')]) rss = wrap(rss, ['url', additional('int')])
@ -498,9 +401,11 @@ class RSS(callbacks.Plugin):
url = self.registryValue('feeds.%s' % url) url = self.registryValue('feeds.%s' % url)
except registry.NonExistentRegistryEntry: except registry.NonExistentRegistryEntry:
pass pass
feed = self.getFeed(url) feed = self.get_feed(url)
conv = self._getConverter(feed) if not feed:
info = feed.get('feed') feed = Feed(url, url)
self.update_feed_if_needed(feed)
info = feed.data
if not info: if not info:
irc.error(_('I couldn\'t retrieve that RSS feed.')) irc.error(_('I couldn\'t retrieve that RSS feed.'))
return return
@ -510,10 +415,10 @@ class RSS(callbacks.Plugin):
now = time.mktime(time.gmtime()) now = time.mktime(time.gmtime())
when = utils.timeElapsed(now - seconds) + ' ago' when = utils.timeElapsed(now - seconds) + ' ago'
else: else:
when = 'time unavailable' when = _('time unavailable')
title = conv(info.get('title', 'unavailable')) title = info.get('title', _('unavailable'))
desc = conv(info.get('description', 'unavailable')) desc = info.get('description', _('unavailable'))
link = conv(info.get('link', 'unavailable')) link = info.get('link', _('unavailable'))
# The rest of the entries are all available in the channel key # The rest of the entries are all available in the channel key
response = format(_('Title: %s; URL: %u; ' response = format(_('Title: %s; URL: %u; '
'Description: %s; Last updated: %s.'), 'Description: %s; Last updated: %s.'),

View File

@ -374,6 +374,7 @@ registerChannelValue(supybot.reply.format, 'time',
def timestamp(t): def timestamp(t):
if t is None: if t is None:
t = time.time() t = time.time()
elif isinstance(t, float):
t = time.localtime(t) t = time.localtime(t)
format = get(supybot.reply.format.time, dynamic.channel) format = get(supybot.reply.format.time, dynamic.channel)
return time.strftime(format, t) return time.strftime(format, t)

View File

@ -454,5 +454,40 @@ class CacheDict(collections.MutableMapping):
def __len__(self): def __len__(self):
return len(self.d) return len(self.d)
class TruncatableSet(collections.MutableSet):
"""A set that keeps track of the order of inserted elements so
the oldest can be removed."""
def __init__(self, iterable=[]):
self._ordered_items = list(iterable)
self._items = set(self._ordered_items)
def __contains__(self, item):
return item in self._items
def __iter__(self):
return iter(self._items)
def __len__(self):
return len(self._items)
def add(self, item):
if item not in self._items:
self._items.add(item)
self._ordered_items.append(item)
def discard(self, item):
self._items.discard(item)
self._ordered_items.remove(item)
def truncate(self, size):
assert size >= 0
removed_size = len(self)-size
# I make two different cases depending on removed_size<size
# in order to make if faster if one is significantly bigger than the
# other.
if removed_size <= 0:
return
elif removed_size < size:
# If there are more kept items than removed items
for old_item in self._ordered_items[0:-size]:
self.discard(old_item)
self._ordered_items = self._ordered_items[-size:]
else:
self._ordered_items = self._ordered_items[-size:]
self._items = set(self._ordered_items)
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:

View File

@ -1133,6 +1133,32 @@ class TestCacheDict(SupyTestCase):
self.failUnless(i in d) self.failUnless(i in d)
self.failUnless(d[i] == i) self.failUnless(d[i] == i)
class TestTruncatableSet(SupyTestCase):
def testBasics(self):
s = TruncatableSet(['foo', 'bar', 'baz', 'qux'])
self.assertEqual(s, {'foo', 'bar', 'baz', 'qux'})
self.failUnless('foo' in s)
self.failUnless('bar' in s)
self.failIf('quux' in s)
s.discard('baz')
self.failUnless('foo' in s)
self.failIf('baz' in s)
s.add('quux')
self.failUnless('quux' in s)
def testTruncate(self):
s = TruncatableSet(['foo', 'bar'])
s.add('baz')
s.add('qux')
s.truncate(3)
self.assertEqual(s, {'bar', 'baz', 'qux'})
def testTruncateUnion(self):
s = TruncatableSet(['bar', 'foo'])
s |= {'baz', 'qux'}
s.truncate(3)
self.assertEqual(s, {'foo', 'baz', 'qux'})
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: # vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: