mirror of
https://github.com/Mikaela/Limnoria.git
synced 2025-01-11 04:32:36 +01:00
RSS: Major rewriting. @rss works.
This commit is contained in:
parent
af24192b0f
commit
219c47d1c8
@ -58,47 +58,21 @@ conf.registerGlobalValue(RSS, 'feeds',
|
|||||||
########
|
########
|
||||||
# Format
|
# Format
|
||||||
|
|
||||||
# Common
|
|
||||||
conf.registerChannelValue(RSS, 'bold', registry.Boolean(
|
|
||||||
True, _("""Determines whether the bot will bold the title of the feed when
|
|
||||||
it announces news.""")))
|
|
||||||
conf.registerChannelValue(RSS, 'headlineSeparator',
|
conf.registerChannelValue(RSS, 'headlineSeparator',
|
||||||
registry.StringSurroundedBySpaces('|', _("""Determines what string is
|
registry.StringSurroundedBySpaces('|', _("""Determines what string is
|
||||||
used to separate headlines in new feeds.""")))
|
used to separate headlines in new feeds.""")))
|
||||||
|
conf.registerChannelValue(RSS, 'format',
|
||||||
# Format for non-announced headlines
|
registry.String(_('$date: $title <$link>'), _("""The format the bot
|
||||||
conf.registerChannelValue(RSS, 'showLinks',
|
will use for displaying headlines of a RSS feed that is triggered
|
||||||
registry.Boolean(True, _("""Determines whether the bot will list the link
|
manually. In addition to fields defined by feedparser ($published
|
||||||
along with the title of the feed when the rss command is called.
|
(the entry date), $title, $link, $description, $id, etc.), the following
|
||||||
supybot.plugins.RSS.announce.showLinks affects whether links will be
|
variables can be used: $feed_name, $date (parsed date, as defined in
|
||||||
listed when a feed is automatically announced.""")))
|
supybot.reply.format.time)""")))
|
||||||
conf.registerChannelValue(RSS, 'showPubDate',
|
conf.registerChannelValue(RSS, 'announceFormat',
|
||||||
registry.Boolean(False, """Determines whether the bot will list the
|
registry.String(_('News from $feed_name: $title <$link>'),
|
||||||
publication datetime stamp along with the title of the feed when the rss
|
_("""The format the bot will use for displaying headlines of a RSS feed
|
||||||
command is called.
|
that is announced. See supybot.plugins.RSS.format for the available
|
||||||
supybot.plugins.RSS.announce.showPubDate affects whether this will be
|
variables.""")))
|
||||||
listed when a feed is automatically announced."""))
|
|
||||||
|
|
||||||
# Format for announced headlines
|
|
||||||
conf.registerGroup(RSS, 'announce')
|
|
||||||
conf.registerChannelValue(RSS.announce, 'showLinks',
|
|
||||||
registry.Boolean(True, _("""Determines whether the bot will list the link
|
|
||||||
along with the title of the feed when a feed is automatically
|
|
||||||
announced.""")))
|
|
||||||
conf.registerChannelValue(RSS.announce, 'showPubDate',
|
|
||||||
registry.Boolean(False, """Determines whether the bot will list the
|
|
||||||
publication datetime stamp along with the title of the feed when a feed
|
|
||||||
is automatically announced."""))
|
|
||||||
conf.registerGlobalValue(RSS.announce, 'cachePeriod',
|
|
||||||
registry.PositiveInteger(604800, """Maximum age of cached RSS headlines,
|
|
||||||
in seconds. Headline cache is used to avoid re-announcing old news."""))
|
|
||||||
conf.registerChannelValue(RSS, 'announcementPrefix',
|
|
||||||
registry.StringWithSpaceOnRight(_('News from '), _("""Determines what
|
|
||||||
prefix is prepended (if any) to the news item announcements made in the
|
|
||||||
channel.""")))
|
|
||||||
conf.registerChannelValue(RSS, 'announcementSeparator',
|
|
||||||
registry.StringWithSpaceOnRight(_(': '), _("""Determines what
|
|
||||||
suffix is appended to the feed name in a news item.""")))
|
|
||||||
|
|
||||||
###########
|
###########
|
||||||
# Announces
|
# Announces
|
||||||
@ -115,13 +89,10 @@ conf.registerGlobalValue(RSS, 'sortFeedItems',
|
|||||||
FeedItemSortOrder('asInFeed', _("""Determines whether feed items should be
|
FeedItemSortOrder('asInFeed', _("""Determines whether feed items should be
|
||||||
sorted by their update timestamp or kept in the same order as they appear
|
sorted by their update timestamp or kept in the same order as they appear
|
||||||
in a feed.""")))
|
in a feed.""")))
|
||||||
conf.registerGlobalValue(RSS, 'stripRedirect', registry.Boolean(
|
|
||||||
True, """Determines whether the bot will attempt to strip url redirection
|
|
||||||
from headline links, by taking things after the last http://."""))
|
|
||||||
|
|
||||||
####################
|
####################
|
||||||
# Headlines filtering
|
# Headlines filtering
|
||||||
conf.registerGlobalValue(RSS, 'defaultNumberOfHeadlines',
|
conf.registerChannelValue(RSS, 'defaultNumberOfHeadlines',
|
||||||
registry.PositiveInteger(1, _("""Indicates how many headlines an rss feed
|
registry.PositiveInteger(1, _("""Indicates how many headlines an rss feed
|
||||||
will output by default, if no number is provided.""")))
|
will output by default, if no number is provided.""")))
|
||||||
conf.registerChannelValue(RSS, 'initialAnnounceHeadlines',
|
conf.registerChannelValue(RSS, 'initialAnnounceHeadlines',
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
###
|
###
|
||||||
# Copyright (c) 2002-2004, Jeremiah Fincher
|
# Copyright (c) 2002-2004, Jeremiah Fincher
|
||||||
# Copyright (c) 2008-2010, James McCoy
|
# Copyright (c) 2008-2010, James McCoy
|
||||||
|
# Copyright (c) 2014, Valentin Lorentz
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
#
|
#
|
||||||
# Redistribution and use in source and binary forms, with or without
|
# Redistribution and use in source and binary forms, with or without
|
||||||
@ -30,6 +31,7 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
import types
|
import types
|
||||||
|
import string
|
||||||
import socket
|
import socket
|
||||||
import threading
|
import threading
|
||||||
import re
|
import re
|
||||||
@ -46,12 +48,67 @@ import supybot.callbacks as callbacks
|
|||||||
from supybot.i18n import PluginInternationalization, internationalizeDocstring
|
from supybot.i18n import PluginInternationalization, internationalizeDocstring
|
||||||
_ = PluginInternationalization('RSS')
|
_ = PluginInternationalization('RSS')
|
||||||
|
|
||||||
def getFeedName(irc, msg, args, state):
|
def get_feedName(irc, msg, args, state):
|
||||||
if not registry.isValidRegistryName(args[0]):
|
if not registry.isValidRegistryName(args[0]):
|
||||||
state.errorInvalid('feed name', args[0],
|
state.errorInvalid('feed name', args[0],
|
||||||
'Feed names must not include spaces.')
|
'Feed names must not include spaces.')
|
||||||
state.args.append(callbacks.canonicalName(args.pop(0)))
|
state.args.append(callbacks.canonicalName(args.pop(0)))
|
||||||
addConverter('feedName', getFeedName)
|
addConverter('feedName', get_feedName)
|
||||||
|
|
||||||
|
class Feed:
|
||||||
|
__slots__ = ('url', 'name', 'data', 'last_update', 'entries',
|
||||||
|
'lock', 'announced_entries')
|
||||||
|
def __init__(self, name, url, plugin_is_loading=False):
|
||||||
|
self.name = name
|
||||||
|
self.url = url
|
||||||
|
self.data = None
|
||||||
|
# We don't want to fetch feeds right after the plugin is
|
||||||
|
# loaded (the bot could be starting, and thus already busy)
|
||||||
|
self.last_update = time.time() if plugin_is_loading else 0
|
||||||
|
self.entries = None
|
||||||
|
self.lock = threading.Thread()
|
||||||
|
self.announced_entries = utils.structures.TruncatableSet()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def command(self):
|
||||||
|
docstring = format(_("""[<number of headlines>]
|
||||||
|
|
||||||
|
Reports the titles for %s at the RSS feed %u. If
|
||||||
|
<number of headlines> is given, returns only that many headlines.
|
||||||
|
RSS feeds are only looked up every supybot.plugins.RSS.waitPeriod
|
||||||
|
seconds, which defaults to 1800 (30 minutes) since that's what most
|
||||||
|
websites prefer."""), self.name, self.url)
|
||||||
|
if self.isCommandMethod(name):
|
||||||
|
s = format('I already have a command in this plugin named %s.',name)
|
||||||
|
raise callbacks.Error(s)
|
||||||
|
def f(self, irc, msg, args):
|
||||||
|
args.insert(0, url)
|
||||||
|
self.rss(irc, msg, args)
|
||||||
|
f = utils.python.changeFunctionName(f, name, docstring)
|
||||||
|
f = types.MethodType(f, self)
|
||||||
|
return f
|
||||||
|
|
||||||
|
def lock_feed(f):
|
||||||
|
def newf(feed, *args, **kwargs):
|
||||||
|
with feed.lock:
|
||||||
|
return f(feed, *args, **kwargs)
|
||||||
|
return f
|
||||||
|
|
||||||
|
def sort_feed_items(items, order):
|
||||||
|
"""Return feed items, sorted according to sortFeedItems."""
|
||||||
|
if order not in ['oldestFirst', 'newestFirst']:
|
||||||
|
return items
|
||||||
|
if order == 'oldestFirst':
|
||||||
|
reverse = False
|
||||||
|
if order == 'newestFirst':
|
||||||
|
reverse = True
|
||||||
|
try:
|
||||||
|
sitems = sorted(items, key=lambda i: i['updated'], reverse=reverse)
|
||||||
|
except KeyError:
|
||||||
|
# feedparser normalizes required timestamp fields in ATOM and RSS
|
||||||
|
# to the "updated" field. Feeds missing it are unsortable by date.
|
||||||
|
return items
|
||||||
|
return sitems
|
||||||
|
|
||||||
class RSS(callbacks.Plugin):
|
class RSS(callbacks.Plugin):
|
||||||
"""This plugin is useful both for announcing updates to RSS feeds in a
|
"""This plugin is useful both for announcing updates to RSS feeds in a
|
||||||
@ -62,326 +119,153 @@ class RSS(callbacks.Plugin):
|
|||||||
def __init__(self, irc):
|
def __init__(self, irc):
|
||||||
self.__parent = super(RSS, self)
|
self.__parent = super(RSS, self)
|
||||||
self.__parent.__init__(irc)
|
self.__parent.__init__(irc)
|
||||||
# Schema is feed : [url, command]
|
# Scheme: {name: url}
|
||||||
self.feedNames = callbacks.CanonicalNameDict()
|
self.feed_names = callbacks.CanonicalNameDict()
|
||||||
self.locks = {}
|
# Scheme: {url: feed}
|
||||||
self.lastRequest = {}
|
self.feeds = {}
|
||||||
self.cachedFeeds = {}
|
|
||||||
self.cachedHeadlines = {}
|
|
||||||
self.gettingLockLock = threading.Lock()
|
|
||||||
for name in self.registryValue('feeds'):
|
for name in self.registryValue('feeds'):
|
||||||
self._registerFeed(name)
|
self.register_feed_config(name)
|
||||||
try:
|
try:
|
||||||
url = self.registryValue(registry.join(['feeds', name]))
|
url = self.registryValue(registry.join(['feeds', name]))
|
||||||
except registry.NonExistentRegistryEntry:
|
except registry.NonExistentRegistryEntry:
|
||||||
self.log.warning('%s is not a registered feed, removing.',name)
|
self.log.warning('%s is not a registered feed, removing.',name)
|
||||||
continue
|
continue
|
||||||
self.makeFeedCommand(name, url)
|
self.feed_names[name] = url
|
||||||
self.getFeed(url) # So announced feeds don't announce on startup.
|
self.feeds[url] = Feed(name, url, True)
|
||||||
|
|
||||||
|
##################
|
||||||
|
# Feed registering
|
||||||
|
|
||||||
|
def register_feed_config(self, name, url=''):
|
||||||
|
self.registryValue('feeds').add(name)
|
||||||
|
group = self.registryValue('feeds', value=False)
|
||||||
|
conf.registerGlobalValue(group, name, registry.String(url, ''))
|
||||||
|
|
||||||
|
def remove_feed(self, feed):
|
||||||
|
del self.feed_names[feed.name]
|
||||||
|
del self.feeds[feed.url]
|
||||||
|
conf.supybot.plugins.RSS.feeds().remove(name)
|
||||||
|
conf.supybot.plugins.RSS.feeds.unregister(name)
|
||||||
|
|
||||||
|
##################
|
||||||
|
# Methods handling
|
||||||
|
|
||||||
def isCommandMethod(self, name):
|
def isCommandMethod(self, name):
|
||||||
if not self.__parent.isCommandMethod(name):
|
if not self.__parent.isCommandMethod(name):
|
||||||
if name in self.feedNames:
|
return bool(self.get_feed(name))
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def listCommands(self):
|
def listCommands(self):
|
||||||
return self.__parent.listCommands(self.feedNames.keys())
|
return self.__parent.listCommands(self.feeds.keys())
|
||||||
|
|
||||||
def getCommandMethod(self, command):
|
def getCommandMethod(self, command):
|
||||||
try:
|
try:
|
||||||
return self.__parent.getCommandMethod(command)
|
return self.__parent.getCommandMethod(command)
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
return self.feedNames[command[0]][1]
|
return self.feeds[command[0]].command
|
||||||
|
|
||||||
def _registerFeed(self, name, url=''):
|
|
||||||
self.registryValue('feeds').add(name)
|
|
||||||
group = self.registryValue('feeds', value=False)
|
|
||||||
conf.registerGlobalValue(group, name, registry.String(url, ''))
|
|
||||||
|
|
||||||
def __call__(self, irc, msg):
|
def __call__(self, irc, msg):
|
||||||
self.__parent.__call__(irc, msg)
|
self.__parent.__call__(irc, msg)
|
||||||
irc = callbacks.SimpleProxy(irc, msg)
|
self.update_feeds()
|
||||||
newFeeds = {}
|
|
||||||
for channel in irc.state.channels:
|
|
||||||
feeds = self.registryValue('announce', channel)
|
|
||||||
for name in feeds:
|
|
||||||
commandName = callbacks.canonicalName(name)
|
|
||||||
if self.isCommandMethod(commandName):
|
|
||||||
url = self.feedNames[commandName][0]
|
|
||||||
else:
|
|
||||||
url = name
|
|
||||||
if self.willGetNewFeed(url):
|
|
||||||
newFeeds.setdefault((url, name), []).append(channel)
|
|
||||||
for ((url, name), channels) in newFeeds.iteritems():
|
|
||||||
# We check if we can acquire the lock right here because if we
|
|
||||||
# don't, we'll possibly end up spawning a lot of threads to get
|
|
||||||
# the feed, because this thread may run for a number of bytecodes
|
|
||||||
# before it switches to a thread that'll get the lock in
|
|
||||||
# _newHeadlines.
|
|
||||||
if self.acquireLock(url, blocking=False):
|
|
||||||
try:
|
|
||||||
t = threading.Thread(target=self._newHeadlines,
|
|
||||||
name=format('Fetching %u', url),
|
|
||||||
args=(irc, channels, name, url))
|
|
||||||
self.log.info('Checking for announcements at %u', url)
|
|
||||||
world.threadsSpawned += 1
|
|
||||||
t.setDaemon(True)
|
|
||||||
t.start()
|
|
||||||
finally:
|
|
||||||
self.releaseLock(url)
|
|
||||||
time.sleep(0.1) # So other threads can run.
|
|
||||||
|
|
||||||
def buildHeadlines(self, headlines, channel, linksconfig='announce.showLinks', dateconfig='announce.showPubDate'):
|
|
||||||
newheadlines = []
|
|
||||||
for headline in headlines:
|
|
||||||
link = ''
|
|
||||||
pubDate = ''
|
|
||||||
if self.registryValue(linksconfig, channel):
|
|
||||||
if headline[1]:
|
|
||||||
if self.registryValue('stripRedirect'):
|
|
||||||
link = re.sub('^.*http://', 'http://', headline[1])
|
|
||||||
else:
|
|
||||||
link = headline[1]
|
|
||||||
if self.registryValue(dateconfig, channel):
|
|
||||||
if headline[2]:
|
|
||||||
pubDate = ' [%s]' % (headline[2],)
|
|
||||||
if sys.version_info[0] < 3:
|
|
||||||
if isinstance(headline[0], unicode):
|
|
||||||
try:
|
|
||||||
import charade.universaldetector
|
|
||||||
u = charade.universaldetector.UniversalDetector()
|
|
||||||
u.feed(headline[0])
|
|
||||||
u.close()
|
|
||||||
encoding = u.result['encoding']
|
|
||||||
except ImportError:
|
|
||||||
encoding = 'utf8'
|
|
||||||
newheadlines.append(format('%s %u%s',
|
|
||||||
headline[0].encode(encoding,'replace'),
|
|
||||||
link,
|
|
||||||
pubDate))
|
|
||||||
else:
|
|
||||||
newheadlines.append(format('%s %u%s',
|
|
||||||
headline[0],
|
|
||||||
link,
|
|
||||||
pubDate))
|
|
||||||
else:
|
|
||||||
newheadlines.append(format('%s %u%s',
|
|
||||||
headline[0],
|
|
||||||
link,
|
|
||||||
pubDate))
|
|
||||||
return newheadlines
|
|
||||||
|
|
||||||
def _newHeadlines(self, irc, channels, name, url):
|
##################
|
||||||
try:
|
# Status accessors
|
||||||
# We acquire the lock here so there's only one announcement thread
|
|
||||||
# in this code at any given time. Otherwise, several announcement
|
|
||||||
# threads will getFeed (all blocking, in turn); then they'll all
|
|
||||||
# want to send their news messages to the appropriate channels.
|
|
||||||
# Note that we're allowed to acquire this lock twice within the
|
|
||||||
# same thread because it's an RLock and not just a normal Lock.
|
|
||||||
self.acquireLock(url)
|
|
||||||
t = time.time()
|
|
||||||
try:
|
|
||||||
#oldresults = self.cachedFeeds[url]
|
|
||||||
#oldheadlines = self.getHeadlines(oldresults)
|
|
||||||
oldheadlines = self.cachedHeadlines[url]
|
|
||||||
oldheadlines = list(filter(lambda x: t - x[3] <
|
|
||||||
self.registryValue('announce.cachePeriod'), oldheadlines))
|
|
||||||
except KeyError:
|
|
||||||
oldheadlines = []
|
|
||||||
newresults = self.getFeed(url)
|
|
||||||
newheadlines = self.getHeadlines(newresults)
|
|
||||||
if len(newheadlines) == 1:
|
|
||||||
s = newheadlines[0][0]
|
|
||||||
if s in ('Timeout downloading feed.',
|
|
||||||
'Unable to download feed.'):
|
|
||||||
self.log.debug('%s %u', s, url)
|
|
||||||
return
|
|
||||||
def normalize(headline):
|
|
||||||
return (tuple(headline[0].lower().split()), headline[1])
|
|
||||||
oldheadlinesset = set(map(normalize, oldheadlines))
|
|
||||||
for (i, headline) in enumerate(newheadlines):
|
|
||||||
if normalize(headline) in oldheadlinesset:
|
|
||||||
newheadlines[i] = None
|
|
||||||
newheadlines = list(filter(None, newheadlines)) # Removes Nones.
|
|
||||||
number_of_headlines = len(oldheadlines)
|
|
||||||
oldheadlines.extend(newheadlines)
|
|
||||||
self.cachedHeadlines[url] = oldheadlines
|
|
||||||
if newheadlines:
|
|
||||||
def filter_whitelist(headline):
|
|
||||||
v = False
|
|
||||||
for kw in whitelist:
|
|
||||||
if kw in headline[0] or kw in headline[1]:
|
|
||||||
v = True
|
|
||||||
break
|
|
||||||
return v
|
|
||||||
def filter_blacklist(headline):
|
|
||||||
v = True
|
|
||||||
for kw in blacklist:
|
|
||||||
if kw in headline[0] or kw in headline[1]:
|
|
||||||
v = False
|
|
||||||
break
|
|
||||||
return v
|
|
||||||
for channel in channels:
|
|
||||||
if number_of_headlines == 0:
|
|
||||||
channelnewheadlines = newheadlines[:self.registryValue('initialAnnounceHeadlines', channel)]
|
|
||||||
else:
|
|
||||||
channelnewheadlines = newheadlines[:]
|
|
||||||
whitelist = self.registryValue('keywordWhitelist', channel)
|
|
||||||
blacklist = self.registryValue('keywordBlacklist', channel)
|
|
||||||
if len(whitelist) != 0:
|
|
||||||
channelnewheadlines = filter(filter_whitelist, channelnewheadlines)
|
|
||||||
if len(blacklist) != 0:
|
|
||||||
channelnewheadlines = filter(filter_blacklist, channelnewheadlines)
|
|
||||||
channelnewheadlines = list(channelnewheadlines)
|
|
||||||
if len(channelnewheadlines) == 0:
|
|
||||||
return
|
|
||||||
bold = self.registryValue('bold', channel)
|
|
||||||
sep = self.registryValue('headlineSeparator', channel)
|
|
||||||
prefix = self.registryValue('announcementPrefix', channel)
|
|
||||||
suffix = self.registryValue('announcementSeparator', channel)
|
|
||||||
pre = format('%s%s%s', prefix, name, suffix)
|
|
||||||
if bold:
|
|
||||||
pre = ircutils.bold(pre)
|
|
||||||
sep = ircutils.bold(sep)
|
|
||||||
headlines = self.buildHeadlines(channelnewheadlines, channel)
|
|
||||||
irc.replies(headlines, prefixer=pre, joiner=sep,
|
|
||||||
to=channel, prefixNick=False, private=True)
|
|
||||||
finally:
|
|
||||||
self.releaseLock(url)
|
|
||||||
|
|
||||||
def willGetNewFeed(self, url):
|
def get_feed(self, name):
|
||||||
now = time.time()
|
return self.feeds.get(self.feed_names.get(name, name), None)
|
||||||
wait = self.registryValue('waitPeriod')
|
|
||||||
if url not in self.lastRequest or now - self.lastRequest[url] > wait:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def acquireLock(self, url, blocking=True):
|
def is_expired(self, feed):
|
||||||
try:
|
assert feed
|
||||||
self.gettingLockLock.acquire()
|
event_horizon = time.time() - self.registryValue('waitPeriod')
|
||||||
try:
|
return feed.last_update < event_horizon
|
||||||
lock = self.locks[url]
|
|
||||||
except KeyError:
|
|
||||||
lock = threading.RLock()
|
|
||||||
self.locks[url] = lock
|
|
||||||
return lock.acquire(blocking=blocking)
|
|
||||||
finally:
|
|
||||||
self.gettingLockLock.release()
|
|
||||||
|
|
||||||
def releaseLock(self, url):
|
|
||||||
self.locks[url].release()
|
|
||||||
|
|
||||||
def getFeed(self, url):
|
###############
|
||||||
def error(s):
|
# Feed fetching
|
||||||
return {'items': [{'title': s}]}
|
|
||||||
try:
|
@lock_feed
|
||||||
# This is the most obvious place to acquire the lock, because a
|
def update_feed(self, feed):
|
||||||
# malicious user could conceivably flood the bot with rss commands
|
d = feedparser.parse(feed.url)
|
||||||
# and DoS the website in question.
|
feed.data = d.feed
|
||||||
self.acquireLock(url)
|
feed.entries = d.entries
|
||||||
if self.willGetNewFeed(url):
|
self.announce_feed(feed)
|
||||||
results = {}
|
|
||||||
try:
|
def update_feed_in_thread(self, feed):
|
||||||
self.log.debug('Downloading new feed from %u', url)
|
feed.last_update = time.time()
|
||||||
results = feedparser.parse(url)
|
t = world.SupyThread(target=self.update_feed,
|
||||||
if 'bozo_exception' in results and not results['entries']:
|
name=format('Fetching feed %u', feed.url),
|
||||||
raise results['bozo_exception']
|
args=(feed,))
|
||||||
except feedparser.sgmllib.SGMLParseError:
|
t.setDaemon(True)
|
||||||
self.log.exception('Uncaught exception from feedparser:')
|
t.start()
|
||||||
raise callbacks.Error('Invalid (unparsable) RSS feed.')
|
|
||||||
except socket.timeout:
|
def update_feed_if_needed(self, feed):
|
||||||
return error('Timeout downloading feed.')
|
if self.is_expired(feed):
|
||||||
except Exception as e:
|
self.update_feed(feed)
|
||||||
# These seem mostly harmless. We'll need reports of a
|
|
||||||
# kind that isn't.
|
def update_feeds(self):
|
||||||
self.log.debug('Allowing bozo_exception %r through.', e)
|
for name in self.registryValue('feeds'):
|
||||||
if results.get('feed', {}) and self.getHeadlines(results):
|
self.update_feed_if_needed(self.get_feed(name))
|
||||||
self.cachedFeeds[url] = results
|
|
||||||
self.lastRequest[url] = time.time()
|
@lock_feed
|
||||||
else:
|
def announce_feed(self, feed):
|
||||||
self.log.debug('Not caching results; feed is empty.')
|
entries = feed.entries
|
||||||
try:
|
new_entries = [entry for entry in entries
|
||||||
return self.cachedFeeds[url]
|
if entry.id not in feed.announced_entries]
|
||||||
except KeyError:
|
if not new_entries:
|
||||||
wait = self.registryValue('waitPeriod')
|
return
|
||||||
# If there's a problem retrieving the feed, we should back off
|
|
||||||
# for a little bit before retrying so that there is time for
|
|
||||||
# the error to be resolved.
|
|
||||||
self.lastRequest[url] = time.time() - .5 * wait
|
|
||||||
return error('Unable to download feed.')
|
|
||||||
finally:
|
|
||||||
self.releaseLock(url)
|
|
||||||
|
|
||||||
def _getConverter(self, feed):
|
|
||||||
toText = utils.web.htmlToText
|
|
||||||
if 'encoding' in feed:
|
|
||||||
def conv(s):
|
|
||||||
# encode() first so there implicit encoding doesn't happen in
|
|
||||||
# other functions when unicode and bytestring objects are used
|
|
||||||
# together
|
|
||||||
s = s.encode(feed['encoding'], 'replace')
|
|
||||||
s = toText(s).strip()
|
|
||||||
return s
|
|
||||||
return conv
|
|
||||||
else:
|
|
||||||
return lambda s: toText(s).strip()
|
|
||||||
def _sortFeedItems(self, items):
|
|
||||||
"""Return feed items, sorted according to sortFeedItems."""
|
|
||||||
order = self.registryValue('sortFeedItems')
|
order = self.registryValue('sortFeedItems')
|
||||||
if order not in ['oldestFirst', 'newestFirst']:
|
new_entries = sort_feed_items(new_entries, order)
|
||||||
return items
|
for irc in world.ircs:
|
||||||
if order == 'oldestFirst':
|
for channel in irc.state.channels:
|
||||||
reverse = False
|
if feed.name not in self.registryValue('announce', channel):
|
||||||
if order == 'newestFirst':
|
continue
|
||||||
reverse = True
|
for entry in new_entries:
|
||||||
try:
|
self.announce_entry(irc, channel, feed, entry)
|
||||||
sitems = sorted(items, key=lambda i: i['updated'], reverse=reverse)
|
feed.announced_entries |= {entry.id for entry in new_entries}
|
||||||
except KeyError:
|
# We keep a little more because we don't want to re-announce
|
||||||
# feedparser normalizes required timestamp fields in ATOM and RSS
|
# oldest entries if one of the newest gets removed.
|
||||||
# to the "updated" field. Feeds missing it are unsortable by date.
|
feed.announced_entries.truncate(2*len(entries))
|
||||||
return items
|
|
||||||
return sitems
|
|
||||||
|
|
||||||
def getHeadlines(self, feed):
|
|
||||||
headlines = []
|
|
||||||
t = time.time()
|
|
||||||
conv = self._getConverter(feed)
|
|
||||||
for d in self._sortFeedItems(feed['items']):
|
|
||||||
if 'title' in d:
|
|
||||||
title = conv(d['title'])
|
|
||||||
link = d.get('link')
|
|
||||||
pubDate = d.get('pubDate', d.get('updated'))
|
|
||||||
headlines.append((title, link, pubDate, t))
|
|
||||||
return headlines
|
|
||||||
|
|
||||||
@internationalizeDocstring
|
#################
|
||||||
def makeFeedCommand(self, name, url):
|
# Entry rendering
|
||||||
docstring = format("""[<number of headlines>]
|
|
||||||
|
|
||||||
Reports the titles for %s at the RSS feed %u. If
|
def should_send_entry(self, channel, entry):
|
||||||
<number of headlines> is given, returns only that many headlines.
|
whitelist = self.registryValue('keywordWhitelist', channel)
|
||||||
RSS feeds are only looked up every supybot.plugins.RSS.waitPeriod
|
blacklist = self.registryValue('keywordBlacklist', channel)
|
||||||
seconds, which defaults to 1800 (30 minutes) since that's what most
|
if whitelist:
|
||||||
websites prefer.
|
if all(kw not in entry.title and kw not in entry.description
|
||||||
""", name, url)
|
for kw in whitelist):
|
||||||
if url not in self.locks:
|
return False
|
||||||
self.locks[url] = threading.RLock()
|
if blacklist:
|
||||||
if self.isCommandMethod(name):
|
if any(kw in entry.title or kw in entry.description
|
||||||
s = format('I already have a command in this plugin named %s.',name)
|
for kw in blacklist):
|
||||||
raise callbacks.Error(s)
|
return False
|
||||||
def f(self, irc, msg, args):
|
return True
|
||||||
args.insert(0, url)
|
|
||||||
self.rss(irc, msg, args)
|
def format_entry(self, channel, feed, entry, is_announce):
|
||||||
f = utils.python.changeFunctionName(f, name, docstring)
|
if is_announce:
|
||||||
f = types.MethodType(f, self)
|
template = self.registryValue('announceFormat', channel)
|
||||||
self.feedNames[name] = (url, f)
|
else:
|
||||||
self._registerFeed(name, url)
|
template = self.registryValue('format', channel)
|
||||||
|
date = entry.get('published_parsed', entry.get('updated_parsed'))
|
||||||
|
date = utils.str.timestamp(date)
|
||||||
|
return string.Template(template).safe_substitute(template,
|
||||||
|
feed_name=feed.name,
|
||||||
|
date=date,
|
||||||
|
**entry)
|
||||||
|
|
||||||
|
def announce_entry(self, irc, channel, feed, entry):
|
||||||
|
if self.should_send_entry(channel, entry):
|
||||||
|
s = format_entry(channel, feed, entry, True)
|
||||||
|
irc.sendMsg(ircmsgs.privmsg(channel, s))
|
||||||
|
|
||||||
|
|
||||||
|
##########
|
||||||
|
# Commands
|
||||||
|
|
||||||
@internationalizeDocstring
|
@internationalizeDocstring
|
||||||
def add(self, irc, msg, args, name, url):
|
def add(self, irc, msg, args, name, url):
|
||||||
@ -390,7 +274,8 @@ class RSS(callbacks.Plugin):
|
|||||||
Adds a command to this plugin that will look up the RSS feed at the
|
Adds a command to this plugin that will look up the RSS feed at the
|
||||||
given URL.
|
given URL.
|
||||||
"""
|
"""
|
||||||
self.makeFeedCommand(name, url)
|
self.register_feed_config(name, url)
|
||||||
|
self.feeds[name] = Feed(name, url)
|
||||||
irc.replySuccess()
|
irc.replySuccess()
|
||||||
add = wrap(add, ['feedName', 'url'])
|
add = wrap(add, ['feedName', 'url'])
|
||||||
|
|
||||||
@ -401,12 +286,11 @@ class RSS(callbacks.Plugin):
|
|||||||
Removes the command for looking up RSS feeds at <name> from
|
Removes the command for looking up RSS feeds at <name> from
|
||||||
this plugin.
|
this plugin.
|
||||||
"""
|
"""
|
||||||
if name not in self.feedNames:
|
feed = self.get_feed(name)
|
||||||
|
if not feed:
|
||||||
irc.error(_('That\'s not a valid RSS feed command name.'))
|
irc.error(_('That\'s not a valid RSS feed command name.'))
|
||||||
return
|
return
|
||||||
del self.feedNames[name]
|
self.remove_feed(feed)
|
||||||
conf.supybot.plugins.RSS.feeds().remove(name)
|
|
||||||
conf.supybot.plugins.RSS.feeds.unregister(name)
|
|
||||||
irc.replySuccess()
|
irc.replySuccess()
|
||||||
remove = wrap(remove, ['feedName'])
|
remove = wrap(remove, ['feedName'])
|
||||||
|
|
||||||
@ -467,23 +351,25 @@ class RSS(callbacks.Plugin):
|
|||||||
If <number of headlines> is given, return only that many headlines.
|
If <number of headlines> is given, return only that many headlines.
|
||||||
"""
|
"""
|
||||||
self.log.debug('Fetching %u', url)
|
self.log.debug('Fetching %u', url)
|
||||||
feed = self.getFeed(url)
|
feed = self.get_feed(url)
|
||||||
|
if not feed:
|
||||||
|
feed = Feed(url, url)
|
||||||
if irc.isChannel(msg.args[0]):
|
if irc.isChannel(msg.args[0]):
|
||||||
channel = msg.args[0]
|
channel = msg.args[0]
|
||||||
else:
|
else:
|
||||||
channel = None
|
channel = None
|
||||||
headlines = self.getHeadlines(feed)
|
self.update_feed_if_needed(feed)
|
||||||
if not headlines:
|
entries = feed.entries
|
||||||
|
if not entries:
|
||||||
irc.error(_('Couldn\'t get RSS feed.'))
|
irc.error(_('Couldn\'t get RSS feed.'))
|
||||||
return
|
return
|
||||||
headlines = self.buildHeadlines(headlines, channel, 'showLinks', 'showPubDate')
|
n = n or self.registryValue('defaultNumberOfHeadlines', channel)
|
||||||
if n:
|
entries = list(filter(lambda e:self.should_send_entry(channel, e),
|
||||||
headlines = headlines[:n]
|
feed.entries))
|
||||||
else:
|
entries = entries[:n]
|
||||||
headlines = headlines[:self.registryValue('defaultNumberOfHeadlines')]
|
headlines = map(lambda e:self.format_entry(channel, feed, e, False),
|
||||||
|
entries)
|
||||||
sep = self.registryValue('headlineSeparator', channel)
|
sep = self.registryValue('headlineSeparator', channel)
|
||||||
if self.registryValue('bold', channel):
|
|
||||||
sep = ircutils.bold(sep)
|
|
||||||
irc.replies(headlines, joiner=sep)
|
irc.replies(headlines, joiner=sep)
|
||||||
rss = wrap(rss, ['url', additional('int')])
|
rss = wrap(rss, ['url', additional('int')])
|
||||||
|
|
||||||
@ -498,9 +384,9 @@ class RSS(callbacks.Plugin):
|
|||||||
url = self.registryValue('feeds.%s' % url)
|
url = self.registryValue('feeds.%s' % url)
|
||||||
except registry.NonExistentRegistryEntry:
|
except registry.NonExistentRegistryEntry:
|
||||||
pass
|
pass
|
||||||
feed = self.getFeed(url)
|
feed = self.get_feed(url)
|
||||||
conv = self._getConverter(feed)
|
self.update_feed_if_needed(feed)
|
||||||
info = feed.get('feed')
|
info = feed.data
|
||||||
if not info:
|
if not info:
|
||||||
irc.error(_('I couldn\'t retrieve that RSS feed.'))
|
irc.error(_('I couldn\'t retrieve that RSS feed.'))
|
||||||
return
|
return
|
||||||
@ -510,10 +396,10 @@ class RSS(callbacks.Plugin):
|
|||||||
now = time.mktime(time.gmtime())
|
now = time.mktime(time.gmtime())
|
||||||
when = utils.timeElapsed(now - seconds) + ' ago'
|
when = utils.timeElapsed(now - seconds) + ' ago'
|
||||||
else:
|
else:
|
||||||
when = 'time unavailable'
|
when = _('time unavailable')
|
||||||
title = conv(info.get('title', 'unavailable'))
|
title = info.get('title', _('unavailable'))
|
||||||
desc = conv(info.get('description', 'unavailable'))
|
desc = info.get('description', _('unavailable'))
|
||||||
link = conv(info.get('link', 'unavailable'))
|
link = info.get('link', _('unavailable'))
|
||||||
# The rest of the entries are all available in the channel key
|
# The rest of the entries are all available in the channel key
|
||||||
response = format(_('Title: %s; URL: %u; '
|
response = format(_('Title: %s; URL: %u; '
|
||||||
'Description: %s; Last updated: %s.'),
|
'Description: %s; Last updated: %s.'),
|
||||||
|
Loading…
Reference in New Issue
Block a user