mirror of
https://github.com/Mikaela/Limnoria.git
synced 2024-11-26 20:59:27 +01:00
RSS: keep track of headlines over multiple feed fetches, with configurable expiration.
This is better at avoiding repeats than just keeping the last fetch, since some feeds shuffle items around (like google news search). Conflicts: plugins/RSS/config.py
This commit is contained in:
commit
20bef2dcd0
@ -75,6 +75,7 @@ conf.registerGlobalValue(RSS, 'sortFeedItems',
|
||||
FeedItemSortOrder('asInFeed', _("""Determines whether feed items should be
|
||||
sorted by their update timestamp or kept in the same order as they appear
|
||||
in a feed.""")))
|
||||
|
||||
conf.registerGlobalValue(RSS, 'feeds',
|
||||
FeedNames([], _("""Determines what feeds should be accessible as
|
||||
commands.""")))
|
||||
@ -104,6 +105,9 @@ conf.registerChannelValue(RSS.announce, 'showLinks',
|
||||
along with the title of the feed when a feed is automatically
|
||||
announced.""")))
|
||||
|
||||
conf.registerGlobalValue(RSS.announce, 'cachePeriod',
|
||||
registry.PositiveInteger(86400, """Maximum age of cached RSS headlines,
|
||||
in seconds. Headline cache is used to avoid re-announcing old news."""))
|
||||
|
||||
|
||||
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
|
||||
|
@ -72,6 +72,7 @@ class RSS(callbacks.Plugin):
|
||||
self.locks = {}
|
||||
self.lastRequest = {}
|
||||
self.cachedFeeds = {}
|
||||
self.cachedHeadlines = {}
|
||||
self.gettingLockLock = threading.Lock()
|
||||
for name in self.registryValue('feeds'):
|
||||
self._registerFeed(name)
|
||||
@ -164,9 +165,12 @@ class RSS(callbacks.Plugin):
|
||||
# Note that we're allowed to acquire this lock twice within the
|
||||
# same thread because it's an RLock and not just a normal Lock.
|
||||
self.acquireLock(url)
|
||||
t = time.time()
|
||||
try:
|
||||
oldresults = self.cachedFeeds[url]
|
||||
oldheadlines = self.getHeadlines(oldresults)
|
||||
#oldresults = self.cachedFeeds[url]
|
||||
#oldheadlines = self.getHeadlines(oldresults)
|
||||
oldheadlines = self.cachedHeadlines[url]
|
||||
oldheadlines = filter(lambda x: t - x[2] < self.registryValue('announce.cachePeriod'), oldheadlines)
|
||||
except KeyError:
|
||||
oldheadlines = []
|
||||
newresults = self.getFeed(url)
|
||||
@ -179,11 +183,13 @@ class RSS(callbacks.Plugin):
|
||||
return
|
||||
def normalize(headline):
|
||||
return (tuple(headline[0].lower().split()), headline[1])
|
||||
oldheadlines = set(map(normalize, oldheadlines))
|
||||
oldheadlinesset = set(map(normalize, oldheadlines))
|
||||
for (i, headline) in enumerate(newheadlines):
|
||||
if normalize(headline) in oldheadlines:
|
||||
if normalize(headline) in oldheadlinesset:
|
||||
newheadlines[i] = None
|
||||
newheadlines = filter(None, newheadlines) # Removes Nones.
|
||||
oldheadlines.extend(newheadlines)
|
||||
self.cachedHeadlines[url] = oldheadlines
|
||||
if newheadlines:
|
||||
def filter_whitelist(headline):
|
||||
v = False
|
||||
@ -324,15 +330,13 @@ class RSS(callbacks.Plugin):
|
||||
|
||||
def getHeadlines(self, feed):
|
||||
headlines = []
|
||||
t = time.time()
|
||||
conv = self._getConverter(feed)
|
||||
for d in self._sortFeedItems(feed['items']):
|
||||
if 'title' in d:
|
||||
title = conv(d['title'])
|
||||
link = d.get('link')
|
||||
if link:
|
||||
headlines.append((title, link))
|
||||
else:
|
||||
headlines.append((title, None))
|
||||
link = d.get('link') # defaults to None
|
||||
headlines.append((title, link, t))
|
||||
return headlines
|
||||
|
||||
@internationalizeDocstring
|
||||
|
Loading…
Reference in New Issue
Block a user