mirror of
https://github.com/Mikaela/Limnoria.git
synced 2025-02-08 18:44:06 +01:00
RSS: keep track of headlines over multiple feed fetches, with configurable expiration.
This is better at avoiding repeats than just keeping the last fetch, since some feeds shuffle items around (like google news search).
This commit is contained in:
parent
5d6a3c5a46
commit
bc0d16a4e1
@ -62,6 +62,7 @@ conf.registerGlobalValue(RSS, 'waitPeriod',
|
|||||||
registry.PositiveInteger(1800, """Indicates how many seconds the bot will
|
registry.PositiveInteger(1800, """Indicates how many seconds the bot will
|
||||||
wait between retrieving RSS feeds; requests made within this period will
|
wait between retrieving RSS feeds; requests made within this period will
|
||||||
return cached results."""))
|
return cached results."""))
|
||||||
|
|
||||||
conf.registerGlobalValue(RSS, 'feeds',
|
conf.registerGlobalValue(RSS, 'feeds',
|
||||||
FeedNames([], """Determines what feeds should be accessible as
|
FeedNames([], """Determines what feeds should be accessible as
|
||||||
commands."""))
|
commands."""))
|
||||||
@ -91,7 +92,9 @@ conf.registerChannelValue(RSS.announce, 'showLinks',
|
|||||||
registry.Boolean(False, """Determines whether the bot will list the link
|
registry.Boolean(False, """Determines whether the bot will list the link
|
||||||
along with the title of the feed when a feed is automatically
|
along with the title of the feed when a feed is automatically
|
||||||
announced."""))
|
announced."""))
|
||||||
|
conf.registerGlobalValue(RSS.announce, 'cachePeriod',
|
||||||
|
registry.PositiveInteger(86400, """Maximum age of cached RSS headlines,
|
||||||
|
in seconds. Headline cache is used to avoid re-announcing old news."""))
|
||||||
|
|
||||||
|
|
||||||
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
|
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
|
||||||
|
@ -70,6 +70,7 @@ class RSS(callbacks.Plugin):
|
|||||||
self.locks = {}
|
self.locks = {}
|
||||||
self.lastRequest = {}
|
self.lastRequest = {}
|
||||||
self.cachedFeeds = {}
|
self.cachedFeeds = {}
|
||||||
|
self.cachedHeadlines = {}
|
||||||
self.gettingLockLock = threading.Lock()
|
self.gettingLockLock = threading.Lock()
|
||||||
for name in self.registryValue('feeds'):
|
for name in self.registryValue('feeds'):
|
||||||
self._registerFeed(name)
|
self._registerFeed(name)
|
||||||
@ -161,9 +162,12 @@ class RSS(callbacks.Plugin):
|
|||||||
# Note that we're allowed to acquire this lock twice within the
|
# Note that we're allowed to acquire this lock twice within the
|
||||||
# same thread because it's an RLock and not just a normal Lock.
|
# same thread because it's an RLock and not just a normal Lock.
|
||||||
self.acquireLock(url)
|
self.acquireLock(url)
|
||||||
|
t = time.time()
|
||||||
try:
|
try:
|
||||||
oldresults = self.cachedFeeds[url]
|
#oldresults = self.cachedFeeds[url]
|
||||||
oldheadlines = self.getHeadlines(oldresults)
|
#oldheadlines = self.getHeadlines(oldresults)
|
||||||
|
oldheadlines = self.cachedHeadlines[url]
|
||||||
|
oldheadlines = filter(lambda x: t - x[2] < self.registryValue('announce.cachePeriod'), oldheadlines)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
oldheadlines = []
|
oldheadlines = []
|
||||||
newresults = self.getFeed(url)
|
newresults = self.getFeed(url)
|
||||||
@ -176,11 +180,13 @@ class RSS(callbacks.Plugin):
|
|||||||
return
|
return
|
||||||
def normalize(headline):
|
def normalize(headline):
|
||||||
return (tuple(headline[0].lower().split()), headline[1])
|
return (tuple(headline[0].lower().split()), headline[1])
|
||||||
oldheadlines = set(map(normalize, oldheadlines))
|
oldheadlinesset = set(map(normalize, oldheadlines))
|
||||||
for (i, headline) in enumerate(newheadlines):
|
for (i, headline) in enumerate(newheadlines):
|
||||||
if normalize(headline) in oldheadlines:
|
if normalize(headline) in oldheadlinesset:
|
||||||
newheadlines[i] = None
|
newheadlines[i] = None
|
||||||
newheadlines = filter(None, newheadlines) # Removes Nones.
|
newheadlines = filter(None, newheadlines) # Removes Nones.
|
||||||
|
oldheadlines.extend(newheadlines)
|
||||||
|
self.cachedHeadlines[url] = oldheadlines
|
||||||
if newheadlines:
|
if newheadlines:
|
||||||
def filter_whitelist(headline):
|
def filter_whitelist(headline):
|
||||||
v = False
|
v = False
|
||||||
@ -301,15 +307,13 @@ class RSS(callbacks.Plugin):
|
|||||||
|
|
||||||
def getHeadlines(self, feed):
|
def getHeadlines(self, feed):
|
||||||
headlines = []
|
headlines = []
|
||||||
|
t = time.time()
|
||||||
conv = self._getConverter(feed)
|
conv = self._getConverter(feed)
|
||||||
for d in feed['items']:
|
for d in feed['items']:
|
||||||
if 'title' in d:
|
if 'title' in d:
|
||||||
title = conv(d['title'])
|
title = conv(d['title'])
|
||||||
link = d.get('link')
|
link = d.get('link') # defaults to None
|
||||||
if link:
|
headlines.append((title, link, t))
|
||||||
headlines.append((title, link))
|
|
||||||
else:
|
|
||||||
headlines.append((title, None))
|
|
||||||
return headlines
|
return headlines
|
||||||
|
|
||||||
def makeFeedCommand(self, name, url):
|
def makeFeedCommand(self, name, url):
|
||||||
|
Loading…
Reference in New Issue
Block a user