RSS: Add support for $content/$summary_detail/$title_detail

2025-07-15 10:07:31 +02:00 · 2023-10-17 19:00:03 +02:00 · 2023-10-17 19:00:03 +02:00 · 04f0d70113
commit 04f0d70113
parent edb13f65df
2 changed files with 121 additions and 1 deletions
--- a/plugins/RSS/plugin.py
+++ b/plugins/RSS/plugin.py
@ -497,6 +497,35 @@ class RSS(callbacks.Plugin):
                  isinstance(v, str)}
        kwargs["feed_name"] = feed.name
        kwargs.update(entry)
+        for (key, value) in list(kwargs.items()):
+            # First look for plain text
+            if isinstance(value, list):
+                for item in value:
+                    if isinstance(item, dict) and 'value' in item and \
+                            item.get('type') == 'text/plain':
+                        value = item['value']
+                        break
+            # Then look for HTML text or URL
+            if isinstance(value, list):
+                for item in value:
+                    if isinstance(item, dict) and item.get('type') in \
+                            ('text/html', 'application/xhtml+xml'):
+                        if 'value' in item:
+                            value = utils.web.htmlToText(item['value'])
+                        elif 'href' in item:
+                            value = item['href']
+            # Then fall back to any URL
+            if isinstance(value, list):
+                for item in value:
+                    if isinstance(item, dict) and 'href' in item:
+                        value = item['href']
+                        break
+            # Finally, as a last resort, use the value as-is
+            if isinstance(value, list):
+                for item in value:
+                    if isinstance(item, dict) and 'value' in item:
+                        value = item['value']
+            kwargs[key] = value
        s = string.Template(template).safe_substitute(entry, **kwargs, date=date)
        return self._normalize_entry(s)

--- a/plugins/RSS/test.py
+++ b/plugins/RSS/test.py
@ -59,7 +59,6 @@ not_well_formed = """<?xml version="1.0" encoding="utf-8"?>
 </rss>
 """

-
 class MockResponse:
    headers = {}
    url = ''
@ -359,6 +358,98 @@ class RSSTestCase(ChannelPluginTestCase):
            self.assertRegexp('rss http://xkcd.com/rss.xml',
                    'On the other hand, the refractor\'s')

+    @mock_urllib
+    def testContentHtmlOnly(self, mock):
+        timeFastForward(1.1)
+        with conf.supybot.plugins.RSS.format.context('$content'):
+            mock._data = """
+<?xml version="1.0" encoding="UTF-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xml:lang="en-US">
+  <title>Recent Commits to anope:2.0</title>
+  <updated>2023-10-04T16:14:39Z</updated>
+  <entry>
+    <title>title with &lt;pre&gt;HTML&lt;pre&gt;</title>
+    <updated>2023-10-04T16:14:39Z</updated>
+    <content type="html">
+      content with &lt;pre&gt;HTML&lt;pre&gt;
+    </content>
+  </entry>
+</feed>"""
+            self.assertRegexp('rss https://example.org',
+                    'content with HTML')
+
+    @mock_urllib
+    def testContentXhtmlOnly(self, mock):
+        timeFastForward(1.1)
+        with conf.supybot.plugins.RSS.format.context('$content'):
+            mock._data = """
+<?xml version="1.0" encoding="UTF-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xml:lang="en-US">
+  <title>Recent Commits to anope:2.0</title>
+  <updated>2023-10-04T16:14:39Z</updated>
+  <entry>
+    <title>title with &lt;pre&gt;HTML&lt;pre&gt;</title>
+    <updated>2023-10-04T16:14:39Z</updated>
+    <content type="xhtml">
+      <div xmlns="http://www.w3.org/1999/xhtml">
+        content with <pre>XHTML<pre>
+      </div>
+    </content>
+  </entry>
+</feed>"""
+            self.assertRegexp('rss https://example.org',
+                    'content with XHTML')
+
+    @mock_urllib
+    def testContentHtmlAndPlaintext(self, mock):
+        timeFastForward(1.1)
+        with conf.supybot.plugins.RSS.format.context('$content'):
+            mock._data = """
+<?xml version="1.0" encoding="UTF-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xml:lang="en-US">
+  <title>Recent Commits to anope:2.0</title>
+  <updated>2023-10-04T16:14:39Z</updated>
+  <entry>
+    <title>title with &lt;pre&gt;HTML&lt;pre&gt;</title>
+    <updated>2023-10-04T16:14:39Z</updated>
+    <!-- Atom spec says multiple contents is invalid, feedparser says it's not.
+         I like having the option, so let's make sure we support it. -->
+    <content type="html">
+      content with &lt;pre&gt;HTML&lt;pre&gt;
+    </content>
+    <content type="text">
+      content with plaintext
+    </content>
+  </entry>
+</feed>"""
+            self.assertRegexp('rss https://example.org',
+                    'content with plaintext')
+
+    @mock_urllib
+    def testContentPlaintextAndHtml(self, mock):
+        timeFastForward(1.1)
+        with conf.supybot.plugins.RSS.format.context('$content'):
+            mock._data = """
+<?xml version="1.0" encoding="UTF-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xml:lang="en-US">
+  <title>Recent Commits to anope:2.0</title>
+  <updated>2023-10-04T16:14:39Z</updated>
+  <entry>
+    <title>title with &lt;pre&gt;HTML&lt;pre&gt;</title>
+    <updated>2023-10-04T16:14:39Z</updated>
+    <!-- Atom spec says multiple contents is invalid, feedparser says it's not.
+         I like having the option, so let's make sure we support it. -->
+    <content type="text">
+      content with plaintext
+    </content>
+    <content type="html">
+      content with &lt;pre&gt;HTML&lt;pre&gt;
+    </content>
+  </entry>
+</feed>"""
+            self.assertRegexp('rss https://example.org',
+                    'content with plaintext')
+
    @mock_urllib
    def testFeedAttribute(self, mock):
        timeFastForward(1.1)