diff --git a/plugins/Fediverse/activitypub.py b/plugins/Fediverse/activitypub.py index 99d556cbd..895199367 100644 --- a/plugins/Fediverse/activitypub.py +++ b/plugins/Fediverse/activitypub.py @@ -34,6 +34,7 @@ import email import base64 import functools import contextlib +import urllib.error import urllib.parse import xml.etree.ElementTree as ET @@ -129,6 +130,47 @@ def _get_webfinger_url(hostname): return "https://%s/.well-known/webfinger?resource={uri}" +def has_webfinger_support(hostname): + """Returns whether the hostname probably supports webfinger or not. + + This relies on an edge case of the Webfinger specification, + so it may not successfully detect some hosts because they don't follow + the specification.""" + request = urllib.request.Request( + "https://%s/.well-known/webfinger" % hostname, method="HEAD" + ) + try: + urllib.request.urlopen(request) + except urllib.error.HTTPError as e: + if e.code == 400: + # RFC 7033 requires a 400 response when the "resource" parameter + # is missing: https://tools.ietf.org/html/rfc7033#section-4.2 + # + # This works for: + # * Misskey + # * PeerTube + # * Pleroma + return True + elif e.headers.get("Content-Type", "") == "application/jrd+json": + # WriteFreely, and possibly others. + # https://github.com/writeas/writefreely/issues/310 + return True + elif e.code == 404: + if e.headers.get("Server", "").lower() == "mastodon": + # https://github.com/tootsuite/mastodon/issues/13757 + return True + + # Else, the host probably doesn't support Webfinger. + + # Known false negatives: + # * Nextcloud (returns 404) + # * Pixelfed (returns 302 to the homepage): + # https://github.com/pixelfed/pixelfed/issues/2180 + # * Plume (returns 404): + # https://github.com/Plume-org/Plume/issues/770 + return False + + def webfinger(hostname, uri): template = _get_webfinger_url(hostname) assert template diff --git a/plugins/Fediverse/plugin.py b/plugins/Fediverse/plugin.py index a44577205..3e2617239 100644 --- a/plugins/Fediverse/plugin.py +++ b/plugins/Fediverse/plugin.py @@ -125,6 +125,14 @@ class Fediverse(callbacks.PluginRegexp): self._startHttp() self._actor_cache = utils.structures.TimeoutDict(timeout=600) + # Used when snarfing, to cheaply avoid querying non-ActivityPub + # servers. + # Is also written to when using commands that successfully find + # ActivityPub data. + self._webfinger_support_cache = utils.structures.TimeoutDict( + timeout=60 * 60 * 24 + ) + def _startHttp(self): callback = FediverseHttp() callback._plugin = self @@ -137,6 +145,13 @@ class Fediverse(callbacks.PluginRegexp): def _stopHttp(self): httpserver.unhook("fediverse") + def _has_webfinger_support(self, hostname): + if hostname not in self._webfinger_support_cache: + self._webfinger_support_cache[hostname] = ap.has_webfinger_support( + hostname + ) + return self._webfinger_support_cache[hostname] + def _get_actor(self, irc, username): if username in self._actor_cache: return self._actor_cache[username] @@ -167,6 +182,8 @@ class Fediverse(callbacks.PluginRegexp): if username: self._actor_cache[username] = actor + self._webfinger_support_cache[hostname] = True + self._actor_cache[actor["id"]] = actor return actor @@ -257,9 +274,17 @@ class Fediverse(callbacks.PluginRegexp): "snarfers.username", msg.channel, irc.network ): return + + if not self._has_webfinger_support(match.group("hostname")): + self.log.debug( + "Not snarfing, host doesn't have Webfinger support." + ) + return + try: actor = self._get_actor(irc, match.group(0)) - except ap.ActivityPubError: + except ap.ActivityPubError as e: + self.log.info("Could not fetch %s: %s", match.group(0), e) # Be silent on errors return @@ -282,6 +307,14 @@ class Fediverse(callbacks.PluginRegexp): snarf_status = self.registryValue("snarfers.status", channel, network) if not snarf_profile and not snarf_status: return + + hostname = urllib.parse.urlparse(url).hostname + if not self._has_webfinger_support(hostname): + self.log.debug( + "Not snarfing, host doesn't have Webfinger support." + ) + return + try: resource = ap.get_resource_from_url(url) except ap.ActivityPubError: @@ -357,6 +390,9 @@ class Fediverse(callbacks.PluginRegexp): status = ap.get_resource_from_url(url) except ap.ActivityPubError as e: irc.error(_("Could not get status: %s") % e.args[0], Raise=True) + else: + hostname = urllib.parse.urlparse(url).hostname + self._webfinger_support_cache[hostname] = True irc.reply(self._format_status(irc, msg, status)) diff --git a/plugins/Fediverse/test.py b/plugins/Fediverse/test.py index 8a3445460..7c4e6d367 100644 --- a/plugins/Fediverse/test.py +++ b/plugins/Fediverse/test.py @@ -34,9 +34,10 @@ import functools import contextlib from multiprocessing import Manager -from supybot import commands, conf, utils +from supybot import conf, log, utils from supybot.test import ChannelPluginTestCase, network +from . import activitypub as ap from .test_data import ( PRIVATE_KEY, HOSTMETA_URL, @@ -89,10 +90,31 @@ class NetworkedFediverseTestCase(BaseFediverseTestCase): "Error: Unknown user @nonexistinguser@oc.todon.fr.", ) + def testHasWebfingerSupport(self): + self.assertTrue(ap.has_webfinger_support("oc.todon.fr")) + self.assertFalse(ap.has_webfinger_support("example.org")) + class NetworklessFediverseTestCase(BaseFediverseTestCase): timeout = 0.1 + @contextlib.contextmanager + def mockWebfingerSupport(self, value): + original_has_webfinger_support = ap.has_webfinger_support + + @functools.wraps(original_has_webfinger_support) + def newf(hostname): + if value == "not called": + assert False + assert type(value) is bool + return value + + ap.has_webfinger_support = newf + + yield + + ap.has_webfinger_support = original_has_webfinger_support + @contextlib.contextmanager def mockRequests(self, expected_requests): with Manager() as m: @@ -105,6 +127,7 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase): assert expected_requests, url (expected_url, response) = expected_requests.pop(0) self.assertEqual(url, expected_url, "Unexpected URL: %s" % url) + log.debug("Got request to %s", url) if isinstance(response, bytes): return response @@ -225,7 +248,7 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase): ) def testProfileSnarfer(self): - with self.mockRequests([]): + with self.mockWebfingerSupport("not called"), self.mockRequests([]): self.assertSnarfNoResponse("aaa @nonexistinguser@example.org bbb") with conf.supybot.plugins.Fediverse.snarfers.username.context(True): @@ -235,24 +258,46 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase): (ACTOR_URL, ACTOR_DATA), ] - with self.mockRequests(expected_requests): + # First request, should work + with self.mockWebfingerSupport(True), self.mockRequests( + expected_requests + ): self.assertSnarfResponse( "aaa @someuser@example.org bbb", "\x02someuser\x02 (@someuser@example.org): My Biography", ) + # Same request; it is all cached + with self.mockWebfingerSupport("not called"), self.mockRequests( + [] + ): + self.assertSnarfResponse( + "aaa @someuser@example.org bbb", + "\x02someuser\x02 (@someuser@example.org): My Biography", + ) + + # Nonexisting user + expected_requests = [ (HOSTMETA_URL, HOSTMETA_DATA), (WEBFINGER_URL, utils.web.Error("blah")), ] - with self.mockRequests(expected_requests): + with self.mockWebfingerSupport("not called"), self.mockRequests( + expected_requests + ): self.assertSnarfNoResponse( "aaa @nonexistinguser@example.org bbb" ) + def testProfileSnarferNoWebfinger(self): + with conf.supybot.plugins.Fediverse.snarfers.username.context(False): + # No webfinger support, shouldn't make requests + with self.mockWebfingerSupport(False), self.mockRequests([]): + self.assertSnarfNoResponse("aaa @someuser@example.org bbb") + def testProfileUrlSnarfer(self): - with self.mockRequests([]): + with self.mockWebfingerSupport("not called"), self.mockRequests([]): self.assertSnarfNoResponse( "aaa https://example.org/users/someuser bbb" ) @@ -260,14 +305,18 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase): with conf.supybot.plugins.Fediverse.snarfers.profile.context(True): expected_requests = [(ACTOR_URL, utils.web.Error("blah"))] - with self.mockRequests(expected_requests): + with self.mockWebfingerSupport(True), self.mockRequests( + expected_requests + ): self.assertSnarfNoResponse( "aaa https://example.org/users/someuser bbb" ) expected_requests = [(ACTOR_URL, ACTOR_DATA)] - with self.mockRequests(expected_requests): + with self.mockWebfingerSupport("not called"), self.mockRequests( + expected_requests + ): self.assertSnarfResponse( "aaa https://example.org/users/someuser bbb", "\x02someuser\x02 (@someuser@example.org): My Biography", @@ -363,7 +412,7 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase): ) def testStatusUrlSnarferDisabled(self): - with self.mockRequests([]): + with self.mockWebfingerSupport("not called"), self.mockRequests([]): self.assertSnarfNoResponse( "aaa https://example.org/users/someuser/statuses/1234 bbb" ) @@ -375,7 +424,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase): (ACTOR_URL, ACTOR_DATA), ] - with self.mockRequests(expected_requests): + with self.mockWebfingerSupport(True), self.mockRequests( + expected_requests + ): self.assertSnarfResponse( "aaa https://example.org/users/someuser/statuses/1234 bbb", "\x02someuser\x02 (@someuser@example.org): " @@ -386,7 +437,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase): with conf.supybot.plugins.Fediverse.snarfers.status.context(True): expected_requests = [(STATUS_URL, utils.web.Error("blah"))] - with self.mockRequests(expected_requests): + with self.mockWebfingerSupport(True), self.mockRequests( + expected_requests + ): self.assertSnarfNoResponse( "aaa https://example.org/users/someuser/statuses/1234 bbb" ) @@ -396,7 +449,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase): (ACTOR_URL, utils.web.Error("blah")), ] - with self.mockRequests(expected_requests): + with self.mockWebfingerSupport("not called"), self.mockRequests( + expected_requests + ): self.assertSnarfResponse( "aaa https://example.org/users/someuser/statuses/1234 bbb", ": @ FirstAuthor I am replying to you", @@ -407,7 +462,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase): with conf.supybot.plugins.Fediverse.snarfers.profile.context(True): expected_requests = [(STATUS_URL, STATUS_DATA)] - with self.mockRequests(expected_requests): + with self.mockWebfingerSupport(True), self.mockRequests( + expected_requests + ): self.assertSnarfNoResponse( "aaa https://example.org/users/someuser/statuses/1234 bbb" ) @@ -416,7 +473,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase): with conf.supybot.plugins.Fediverse.snarfers.profile.context(True): expected_requests = [(ACTOR_URL, ACTOR_DATA)] - with self.mockRequests(expected_requests): + with self.mockWebfingerSupport("not called"), self.mockRequests( + expected_requests + ): self.assertSnarfNoResponse( "aaa https://example.org/users/someuser/ bbb" )