Fediverse: Cache host support for webfinger before snarfing.

This is much cheaper both for us and the host if the host
doesn't support activitypub at all (which is what happens
most of the time).
This commit is contained in:
Valentin Lorentz 2020-05-14 21:33:34 +02:00
parent 566920070c
commit eaf7222509
3 changed files with 151 additions and 14 deletions

View File

@ -34,6 +34,7 @@ import email
import base64 import base64
import functools import functools
import contextlib import contextlib
import urllib.error
import urllib.parse import urllib.parse
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
@ -129,6 +130,47 @@ def _get_webfinger_url(hostname):
return "https://%s/.well-known/webfinger?resource={uri}" return "https://%s/.well-known/webfinger?resource={uri}"
def has_webfinger_support(hostname):
"""Returns whether the hostname probably supports webfinger or not.
This relies on an edge case of the Webfinger specification,
so it may not successfully detect some hosts because they don't follow
the specification."""
request = urllib.request.Request(
"https://%s/.well-known/webfinger" % hostname, method="HEAD"
)
try:
urllib.request.urlopen(request)
except urllib.error.HTTPError as e:
if e.code == 400:
# RFC 7033 requires a 400 response when the "resource" parameter
# is missing: https://tools.ietf.org/html/rfc7033#section-4.2
#
# This works for:
# * Misskey
# * PeerTube
# * Pleroma
return True
elif e.headers.get("Content-Type", "") == "application/jrd+json":
# WriteFreely, and possibly others.
# https://github.com/writeas/writefreely/issues/310
return True
elif e.code == 404:
if e.headers.get("Server", "").lower() == "mastodon":
# https://github.com/tootsuite/mastodon/issues/13757
return True
# Else, the host probably doesn't support Webfinger.
# Known false negatives:
# * Nextcloud (returns 404)
# * Pixelfed (returns 302 to the homepage):
# https://github.com/pixelfed/pixelfed/issues/2180
# * Plume (returns 404):
# https://github.com/Plume-org/Plume/issues/770
return False
def webfinger(hostname, uri): def webfinger(hostname, uri):
template = _get_webfinger_url(hostname) template = _get_webfinger_url(hostname)
assert template assert template

View File

@ -125,6 +125,14 @@ class Fediverse(callbacks.PluginRegexp):
self._startHttp() self._startHttp()
self._actor_cache = utils.structures.TimeoutDict(timeout=600) self._actor_cache = utils.structures.TimeoutDict(timeout=600)
# Used when snarfing, to cheaply avoid querying non-ActivityPub
# servers.
# Is also written to when using commands that successfully find
# ActivityPub data.
self._webfinger_support_cache = utils.structures.TimeoutDict(
timeout=60 * 60 * 24
)
def _startHttp(self): def _startHttp(self):
callback = FediverseHttp() callback = FediverseHttp()
callback._plugin = self callback._plugin = self
@ -137,6 +145,13 @@ class Fediverse(callbacks.PluginRegexp):
def _stopHttp(self): def _stopHttp(self):
httpserver.unhook("fediverse") httpserver.unhook("fediverse")
def _has_webfinger_support(self, hostname):
if hostname not in self._webfinger_support_cache:
self._webfinger_support_cache[hostname] = ap.has_webfinger_support(
hostname
)
return self._webfinger_support_cache[hostname]
def _get_actor(self, irc, username): def _get_actor(self, irc, username):
if username in self._actor_cache: if username in self._actor_cache:
return self._actor_cache[username] return self._actor_cache[username]
@ -167,6 +182,8 @@ class Fediverse(callbacks.PluginRegexp):
if username: if username:
self._actor_cache[username] = actor self._actor_cache[username] = actor
self._webfinger_support_cache[hostname] = True
self._actor_cache[actor["id"]] = actor self._actor_cache[actor["id"]] = actor
return actor return actor
@ -257,9 +274,17 @@ class Fediverse(callbacks.PluginRegexp):
"snarfers.username", msg.channel, irc.network "snarfers.username", msg.channel, irc.network
): ):
return return
if not self._has_webfinger_support(match.group("hostname")):
self.log.debug(
"Not snarfing, host doesn't have Webfinger support."
)
return
try: try:
actor = self._get_actor(irc, match.group(0)) actor = self._get_actor(irc, match.group(0))
except ap.ActivityPubError: except ap.ActivityPubError as e:
self.log.info("Could not fetch %s: %s", match.group(0), e)
# Be silent on errors # Be silent on errors
return return
@ -282,6 +307,14 @@ class Fediverse(callbacks.PluginRegexp):
snarf_status = self.registryValue("snarfers.status", channel, network) snarf_status = self.registryValue("snarfers.status", channel, network)
if not snarf_profile and not snarf_status: if not snarf_profile and not snarf_status:
return return
hostname = urllib.parse.urlparse(url).hostname
if not self._has_webfinger_support(hostname):
self.log.debug(
"Not snarfing, host doesn't have Webfinger support."
)
return
try: try:
resource = ap.get_resource_from_url(url) resource = ap.get_resource_from_url(url)
except ap.ActivityPubError: except ap.ActivityPubError:
@ -357,6 +390,9 @@ class Fediverse(callbacks.PluginRegexp):
status = ap.get_resource_from_url(url) status = ap.get_resource_from_url(url)
except ap.ActivityPubError as e: except ap.ActivityPubError as e:
irc.error(_("Could not get status: %s") % e.args[0], Raise=True) irc.error(_("Could not get status: %s") % e.args[0], Raise=True)
else:
hostname = urllib.parse.urlparse(url).hostname
self._webfinger_support_cache[hostname] = True
irc.reply(self._format_status(irc, msg, status)) irc.reply(self._format_status(irc, msg, status))

View File

@ -34,9 +34,10 @@ import functools
import contextlib import contextlib
from multiprocessing import Manager from multiprocessing import Manager
from supybot import commands, conf, utils from supybot import conf, log, utils
from supybot.test import ChannelPluginTestCase, network from supybot.test import ChannelPluginTestCase, network
from . import activitypub as ap
from .test_data import ( from .test_data import (
PRIVATE_KEY, PRIVATE_KEY,
HOSTMETA_URL, HOSTMETA_URL,
@ -89,10 +90,31 @@ class NetworkedFediverseTestCase(BaseFediverseTestCase):
"Error: Unknown user @nonexistinguser@oc.todon.fr.", "Error: Unknown user @nonexistinguser@oc.todon.fr.",
) )
def testHasWebfingerSupport(self):
self.assertTrue(ap.has_webfinger_support("oc.todon.fr"))
self.assertFalse(ap.has_webfinger_support("example.org"))
class NetworklessFediverseTestCase(BaseFediverseTestCase): class NetworklessFediverseTestCase(BaseFediverseTestCase):
timeout = 0.1 timeout = 0.1
@contextlib.contextmanager
def mockWebfingerSupport(self, value):
original_has_webfinger_support = ap.has_webfinger_support
@functools.wraps(original_has_webfinger_support)
def newf(hostname):
if value == "not called":
assert False
assert type(value) is bool
return value
ap.has_webfinger_support = newf
yield
ap.has_webfinger_support = original_has_webfinger_support
@contextlib.contextmanager @contextlib.contextmanager
def mockRequests(self, expected_requests): def mockRequests(self, expected_requests):
with Manager() as m: with Manager() as m:
@ -105,6 +127,7 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
assert expected_requests, url assert expected_requests, url
(expected_url, response) = expected_requests.pop(0) (expected_url, response) = expected_requests.pop(0)
self.assertEqual(url, expected_url, "Unexpected URL: %s" % url) self.assertEqual(url, expected_url, "Unexpected URL: %s" % url)
log.debug("Got request to %s", url)
if isinstance(response, bytes): if isinstance(response, bytes):
return response return response
@ -225,7 +248,7 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
) )
def testProfileSnarfer(self): def testProfileSnarfer(self):
with self.mockRequests([]): with self.mockWebfingerSupport("not called"), self.mockRequests([]):
self.assertSnarfNoResponse("aaa @nonexistinguser@example.org bbb") self.assertSnarfNoResponse("aaa @nonexistinguser@example.org bbb")
with conf.supybot.plugins.Fediverse.snarfers.username.context(True): with conf.supybot.plugins.Fediverse.snarfers.username.context(True):
@ -235,24 +258,46 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
(ACTOR_URL, ACTOR_DATA), (ACTOR_URL, ACTOR_DATA),
] ]
with self.mockRequests(expected_requests): # First request, should work
with self.mockWebfingerSupport(True), self.mockRequests(
expected_requests
):
self.assertSnarfResponse( self.assertSnarfResponse(
"aaa @someuser@example.org bbb", "aaa @someuser@example.org bbb",
"\x02someuser\x02 (@someuser@example.org): My Biography", "\x02someuser\x02 (@someuser@example.org): My Biography",
) )
# Same request; it is all cached
with self.mockWebfingerSupport("not called"), self.mockRequests(
[]
):
self.assertSnarfResponse(
"aaa @someuser@example.org bbb",
"\x02someuser\x02 (@someuser@example.org): My Biography",
)
# Nonexisting user
expected_requests = [ expected_requests = [
(HOSTMETA_URL, HOSTMETA_DATA), (HOSTMETA_URL, HOSTMETA_DATA),
(WEBFINGER_URL, utils.web.Error("blah")), (WEBFINGER_URL, utils.web.Error("blah")),
] ]
with self.mockRequests(expected_requests): with self.mockWebfingerSupport("not called"), self.mockRequests(
expected_requests
):
self.assertSnarfNoResponse( self.assertSnarfNoResponse(
"aaa @nonexistinguser@example.org bbb" "aaa @nonexistinguser@example.org bbb"
) )
def testProfileSnarferNoWebfinger(self):
with conf.supybot.plugins.Fediverse.snarfers.username.context(False):
# No webfinger support, shouldn't make requests
with self.mockWebfingerSupport(False), self.mockRequests([]):
self.assertSnarfNoResponse("aaa @someuser@example.org bbb")
def testProfileUrlSnarfer(self): def testProfileUrlSnarfer(self):
with self.mockRequests([]): with self.mockWebfingerSupport("not called"), self.mockRequests([]):
self.assertSnarfNoResponse( self.assertSnarfNoResponse(
"aaa https://example.org/users/someuser bbb" "aaa https://example.org/users/someuser bbb"
) )
@ -260,14 +305,18 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
with conf.supybot.plugins.Fediverse.snarfers.profile.context(True): with conf.supybot.plugins.Fediverse.snarfers.profile.context(True):
expected_requests = [(ACTOR_URL, utils.web.Error("blah"))] expected_requests = [(ACTOR_URL, utils.web.Error("blah"))]
with self.mockRequests(expected_requests): with self.mockWebfingerSupport(True), self.mockRequests(
expected_requests
):
self.assertSnarfNoResponse( self.assertSnarfNoResponse(
"aaa https://example.org/users/someuser bbb" "aaa https://example.org/users/someuser bbb"
) )
expected_requests = [(ACTOR_URL, ACTOR_DATA)] expected_requests = [(ACTOR_URL, ACTOR_DATA)]
with self.mockRequests(expected_requests): with self.mockWebfingerSupport("not called"), self.mockRequests(
expected_requests
):
self.assertSnarfResponse( self.assertSnarfResponse(
"aaa https://example.org/users/someuser bbb", "aaa https://example.org/users/someuser bbb",
"\x02someuser\x02 (@someuser@example.org): My Biography", "\x02someuser\x02 (@someuser@example.org): My Biography",
@ -363,7 +412,7 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
) )
def testStatusUrlSnarferDisabled(self): def testStatusUrlSnarferDisabled(self):
with self.mockRequests([]): with self.mockWebfingerSupport("not called"), self.mockRequests([]):
self.assertSnarfNoResponse( self.assertSnarfNoResponse(
"aaa https://example.org/users/someuser/statuses/1234 bbb" "aaa https://example.org/users/someuser/statuses/1234 bbb"
) )
@ -375,7 +424,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
(ACTOR_URL, ACTOR_DATA), (ACTOR_URL, ACTOR_DATA),
] ]
with self.mockRequests(expected_requests): with self.mockWebfingerSupport(True), self.mockRequests(
expected_requests
):
self.assertSnarfResponse( self.assertSnarfResponse(
"aaa https://example.org/users/someuser/statuses/1234 bbb", "aaa https://example.org/users/someuser/statuses/1234 bbb",
"\x02someuser\x02 (@someuser@example.org): " "\x02someuser\x02 (@someuser@example.org): "
@ -386,7 +437,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
with conf.supybot.plugins.Fediverse.snarfers.status.context(True): with conf.supybot.plugins.Fediverse.snarfers.status.context(True):
expected_requests = [(STATUS_URL, utils.web.Error("blah"))] expected_requests = [(STATUS_URL, utils.web.Error("blah"))]
with self.mockRequests(expected_requests): with self.mockWebfingerSupport(True), self.mockRequests(
expected_requests
):
self.assertSnarfNoResponse( self.assertSnarfNoResponse(
"aaa https://example.org/users/someuser/statuses/1234 bbb" "aaa https://example.org/users/someuser/statuses/1234 bbb"
) )
@ -396,7 +449,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
(ACTOR_URL, utils.web.Error("blah")), (ACTOR_URL, utils.web.Error("blah")),
] ]
with self.mockRequests(expected_requests): with self.mockWebfingerSupport("not called"), self.mockRequests(
expected_requests
):
self.assertSnarfResponse( self.assertSnarfResponse(
"aaa https://example.org/users/someuser/statuses/1234 bbb", "aaa https://example.org/users/someuser/statuses/1234 bbb",
"<error: blah>: @ FirstAuthor I am replying to you", "<error: blah>: @ FirstAuthor I am replying to you",
@ -407,7 +462,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
with conf.supybot.plugins.Fediverse.snarfers.profile.context(True): with conf.supybot.plugins.Fediverse.snarfers.profile.context(True):
expected_requests = [(STATUS_URL, STATUS_DATA)] expected_requests = [(STATUS_URL, STATUS_DATA)]
with self.mockRequests(expected_requests): with self.mockWebfingerSupport(True), self.mockRequests(
expected_requests
):
self.assertSnarfNoResponse( self.assertSnarfNoResponse(
"aaa https://example.org/users/someuser/statuses/1234 bbb" "aaa https://example.org/users/someuser/statuses/1234 bbb"
) )
@ -416,7 +473,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
with conf.supybot.plugins.Fediverse.snarfers.profile.context(True): with conf.supybot.plugins.Fediverse.snarfers.profile.context(True):
expected_requests = [(ACTOR_URL, ACTOR_DATA)] expected_requests = [(ACTOR_URL, ACTOR_DATA)]
with self.mockRequests(expected_requests): with self.mockWebfingerSupport("not called"), self.mockRequests(
expected_requests
):
self.assertSnarfNoResponse( self.assertSnarfNoResponse(
"aaa https://example.org/users/someuser/ bbb" "aaa https://example.org/users/someuser/ bbb"
) )