Fediverse: Cache host support for webfinger before snarfing.

This is much cheaper both for us and the host if the host
doesn't support activitypub at all (which is what happens
most of the time).
This commit is contained in:
Valentin Lorentz 2020-05-14 21:33:34 +02:00
parent 566920070c
commit eaf7222509
3 changed files with 151 additions and 14 deletions

View File

@ -34,6 +34,7 @@ import email
import base64
import functools
import contextlib
import urllib.error
import urllib.parse
import xml.etree.ElementTree as ET
@ -129,6 +130,47 @@ def _get_webfinger_url(hostname):
return "https://%s/.well-known/webfinger?resource={uri}"
def has_webfinger_support(hostname):
"""Returns whether the hostname probably supports webfinger or not.
This relies on an edge case of the Webfinger specification,
so it may not successfully detect some hosts because they don't follow
the specification."""
request = urllib.request.Request(
"https://%s/.well-known/webfinger" % hostname, method="HEAD"
)
try:
urllib.request.urlopen(request)
except urllib.error.HTTPError as e:
if e.code == 400:
# RFC 7033 requires a 400 response when the "resource" parameter
# is missing: https://tools.ietf.org/html/rfc7033#section-4.2
#
# This works for:
# * Misskey
# * PeerTube
# * Pleroma
return True
elif e.headers.get("Content-Type", "") == "application/jrd+json":
# WriteFreely, and possibly others.
# https://github.com/writeas/writefreely/issues/310
return True
elif e.code == 404:
if e.headers.get("Server", "").lower() == "mastodon":
# https://github.com/tootsuite/mastodon/issues/13757
return True
# Else, the host probably doesn't support Webfinger.
# Known false negatives:
# * Nextcloud (returns 404)
# * Pixelfed (returns 302 to the homepage):
# https://github.com/pixelfed/pixelfed/issues/2180
# * Plume (returns 404):
# https://github.com/Plume-org/Plume/issues/770
return False
def webfinger(hostname, uri):
template = _get_webfinger_url(hostname)
assert template

View File

@ -125,6 +125,14 @@ class Fediverse(callbacks.PluginRegexp):
self._startHttp()
self._actor_cache = utils.structures.TimeoutDict(timeout=600)
# Used when snarfing, to cheaply avoid querying non-ActivityPub
# servers.
# Is also written to when using commands that successfully find
# ActivityPub data.
self._webfinger_support_cache = utils.structures.TimeoutDict(
timeout=60 * 60 * 24
)
def _startHttp(self):
callback = FediverseHttp()
callback._plugin = self
@ -137,6 +145,13 @@ class Fediverse(callbacks.PluginRegexp):
def _stopHttp(self):
httpserver.unhook("fediverse")
def _has_webfinger_support(self, hostname):
if hostname not in self._webfinger_support_cache:
self._webfinger_support_cache[hostname] = ap.has_webfinger_support(
hostname
)
return self._webfinger_support_cache[hostname]
def _get_actor(self, irc, username):
if username in self._actor_cache:
return self._actor_cache[username]
@ -167,6 +182,8 @@ class Fediverse(callbacks.PluginRegexp):
if username:
self._actor_cache[username] = actor
self._webfinger_support_cache[hostname] = True
self._actor_cache[actor["id"]] = actor
return actor
@ -257,9 +274,17 @@ class Fediverse(callbacks.PluginRegexp):
"snarfers.username", msg.channel, irc.network
):
return
if not self._has_webfinger_support(match.group("hostname")):
self.log.debug(
"Not snarfing, host doesn't have Webfinger support."
)
return
try:
actor = self._get_actor(irc, match.group(0))
except ap.ActivityPubError:
except ap.ActivityPubError as e:
self.log.info("Could not fetch %s: %s", match.group(0), e)
# Be silent on errors
return
@ -282,6 +307,14 @@ class Fediverse(callbacks.PluginRegexp):
snarf_status = self.registryValue("snarfers.status", channel, network)
if not snarf_profile and not snarf_status:
return
hostname = urllib.parse.urlparse(url).hostname
if not self._has_webfinger_support(hostname):
self.log.debug(
"Not snarfing, host doesn't have Webfinger support."
)
return
try:
resource = ap.get_resource_from_url(url)
except ap.ActivityPubError:
@ -357,6 +390,9 @@ class Fediverse(callbacks.PluginRegexp):
status = ap.get_resource_from_url(url)
except ap.ActivityPubError as e:
irc.error(_("Could not get status: %s") % e.args[0], Raise=True)
else:
hostname = urllib.parse.urlparse(url).hostname
self._webfinger_support_cache[hostname] = True
irc.reply(self._format_status(irc, msg, status))

View File

@ -34,9 +34,10 @@ import functools
import contextlib
from multiprocessing import Manager
from supybot import commands, conf, utils
from supybot import conf, log, utils
from supybot.test import ChannelPluginTestCase, network
from . import activitypub as ap
from .test_data import (
PRIVATE_KEY,
HOSTMETA_URL,
@ -89,10 +90,31 @@ class NetworkedFediverseTestCase(BaseFediverseTestCase):
"Error: Unknown user @nonexistinguser@oc.todon.fr.",
)
def testHasWebfingerSupport(self):
self.assertTrue(ap.has_webfinger_support("oc.todon.fr"))
self.assertFalse(ap.has_webfinger_support("example.org"))
class NetworklessFediverseTestCase(BaseFediverseTestCase):
timeout = 0.1
@contextlib.contextmanager
def mockWebfingerSupport(self, value):
original_has_webfinger_support = ap.has_webfinger_support
@functools.wraps(original_has_webfinger_support)
def newf(hostname):
if value == "not called":
assert False
assert type(value) is bool
return value
ap.has_webfinger_support = newf
yield
ap.has_webfinger_support = original_has_webfinger_support
@contextlib.contextmanager
def mockRequests(self, expected_requests):
with Manager() as m:
@ -105,6 +127,7 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
assert expected_requests, url
(expected_url, response) = expected_requests.pop(0)
self.assertEqual(url, expected_url, "Unexpected URL: %s" % url)
log.debug("Got request to %s", url)
if isinstance(response, bytes):
return response
@ -225,7 +248,7 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
)
def testProfileSnarfer(self):
with self.mockRequests([]):
with self.mockWebfingerSupport("not called"), self.mockRequests([]):
self.assertSnarfNoResponse("aaa @nonexistinguser@example.org bbb")
with conf.supybot.plugins.Fediverse.snarfers.username.context(True):
@ -235,24 +258,46 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
(ACTOR_URL, ACTOR_DATA),
]
with self.mockRequests(expected_requests):
# First request, should work
with self.mockWebfingerSupport(True), self.mockRequests(
expected_requests
):
self.assertSnarfResponse(
"aaa @someuser@example.org bbb",
"\x02someuser\x02 (@someuser@example.org): My Biography",
)
# Same request; it is all cached
with self.mockWebfingerSupport("not called"), self.mockRequests(
[]
):
self.assertSnarfResponse(
"aaa @someuser@example.org bbb",
"\x02someuser\x02 (@someuser@example.org): My Biography",
)
# Nonexisting user
expected_requests = [
(HOSTMETA_URL, HOSTMETA_DATA),
(WEBFINGER_URL, utils.web.Error("blah")),
]
with self.mockRequests(expected_requests):
with self.mockWebfingerSupport("not called"), self.mockRequests(
expected_requests
):
self.assertSnarfNoResponse(
"aaa @nonexistinguser@example.org bbb"
)
def testProfileSnarferNoWebfinger(self):
with conf.supybot.plugins.Fediverse.snarfers.username.context(False):
# No webfinger support, shouldn't make requests
with self.mockWebfingerSupport(False), self.mockRequests([]):
self.assertSnarfNoResponse("aaa @someuser@example.org bbb")
def testProfileUrlSnarfer(self):
with self.mockRequests([]):
with self.mockWebfingerSupport("not called"), self.mockRequests([]):
self.assertSnarfNoResponse(
"aaa https://example.org/users/someuser bbb"
)
@ -260,14 +305,18 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
with conf.supybot.plugins.Fediverse.snarfers.profile.context(True):
expected_requests = [(ACTOR_URL, utils.web.Error("blah"))]
with self.mockRequests(expected_requests):
with self.mockWebfingerSupport(True), self.mockRequests(
expected_requests
):
self.assertSnarfNoResponse(
"aaa https://example.org/users/someuser bbb"
)
expected_requests = [(ACTOR_URL, ACTOR_DATA)]
with self.mockRequests(expected_requests):
with self.mockWebfingerSupport("not called"), self.mockRequests(
expected_requests
):
self.assertSnarfResponse(
"aaa https://example.org/users/someuser bbb",
"\x02someuser\x02 (@someuser@example.org): My Biography",
@ -363,7 +412,7 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
)
def testStatusUrlSnarferDisabled(self):
with self.mockRequests([]):
with self.mockWebfingerSupport("not called"), self.mockRequests([]):
self.assertSnarfNoResponse(
"aaa https://example.org/users/someuser/statuses/1234 bbb"
)
@ -375,7 +424,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
(ACTOR_URL, ACTOR_DATA),
]
with self.mockRequests(expected_requests):
with self.mockWebfingerSupport(True), self.mockRequests(
expected_requests
):
self.assertSnarfResponse(
"aaa https://example.org/users/someuser/statuses/1234 bbb",
"\x02someuser\x02 (@someuser@example.org): "
@ -386,7 +437,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
with conf.supybot.plugins.Fediverse.snarfers.status.context(True):
expected_requests = [(STATUS_URL, utils.web.Error("blah"))]
with self.mockRequests(expected_requests):
with self.mockWebfingerSupport(True), self.mockRequests(
expected_requests
):
self.assertSnarfNoResponse(
"aaa https://example.org/users/someuser/statuses/1234 bbb"
)
@ -396,7 +449,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
(ACTOR_URL, utils.web.Error("blah")),
]
with self.mockRequests(expected_requests):
with self.mockWebfingerSupport("not called"), self.mockRequests(
expected_requests
):
self.assertSnarfResponse(
"aaa https://example.org/users/someuser/statuses/1234 bbb",
"<error: blah>: @ FirstAuthor I am replying to you",
@ -407,7 +462,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
with conf.supybot.plugins.Fediverse.snarfers.profile.context(True):
expected_requests = [(STATUS_URL, STATUS_DATA)]
with self.mockRequests(expected_requests):
with self.mockWebfingerSupport(True), self.mockRequests(
expected_requests
):
self.assertSnarfNoResponse(
"aaa https://example.org/users/someuser/statuses/1234 bbb"
)
@ -416,7 +473,9 @@ class NetworklessFediverseTestCase(BaseFediverseTestCase):
with conf.supybot.plugins.Fediverse.snarfers.profile.context(True):
expected_requests = [(ACTOR_URL, ACTOR_DATA)]
with self.mockRequests(expected_requests):
with self.mockWebfingerSupport("not called"), self.mockRequests(
expected_requests
):
self.assertSnarfNoResponse(
"aaa https://example.org/users/someuser/ bbb"
)