From 66d986e820516787a12ab5d64a80ebf36eb5a2d0 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Thu, 3 Mar 2022 22:16:37 +0100 Subject: [PATCH] Web: Add overrides to support Youtube and Reddit; remove Twitter from tests. --- plugins/Web/plugin.py | 11 +++++++++++ plugins/Web/test.py | 16 +++++++++------- src/utils/web.py | 4 ++-- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/plugins/Web/plugin.py b/plugins/Web/plugin.py index 397d3d07b..037de9ed0 100644 --- a/plugins/Web/plugin.py +++ b/plugins/Web/plugin.py @@ -149,6 +149,17 @@ class Web(callbacks.PluginRegexp): def getTitle(self, irc, url, raiseErrors, msg): size = conf.supybot.protocols.http.peekSize() + + parsed_url = utils.web.urlparse(url) + if parsed_url.netloc.endswith(('youtube.com', '.youtube.com')): + # there is a lot of Javascript before the + size = 409600 + if parsed_url.netloc in ('reddit.com', 'www.reddit.com', 'new.reddit.com'): + # Since 2022-03, New Reddit has 'Reddit - Dive into anything' as + # <title> on every page. + parsed_url = parsed_url._replace(netloc='old.reddit.com') + url = utils.web.urlunparse(parsed_url) + timeout = self.registryValue('timeout') headers = conf.defaultHttpHeaders(irc.network, msg.channel) try: diff --git a/plugins/Web/test.py b/plugins/Web/test.py index 759d8c839..88fd10cac 100644 --- a/plugins/Web/test.py +++ b/plugins/Web/test.py @@ -71,17 +71,19 @@ class WebTestCase(ChannelPluginTestCase): # part of it. self.assertRegexp('title http://www.n-e-r-d.com/', 'N.*E.*R.*D') # Checks that the parser doesn't hang on invalid tags - self.assertNotError( - 'title http://www.youtube.com/watch?v=x4BtiqPN4u8') self.assertResponse( 'title http://www.thefreedictionary.com/don%27t', "Don't - definition of don't by The Free Dictionary") + + def testtitleYoutube(self): self.assertRegexp( - 'title ' - 'https://twitter.com/rlbarnes/status/656554266744586240', - '"PSA: In Firefox 44 Nightly, "http:" pages with ' - '<input type="password"> are now marked insecure. ' - 'https://t.co/qS9LxuRPdm"$') + 'title https://www.youtube.com/watch?v=GHMjD0Lp5DY', + 'Pianoforte') + + def testtitleReddit(self): + self.assertRegexp( + 'title https://www.reddit.com/r/irc/', + 'Internet Relay Chat') def testTitleSnarfer(self): try: diff --git a/src/utils/web.py b/src/utils/web.py index 901ac4dc7..bde24e1e1 100644 --- a/src/utils/web.py +++ b/src/utils/web.py @@ -46,7 +46,7 @@ if minisix.PY2: import urllib import urllib2 from httplib import InvalidURL - from urlparse import urlsplit, urlunsplit, urlparse + from urlparse import urlsplit, urlunsplit, urlparse, urlunparse from htmlentitydefs import entitydefs, name2codepoint from HTMLParser import HTMLParser from cgi import escape as html_escape @@ -61,7 +61,7 @@ if minisix.PY2: from urllib import splithost, splituser else: from http.client import InvalidURL - from urllib.parse import urlsplit, urlunsplit, urlparse + from urllib.parse import urlsplit, urlunsplit, urlparse, urlunparse from html.entities import entitydefs, name2codepoint from html.parser import HTMLParser from html import escape as html_escape