diff --git a/plugins/Web/plugin.py b/plugins/Web/plugin.py
index 397d3d07b..037de9ed0 100644
--- a/plugins/Web/plugin.py
+++ b/plugins/Web/plugin.py
@@ -149,6 +149,17 @@ class Web(callbacks.PluginRegexp):
def getTitle(self, irc, url, raiseErrors, msg):
size = conf.supybot.protocols.http.peekSize()
+
+ parsed_url = utils.web.urlparse(url)
+ if parsed_url.netloc.endswith(('youtube.com', '.youtube.com')):
+ # there is a lot of Javascript before the
+ size = 409600
+ if parsed_url.netloc in ('reddit.com', 'www.reddit.com', 'new.reddit.com'):
+ # Since 2022-03, New Reddit has 'Reddit - Dive into anything' as
+ # on every page.
+ parsed_url = parsed_url._replace(netloc='old.reddit.com')
+ url = utils.web.urlunparse(parsed_url)
+
timeout = self.registryValue('timeout')
headers = conf.defaultHttpHeaders(irc.network, msg.channel)
try:
diff --git a/plugins/Web/test.py b/plugins/Web/test.py
index 759d8c839..88fd10cac 100644
--- a/plugins/Web/test.py
+++ b/plugins/Web/test.py
@@ -71,17 +71,19 @@ class WebTestCase(ChannelPluginTestCase):
# part of it.
self.assertRegexp('title http://www.n-e-r-d.com/', 'N.*E.*R.*D')
# Checks that the parser doesn't hang on invalid tags
- self.assertNotError(
- 'title http://www.youtube.com/watch?v=x4BtiqPN4u8')
self.assertResponse(
'title http://www.thefreedictionary.com/don%27t',
"Don't - definition of don't by The Free Dictionary")
+
+ def testtitleYoutube(self):
self.assertRegexp(
- 'title '
- 'https://twitter.com/rlbarnes/status/656554266744586240',
- '"PSA: In Firefox 44 Nightly, "http:" pages with '
- ' are now marked insecure. '
- 'https://t.co/qS9LxuRPdm"$')
+ 'title https://www.youtube.com/watch?v=GHMjD0Lp5DY',
+ 'Pianoforte')
+
+ def testtitleReddit(self):
+ self.assertRegexp(
+ 'title https://www.reddit.com/r/irc/',
+ 'Internet Relay Chat')
def testTitleSnarfer(self):
try:
diff --git a/src/utils/web.py b/src/utils/web.py
index 901ac4dc7..bde24e1e1 100644
--- a/src/utils/web.py
+++ b/src/utils/web.py
@@ -46,7 +46,7 @@ if minisix.PY2:
import urllib
import urllib2
from httplib import InvalidURL
- from urlparse import urlsplit, urlunsplit, urlparse
+ from urlparse import urlsplit, urlunsplit, urlparse, urlunparse
from htmlentitydefs import entitydefs, name2codepoint
from HTMLParser import HTMLParser
from cgi import escape as html_escape
@@ -61,7 +61,7 @@ if minisix.PY2:
from urllib import splithost, splituser
else:
from http.client import InvalidURL
- from urllib.parse import urlsplit, urlunsplit, urlparse
+ from urllib.parse import urlsplit, urlunsplit, urlparse, urlunparse
from html.entities import entitydefs, name2codepoint
from html.parser import HTMLParser
from html import escape as html_escape