Web: Add overrides to support Youtube and Reddit; remove Twitter from tests.

This commit is contained in:
Valentin Lorentz 2022-03-03 22:16:37 +01:00
parent 76f7eced5d
commit 66d986e820
3 changed files with 22 additions and 9 deletions

View File

@ -149,6 +149,17 @@ class Web(callbacks.PluginRegexp):
def getTitle(self, irc, url, raiseErrors, msg):
size = conf.supybot.protocols.http.peekSize()
parsed_url = utils.web.urlparse(url)
if parsed_url.netloc.endswith(('youtube.com', '.youtube.com')):
# there is a lot of Javascript before the <title>
size = 409600
if parsed_url.netloc in ('reddit.com', 'www.reddit.com', 'new.reddit.com'):
# Since 2022-03, New Reddit has 'Reddit - Dive into anything' as
# <title> on every page.
parsed_url = parsed_url._replace(netloc='old.reddit.com')
url = utils.web.urlunparse(parsed_url)
timeout = self.registryValue('timeout')
headers = conf.defaultHttpHeaders(irc.network, msg.channel)
try:

View File

@ -71,17 +71,19 @@ class WebTestCase(ChannelPluginTestCase):
# part of it.
self.assertRegexp('title http://www.n-e-r-d.com/', 'N.*E.*R.*D')
# Checks that the parser doesn't hang on invalid tags
self.assertNotError(
'title http://www.youtube.com/watch?v=x4BtiqPN4u8')
self.assertResponse(
'title http://www.thefreedictionary.com/don%27t',
"Don't - definition of don't by The Free Dictionary")
def testtitleYoutube(self):
self.assertRegexp(
'title '
'https://twitter.com/rlbarnes/status/656554266744586240',
'"PSA: In Firefox 44 Nightly, "http:" pages with '
'<input type="password"> are now marked insecure. '
'https://t.co/qS9LxuRPdm"$')
'title https://www.youtube.com/watch?v=GHMjD0Lp5DY',
'Pianoforte')
def testtitleReddit(self):
self.assertRegexp(
'title https://www.reddit.com/r/irc/',
'Internet Relay Chat')
def testTitleSnarfer(self):
try:

View File

@ -46,7 +46,7 @@ if minisix.PY2:
import urllib
import urllib2
from httplib import InvalidURL
from urlparse import urlsplit, urlunsplit, urlparse
from urlparse import urlsplit, urlunsplit, urlparse, urlunparse
from htmlentitydefs import entitydefs, name2codepoint
from HTMLParser import HTMLParser
from cgi import escape as html_escape
@ -61,7 +61,7 @@ if minisix.PY2:
from urllib import splithost, splituser
else:
from http.client import InvalidURL
from urllib.parse import urlsplit, urlunsplit, urlparse
from urllib.parse import urlsplit, urlunsplit, urlparse, urlunparse
from html.entities import entitydefs, name2codepoint
from html.parser import HTMLParser
from html import escape as html_escape