Web: Add overrides to support Youtube and Reddit; remove Twitter from tests.

This commit is contained in:
Valentin Lorentz 2022-03-03 22:16:37 +01:00
parent 76f7eced5d
commit 66d986e820
3 changed files with 22 additions and 9 deletions

View File

@ -149,6 +149,17 @@ class Web(callbacks.PluginRegexp):
def getTitle(self, irc, url, raiseErrors, msg): def getTitle(self, irc, url, raiseErrors, msg):
size = conf.supybot.protocols.http.peekSize() size = conf.supybot.protocols.http.peekSize()
parsed_url = utils.web.urlparse(url)
if parsed_url.netloc.endswith(('youtube.com', '.youtube.com')):
# there is a lot of Javascript before the <title>
size = 409600
if parsed_url.netloc in ('reddit.com', 'www.reddit.com', 'new.reddit.com'):
# Since 2022-03, New Reddit has 'Reddit - Dive into anything' as
# <title> on every page.
parsed_url = parsed_url._replace(netloc='old.reddit.com')
url = utils.web.urlunparse(parsed_url)
timeout = self.registryValue('timeout') timeout = self.registryValue('timeout')
headers = conf.defaultHttpHeaders(irc.network, msg.channel) headers = conf.defaultHttpHeaders(irc.network, msg.channel)
try: try:

View File

@ -71,17 +71,19 @@ class WebTestCase(ChannelPluginTestCase):
# part of it. # part of it.
self.assertRegexp('title http://www.n-e-r-d.com/', 'N.*E.*R.*D') self.assertRegexp('title http://www.n-e-r-d.com/', 'N.*E.*R.*D')
# Checks that the parser doesn't hang on invalid tags # Checks that the parser doesn't hang on invalid tags
self.assertNotError(
'title http://www.youtube.com/watch?v=x4BtiqPN4u8')
self.assertResponse( self.assertResponse(
'title http://www.thefreedictionary.com/don%27t', 'title http://www.thefreedictionary.com/don%27t',
"Don't - definition of don't by The Free Dictionary") "Don't - definition of don't by The Free Dictionary")
def testtitleYoutube(self):
self.assertRegexp( self.assertRegexp(
'title ' 'title https://www.youtube.com/watch?v=GHMjD0Lp5DY',
'https://twitter.com/rlbarnes/status/656554266744586240', 'Pianoforte')
'"PSA: In Firefox 44 Nightly, "http:" pages with '
'<input type="password"> are now marked insecure. ' def testtitleReddit(self):
'https://t.co/qS9LxuRPdm"$') self.assertRegexp(
'title https://www.reddit.com/r/irc/',
'Internet Relay Chat')
def testTitleSnarfer(self): def testTitleSnarfer(self):
try: try:

View File

@ -46,7 +46,7 @@ if minisix.PY2:
import urllib import urllib
import urllib2 import urllib2
from httplib import InvalidURL from httplib import InvalidURL
from urlparse import urlsplit, urlunsplit, urlparse from urlparse import urlsplit, urlunsplit, urlparse, urlunparse
from htmlentitydefs import entitydefs, name2codepoint from htmlentitydefs import entitydefs, name2codepoint
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
from cgi import escape as html_escape from cgi import escape as html_escape
@ -61,7 +61,7 @@ if minisix.PY2:
from urllib import splithost, splituser from urllib import splithost, splituser
else: else:
from http.client import InvalidURL from http.client import InvalidURL
from urllib.parse import urlsplit, urlunsplit, urlparse from urllib.parse import urlsplit, urlunsplit, urlparse, urlunparse
from html.entities import entitydefs, name2codepoint from html.entities import entitydefs, name2codepoint
from html.parser import HTMLParser from html.parser import HTMLParser
from html import escape as html_escape from html import escape as html_escape