Keep closing parenthesis when snarfing URLs

Fixes bug when handling URLs with a closing parenthesis ')'. e.g. https://en.wikipedia.org/wiki/Harley_Quinn_(TV_series)
This commit is contained in:
Gordon Shumway 2020-06-27 14:23:08 -04:00 committed by Valentin Lorentz
parent ef59a033e3
commit a3e6887fc9

View File

@ -87,7 +87,7 @@ _domain = r'%s(?:\.%s)*\.[0-9a-z][-0-9a-z]+' % (_label, _label)
_urlRe = r'(%s://(?:\S+@)?(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?)' % ( _urlRe = r'(%s://(?:\S+@)?(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?)' % (
_scheme, _domain, _ipAddr) _scheme, _domain, _ipAddr)
urlRe = re.compile(_urlRe, re.I) urlRe = re.compile(_urlRe, re.I)
_httpUrlRe = r'(https?://(?:\S+@)?(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?)' % \ _httpUrlRe = r'(https?://(?:\S+@)?(?:%s|%s)(?::\d+)?(?:/[^\]>\s]*)?)' % \
(_domain, _ipAddr) (_domain, _ipAddr)
httpUrlRe = re.compile(_httpUrlRe, re.I) httpUrlRe = re.compile(_httpUrlRe, re.I)