Fix scheme-matching regexp.

Old behavior allowed underscores and schemes starting with a digit,
and did not allow -+.
New behavior: https://tools.ietf.org/html/rfc3986\#section-3.1

Side effect: closes GH-1146
This commit is contained in:
Valentin Lorentz 2016-02-07 09:44:08 +01:00
parent f2e726d3e7
commit 61e5a29351
2 changed files with 7 additions and 2 deletions

View File

@ -80,6 +80,10 @@ class URLTestCase(ChannelPluginTestCase):
self.feedMsg(urls[0]) self.feedMsg(urls[0])
self.assertResponse('url last', urls[0]) self.assertResponse('url last', urls[0])
def testStripsColors(self):
self.feedMsg('\x031foo \x034' + urls[0])
self.assertResponse('url last', urls[0])
def testAction(self): def testAction(self):
self.irc.feedMsg(ircmsgs.action(self.channel, urls[1])) self.irc.feedMsg(ircmsgs.action(self.channel, urls[1]))
self.assertNotRegexp('url last', '\\x01') self.assertNotRegexp('url last', '\\x01')

View File

@ -80,9 +80,10 @@ _octet = r'(?:2(?:[0-4]\d|5[0-5])|1\d\d|\d{1,2})'
_ipAddr = r'%s(?:\.%s){3}' % (_octet, _octet) _ipAddr = r'%s(?:\.%s){3}' % (_octet, _octet)
# Base domain regex off RFC 1034 and 1738 # Base domain regex off RFC 1034 and 1738
_label = r'[0-9a-z][-0-9a-z]*[0-9a-z]?' _label = r'[0-9a-z][-0-9a-z]*[0-9a-z]?'
_scheme = r'[a-z][a-z0-9+.-]*'
_domain = r'%s(?:\.%s)*\.[0-9a-z][-0-9a-z]+' % (_label, _label) _domain = r'%s(?:\.%s)*\.[0-9a-z][-0-9a-z]+' % (_label, _label)
_urlRe = r'(\w+://(?:\S+@)?(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?)' % (_domain, _urlRe = r'(%s://(?:\S+@)?(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?)' % (
_ipAddr) _scheme, _domain, _ipAddr)
urlRe = re.compile(_urlRe, re.I) urlRe = re.compile(_urlRe, re.I)
_httpUrlRe = r'(https?://(?:\S+@)?(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?)' % \ _httpUrlRe = r'(https?://(?:\S+@)?(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?)' % \
(_domain, _ipAddr) (_domain, _ipAddr)