From 61e5a293518312689d33fc257ef9ac37a6f407d4 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Sun, 7 Feb 2016 09:44:08 +0100 Subject: [PATCH] Fix scheme-matching regexp. Old behavior allowed underscores and schemes starting with a digit, and did not allow -+. New behavior: https://tools.ietf.org/html/rfc3986\#section-3.1 Side effect: closes GH-1146 --- plugins/URL/test.py | 4 ++++ src/utils/web.py | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/plugins/URL/test.py b/plugins/URL/test.py index 059ebb7ba..6b8c435bc 100644 --- a/plugins/URL/test.py +++ b/plugins/URL/test.py @@ -80,6 +80,10 @@ class URLTestCase(ChannelPluginTestCase): self.feedMsg(urls[0]) self.assertResponse('url last', urls[0]) + def testStripsColors(self): + self.feedMsg('\x031foo \x034' + urls[0]) + self.assertResponse('url last', urls[0]) + def testAction(self): self.irc.feedMsg(ircmsgs.action(self.channel, urls[1])) self.assertNotRegexp('url last', '\\x01') diff --git a/src/utils/web.py b/src/utils/web.py index bb175643a..dbed601ce 100644 --- a/src/utils/web.py +++ b/src/utils/web.py @@ -80,9 +80,10 @@ _octet = r'(?:2(?:[0-4]\d|5[0-5])|1\d\d|\d{1,2})' _ipAddr = r'%s(?:\.%s){3}' % (_octet, _octet) # Base domain regex off RFC 1034 and 1738 _label = r'[0-9a-z][-0-9a-z]*[0-9a-z]?' +_scheme = r'[a-z][a-z0-9+.-]*' _domain = r'%s(?:\.%s)*\.[0-9a-z][-0-9a-z]+' % (_label, _label) -_urlRe = r'(\w+://(?:\S+@)?(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?)' % (_domain, - _ipAddr) +_urlRe = r'(%s://(?:\S+@)?(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?)' % ( + _scheme, _domain, _ipAddr) urlRe = re.compile(_urlRe, re.I) _httpUrlRe = r'(https?://(?:\S+@)?(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?)' % \ (_domain, _ipAddr)