Fix our RE parsing to handle multiple backslashes before the separator

This also adds support for using brace pairs ({}, [], (), <>) as the separators for m//.
2025-12-09 19:57:25 +01:00 · 2009-02-06 21:33:28 +00:00 · 2009-02-06 21:33:28 +00:00 · 0c42ea111a
commit 0c42ea111a
parent 0d4ff7f3dc
2 changed files with 31 additions and 20 deletions
--- a/src/utils/str.py
+++ b/src/utils/str.py
@ -1,6 +1,6 @@
 ###
 # Copyright (c) 2002-2005, Jeremiah Fincher
-# Copyright (c) 2008, James Vega
+# Copyright (c) 2008-2009, James Vega
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@ -112,36 +112,42 @@ def quoted(s):
    """Returns a quoted s."""
    return '"%s"' % s
-def _getSep(s):
+_openers = '{[(<'
 _closers = '}])>'
 def _getSep(s, allowBraces=False):
    if len(s) < 2:
        raise ValueError, 'string given to _getSep is too short: %r' % s
    if allowBraces:
        braces = _closers
    else:
        braces = _openers + _closers
    if s.startswith('m') or s.startswith('s'):
        separator = s[1]
    else:
        separator = s[0]
-    if separator.isalnum() or separator in '{}[]()<>':
+    if separator.isalnum() or separator in braces:
        raise ValueError, \
              'Invalid separator: separator must not be alphanumeric or in ' \
-              '"{}[]()<>"'
+              '"%s"' % braces
    return separator
 def _getSplitterRe(s):
    separator = _getSep(s)
    return re.compile(r'(?<!\\)%s' % re.escape(separator))
 def perlReToPythonRe(s):
    """Converts a string representation of a Perl regular expression (i.e.,
    m/^foo$/i or /foo|bar/) to a Python regular expression.
    """
-    sep = _getSep(s)
+    opener = closer = _getSep(s, True)
-    splitter = _getSplitterRe(s)
+    if opener in '{[(<':
        closer = _closers[_openers.index(opener)]
    opener = re.escape(opener)
    closer = re.escape(closer)
    matcher = re.compile(r'm?%s((?:\\.|[^\\])*)%s(.*)' % (opener, closer))
    try:
-        (kind, regexp, flags) = splitter.split(s)
+        (regexp, flags) = matcher.match(s).groups()
-    except ValueError: # Unpack list of wrong size.
+    except AttributeError: # Unpack list of wrong size.
        raise ValueError, 'Must be of the form m/.../ or /.../'
-    regexp = regexp.replace('\\'+sep, sep)
+    regexp = regexp.replace('\\'+opener, opener)
-    if kind not in ('', 'm'):
+    if opener != closer:
-        raise ValueError, 'Invalid kind: must be in ("", "m")'
+        regexp = regexp.replace('\\'+closer, closer)
    flag = 0
    try:
        for c in flags.upper():
@ -159,17 +165,17 @@ def perlReToReplacer(s):
    replacement.
    """
    sep = _getSep(s)
-    splitter = _getSplitterRe(s)
+    escaped = re.escape(sep)
    matcher = re.compile(r's%s((?:\\.|[^\\])*)%s((?:\\%s|[^\\])*)%s(.*)'
                         % (escaped, escaped, escaped, escaped))
    try:
-        (kind, regexp, replace, flags) = splitter.split(s)
+        (regexp, replace, flags) = matcher.match(s).groups()
-    except ValueError: # Unpack list of wrong size.
+    except AttributeError: # Unpack list of wrong size.
        raise ValueError, 'Must be of the form s/.../.../'
    regexp = regexp.replace('\x08', r'\b')
    replace = replace.replace('\\'+sep, sep)
    for i in xrange(10):
        replace = replace.replace(chr(i), r'\%s' % i)
    if kind != 's':
        raise ValueError, 'Invalid kind: must be "s"'
    g = False
    if 'g' in flags:
        g = True
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -1,5 +1,6 @@
 ###
 # Copyright (c) 2002-2005, Jeremiah Fincher
 # Copyright (c) 2009, James Vega
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@ -276,6 +277,8 @@ class StrTest(SupyTestCase):
    def testP2PReDifferentSeparator(self):
        r = utils.str.perlReToPythonRe('m!foo!')
        self.failUnless(r.search('foo'))
        r = utils.str.perlReToPythonRe('m{cat}')
        self.failUnless(r.search('cat'))
    def testPerlReToReplacer(self):
        PRTR = utils.str.perlReToReplacer
@ -291,6 +294,8 @@ class StrTest(SupyTestCase):
        self.assertEqual(f('foobarbaz'), 'foorz')
        f = PRTR('s/ba\\///g')
        self.assertEqual(f('fooba/rba/z'), 'foorz')
        f = PRTR('s/ba\\\\//g')
        self.assertEqual(f('fooba\\rba\\z'), 'foorz')
        f = PRTR('s/cat/dog/i')
        self.assertEqual(f('CATFISH'), 'dogFISH')
        f = PRTR('s/foo/foo\/bar/')