Fix our RE parsing to handle multiple backslashes before the separator

This also adds support for using brace pairs ({}, [], (), <>) as the
separators for m//.
This commit is contained in:
James Vega 2009-02-06 21:33:28 +00:00
parent 0d4ff7f3dc
commit 0c42ea111a
2 changed files with 31 additions and 20 deletions

View File

@ -1,6 +1,6 @@
### ###
# Copyright (c) 2002-2005, Jeremiah Fincher # Copyright (c) 2002-2005, Jeremiah Fincher
# Copyright (c) 2008, James Vega # Copyright (c) 2008-2009, James Vega
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -112,36 +112,42 @@ def quoted(s):
"""Returns a quoted s.""" """Returns a quoted s."""
return '"%s"' % s return '"%s"' % s
def _getSep(s): _openers = '{[(<'
_closers = '}])>'
def _getSep(s, allowBraces=False):
if len(s) < 2: if len(s) < 2:
raise ValueError, 'string given to _getSep is too short: %r' % s raise ValueError, 'string given to _getSep is too short: %r' % s
if allowBraces:
braces = _closers
else:
braces = _openers + _closers
if s.startswith('m') or s.startswith('s'): if s.startswith('m') or s.startswith('s'):
separator = s[1] separator = s[1]
else: else:
separator = s[0] separator = s[0]
if separator.isalnum() or separator in '{}[]()<>': if separator.isalnum() or separator in braces:
raise ValueError, \ raise ValueError, \
'Invalid separator: separator must not be alphanumeric or in ' \ 'Invalid separator: separator must not be alphanumeric or in ' \
'"{}[]()<>"' '"%s"' % braces
return separator return separator
def _getSplitterRe(s):
separator = _getSep(s)
return re.compile(r'(?<!\\)%s' % re.escape(separator))
def perlReToPythonRe(s): def perlReToPythonRe(s):
"""Converts a string representation of a Perl regular expression (i.e., """Converts a string representation of a Perl regular expression (i.e.,
m/^foo$/i or /foo|bar/) to a Python regular expression. m/^foo$/i or /foo|bar/) to a Python regular expression.
""" """
sep = _getSep(s) opener = closer = _getSep(s, True)
splitter = _getSplitterRe(s) if opener in '{[(<':
closer = _closers[_openers.index(opener)]
opener = re.escape(opener)
closer = re.escape(closer)
matcher = re.compile(r'm?%s((?:\\.|[^\\])*)%s(.*)' % (opener, closer))
try: try:
(kind, regexp, flags) = splitter.split(s) (regexp, flags) = matcher.match(s).groups()
except ValueError: # Unpack list of wrong size. except AttributeError: # Unpack list of wrong size.
raise ValueError, 'Must be of the form m/.../ or /.../' raise ValueError, 'Must be of the form m/.../ or /.../'
regexp = regexp.replace('\\'+sep, sep) regexp = regexp.replace('\\'+opener, opener)
if kind not in ('', 'm'): if opener != closer:
raise ValueError, 'Invalid kind: must be in ("", "m")' regexp = regexp.replace('\\'+closer, closer)
flag = 0 flag = 0
try: try:
for c in flags.upper(): for c in flags.upper():
@ -159,17 +165,17 @@ def perlReToReplacer(s):
replacement. replacement.
""" """
sep = _getSep(s) sep = _getSep(s)
splitter = _getSplitterRe(s) escaped = re.escape(sep)
matcher = re.compile(r's%s((?:\\.|[^\\])*)%s((?:\\%s|[^\\])*)%s(.*)'
% (escaped, escaped, escaped, escaped))
try: try:
(kind, regexp, replace, flags) = splitter.split(s) (regexp, replace, flags) = matcher.match(s).groups()
except ValueError: # Unpack list of wrong size. except AttributeError: # Unpack list of wrong size.
raise ValueError, 'Must be of the form s/.../.../' raise ValueError, 'Must be of the form s/.../.../'
regexp = regexp.replace('\x08', r'\b') regexp = regexp.replace('\x08', r'\b')
replace = replace.replace('\\'+sep, sep) replace = replace.replace('\\'+sep, sep)
for i in xrange(10): for i in xrange(10):
replace = replace.replace(chr(i), r'\%s' % i) replace = replace.replace(chr(i), r'\%s' % i)
if kind != 's':
raise ValueError, 'Invalid kind: must be "s"'
g = False g = False
if 'g' in flags: if 'g' in flags:
g = True g = True

View File

@ -1,5 +1,6 @@
### ###
# Copyright (c) 2002-2005, Jeremiah Fincher # Copyright (c) 2002-2005, Jeremiah Fincher
# Copyright (c) 2009, James Vega
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -276,6 +277,8 @@ class StrTest(SupyTestCase):
def testP2PReDifferentSeparator(self): def testP2PReDifferentSeparator(self):
r = utils.str.perlReToPythonRe('m!foo!') r = utils.str.perlReToPythonRe('m!foo!')
self.failUnless(r.search('foo')) self.failUnless(r.search('foo'))
r = utils.str.perlReToPythonRe('m{cat}')
self.failUnless(r.search('cat'))
def testPerlReToReplacer(self): def testPerlReToReplacer(self):
PRTR = utils.str.perlReToReplacer PRTR = utils.str.perlReToReplacer
@ -291,6 +294,8 @@ class StrTest(SupyTestCase):
self.assertEqual(f('foobarbaz'), 'foorz') self.assertEqual(f('foobarbaz'), 'foorz')
f = PRTR('s/ba\\///g') f = PRTR('s/ba\\///g')
self.assertEqual(f('fooba/rba/z'), 'foorz') self.assertEqual(f('fooba/rba/z'), 'foorz')
f = PRTR('s/ba\\\\//g')
self.assertEqual(f('fooba\\rba\\z'), 'foorz')
f = PRTR('s/cat/dog/i') f = PRTR('s/cat/dog/i')
self.assertEqual(f('CATFISH'), 'dogFISH') self.assertEqual(f('CATFISH'), 'dogFISH')
f = PRTR('s/foo/foo\/bar/') f = PRTR('s/foo/foo\/bar/')