Lib/test/test_sre.py

   1 # FIXME: this is basically test_re.py, with a few minor changes
   2
   3 import sys
   4 sys.path=['.']+sys.path
   5
   6 from test_support import verbose, TestFailed
   7 import sre
   8 import sys, os, string, traceback
   9
  10 # Misc tests from Tim Peters' re.doc
  11
  12 if verbose:
  13     print 'Running tests on sre.search and sre.match'
  14
  15 try:
  16     assert sre.search('x*', 'axx').span(0) == (0, 0)
  17     assert sre.search('x*', 'axx').span() == (0, 0)
  18     assert sre.search('x+', 'axx').span(0) == (1, 3)
  19     assert sre.search('x+', 'axx').span() == (1, 3)
  20     assert sre.search('x', 'aaa') == None
  21 except:
  22     raise TestFailed, "sre.search"
  23
  24 try:
  25     assert sre.match('a*', 'xxx').span(0) == (0, 0)
  26     assert sre.match('a*', 'xxx').span() == (0, 0)
  27     assert sre.match('x*', 'xxxa').span(0) == (0, 3)
  28     assert sre.match('x*', 'xxxa').span() == (0, 3)
  29     assert sre.match('a+', 'xxx') == None
  30 except:
  31     raise TestFailed, "sre.search"
  32
  33 if verbose:
  34     print 'Running tests on sre.sub'
  35
  36 try:
  37     assert sre.sub("(?i)b+", "x", "bbbb BBBB") == 'x x'
  38
  39     def bump_num(matchobj):
  40         int_value = int(matchobj.group(0))
  41         return str(int_value + 1)
  42
  43     assert sre.sub(r'\d+', bump_num, '08.2 -2 23x99y') == '9.3 -3 24x100y'
  44     assert sre.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3) == '9.3 -3 23x99y'
  45
  46     assert sre.sub('.', lambda m: r"\n", 'x') == '\\n'
  47     assert sre.sub('.', r"\n", 'x') == '\n'
  48
  49     s = r"\1\1"
  50     assert sre.sub('(.)', s, 'x') == 'xx'
  51     assert sre.sub('(.)', sre.escape(s), 'x') == s
  52     assert sre.sub('(.)', lambda m: s, 'x') == s
  53
  54     assert sre.sub('(?P<a>x)', '\g<a>\g<a>', 'xx') == 'xxxx'
  55     assert sre.sub('(?P<a>x)', '\g<a>\g<1>', 'xx') == 'xxxx'
  56     assert sre.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx') == 'xxxx'
  57     assert sre.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx') == 'xxxx'
  58
  59     assert sre.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a') == '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D'
  60     assert sre.sub('a', '\t\n\v\r\f\a', 'a') == '\t\n\v\r\f\a'
  61     assert sre.sub('a', '\t\n\v\r\f\a', 'a') == (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))
  62
  63     assert sre.sub('^\s*', 'X', 'test') == 'Xtest'
  64 except AssertionError:
  65     raise TestFailed, "sre.sub"
  66
  67
  68 try:
  69     assert sre.sub('a', 'b', 'aaaaa') == 'bbbbb'
  70     assert sre.sub('a', 'b', 'aaaaa', 1) == 'baaaa'
  71 except AssertionError:
  72     raise TestFailed, "qualified sre.sub"
  73
  74 if verbose:
  75     print 'Running tests on symbolic references'
  76
  77 try:
  78     sre.sub('(?P<a>x)', '\g<a', 'xx')
  79 except sre.error, reason:
  80     pass
  81 else:
  82     raise TestFailed, "symbolic reference"
  83
  84 try:
  85     sre.sub('(?P<a>x)', '\g<', 'xx')
  86 except sre.error, reason:
  87     pass
  88 else:
  89     raise TestFailed, "symbolic reference"
  90
  91 try:
  92     sre.sub('(?P<a>x)', '\g', 'xx')
  93 except sre.error, reason:
  94     pass
  95 else:
  96     raise TestFailed, "symbolic reference"
  97
  98 try:
  99     sre.sub('(?P<a>x)', '\g<a a>', 'xx')
 100 except sre.error, reason:
 101     pass
 102 else:
 103     raise TestFailed, "symbolic reference"
 104
 105 try:
 106     sre.sub('(?P<a>x)', '\g<1a1>', 'xx')
 107 except sre.error, reason:
 108     pass
 109 else:
 110     raise TestFailed, "symbolic reference"
 111
 112 try:
 113     sre.sub('(?P<a>x)', '\g<ab>', 'xx')
 114 except IndexError, reason:
 115     pass
 116 else:
 117     raise TestFailed, "symbolic reference"
 118
 119 try:
 120     sre.sub('(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
 121 except sre.error, reason:
 122     pass
 123 else:
 124     raise TestFailed, "symbolic reference"
 125
 126 try:
 127     sre.sub('(?P<a>x)|(?P<b>y)', '\\2', 'xx')
 128 except sre.error, reason:
 129     pass
 130 else:
 131     raise TestFailed, "symbolic reference"
 132
 133 if verbose:
 134     print 'Running tests on sre.subn'
 135
 136 try:
 137     assert sre.subn("(?i)b+", "x", "bbbb BBBB") == ('x x', 2)
 138     assert sre.subn("b+", "x", "bbbb BBBB") == ('x BBBB', 1)
 139     assert sre.subn("b+", "x", "xyz") == ('xyz', 0)
 140     assert sre.subn("b*", "x", "xyz") == ('xxxyxzx', 4)
 141     assert sre.subn("b*", "x", "xyz", 2) == ('xxxyz', 2)
 142 except AssertionError:
 143     raise TestFailed, "sre.subn"
 144
 145 if verbose:
 146     print 'Running tests on sre.split'
 147
 148 try:
 149     assert sre.split(":", ":a:b::c") == ['', 'a', 'b', '', 'c']
 150     assert sre.split(":*", ":a:b::c") == ['', 'a', 'b', 'c']
 151     assert sre.split("(:*)", ":a:b::c") == ['', ':', 'a', ':', 'b', '::', 'c']
 152     assert sre.split("(?::*)", ":a:b::c") == ['', 'a', 'b', 'c']
 153     assert sre.split("(:)*", ":a:b::c") == ['', ':', 'a', ':', 'b', ':', 'c']
 154     assert sre.split("([b:]+)", ":a:b::c") == ['', ':', 'a', ':b::', 'c']
 155     assert sre.split("(b)|(:+)", ":a:b::c") == \
 156            ['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c']
 157     assert sre.split("(?:b)|(?::+)", ":a:b::c") == ['', 'a', '', '', 'c']
 158 except AssertionError:
 159     raise TestFailed, "sre.split"
 160
 161 try:
 162     assert sre.split(":", ":a:b::c", 2) == ['', 'a', 'b::c']
 163     assert sre.split(':', 'a:b:c:d', 2) == ['a', 'b', 'c:d']
 164
 165     assert sre.split("(:)", ":a:b::c", 2) == ['', ':', 'a', ':', 'b::c']
 166     assert sre.split("(:*)", ":a:b::c", 2) == ['', ':', 'a', ':', 'b::c']
 167 except AssertionError:
 168     raise TestFailed, "qualified sre.split"
 169
 170 if verbose:
 171     print "Running tests on sre.findall"
 172
 173 try:
 174     assert sre.findall(":+", "abc") == []
 175     assert sre.findall(":+", "a:b::c:::d") == [":", "::", ":::"]
 176     assert sre.findall("(:+)", "a:b::c:::d") == [":", "::", ":::"]
 177     assert sre.findall("(:)(:*)", "a:b::c:::d") == [(":", ""),
 178                                                    (":", ":"),
 179                                                    (":", "::")]
 180     assert sre.findall("(a)|(b)", "abc") == [("a", ""), ("", "b")]
 181 except AssertionError:
 182     raise TestFailed, "sre.findall"
 183
 184 if verbose:
 185     print "Running tests on sre.match"
 186
 187 try:
 188     # No groups at all
 189     m = sre.match('a', 'a') ; assert m.groups() == ()
 190     # A single group
 191     m = sre.match('(a)', 'a') ; assert m.groups() == ('a',)
 192
 193     pat = sre.compile('((a)|(b))(c)?')
 194     assert pat.match('a').groups() == ('a', 'a', None, None)
 195     assert pat.match('b').groups() == ('b', None, 'b', None)
 196     assert pat.match('ac').groups() == ('a', 'a', None, 'c')
 197     assert pat.match('bc').groups() == ('b', None, 'b', 'c')
 198     assert pat.match('bc').groups("") == ('b', "", 'b', 'c')
 199 except AssertionError:
 200     raise TestFailed, "match .groups() method"
 201
 202 try:
 203     # A single group
 204     m = sre.match('(a)', 'a')
 205     assert m.group(0) == 'a' ; assert m.group(0) == 'a'
 206     assert m.group(1) == 'a' ; assert m.group(1, 1) == ('a', 'a')
 207
 208     pat = sre.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
 209     assert pat.match('a').group(1, 2, 3) == ('a', None, None)
 210     assert pat.match('b').group('a1', 'b2', 'c3') == (None, 'b', None)
 211     assert pat.match('ac').group(1, 'b2', 3) == ('a', None, 'c')
 212 except AssertionError:
 213     raise TestFailed, "match .group() method"
 214
 215 if verbose:
 216     print "Running tests on sre.escape"
 217
 218 try:
 219     p=""
 220     for i in range(0, 256):
 221         p = p + chr(i)
 222         assert sre.match(sre.escape(chr(i)), chr(i)) != None
 223         assert sre.match(sre.escape(chr(i)), chr(i)).span() == (0,1)
 224
 225     pat=sre.compile( sre.escape(p) )
 226     assert pat.match(p) != None
 227     assert pat.match(p).span() == (0,256)
 228 except AssertionError:
 229     raise TestFailed, "sre.escape"
 230
 231
 232 if verbose:
 233     print 'Pickling a SRE_Pattern instance'
 234
 235 try:
 236     import pickle
 237     pat = sre.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
 238     s = pickle.dumps(pat)
 239     pat = pickle.loads(s)
 240 except:
 241     print TestFailed, 're module pickle' # expected
 242
 243 try:
 244     import cPickle
 245     pat = sre.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
 246     s = cPickle.dumps(pat)
 247     pat = cPickle.loads(s)
 248 except:
 249     print TestFailed, 're module cPickle' # expected
 250
 251 try:
 252     assert sre.I == sre.IGNORECASE
 253     assert sre.L == sre.LOCALE
 254     assert sre.M == sre.MULTILINE
 255     assert sre.S == sre.DOTALL
 256     assert sre.X == sre.VERBOSE
 257     assert sre.T == sre.TEMPLATE
 258     assert sre.U == sre.UNICODE
 259 except AssertionError:
 260     raise TestFailed, 're module constants'
 261
 262 for flags in [sre.I, sre.M, sre.X, sre.S, sre.L, sre.T, sre.U]:
 263     try:
 264         r = sre.compile('^pattern$', flags)
 265     except:
 266         print 'Exception raised on flag', flags
 267
 268 if verbose:
 269     print 'Test engine limitations'
 270
 271 # Try nasty case that overflows the straightforward recursive
 272 # implementation of repeated groups.
 273 try:
 274     assert sre.match('(x)*', 50000*'x').span() == (0, 50000)
 275 except RuntimeError, v:
 276     print v
 277
 278 from re_tests import *
 279
 280 if verbose:
 281     print 'Running re_tests test suite'
 282 else:
 283     # To save time, only run the first and last 10 tests
 284     #tests = tests[:10] + tests[-10:]
 285     pass
 286
 287 for t in tests:
 288     sys.stdout.flush()
 289     pattern=s=outcome=repl=expected=None
 290     if len(t)==5:
 291         pattern, s, outcome, repl, expected = t
 292     elif len(t)==3:
 293         pattern, s, outcome = t
 294     else:
 295         raise ValueError, ('Test tuples should have 3 or 5 fields',t)
 296
 297     try:
 298         obj=sre.compile(pattern)
 299     except sre.error:
 300         if outcome==SYNTAX_ERROR: pass  # Expected a syntax error
 301         else:
 302             print '=== Syntax error:', t
 303     except KeyboardInterrupt: raise KeyboardInterrupt
 304     except:
 305         print '*** Unexpected error ***', t
 306         if verbose:
 307             traceback.print_exc(file=sys.stdout)
 308     else:
 309         try:
 310             result=obj.search(s)
 311         except (sre.error), msg:
 312             print '=== Unexpected exception', t, repr(msg)
 313         if outcome==SYNTAX_ERROR:
 314             # This should have been a syntax error; forget it.
 315             pass
 316         elif outcome==FAIL:
 317             if result is None: pass   # No match, as expected
 318             else: print '=== Succeeded incorrectly', t
 319         elif outcome==SUCCEED:
 320             if result is not None:
 321                 # Matched, as expected, so now we compute the
 322                 # result string and compare it to our expected result.
 323                 start, end = result.span(0)
 324                 vardict={'found': result.group(0),
 325                          'groups': result.group(),
 326                          'flags': result.re.flags}
 327                 for i in range(1, 100):
 328                     try:
 329                         gi = result.group(i)
 330                         # Special hack because else the string concat fails:
 331                         if gi is None:
 332                             gi = "None"
 333                     except IndexError:
 334                         gi = "Error"
 335                     vardict['g%d' % i] = gi
 336                 for i in result.re.groupindex.keys():
 337                     try:
 338                         gi = result.group(i)
 339                         if gi is None:
 340                             gi = "None"
 341                     except IndexError:
 342                         gi = "Error"
 343                     vardict[i] = gi
 344                 repl=eval(repl, vardict)
 345                 if repl!=expected:
 346                     print '=== grouping error', t,
 347                     print repr(repl)+' should be '+repr(expected)
 348             else:
 349                 print '=== Failed incorrectly', t
 350                 continue
 351
 352             # Try the match on a unicode string, and check that it
 353             # still succeeds.
 354             result=obj.search(unicode(s, "latin-1"))
 355             if result==None:
 356                 print '=== Fails on unicode match', t
 357
 358             # Try the match on a unicode pattern, and check that it
 359             # still succeeds.
 360             obj=sre.compile(unicode(pattern, "latin-1"))
 361             result=obj.search(s)
 362             if result==None:
 363                 print '=== Fails on unicode pattern match', t
 364
 365             # Try the match with the search area limited to the extent
 366             # of the match and see if it still succeeds.  \B will
 367             # break (because it won't match at the end or start of a
 368             # string), so we'll ignore patterns that feature it.
 369
 370             if pattern[:2]!='\\B' and pattern[-2:]!='\\B':
 371                 obj=sre.compile(pattern)
 372                 result=obj.search(s, result.start(0), result.end(0)+1)
 373                 if result==None:
 374                     print '=== Failed on range-limited match', t
 375
 376             # Try the match with IGNORECASE enabled, and check that it
 377             # still succeeds.
 378             obj=sre.compile(pattern, sre.IGNORECASE)
 379             result=obj.search(s)
 380             if result==None:
 381                 print '=== Fails on case-insensitive match', t
 382
 383             # Try the match with LOCALE enabled, and check that it
 384             # still succeeds.
 385             obj=sre.compile(pattern, sre.LOCALE)
 386             result=obj.search(s)
 387             if result==None:
 388                 print '=== Fails on locale-sensitive match', t
 389
 390             # Try the match with UNICODE locale enabled, and check
 391             # that it still succeeds.
 392             obj=sre.compile(pattern, sre.UNICODE)
 393             result=obj.search(s)
 394             if result==None:
 395                 print '=== Fails on unicode-sensitive match', t