1 #=======================================================================
3 # Python Lexical Analyser
5 # Traditional Regular Expression Syntax
7 #=======================================================================
9 from Regexps
import Alt
, Seq
, Rep
, Rep1
, Opt
, Any
, AnyBut
, Bol
, Eol
, Char
10 from Errors
import PlexError
12 class RegexpSyntaxError(PlexError
):
17 Convert traditional string representation of regular expression |s|
18 into Plex representation.
20 return REParser(s
).parse_re()
22 class REParser(object):
24 def __init__(self
, s
):
33 self
.error("Unexpected %s" % repr(self
.c
))
37 """Parse a set of alternative regexps."""
43 re_list
.append(self
.parse_seq())
48 """Parse a sequence of regexps."""
50 while not self
.end
and not self
.c
in "|)":
51 re_list
.append(self
.parse_mod())
55 """Parse a primitive regexp followed by *, +, ? modifiers."""
56 re
= self
.parse_prim()
57 while not self
.end
and self
.c
in "*+?":
68 """Parse a primitive regexp."""
80 re
= self
.parse_charset()
88 def parse_charset(self
):
89 """Parse a charset. Does not include the surrounding []."""
98 while not self
.end
and self
.c
!= ']':
100 if self
.c
== '-' and self
.lookahead(1) != ']':
103 for a
in xrange(ord(c1
), ord(c2
) + 1):
104 char_list
.append(chr(a
))
107 chars
= ''.join(char_list
)
114 """Advance to the next char."""
116 i
= self
.i
= self
.i
+ 1
125 self
.error("Premature end of string")
130 def lookahead(self
, n
):
131 """Look ahead n chars."""
140 Expect to find character |c| at current position.
141 Raises an exception otherwise.
146 self
.error("Missing %s" % repr(c
))
148 def error(self
, mess
):
149 """Raise exception to signal syntax error in regexp."""
150 raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
151 repr(self
.s
), self
.i
, mess
))