Pin Chrome's shortcut to the Win10 Start menu on install and OS upgrade.
[chromium-blink-merge.git] / third_party / cython / src / Cython / Plex / Traditional.py
blob6d3e48fa4a2ed8f4240327222d2e415dba032024
1 #=======================================================================
3 # Python Lexical Analyser
5 # Traditional Regular Expression Syntax
7 #=======================================================================
9 from Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
10 from Errors import PlexError
12 class RegexpSyntaxError(PlexError):
13 pass
15 def re(s):
16 """
17 Convert traditional string representation of regular expression |s|
18 into Plex representation.
19 """
20 return REParser(s).parse_re()
22 class REParser(object):
24 def __init__(self, s):
25 self.s = s
26 self.i = -1
27 self.end = 0
28 self.next()
30 def parse_re(self):
31 re = self.parse_alt()
32 if not self.end:
33 self.error("Unexpected %s" % repr(self.c))
34 return re
36 def parse_alt(self):
37 """Parse a set of alternative regexps."""
38 re = self.parse_seq()
39 if self.c == '|':
40 re_list = [re]
41 while self.c == '|':
42 self.next()
43 re_list.append(self.parse_seq())
44 re = Alt(*re_list)
45 return re
47 def parse_seq(self):
48 """Parse a sequence of regexps."""
49 re_list = []
50 while not self.end and not self.c in "|)":
51 re_list.append(self.parse_mod())
52 return Seq(*re_list)
54 def parse_mod(self):
55 """Parse a primitive regexp followed by *, +, ? modifiers."""
56 re = self.parse_prim()
57 while not self.end and self.c in "*+?":
58 if self.c == '*':
59 re = Rep(re)
60 elif self.c == '+':
61 re = Rep1(re)
62 else: # self.c == '?'
63 re = Opt(re)
64 self.next()
65 return re
67 def parse_prim(self):
68 """Parse a primitive regexp."""
69 c = self.get()
70 if c == '.':
71 re = AnyBut("\n")
72 elif c == '^':
73 re = Bol
74 elif c == '$':
75 re = Eol
76 elif c == '(':
77 re = self.parse_alt()
78 self.expect(')')
79 elif c == '[':
80 re = self.parse_charset()
81 self.expect(']')
82 else:
83 if c == '\\':
84 c = self.get()
85 re = Char(c)
86 return re
88 def parse_charset(self):
89 """Parse a charset. Does not include the surrounding []."""
90 char_list = []
91 invert = 0
92 if self.c == '^':
93 invert = 1
94 self.next()
95 if self.c == ']':
96 char_list.append(']')
97 self.next()
98 while not self.end and self.c != ']':
99 c1 = self.get()
100 if self.c == '-' and self.lookahead(1) != ']':
101 self.next()
102 c2 = self.get()
103 for a in xrange(ord(c1), ord(c2) + 1):
104 char_list.append(chr(a))
105 else:
106 char_list.append(c1)
107 chars = ''.join(char_list)
108 if invert:
109 return AnyBut(chars)
110 else:
111 return Any(chars)
113 def next(self):
114 """Advance to the next char."""
115 s = self.s
116 i = self.i = self.i + 1
117 if i < len(s):
118 self.c = s[i]
119 else:
120 self.c = ''
121 self.end = 1
123 def get(self):
124 if self.end:
125 self.error("Premature end of string")
126 c = self.c
127 self.next()
128 return c
130 def lookahead(self, n):
131 """Look ahead n chars."""
132 j = self.i + n
133 if j < len(self.s):
134 return self.s[j]
135 else:
136 return ''
138 def expect(self, c):
140 Expect to find character |c| at current position.
141 Raises an exception otherwise.
143 if self.c == c:
144 self.next()
145 else:
146 self.error("Missing %s" % repr(c))
148 def error(self, mess):
149 """Raise exception to signal syntax error in regexp."""
150 raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
151 repr(self.s), self.i, mess))