several major improvements to the sparc backend: support for weak linkage
[llvm/avr.git] / utils / lit / ShUtil.py
blobc4bbb3d3731d53dacb835ca1b12e3c5064b658c0
1 import itertools
3 import Util
4 from ShCommands import Command, Pipeline, Seq
6 class ShLexer:
7 def __init__(self, data, win32Escapes = False):
8 self.data = data
9 self.pos = 0
10 self.end = len(data)
11 self.win32Escapes = win32Escapes
13 def eat(self):
14 c = self.data[self.pos]
15 self.pos += 1
16 return c
18 def look(self):
19 return self.data[self.pos]
21 def maybe_eat(self, c):
22 """
23 maybe_eat(c) - Consume the character c if it is the next character,
24 returning True if a character was consumed. """
25 if self.data[self.pos] == c:
26 self.pos += 1
27 return True
28 return False
30 def lex_arg_fast(self, c):
31 # Get the leading whitespace free section.
32 chunk = self.data[self.pos - 1:].split(None, 1)[0]
34 # If it has special characters, the fast path failed.
35 if ('|' in chunk or '&' in chunk or
36 '<' in chunk or '>' in chunk or
37 "'" in chunk or '"' in chunk or
38 '\\' in chunk):
39 return None
41 self.pos = self.pos - 1 + len(chunk)
42 return chunk
44 def lex_arg_slow(self, c):
45 if c in "'\"":
46 str = self.lex_arg_quoted(c)
47 else:
48 str = c
49 while self.pos != self.end:
50 c = self.look()
51 if c.isspace() or c in "|&":
52 break
53 elif c in '><':
54 # This is an annoying case; we treat '2>' as a single token so
55 # we don't have to track whitespace tokens.
57 # If the parse string isn't an integer, do the usual thing.
58 if not str.isdigit():
59 break
61 # Otherwise, lex the operator and convert to a redirection
62 # token.
63 num = int(str)
64 tok = self.lex_one_token()
65 assert isinstance(tok, tuple) and len(tok) == 1
66 return (tok[0], num)
67 elif c == '"':
68 self.eat()
69 str += self.lex_arg_quoted('"')
70 elif not self.win32Escapes and c == '\\':
71 # Outside of a string, '\\' escapes everything.
72 self.eat()
73 if self.pos == self.end:
74 Util.warning("escape at end of quoted argument in: %r" %
75 self.data)
76 return str
77 str += self.eat()
78 else:
79 str += self.eat()
80 return str
82 def lex_arg_quoted(self, delim):
83 str = ''
84 while self.pos != self.end:
85 c = self.eat()
86 if c == delim:
87 return str
88 elif c == '\\' and delim == '"':
89 # Inside a '"' quoted string, '\\' only escapes the quote
90 # character and backslash, otherwise it is preserved.
91 if self.pos == self.end:
92 Util.warning("escape at end of quoted argument in: %r" %
93 self.data)
94 return str
95 c = self.eat()
96 if c == '"': #
97 str += '"'
98 elif c == '\\':
99 str += '\\'
100 else:
101 str += '\\' + c
102 else:
103 str += c
104 Util.warning("missing quote character in %r" % self.data)
105 return str
107 def lex_arg_checked(self, c):
108 pos = self.pos
109 res = self.lex_arg_fast(c)
110 end = self.pos
112 self.pos = pos
113 reference = self.lex_arg_slow(c)
114 if res is not None:
115 if res != reference:
116 raise ValueError,"Fast path failure: %r != %r" % (res, reference)
117 if self.pos != end:
118 raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
119 return reference
121 def lex_arg(self, c):
122 return self.lex_arg_fast(c) or self.lex_arg_slow(c)
124 def lex_one_token(self):
126 lex_one_token - Lex a single 'sh' token. """
128 c = self.eat()
129 if c in ';!':
130 return (c,)
131 if c == '|':
132 if self.maybe_eat('|'):
133 return ('||',)
134 return (c,)
135 if c == '&':
136 if self.maybe_eat('&'):
137 return ('&&',)
138 if self.maybe_eat('>'):
139 return ('&>',)
140 return (c,)
141 if c == '>':
142 if self.maybe_eat('&'):
143 return ('>&',)
144 if self.maybe_eat('>'):
145 return ('>>',)
146 return (c,)
147 if c == '<':
148 if self.maybe_eat('&'):
149 return ('<&',)
150 if self.maybe_eat('>'):
151 return ('<<',)
152 return (c,)
154 return self.lex_arg(c)
156 def lex(self):
157 while self.pos != self.end:
158 if self.look().isspace():
159 self.eat()
160 else:
161 yield self.lex_one_token()
165 class ShParser:
166 def __init__(self, data, win32Escapes = False):
167 self.data = data
168 self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
170 def lex(self):
171 try:
172 return self.tokens.next()
173 except StopIteration:
174 return None
176 def look(self):
177 next = self.lex()
178 if next is not None:
179 self.tokens = itertools.chain([next], self.tokens)
180 return next
182 def parse_command(self):
183 tok = self.lex()
184 if not tok:
185 raise ValueError,"empty command!"
186 if isinstance(tok, tuple):
187 raise ValueError,"syntax error near unexpected token %r" % tok[0]
189 args = [tok]
190 redirects = []
191 while 1:
192 tok = self.look()
194 # EOF?
195 if tok is None:
196 break
198 # If this is an argument, just add it to the current command.
199 if isinstance(tok, str):
200 args.append(self.lex())
201 continue
203 # Otherwise see if it is a terminator.
204 assert isinstance(tok, tuple)
205 if tok[0] in ('|',';','&','||','&&'):
206 break
208 # Otherwise it must be a redirection.
209 op = self.lex()
210 arg = self.lex()
211 if not arg:
212 raise ValueError,"syntax error near token %r" % op[0]
213 redirects.append((op, arg))
215 return Command(args, redirects)
217 def parse_pipeline(self):
218 negate = False
219 if self.look() == ('!',):
220 self.lex()
221 negate = True
223 commands = [self.parse_command()]
224 while self.look() == ('|',):
225 self.lex()
226 commands.append(self.parse_command())
227 return Pipeline(commands, negate)
229 def parse(self):
230 lhs = self.parse_pipeline()
232 while self.look():
233 operator = self.lex()
234 assert isinstance(operator, tuple) and len(operator) == 1
236 if not self.look():
237 raise ValueError, "missing argument to operator %r" % operator[0]
239 # FIXME: Operator precedence!!
240 lhs = Seq(lhs, operator[0], self.parse_pipeline())
242 return lhs
246 import unittest
248 class TestShLexer(unittest.TestCase):
249 def lex(self, str, *args, **kwargs):
250 return list(ShLexer(str, *args, **kwargs).lex())
252 def test_basic(self):
253 self.assertEqual(self.lex('a|b>c&d<e'),
254 ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd',
255 ('<',), 'e'])
257 def test_redirection_tokens(self):
258 self.assertEqual(self.lex('a2>c'),
259 ['a2', ('>',), 'c'])
260 self.assertEqual(self.lex('a 2>c'),
261 ['a', ('>',2), 'c'])
263 def test_quoting(self):
264 self.assertEqual(self.lex(""" 'a' """),
265 ['a'])
266 self.assertEqual(self.lex(""" "hello\\"world" """),
267 ['hello"world'])
268 self.assertEqual(self.lex(""" "hello\\'world" """),
269 ["hello\\'world"])
270 self.assertEqual(self.lex(""" "hello\\\\world" """),
271 ["hello\\world"])
272 self.assertEqual(self.lex(""" he"llo wo"rld """),
273 ["hello world"])
274 self.assertEqual(self.lex(""" a\\ b a\\\\b """),
275 ["a b", "a\\b"])
276 self.assertEqual(self.lex(""" "" "" """),
277 ["", ""])
278 self.assertEqual(self.lex(""" a\\ b """, win32Escapes = True),
279 ['a\\', 'b'])
281 class TestShParse(unittest.TestCase):
282 def parse(self, str):
283 return ShParser(str).parse()
285 def test_basic(self):
286 self.assertEqual(self.parse('echo hello'),
287 Pipeline([Command(['echo', 'hello'], [])], False))
288 self.assertEqual(self.parse('echo ""'),
289 Pipeline([Command(['echo', ''], [])], False))
291 def test_redirection(self):
292 self.assertEqual(self.parse('echo hello > c'),
293 Pipeline([Command(['echo', 'hello'],
294 [((('>'),), 'c')])], False))
295 self.assertEqual(self.parse('echo hello > c >> d'),
296 Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
297 (('>>',), 'd')])], False))
298 self.assertEqual(self.parse('a 2>&1'),
299 Pipeline([Command(['a'], [(('>&',2), '1')])], False))
301 def test_pipeline(self):
302 self.assertEqual(self.parse('a | b'),
303 Pipeline([Command(['a'], []),
304 Command(['b'], [])],
305 False))
307 self.assertEqual(self.parse('a | b | c'),
308 Pipeline([Command(['a'], []),
309 Command(['b'], []),
310 Command(['c'], [])],
311 False))
313 self.assertEqual(self.parse('! a'),
314 Pipeline([Command(['a'], [])],
315 True))
317 def test_list(self):
318 self.assertEqual(self.parse('a ; b'),
319 Seq(Pipeline([Command(['a'], [])], False),
320 ';',
321 Pipeline([Command(['b'], [])], False)))
323 self.assertEqual(self.parse('a & b'),
324 Seq(Pipeline([Command(['a'], [])], False),
325 '&',
326 Pipeline([Command(['b'], [])], False)))
328 self.assertEqual(self.parse('a && b'),
329 Seq(Pipeline([Command(['a'], [])], False),
330 '&&',
331 Pipeline([Command(['b'], [])], False)))
333 self.assertEqual(self.parse('a || b'),
334 Seq(Pipeline([Command(['a'], [])], False),
335 '||',
336 Pipeline([Command(['b'], [])], False)))
338 self.assertEqual(self.parse('a && b || c'),
339 Seq(Seq(Pipeline([Command(['a'], [])], False),
340 '&&',
341 Pipeline([Command(['b'], [])], False)),
342 '||',
343 Pipeline([Command(['c'], [])], False)))
345 if __name__ == '__main__':
346 unittest.main()