This commit was manufactured by cvs2svn to create tag 'r221c2'.
[python/dscho.git] / Lib / compiler / transformer.py
blobcd36aaeac9a137bff762f5e5a29ecb45f723f09d
1 """Parse tree transformation module.
3 Transforms Python source code into an abstract syntax tree (AST)
4 defined in the ast module.
6 The simplest ways to invoke this module are via parse and parseFile.
7 parse(buf) -> AST
8 parseFile(path) -> AST
9 """
11 # Original version written by Greg Stein (gstein@lyra.org)
12 # and Bill Tutt (rassilon@lima.mudlib.org)
13 # February 1997.
15 # Modifications and improvements for Python 2.0 by Jeremy Hylton and
16 # Mark Hammond
18 # Portions of this file are:
19 # Copyright (C) 1997-1998 Greg Stein. All Rights Reserved.
21 # This module is provided under a BSD-ish license. See
22 # http://www.opensource.org/licenses/bsd-license.html
23 # and replace OWNER, ORGANIZATION, and YEAR as appropriate.
25 from ast import *
26 import parser
27 # Care must be taken to use only symbols and tokens defined in Python
28 # 1.5.2 for code branches executed in 1.5.2
29 import symbol
30 import token
31 import string
32 import sys
34 error = 'walker.error'
36 from consts import CO_VARARGS, CO_VARKEYWORDS
37 from consts import OP_ASSIGN, OP_DELETE, OP_APPLY
39 def parseFile(path):
40 f = open(path)
41 src = f.read()
42 f.close()
43 return parse(src)
45 def parse(buf, mode="exec"):
46 if mode == "exec" or mode == "single":
47 return Transformer().parsesuite(buf)
48 elif mode == "eval":
49 return Transformer().parseexpr(buf)
50 else:
51 raise ValueError("compile() arg 3 must be"
52 " 'exec' or 'eval' or 'single'")
54 def asList(nodes):
55 l = []
56 for item in nodes:
57 if hasattr(item, "asList"):
58 l.append(item.asList())
59 else:
60 if type(item) is type( (None, None) ):
61 l.append(tuple(asList(item)))
62 elif type(item) is type( [] ):
63 l.append(asList(item))
64 else:
65 l.append(item)
66 return l
68 def Node(*args):
69 kind = args[0]
70 if nodes.has_key(kind):
71 try:
72 return apply(nodes[kind], args[1:])
73 except TypeError:
74 print nodes[kind], len(args), args
75 raise
76 else:
77 raise error, "Can't find appropriate Node type: %s" % str(args)
78 #return apply(ast.Node, args)
80 class Transformer:
81 """Utility object for transforming Python parse trees.
83 Exposes the following methods:
84 tree = transform(ast_tree)
85 tree = parsesuite(text)
86 tree = parseexpr(text)
87 tree = parsefile(fileob | filename)
88 """
90 def __init__(self):
91 self._dispatch = {}
92 for value, name in symbol.sym_name.items():
93 if hasattr(self, name):
94 self._dispatch[value] = getattr(self, name)
95 self._dispatch[token.NEWLINE] = self.com_NEWLINE
96 self._atom_dispatch = {token.LPAR: self.atom_lpar,
97 token.LSQB: self.atom_lsqb,
98 token.LBRACE: self.atom_lbrace,
99 token.BACKQUOTE: self.atom_backquote,
100 token.NUMBER: self.atom_number,
101 token.STRING: self.atom_string,
102 token.NAME: self.atom_name,
105 def transform(self, tree):
106 """Transform an AST into a modified parse tree."""
107 if type(tree) != type(()) and type(tree) != type([]):
108 tree = parser.ast2tuple(tree, line_info=1)
109 return self.compile_node(tree)
111 def parsesuite(self, text):
112 """Return a modified parse tree for the given suite text."""
113 # Hack for handling non-native line endings on non-DOS like OSs.
114 text = string.replace(text, '\x0d', '')
115 return self.transform(parser.suite(text))
117 def parseexpr(self, text):
118 """Return a modified parse tree for the given expression text."""
119 return self.transform(parser.expr(text))
121 def parsefile(self, file):
122 """Return a modified parse tree for the contents of the given file."""
123 if type(file) == type(''):
124 file = open(file)
125 return self.parsesuite(file.read())
127 # --------------------------------------------------------------
129 # PRIVATE METHODS
132 def compile_node(self, node):
133 ### emit a line-number node?
134 n = node[0]
135 if n == symbol.single_input:
136 return self.single_input(node[1:])
137 if n == symbol.file_input:
138 return self.file_input(node[1:])
139 if n == symbol.eval_input:
140 return self.eval_input(node[1:])
141 if n == symbol.lambdef:
142 return self.lambdef(node[1:])
143 if n == symbol.funcdef:
144 return self.funcdef(node[1:])
145 if n == symbol.classdef:
146 return self.classdef(node[1:])
148 raise error, ('unexpected node type', n)
150 def single_input(self, node):
151 ### do we want to do anything about being "interactive" ?
153 # NEWLINE | simple_stmt | compound_stmt NEWLINE
154 n = node[0][0]
155 if n != token.NEWLINE:
156 return self.com_stmt(node[0])
158 return Pass()
160 def file_input(self, nodelist):
161 doc = self.get_docstring(nodelist, symbol.file_input)
162 if doc is not None:
163 i = 1
164 else:
165 i = 0
166 stmts = []
167 for node in nodelist[i:]:
168 if node[0] != token.ENDMARKER and node[0] != token.NEWLINE:
169 self.com_append_stmt(stmts, node)
170 return Module(doc, Stmt(stmts))
172 def eval_input(self, nodelist):
173 # from the built-in function input()
174 ### is this sufficient?
175 return Expression(self.com_node(nodelist[0]))
177 def funcdef(self, nodelist):
178 # funcdef: 'def' NAME parameters ':' suite
179 # parameters: '(' [varargslist] ')'
181 lineno = nodelist[1][2]
182 name = nodelist[1][1]
183 args = nodelist[2][2]
185 if args[0] == symbol.varargslist:
186 names, defaults, flags = self.com_arglist(args[1:])
187 else:
188 names = defaults = ()
189 flags = 0
190 doc = self.get_docstring(nodelist[4])
192 # code for function
193 code = self.com_node(nodelist[4])
195 if doc is not None:
196 assert isinstance(code, Stmt)
197 assert isinstance(code.nodes[0], Discard)
198 del code.nodes[0]
199 n = Function(name, names, defaults, flags, doc, code)
200 n.lineno = lineno
201 return n
203 def lambdef(self, nodelist):
204 # lambdef: 'lambda' [varargslist] ':' test
205 if nodelist[2][0] == symbol.varargslist:
206 names, defaults, flags = self.com_arglist(nodelist[2][1:])
207 else:
208 names = defaults = ()
209 flags = 0
211 # code for lambda
212 code = self.com_node(nodelist[-1])
214 n = Lambda(names, defaults, flags, code)
215 n.lineno = nodelist[1][2]
216 return n
218 def classdef(self, nodelist):
219 # classdef: 'class' NAME ['(' testlist ')'] ':' suite
221 name = nodelist[1][1]
222 doc = self.get_docstring(nodelist[-1])
223 if nodelist[2][0] == token.COLON:
224 bases = []
225 else:
226 bases = self.com_bases(nodelist[3])
228 # code for class
229 code = self.com_node(nodelist[-1])
231 if doc is not None:
232 assert isinstance(code, Stmt)
233 assert isinstance(code.nodes[0], Discard)
234 del code.nodes[0]
236 n = Class(name, bases, doc, code)
237 n.lineno = nodelist[1][2]
238 return n
240 def stmt(self, nodelist):
241 return self.com_stmt(nodelist[0])
243 small_stmt = stmt
244 flow_stmt = stmt
245 compound_stmt = stmt
247 def simple_stmt(self, nodelist):
248 # small_stmt (';' small_stmt)* [';'] NEWLINE
249 stmts = []
250 for i in range(0, len(nodelist), 2):
251 self.com_append_stmt(stmts, nodelist[i])
252 return Stmt(stmts)
254 def parameters(self, nodelist):
255 raise error
257 def varargslist(self, nodelist):
258 raise error
260 def fpdef(self, nodelist):
261 raise error
263 def fplist(self, nodelist):
264 raise error
266 def dotted_name(self, nodelist):
267 raise error
269 def comp_op(self, nodelist):
270 raise error
272 def trailer(self, nodelist):
273 raise error
275 def sliceop(self, nodelist):
276 raise error
278 def argument(self, nodelist):
279 raise error
281 # --------------------------------------------------------------
283 # STATEMENT NODES (invoked by com_node())
286 def expr_stmt(self, nodelist):
287 # augassign testlist | testlist ('=' testlist)*
288 en = nodelist[-1]
289 exprNode = self.lookup_node(en)(en[1:])
290 if len(nodelist) == 1:
291 n = Discard(exprNode)
292 n.lineno = exprNode.lineno
293 return n
294 if nodelist[1][0] == token.EQUAL:
295 nodes = []
296 for i in range(0, len(nodelist) - 2, 2):
297 nodes.append(self.com_assign(nodelist[i], OP_ASSIGN))
298 n = Assign(nodes, exprNode)
299 n.lineno = nodelist[1][2]
300 else:
301 lval = self.com_augassign(nodelist[0])
302 op = self.com_augassign_op(nodelist[1])
303 n = AugAssign(lval, op[1], exprNode)
304 n.lineno = op[2]
305 return n
307 def print_stmt(self, nodelist):
308 # print ([ test (',' test)* [','] ] | '>>' test [ (',' test)+ [','] ])
309 items = []
310 if len(nodelist) == 1:
311 start = 1
312 dest = None
313 elif nodelist[1][0] == token.RIGHTSHIFT:
314 assert len(nodelist) == 3 \
315 or nodelist[3][0] == token.COMMA
316 dest = self.com_node(nodelist[2])
317 start = 4
318 else:
319 dest = None
320 start = 1
321 for i in range(start, len(nodelist), 2):
322 items.append(self.com_node(nodelist[i]))
323 if nodelist[-1][0] == token.COMMA:
324 n = Print(items, dest)
325 n.lineno = nodelist[0][2]
326 return n
327 n = Printnl(items, dest)
328 n.lineno = nodelist[0][2]
329 return n
331 def del_stmt(self, nodelist):
332 return self.com_assign(nodelist[1], OP_DELETE)
334 def pass_stmt(self, nodelist):
335 n = Pass()
336 n.lineno = nodelist[0][2]
337 return n
339 def break_stmt(self, nodelist):
340 n = Break()
341 n.lineno = nodelist[0][2]
342 return n
344 def continue_stmt(self, nodelist):
345 n = Continue()
346 n.lineno = nodelist[0][2]
347 return n
349 def return_stmt(self, nodelist):
350 # return: [testlist]
351 if len(nodelist) < 2:
352 n = Return(Const(None))
353 n.lineno = nodelist[0][2]
354 return n
355 n = Return(self.com_node(nodelist[1]))
356 n.lineno = nodelist[0][2]
357 return n
359 def yield_stmt(self, nodelist):
360 n = Yield(self.com_node(nodelist[1]))
361 n.lineno = nodelist[0][2]
362 return n
364 def raise_stmt(self, nodelist):
365 # raise: [test [',' test [',' test]]]
366 if len(nodelist) > 5:
367 expr3 = self.com_node(nodelist[5])
368 else:
369 expr3 = None
370 if len(nodelist) > 3:
371 expr2 = self.com_node(nodelist[3])
372 else:
373 expr2 = None
374 if len(nodelist) > 1:
375 expr1 = self.com_node(nodelist[1])
376 else:
377 expr1 = None
378 n = Raise(expr1, expr2, expr3)
379 n.lineno = nodelist[0][2]
380 return n
382 def import_stmt(self, nodelist):
383 # import_stmt: 'import' dotted_as_name (',' dotted_as_name)* |
384 # from: 'from' dotted_name 'import'
385 # ('*' | import_as_name (',' import_as_name)*)
386 if nodelist[0][1] == 'from':
387 names = []
388 if nodelist[3][0] == token.NAME:
389 for i in range(3, len(nodelist), 2):
390 names.append((nodelist[i][1], None))
391 else:
392 for i in range(3, len(nodelist), 2):
393 names.append(self.com_import_as_name(nodelist[i]))
394 n = From(self.com_dotted_name(nodelist[1]), names)
395 n.lineno = nodelist[0][2]
396 return n
398 if nodelist[1][0] == symbol.dotted_name:
399 names = [(self.com_dotted_name(nodelist[1][1:]), None)]
400 else:
401 names = []
402 for i in range(1, len(nodelist), 2):
403 names.append(self.com_dotted_as_name(nodelist[i]))
404 n = Import(names)
405 n.lineno = nodelist[0][2]
406 return n
408 def global_stmt(self, nodelist):
409 # global: NAME (',' NAME)*
410 names = []
411 for i in range(1, len(nodelist), 2):
412 names.append(nodelist[i][1])
413 n = Global(names)
414 n.lineno = nodelist[0][2]
415 return n
417 def exec_stmt(self, nodelist):
418 # exec_stmt: 'exec' expr ['in' expr [',' expr]]
419 expr1 = self.com_node(nodelist[1])
420 if len(nodelist) >= 4:
421 expr2 = self.com_node(nodelist[3])
422 if len(nodelist) >= 6:
423 expr3 = self.com_node(nodelist[5])
424 else:
425 expr3 = None
426 else:
427 expr2 = expr3 = None
429 n = Exec(expr1, expr2, expr3)
430 n.lineno = nodelist[0][2]
431 return n
433 def assert_stmt(self, nodelist):
434 # 'assert': test, [',' test]
435 expr1 = self.com_node(nodelist[1])
436 if (len(nodelist) == 4):
437 expr2 = self.com_node(nodelist[3])
438 else:
439 expr2 = None
440 n = Assert(expr1, expr2)
441 n.lineno = nodelist[0][2]
442 return n
444 def if_stmt(self, nodelist):
445 # if: test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
446 tests = []
447 for i in range(0, len(nodelist) - 3, 4):
448 testNode = self.com_node(nodelist[i + 1])
449 suiteNode = self.com_node(nodelist[i + 3])
450 tests.append((testNode, suiteNode))
452 if len(nodelist) % 4 == 3:
453 elseNode = self.com_node(nodelist[-1])
454 ## elseNode.lineno = nodelist[-1][1][2]
455 else:
456 elseNode = None
457 n = If(tests, elseNode)
458 n.lineno = nodelist[0][2]
459 return n
461 def while_stmt(self, nodelist):
462 # 'while' test ':' suite ['else' ':' suite]
464 testNode = self.com_node(nodelist[1])
465 bodyNode = self.com_node(nodelist[3])
467 if len(nodelist) > 4:
468 elseNode = self.com_node(nodelist[6])
469 else:
470 elseNode = None
472 n = While(testNode, bodyNode, elseNode)
473 n.lineno = nodelist[0][2]
474 return n
476 def for_stmt(self, nodelist):
477 # 'for' exprlist 'in' exprlist ':' suite ['else' ':' suite]
479 assignNode = self.com_assign(nodelist[1], OP_ASSIGN)
480 listNode = self.com_node(nodelist[3])
481 bodyNode = self.com_node(nodelist[5])
483 if len(nodelist) > 8:
484 elseNode = self.com_node(nodelist[8])
485 else:
486 elseNode = None
488 n = For(assignNode, listNode, bodyNode, elseNode)
489 n.lineno = nodelist[0][2]
490 return n
492 def try_stmt(self, nodelist):
493 # 'try' ':' suite (except_clause ':' suite)+ ['else' ':' suite]
494 # | 'try' ':' suite 'finally' ':' suite
495 if nodelist[3][0] != symbol.except_clause:
496 return self.com_try_finally(nodelist)
498 return self.com_try_except(nodelist)
500 def suite(self, nodelist):
501 # simple_stmt | NEWLINE INDENT NEWLINE* (stmt NEWLINE*)+ DEDENT
502 if len(nodelist) == 1:
503 return self.com_stmt(nodelist[0])
505 stmts = []
506 for node in nodelist:
507 if node[0] == symbol.stmt:
508 self.com_append_stmt(stmts, node)
509 return Stmt(stmts)
511 # --------------------------------------------------------------
513 # EXPRESSION NODES (invoked by com_node())
516 def testlist(self, nodelist):
517 # testlist: expr (',' expr)* [',']
518 # testlist_safe: test [(',' test)+ [',']]
519 # exprlist: expr (',' expr)* [',']
520 return self.com_binary(Tuple, nodelist)
522 testlist_safe = testlist # XXX
523 exprlist = testlist
525 def test(self, nodelist):
526 # and_test ('or' and_test)* | lambdef
527 if len(nodelist) == 1 and nodelist[0][0] == symbol.lambdef:
528 return self.lambdef(nodelist[0])
529 return self.com_binary(Or, nodelist)
531 def and_test(self, nodelist):
532 # not_test ('and' not_test)*
533 return self.com_binary(And, nodelist)
535 def not_test(self, nodelist):
536 # 'not' not_test | comparison
537 result = self.com_node(nodelist[-1])
538 if len(nodelist) == 2:
539 n = Not(result)
540 n.lineno = nodelist[0][2]
541 return n
542 return result
544 def comparison(self, nodelist):
545 # comparison: expr (comp_op expr)*
546 node = self.com_node(nodelist[0])
547 if len(nodelist) == 1:
548 return node
550 results = []
551 for i in range(2, len(nodelist), 2):
552 nl = nodelist[i-1]
554 # comp_op: '<' | '>' | '=' | '>=' | '<=' | '<>' | '!=' | '=='
555 # | 'in' | 'not' 'in' | 'is' | 'is' 'not'
556 n = nl[1]
557 if n[0] == token.NAME:
558 type = n[1]
559 if len(nl) == 3:
560 if type == 'not':
561 type = 'not in'
562 else:
563 type = 'is not'
564 else:
565 type = _cmp_types[n[0]]
567 lineno = nl[1][2]
568 results.append((type, self.com_node(nodelist[i])))
570 # we need a special "compare" node so that we can distinguish
571 # 3 < x < 5 from (3 < x) < 5
572 # the two have very different semantics and results (note that the
573 # latter form is always true)
575 n = Compare(node, results)
576 n.lineno = lineno
577 return n
579 def expr(self, nodelist):
580 # xor_expr ('|' xor_expr)*
581 return self.com_binary(Bitor, nodelist)
583 def xor_expr(self, nodelist):
584 # xor_expr ('^' xor_expr)*
585 return self.com_binary(Bitxor, nodelist)
587 def and_expr(self, nodelist):
588 # xor_expr ('&' xor_expr)*
589 return self.com_binary(Bitand, nodelist)
591 def shift_expr(self, nodelist):
592 # shift_expr ('<<'|'>>' shift_expr)*
593 node = self.com_node(nodelist[0])
594 for i in range(2, len(nodelist), 2):
595 right = self.com_node(nodelist[i])
596 if nodelist[i-1][0] == token.LEFTSHIFT:
597 node = LeftShift([node, right])
598 node.lineno = nodelist[1][2]
599 elif nodelist[i-1][0] == token.RIGHTSHIFT:
600 node = RightShift([node, right])
601 node.lineno = nodelist[1][2]
602 else:
603 raise ValueError, "unexpected token: %s" % nodelist[i-1][0]
604 return node
606 def arith_expr(self, nodelist):
607 node = self.com_node(nodelist[0])
608 for i in range(2, len(nodelist), 2):
609 right = self.com_node(nodelist[i])
610 if nodelist[i-1][0] == token.PLUS:
611 node = Add([node, right])
612 node.lineno = nodelist[1][2]
613 elif nodelist[i-1][0] == token.MINUS:
614 node = Sub([node, right])
615 node.lineno = nodelist[1][2]
616 else:
617 raise ValueError, "unexpected token: %s" % nodelist[i-1][0]
618 return node
620 def term(self, nodelist):
621 node = self.com_node(nodelist[0])
622 for i in range(2, len(nodelist), 2):
623 right = self.com_node(nodelist[i])
624 t = nodelist[i-1][0]
625 if t == token.STAR:
626 node = Mul([node, right])
627 elif t == token.SLASH:
628 node = Div([node, right])
629 elif t == token.PERCENT:
630 node = Mod([node, right])
631 elif t == token.DOUBLESLASH:
632 node = FloorDiv([node, right])
633 else:
634 raise ValueError, "unexpected token: %s" % t
635 node.lineno = nodelist[1][2]
636 return node
638 def factor(self, nodelist):
639 elt = nodelist[0]
640 t = elt[0]
641 node = self.com_node(nodelist[-1])
642 if t == token.PLUS:
643 node = UnaryAdd(node)
644 node.lineno = elt[2]
645 elif t == token.MINUS:
646 node = UnarySub(node)
647 node.lineno = elt[2]
648 elif t == token.TILDE:
649 node = Invert(node)
650 node.lineno = elt[2]
651 return node
653 def power(self, nodelist):
654 # power: atom trailer* ('**' factor)*
655 node = self.com_node(nodelist[0])
656 for i in range(1, len(nodelist)):
657 elt = nodelist[i]
658 if elt[0] == token.DOUBLESTAR:
659 n = Power([node, self.com_node(nodelist[i+1])])
660 n.lineno = elt[2]
661 return n
663 node = self.com_apply_trailer(node, elt)
665 return node
667 def atom(self, nodelist):
668 n = self._atom_dispatch[nodelist[0][0]](nodelist)
669 n.lineno = nodelist[0][2]
670 return n
672 def atom_lpar(self, nodelist):
673 if nodelist[1][0] == token.RPAR:
674 n = Tuple(())
675 n.lineno = nodelist[0][2]
676 return n
677 return self.com_node(nodelist[1])
679 def atom_lsqb(self, nodelist):
680 if nodelist[1][0] == token.RSQB:
681 n = List(())
682 n.lineno = nodelist[0][2]
683 return n
684 return self.com_list_constructor(nodelist[1])
686 def atom_lbrace(self, nodelist):
687 if nodelist[1][0] == token.RBRACE:
688 return Dict(())
689 return self.com_dictmaker(nodelist[1])
691 def atom_backquote(self, nodelist):
692 n = Backquote(self.com_node(nodelist[1]))
693 n.lineno = nodelist[0][2]
694 return n
696 def atom_number(self, nodelist):
697 ### need to verify this matches compile.c
698 k = eval(nodelist[0][1])
699 n = Const(k)
700 n.lineno = nodelist[0][2]
701 return n
703 def atom_string(self, nodelist):
704 ### need to verify this matches compile.c
705 k = ''
706 for node in nodelist:
707 k = k + eval(node[1])
708 n = Const(k)
709 n.lineno = nodelist[0][2]
710 return n
712 def atom_name(self, nodelist):
713 ### any processing to do?
714 n = Name(nodelist[0][1])
715 n.lineno = nodelist[0][2]
716 return n
718 # --------------------------------------------------------------
720 # INTERNAL PARSING UTILITIES
723 # The use of com_node() introduces a lot of extra stack frames,
724 # enough to cause a stack overflow compiling test.test_parser with
725 # the standard interpreter recursionlimit. The com_node() is a
726 # convenience function that hides the dispatch details, but comes
727 # at a very high cost. It is more efficient to dispatch directly
728 # in the callers. In these cases, use lookup_node() and call the
729 # dispatched node directly.
731 def lookup_node(self, node):
732 return self._dispatch[node[0]]
734 def com_node(self, node):
735 # Note: compile.c has handling in com_node for del_stmt, pass_stmt,
736 # break_stmt, stmt, small_stmt, flow_stmt, simple_stmt,
737 # and compound_stmt.
738 # We'll just dispatch them.
739 return self._dispatch[node[0]](node[1:])
741 def com_NEWLINE(self, *args):
742 # A ';' at the end of a line can make a NEWLINE token appear
743 # here, Render it harmless. (genc discards ('discard',
744 # ('const', xxxx)) Nodes)
745 return Discard(Const(None))
747 def com_arglist(self, nodelist):
748 # varargslist:
749 # (fpdef ['=' test] ',')* ('*' NAME [',' ('**'|'*' '*') NAME]
750 # | fpdef ['=' test] (',' fpdef ['=' test])* [',']
751 # | ('**'|'*' '*') NAME)
752 # fpdef: NAME | '(' fplist ')'
753 # fplist: fpdef (',' fpdef)* [',']
754 names = []
755 defaults = []
756 flags = 0
758 i = 0
759 while i < len(nodelist):
760 node = nodelist[i]
761 if node[0] == token.STAR or node[0] == token.DOUBLESTAR:
762 if node[0] == token.STAR:
763 node = nodelist[i+1]
764 if node[0] == token.NAME:
765 names.append(node[1])
766 flags = flags | CO_VARARGS
767 i = i + 3
769 if i < len(nodelist):
770 # should be DOUBLESTAR or STAR STAR
771 t = nodelist[i][0]
772 if t == token.DOUBLESTAR:
773 node = nodelist[i+1]
774 elif t == token.STARSTAR:
775 node = nodelist[i+2]
776 else:
777 raise ValueError, "unexpected token: %s" % t
778 names.append(node[1])
779 flags = flags | CO_VARKEYWORDS
781 break
783 # fpdef: NAME | '(' fplist ')'
784 names.append(self.com_fpdef(node))
786 i = i + 1
787 if i >= len(nodelist):
788 break
790 if nodelist[i][0] == token.EQUAL:
791 defaults.append(self.com_node(nodelist[i + 1]))
792 i = i + 2
793 elif len(defaults):
794 # Treat "(a=1, b)" as "(a=1, b=None)"
795 defaults.append(Const(None))
797 i = i + 1
799 return names, defaults, flags
801 def com_fpdef(self, node):
802 # fpdef: NAME | '(' fplist ')'
803 if node[1][0] == token.LPAR:
804 return self.com_fplist(node[2])
805 return node[1][1]
807 def com_fplist(self, node):
808 # fplist: fpdef (',' fpdef)* [',']
809 if len(node) == 2:
810 return self.com_fpdef(node[1])
811 list = []
812 for i in range(1, len(node), 2):
813 list.append(self.com_fpdef(node[i]))
814 return tuple(list)
816 def com_dotted_name(self, node):
817 # String together the dotted names and return the string
818 name = ""
819 for n in node:
820 if type(n) == type(()) and n[0] == 1:
821 name = name + n[1] + '.'
822 return name[:-1]
824 def com_dotted_as_name(self, node):
825 dot = self.com_dotted_name(node[1])
826 if len(node) <= 2:
827 return dot, None
828 if node[0] == symbol.dotted_name:
829 pass
830 else:
831 assert node[2][1] == 'as'
832 assert node[3][0] == token.NAME
833 return dot, node[3][1]
835 def com_import_as_name(self, node):
836 if node[0] == token.STAR:
837 return '*', None
838 assert node[0] == symbol.import_as_name
839 node = node[1:]
840 if len(node) == 1:
841 assert node[0][0] == token.NAME
842 return node[0][1], None
844 assert node[1][1] == 'as', node
845 assert node[2][0] == token.NAME
846 return node[0][1], node[2][1]
848 def com_bases(self, node):
849 bases = []
850 for i in range(1, len(node), 2):
851 bases.append(self.com_node(node[i]))
852 return bases
854 def com_try_finally(self, nodelist):
855 # try_fin_stmt: "try" ":" suite "finally" ":" suite
856 n = TryFinally(self.com_node(nodelist[2]),
857 self.com_node(nodelist[5]))
858 n.lineno = nodelist[0][2]
859 return n
861 def com_try_except(self, nodelist):
862 # try_except: 'try' ':' suite (except_clause ':' suite)* ['else' suite]
863 #tryexcept: [TryNode, [except_clauses], elseNode)]
864 stmt = self.com_node(nodelist[2])
865 clauses = []
866 elseNode = None
867 for i in range(3, len(nodelist), 3):
868 node = nodelist[i]
869 if node[0] == symbol.except_clause:
870 # except_clause: 'except' [expr [',' expr]] */
871 if len(node) > 2:
872 expr1 = self.com_node(node[2])
873 if len(node) > 4:
874 expr2 = self.com_assign(node[4], OP_ASSIGN)
875 else:
876 expr2 = None
877 else:
878 expr1 = expr2 = None
879 clauses.append((expr1, expr2, self.com_node(nodelist[i+2])))
881 if node[0] == token.NAME:
882 elseNode = self.com_node(nodelist[i+2])
883 n = TryExcept(self.com_node(nodelist[2]), clauses, elseNode)
884 n.lineno = nodelist[0][2]
885 return n
887 def com_augassign_op(self, node):
888 assert node[0] == symbol.augassign
889 return node[1]
891 def com_augassign(self, node):
892 """Return node suitable for lvalue of augmented assignment
894 Names, slices, and attributes are the only allowable nodes.
896 l = self.com_node(node)
897 if l.__class__ in (Name, Slice, Subscript, Getattr):
898 return l
899 raise SyntaxError, "can't assign to %s" % l.__class__.__name__
901 def com_assign(self, node, assigning):
902 # return a node suitable for use as an "lvalue"
903 # loop to avoid trivial recursion
904 while 1:
905 t = node[0]
906 if t == symbol.exprlist or t == symbol.testlist:
907 if len(node) > 2:
908 return self.com_assign_tuple(node, assigning)
909 node = node[1]
910 elif t in _assign_types:
911 if len(node) > 2:
912 raise SyntaxError, "can't assign to operator"
913 node = node[1]
914 elif t == symbol.power:
915 if node[1][0] != symbol.atom:
916 raise SyntaxError, "can't assign to operator"
917 if len(node) > 2:
918 primary = self.com_node(node[1])
919 for i in range(2, len(node)-1):
920 ch = node[i]
921 if ch[0] == token.DOUBLESTAR:
922 raise SyntaxError, "can't assign to operator"
923 primary = self.com_apply_trailer(primary, ch)
924 return self.com_assign_trailer(primary, node[-1],
925 assigning)
926 node = node[1]
927 elif t == symbol.atom:
928 t = node[1][0]
929 if t == token.LPAR:
930 node = node[2]
931 if node[0] == token.RPAR:
932 raise SyntaxError, "can't assign to ()"
933 elif t == token.LSQB:
934 node = node[2]
935 if node[0] == token.RSQB:
936 raise SyntaxError, "can't assign to []"
937 return self.com_assign_list(node, assigning)
938 elif t == token.NAME:
939 return self.com_assign_name(node[1], assigning)
940 else:
941 raise SyntaxError, "can't assign to literal"
942 else:
943 raise SyntaxError, "bad assignment"
945 def com_assign_tuple(self, node, assigning):
946 assigns = []
947 for i in range(1, len(node), 2):
948 assigns.append(self.com_assign(node[i], assigning))
949 return AssTuple(assigns)
951 def com_assign_list(self, node, assigning):
952 assigns = []
953 for i in range(1, len(node), 2):
954 if i + 1 < len(node):
955 if node[i + 1][0] == symbol.list_for:
956 raise SyntaxError, "can't assign to list comprehension"
957 assert node[i + 1][0] == token.COMMA, node[i + 1]
958 assigns.append(self.com_assign(node[i], assigning))
959 return AssList(assigns)
961 def com_assign_name(self, node, assigning):
962 n = AssName(node[1], assigning)
963 n.lineno = node[2]
964 return n
966 def com_assign_trailer(self, primary, node, assigning):
967 t = node[1][0]
968 if t == token.DOT:
969 return self.com_assign_attr(primary, node[2], assigning)
970 if t == token.LSQB:
971 return self.com_subscriptlist(primary, node[2], assigning)
972 if t == token.LPAR:
973 raise SyntaxError, "can't assign to function call"
974 raise SyntaxError, "unknown trailer type: %s" % t
976 def com_assign_attr(self, primary, node, assigning):
977 return AssAttr(primary, node[1], assigning)
979 def com_binary(self, constructor, nodelist):
980 "Compile 'NODE (OP NODE)*' into (type, [ node1, ..., nodeN ])."
981 l = len(nodelist)
982 if l == 1:
983 n = nodelist[0]
984 return self.lookup_node(n)(n[1:])
985 items = []
986 for i in range(0, l, 2):
987 n = nodelist[i]
988 items.append(self.lookup_node(n)(n[1:]))
989 return constructor(items)
991 def com_stmt(self, node):
992 result = self.lookup_node(node)(node[1:])
993 assert result is not None
994 if isinstance(result, Stmt):
995 return result
996 return Stmt([result])
998 def com_append_stmt(self, stmts, node):
999 result = self.com_node(node)
1000 assert result is not None
1001 if isinstance(result, Stmt):
1002 stmts.extend(result.nodes)
1003 else:
1004 stmts.append(result)
1006 if hasattr(symbol, 'list_for'):
1007 def com_list_constructor(self, nodelist):
1008 # listmaker: test ( list_for | (',' test)* [','] )
1009 values = []
1010 for i in range(1, len(nodelist)):
1011 if nodelist[i][0] == symbol.list_for:
1012 assert len(nodelist[i:]) == 1
1013 return self.com_list_comprehension(values[0],
1014 nodelist[i])
1015 elif nodelist[i][0] == token.COMMA:
1016 continue
1017 values.append(self.com_node(nodelist[i]))
1018 return List(values)
1020 def com_list_comprehension(self, expr, node):
1021 # list_iter: list_for | list_if
1022 # list_for: 'for' exprlist 'in' testlist [list_iter]
1023 # list_if: 'if' test [list_iter]
1025 # XXX should raise SyntaxError for assignment
1027 lineno = node[1][2]
1028 fors = []
1029 while node:
1030 t = node[1][1]
1031 if t == 'for':
1032 assignNode = self.com_assign(node[2], OP_ASSIGN)
1033 listNode = self.com_node(node[4])
1034 newfor = ListCompFor(assignNode, listNode, [])
1035 newfor.lineno = node[1][2]
1036 fors.append(newfor)
1037 if len(node) == 5:
1038 node = None
1039 else:
1040 node = self.com_list_iter(node[5])
1041 elif t == 'if':
1042 test = self.com_node(node[2])
1043 newif = ListCompIf(test)
1044 newif.lineno = node[1][2]
1045 newfor.ifs.append(newif)
1046 if len(node) == 3:
1047 node = None
1048 else:
1049 node = self.com_list_iter(node[3])
1050 else:
1051 raise SyntaxError, \
1052 ("unexpected list comprehension element: %s %d"
1053 % (node, lineno))
1054 n = ListComp(expr, fors)
1055 n.lineno = lineno
1056 return n
1058 def com_list_iter(self, node):
1059 assert node[0] == symbol.list_iter
1060 return node[1]
1061 else:
1062 def com_list_constructor(self, nodelist):
1063 values = []
1064 for i in range(1, len(nodelist), 2):
1065 values.append(self.com_node(nodelist[i]))
1066 return List(values)
1068 def com_dictmaker(self, nodelist):
1069 # dictmaker: test ':' test (',' test ':' value)* [',']
1070 items = []
1071 for i in range(1, len(nodelist), 4):
1072 items.append((self.com_node(nodelist[i]),
1073 self.com_node(nodelist[i+2])))
1074 return Dict(items)
1076 def com_apply_trailer(self, primaryNode, nodelist):
1077 t = nodelist[1][0]
1078 if t == token.LPAR:
1079 return self.com_call_function(primaryNode, nodelist[2])
1080 if t == token.DOT:
1081 return self.com_select_member(primaryNode, nodelist[2])
1082 if t == token.LSQB:
1083 return self.com_subscriptlist(primaryNode, nodelist[2], OP_APPLY)
1085 raise SyntaxError, 'unknown node type: %s' % t
1087 def com_select_member(self, primaryNode, nodelist):
1088 if nodelist[0] != token.NAME:
1089 raise SyntaxError, "member must be a name"
1090 n = Getattr(primaryNode, nodelist[1])
1091 n.lineno = nodelist[2]
1092 return n
1094 def com_call_function(self, primaryNode, nodelist):
1095 if nodelist[0] == token.RPAR:
1096 return CallFunc(primaryNode, [])
1097 args = []
1098 kw = 0
1099 len_nodelist = len(nodelist)
1100 for i in range(1, len_nodelist, 2):
1101 node = nodelist[i]
1102 if node[0] == token.STAR or node[0] == token.DOUBLESTAR:
1103 break
1104 kw, result = self.com_argument(node, kw)
1105 args.append(result)
1106 else:
1107 # No broken by star arg, so skip the last one we processed.
1108 i = i + 1
1109 if i < len_nodelist and nodelist[i][0] == token.COMMA:
1110 # need to accept an application that looks like "f(a, b,)"
1111 i = i + 1
1112 star_node = dstar_node = None
1113 while i < len_nodelist:
1114 tok = nodelist[i]
1115 ch = nodelist[i+1]
1116 i = i + 3
1117 if tok[0]==token.STAR:
1118 if star_node is not None:
1119 raise SyntaxError, 'already have the varargs indentifier'
1120 star_node = self.com_node(ch)
1121 elif tok[0]==token.DOUBLESTAR:
1122 if dstar_node is not None:
1123 raise SyntaxError, 'already have the kwargs indentifier'
1124 dstar_node = self.com_node(ch)
1125 else:
1126 raise SyntaxError, 'unknown node type: %s' % tok
1128 return CallFunc(primaryNode, args, star_node, dstar_node)
1130 def com_argument(self, nodelist, kw):
1131 if len(nodelist) == 2:
1132 if kw:
1133 raise SyntaxError, "non-keyword arg after keyword arg"
1134 return 0, self.com_node(nodelist[1])
1135 result = self.com_node(nodelist[3])
1136 n = nodelist[1]
1137 while len(n) == 2 and n[0] != token.NAME:
1138 n = n[1]
1139 if n[0] != token.NAME:
1140 raise SyntaxError, "keyword can't be an expression (%s)"%n[0]
1141 node = Keyword(n[1], result)
1142 node.lineno = n[2]
1143 return 1, node
1145 def com_subscriptlist(self, primary, nodelist, assigning):
1146 # slicing: simple_slicing | extended_slicing
1147 # simple_slicing: primary "[" short_slice "]"
1148 # extended_slicing: primary "[" slice_list "]"
1149 # slice_list: slice_item ("," slice_item)* [","]
1151 # backwards compat slice for '[i:j]'
1152 if len(nodelist) == 2:
1153 sub = nodelist[1]
1154 if (sub[1][0] == token.COLON or \
1155 (len(sub) > 2 and sub[2][0] == token.COLON)) and \
1156 sub[-1][0] != symbol.sliceop:
1157 return self.com_slice(primary, sub, assigning)
1159 subscripts = []
1160 for i in range(1, len(nodelist), 2):
1161 subscripts.append(self.com_subscript(nodelist[i]))
1163 return Subscript(primary, assigning, subscripts)
1165 def com_subscript(self, node):
1166 # slice_item: expression | proper_slice | ellipsis
1167 ch = node[1]
1168 t = ch[0]
1169 if t == token.DOT and node[2][0] == token.DOT:
1170 return Ellipsis()
1171 if t == token.COLON or len(node) > 2:
1172 return self.com_sliceobj(node)
1173 return self.com_node(ch)
1175 def com_sliceobj(self, node):
1176 # proper_slice: short_slice | long_slice
1177 # short_slice: [lower_bound] ":" [upper_bound]
1178 # long_slice: short_slice ":" [stride]
1179 # lower_bound: expression
1180 # upper_bound: expression
1181 # stride: expression
1183 # Note: a stride may be further slicing...
1185 items = []
1187 if node[1][0] == token.COLON:
1188 items.append(Const(None))
1189 i = 2
1190 else:
1191 items.append(self.com_node(node[1]))
1192 # i == 2 is a COLON
1193 i = 3
1195 if i < len(node) and node[i][0] == symbol.test:
1196 items.append(self.com_node(node[i]))
1197 i = i + 1
1198 else:
1199 items.append(Const(None))
1201 # a short_slice has been built. look for long_slice now by looking
1202 # for strides...
1203 for j in range(i, len(node)):
1204 ch = node[j]
1205 if len(ch) == 2:
1206 items.append(Const(None))
1207 else:
1208 items.append(self.com_node(ch[2]))
1210 return Sliceobj(items)
1212 def com_slice(self, primary, node, assigning):
1213 # short_slice: [lower_bound] ":" [upper_bound]
1214 lower = upper = None
1215 if len(node) == 3:
1216 if node[1][0] == token.COLON:
1217 upper = self.com_node(node[2])
1218 else:
1219 lower = self.com_node(node[1])
1220 elif len(node) == 4:
1221 lower = self.com_node(node[1])
1222 upper = self.com_node(node[3])
1223 return Slice(primary, assigning, lower, upper)
1225 def get_docstring(self, node, n=None):
1226 if n is None:
1227 n = node[0]
1228 node = node[1:]
1229 if n == symbol.suite:
1230 if len(node) == 1:
1231 return self.get_docstring(node[0])
1232 for sub in node:
1233 if sub[0] == symbol.stmt:
1234 return self.get_docstring(sub)
1235 return None
1236 if n == symbol.file_input:
1237 for sub in node:
1238 if sub[0] == symbol.stmt:
1239 return self.get_docstring(sub)
1240 return None
1241 if n == symbol.atom:
1242 if node[0][0] == token.STRING:
1243 s = ''
1244 for t in node:
1245 s = s + eval(t[1])
1246 return s
1247 return None
1248 if n == symbol.stmt or n == symbol.simple_stmt \
1249 or n == symbol.small_stmt:
1250 return self.get_docstring(node[0])
1251 if n in _doc_nodes and len(node) == 1:
1252 return self.get_docstring(node[0])
1253 return None
1256 _doc_nodes = [
1257 symbol.expr_stmt,
1258 symbol.testlist,
1259 symbol.testlist_safe,
1260 symbol.test,
1261 symbol.and_test,
1262 symbol.not_test,
1263 symbol.comparison,
1264 symbol.expr,
1265 symbol.xor_expr,
1266 symbol.and_expr,
1267 symbol.shift_expr,
1268 symbol.arith_expr,
1269 symbol.term,
1270 symbol.factor,
1271 symbol.power,
1274 # comp_op: '<' | '>' | '=' | '>=' | '<=' | '<>' | '!=' | '=='
1275 # | 'in' | 'not' 'in' | 'is' | 'is' 'not'
1276 _cmp_types = {
1277 token.LESS : '<',
1278 token.GREATER : '>',
1279 token.EQEQUAL : '==',
1280 token.EQUAL : '==',
1281 token.LESSEQUAL : '<=',
1282 token.GREATEREQUAL : '>=',
1283 token.NOTEQUAL : '!=',
1286 _legal_node_types = [
1287 symbol.funcdef,
1288 symbol.classdef,
1289 symbol.stmt,
1290 symbol.small_stmt,
1291 symbol.flow_stmt,
1292 symbol.simple_stmt,
1293 symbol.compound_stmt,
1294 symbol.expr_stmt,
1295 symbol.print_stmt,
1296 symbol.del_stmt,
1297 symbol.pass_stmt,
1298 symbol.break_stmt,
1299 symbol.continue_stmt,
1300 symbol.return_stmt,
1301 symbol.raise_stmt,
1302 symbol.import_stmt,
1303 symbol.global_stmt,
1304 symbol.exec_stmt,
1305 symbol.assert_stmt,
1306 symbol.if_stmt,
1307 symbol.while_stmt,
1308 symbol.for_stmt,
1309 symbol.try_stmt,
1310 symbol.suite,
1311 symbol.testlist,
1312 symbol.testlist_safe,
1313 symbol.test,
1314 symbol.and_test,
1315 symbol.not_test,
1316 symbol.comparison,
1317 symbol.exprlist,
1318 symbol.expr,
1319 symbol.xor_expr,
1320 symbol.and_expr,
1321 symbol.shift_expr,
1322 symbol.arith_expr,
1323 symbol.term,
1324 symbol.factor,
1325 symbol.power,
1326 symbol.atom,
1329 if hasattr(symbol, 'yield_stmt'):
1330 _legal_node_types.append(symbol.yield_stmt)
1332 _assign_types = [
1333 symbol.test,
1334 symbol.and_test,
1335 symbol.not_test,
1336 symbol.comparison,
1337 symbol.expr,
1338 symbol.xor_expr,
1339 symbol.and_expr,
1340 symbol.shift_expr,
1341 symbol.arith_expr,
1342 symbol.term,
1343 symbol.factor,
1346 import types
1347 _names = {}
1348 for k, v in symbol.sym_name.items():
1349 _names[k] = v
1350 for k, v in token.tok_name.items():
1351 _names[k] = v
1353 def debug_tree(tree):
1354 l = []
1355 for elt in tree:
1356 if type(elt) == types.IntType:
1357 l.append(_names.get(elt, elt))
1358 elif type(elt) == types.StringType:
1359 l.append(elt)
1360 else:
1361 l.append(debug_tree(elt))
1362 return l