1 """A flow graph representation for Python bytecode"""
9 from compiler
import misc
10 from compiler
.consts
import CO_OPTIMIZED
, CO_NEWLOCALS
, CO_VARARGS
, \
16 return cmp(a
.bid
, b
.bid
)
22 self
.current
= self
.entry
= Block()
23 self
.exit
= Block("exit")
24 self
.blocks
= misc
.Set()
25 self
.blocks
.add(self
.entry
)
26 self
.blocks
.add(self
.exit
)
28 def startBlock(self
, block
):
31 print "end", repr(self
.current
)
32 print " next", self
.current
.next
33 print " ", self
.current
.get_children()
37 def nextBlock(self
, block
=None):
38 # XXX think we need to specify when there is implicit transfer
39 # from one block to the next. might be better to represent this
40 # with explicit JUMP_ABSOLUTE instructions that are optimized
41 # out when they are unnecessary.
43 # I think this strategy works: each block has a child
44 # designated as "next" which is returned as the last of the
45 # children. because the nodes in a graph are emitted in
46 # reverse post order, the "next" block will always be emitted
47 # immediately after its parent.
48 # Worry: maintaining this invariant could be tricky
50 block
= self
.newBlock()
52 # Note: If the current block ends with an unconditional
53 # control transfer, then it is incorrect to add an implicit
54 # transfer to the block graph. The current code requires
55 # these edges to get the blocks emitted in the right order,
56 # however. :-( If a client needs to remove these edges, call
59 self
.current
.addNext(block
)
60 self
.startBlock(block
)
67 def startExitBlock(self
):
68 self
.startBlock(self
.exit
)
72 def _enable_debug(self
):
75 def _disable_debug(self
):
78 def emit(self
, *inst
):
81 if inst
[0] == 'RETURN_VALUE':
82 self
.current
.addOutEdge(self
.exit
)
83 if len(inst
) == 2 and isinstance(inst
[1], Block
):
84 self
.current
.addOutEdge(inst
[1])
85 self
.current
.emit(inst
)
87 def getBlocksInOrder(self
):
88 """Return the blocks in reverse postorder
90 i.e. each node appears before all of its successors
92 # XXX make sure every node that doesn't have an explicit next
93 # is set so that next points to exit
94 for b
in self
.blocks
.elements():
99 order
= dfs_postorder(self
.entry
, {})
101 self
.fixupOrder(order
, self
.exit
)
103 if not self
.exit
in order
:
104 order
.append(self
.exit
)
108 def fixupOrder(self
, blocks
, default_next
):
109 """Fixup bad order introduced by DFS."""
111 # XXX This is a total mess. There must be a better way to get
112 # the code blocks in the right order.
114 self
.fixupOrderHonorNext(blocks
, default_next
)
115 self
.fixupOrderForward(blocks
, default_next
)
117 def fixupOrderHonorNext(self
, blocks
, default_next
):
118 """Fix one problem with DFS.
120 The DFS uses child block, but doesn't know about the special
121 "next" block. As a result, the DFS can order blocks so that a
122 block isn't next to the right block for implicit control
126 for i
in range(len(blocks
)):
129 for i
in range(0, len(blocks
) - 1):
132 if not b
.next
or b
.next
[0] == default_next
or b
.next
[0] == n
:
134 # The blocks are in the wrong order. Find the chain of
135 # blocks to insert where they belong.
139 while elt
.next
and elt
.next
[0] != default_next
:
140 chain
.append(elt
.next
[0])
142 # Now remove the blocks in the chain from the current
143 # block list, so that they can be re-inserted.
147 l
.append((index
[b
], b
))
152 # Insert the chain in the proper location
153 blocks
[i
:i
+ 1] = [cur
] + chain
154 # Finally, re-compute the block indexes
155 for i
in range(len(blocks
)):
158 def fixupOrderForward(self
, blocks
, default_next
):
159 """Make sure all JUMP_FORWARDs jump forward"""
164 index
[b
] = len(chains
)
166 if b
.next
and b
.next
[0] == default_next
:
174 for i
in range(len(chains
)):
177 for c
in b
.get_children():
181 if inst
[0] == 'JUMP_FORWARD':
186 constraints
.append((index
[c
], i
))
191 # XXX just do one for now
192 # do swaps to get things in the right order
193 goes_before
, a_chain
= constraints
[0]
194 assert a_chain
> goes_before
197 chains
.insert(goes_before
, c
)
205 return self
.blocks
.elements()
208 """Return nodes appropriate for use with dominator"""
211 def getContainedGraphs(self
):
213 for b
in self
.getBlocks():
214 l
.extend(b
.getContainedGraphs())
217 def dfs_postorder(b
, seen
):
218 """Depth-first search of tree rooted at b, return in postorder"""
221 for c
in b
.get_children():
224 order
= order
+ dfs_postorder(c
, seen
)
231 def __init__(self
, label
=''):
233 self
.inEdges
= misc
.Set()
234 self
.outEdges
= misc
.Set()
236 self
.bid
= Block
._count
238 Block
._count
= Block
._count
+ 1
242 return "<block %s id=%d>" % (self
.label
, self
.bid
)
244 return "<block id=%d>" % (self
.bid
)
247 insts
= map(str, self
.insts
)
248 return "<block %s %d:\n%s>" % (self
.label
, self
.bid
,
249 string
.join(insts
, '\n'))
251 def emit(self
, inst
):
254 self
.outEdges
.add(inst
[1])
255 self
.insts
.append(inst
)
257 def getInstructions(self
):
260 def addInEdge(self
, block
):
261 self
.inEdges
.add(block
)
263 def addOutEdge(self
, block
):
264 self
.outEdges
.add(block
)
266 def addNext(self
, block
):
267 self
.next
.append(block
)
268 assert len(self
.next
) == 1, map(str, self
.next
)
270 _uncond_transfer
= ('RETURN_VALUE', 'RAISE_VARARGS',
271 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP')
274 """Remove bogus edge for unconditional transfers
276 Each block has a next edge that accounts for implicit control
277 transfers, e.g. from a JUMP_IF_FALSE to the block that will be
278 executed if the test is true.
280 These edges must remain for the current assembler code to
281 work. If they are removed, the dfs_postorder gets things in
282 weird orders. However, they shouldn't be there for other
283 purposes, e.g. conversion to SSA form. This method will
284 remove the next edge when it follows an unconditional control
288 op
, arg
= self
.insts
[-1]
289 except (IndexError, ValueError):
291 if op
in self
._uncond
_transfer
:
294 def get_children(self
):
295 if self
.next
and self
.next
[0] in self
.outEdges
:
296 self
.outEdges
.remove(self
.next
[0])
297 return self
.outEdges
.elements() + self
.next
299 def getContainedGraphs(self
):
300 """Return all graphs contained within this block.
302 For example, a MAKE_FUNCTION block will contain a reference to
303 the graph for the function body.
306 for inst
in self
.insts
:
310 if hasattr(op
, 'graph'):
311 contained
.append(op
.graph
)
314 # flags for code objects
316 # the FlowGraph is transformed in place; it exists in one of these states
322 class PyFlowGraph(FlowGraph
):
323 super_init
= FlowGraph
.__init
__
325 def __init__(self
, name
, filename
, args
=(), optimized
=0, klass
=None):
328 self
.filename
= filename
329 self
.docstring
= None
330 self
.args
= args
# XXX
331 self
.argcount
= getArgCount(args
)
334 self
.flags
= CO_OPTIMIZED | CO_NEWLOCALS
339 # Free variables found by the symbol table scan, including
340 # variables used only in nested scopes, are included here.
343 # The closure list is used to track the order of cell
344 # variables and free variables in the resulting code object.
345 # The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both
346 # kinds of variables.
348 self
.varnames
= list(args
) or []
349 for i
in range(len(self
.varnames
)):
350 var
= self
.varnames
[i
]
351 if isinstance(var
, TupleArg
):
352 self
.varnames
[i
] = var
.getName()
355 def setDocstring(self
, doc
):
358 def setFlag(self
, flag
):
359 self
.flags
= self
.flags | flag
360 if flag
== CO_VARARGS
:
361 self
.argcount
= self
.argcount
- 1
363 def checkFlag(self
, flag
):
364 if self
.flags
& flag
:
367 def setFreeVars(self
, names
):
368 self
.freevars
= list(names
)
370 def setCellVars(self
, names
):
371 self
.cellvars
= names
374 """Get a Python code object"""
375 if self
.stage
== RAW
:
376 self
.computeStackDepth()
378 if self
.stage
== FLAT
:
380 if self
.stage
== CONV
:
382 if self
.stage
== DONE
:
383 return self
.newCodeObject()
384 raise RuntimeError, "inconsistent PyFlowGraph state"
386 def dump(self
, io
=None):
393 if opname
== "SET_LINENO":
396 print "\t", "%3d" % pc
, opname
399 print "\t", "%3d" % pc
, opname
, t
[1]
404 def computeStackDepth(self
):
405 """Compute the max stack depth.
407 Approach is to compute the stack effect of each basic block.
408 Then find the path through the code with the largest total
413 for b
in self
.getBlocks():
414 depth
[b
] = findDepth(b
.getInstructions())
423 children
= b
.get_children()
425 return max([max_depth(c
, d
) for c
in children
])
427 if not b
.label
== "exit":
428 return max_depth(self
.exit
, d
)
432 self
.stacksize
= max_depth(self
.entry
, 0)
434 def flattenGraph(self
):
435 """Arrange the blocks in order and resolve jumps"""
436 assert self
.stage
== RAW
437 self
.insts
= insts
= []
441 for b
in self
.getBlocksInOrder():
443 for inst
in b
.getInstructions():
452 for i
in range(len(insts
)):
459 if self
.hasjrel
.has_elt(opname
):
461 offset
= begin
[oparg
] - pc
462 insts
[i
] = opname
, offset
463 elif self
.hasjabs
.has_elt(opname
):
464 insts
[i
] = opname
, begin
[inst
[1]]
468 for i
in dis
.hasjrel
:
469 hasjrel
.add(dis
.opname
[i
])
471 for i
in dis
.hasjabs
:
472 hasjabs
.add(dis
.opname
[i
])
474 def convertArgs(self
):
475 """Convert arguments from symbolic to concrete form"""
476 assert self
.stage
== FLAT
477 self
.consts
.insert(0, self
.docstring
)
479 for i
in range(len(self
.insts
)):
483 conv
= self
._converters
.get(opname
, None)
485 self
.insts
[i
] = opname
, conv(self
, oparg
)
488 def sort_cellvars(self
):
489 """Sort cellvars in the order of varnames and prune from freevars.
492 for name
in self
.cellvars
:
494 self
.cellvars
= [name
for name
in self
.varnames
495 if cells
.has_key(name
)]
496 for name
in self
.cellvars
:
498 self
.cellvars
= self
.cellvars
+ cells
.keys()
499 self
.closure
= self
.cellvars
+ self
.freevars
501 def _lookupName(self
, name
, list):
502 """Return index of name in list, appending if necessary
504 This routine uses a list instead of a dictionary, because a
505 dictionary can't store two different keys if the keys have the
506 same value but different types, e.g. 2 and 2L. The compiler
507 must treat these two separately, so it does an explicit type
508 comparison before comparing the values.
511 for i
in range(len(list)):
512 if t
== type(list[i
]) and list[i
] == name
:
519 def _convert_LOAD_CONST(self
, arg
):
520 if hasattr(arg
, 'getCode'):
522 return self
._lookupName
(arg
, self
.consts
)
524 def _convert_LOAD_FAST(self
, arg
):
525 self
._lookupName
(arg
, self
.names
)
526 return self
._lookupName
(arg
, self
.varnames
)
527 _convert_STORE_FAST
= _convert_LOAD_FAST
528 _convert_DELETE_FAST
= _convert_LOAD_FAST
530 def _convert_LOAD_NAME(self
, arg
):
531 if self
.klass
is None:
532 self
._lookupName
(arg
, self
.varnames
)
533 return self
._lookupName
(arg
, self
.names
)
535 def _convert_NAME(self
, arg
):
536 if self
.klass
is None:
537 self
._lookupName
(arg
, self
.varnames
)
538 return self
._lookupName
(arg
, self
.names
)
539 _convert_STORE_NAME
= _convert_NAME
540 _convert_DELETE_NAME
= _convert_NAME
541 _convert_IMPORT_NAME
= _convert_NAME
542 _convert_IMPORT_FROM
= _convert_NAME
543 _convert_STORE_ATTR
= _convert_NAME
544 _convert_LOAD_ATTR
= _convert_NAME
545 _convert_DELETE_ATTR
= _convert_NAME
546 _convert_LOAD_GLOBAL
= _convert_NAME
547 _convert_STORE_GLOBAL
= _convert_NAME
548 _convert_DELETE_GLOBAL
= _convert_NAME
550 def _convert_DEREF(self
, arg
):
551 self
._lookupName
(arg
, self
.names
)
552 self
._lookupName
(arg
, self
.varnames
)
553 return self
._lookupName
(arg
, self
.closure
)
554 _convert_LOAD_DEREF
= _convert_DEREF
555 _convert_STORE_DEREF
= _convert_DEREF
557 def _convert_LOAD_CLOSURE(self
, arg
):
558 self
._lookupName
(arg
, self
.varnames
)
559 return self
._lookupName
(arg
, self
.closure
)
561 _cmp
= list(dis
.cmp_op
)
562 def _convert_COMPARE_OP(self
, arg
):
563 return self
._cmp
.index(arg
)
565 # similarly for other opcodes...
567 for name
, obj
in locals().items():
568 if name
[:9] == "_convert_":
570 _converters
[opname
] = obj
571 del name
, obj
, opname
573 def makeByteCode(self
):
574 assert self
.stage
== CONV
575 self
.lnotab
= lnotab
= LineAddrTable()
579 lnotab
.addCode(self
.opnum
[opname
])
582 if opname
== "SET_LINENO":
583 lnotab
.nextLine(oparg
)
584 hi
, lo
= twobyte(oparg
)
586 lnotab
.addCode(self
.opnum
[opname
], lo
, hi
)
589 print self
.opnum
[opname
], lo
, hi
594 for num
in range(len(dis
.opname
)):
595 opnum
[dis
.opname
[num
]] = num
598 def newCodeObject(self
):
599 assert self
.stage
== DONE
600 if (self
.flags
& CO_NEWLOCALS
) == 0:
603 nlocals
= len(self
.varnames
)
604 argcount
= self
.argcount
605 if self
.flags
& CO_VARKEYWORDS
:
606 argcount
= argcount
- 1
607 return new
.code(argcount
, nlocals
, self
.stacksize
, self
.flags
,
608 self
.lnotab
.getCode(), self
.getConsts(),
609 tuple(self
.names
), tuple(self
.varnames
),
610 self
.filename
, self
.name
, self
.lnotab
.firstline
,
611 self
.lnotab
.getTable(), tuple(self
.freevars
),
612 tuple(self
.cellvars
))
615 """Return a tuple for the const slot of the code object
617 Must convert references to code (MAKE_FUNCTION) to code
621 for elt
in self
.consts
:
622 if isinstance(elt
, PyFlowGraph
):
628 if opname
[:4] == 'JUMP':
632 """Helper for marking func defs with nested tuples in arglist"""
633 def __init__(self
, count
, names
):
637 return "TupleArg(%s, %s)" % (self
.count
, self
.names
)
639 return ".%d" % self
.count
641 def getArgCount(args
):
645 if isinstance(arg
, TupleArg
):
646 numNames
= len(misc
.flatten(arg
.names
))
647 argcount
= argcount
- numNames
651 """Convert an int argument into high and low bytes"""
652 assert type(val
) == types
.IntType
653 return divmod(val
, 256)
658 This class builds the lnotab, which is documented in compile.c.
659 Here's a brief recap:
661 For each SET_LINENO instruction after the first one, two bytes are
662 added to lnotab. (In some cases, multiple two-byte entries are
663 added.) The first byte is the distance in bytes between the
664 instruction for the last SET_LINENO and the current SET_LINENO.
665 The second byte is offset in line numbers. If either offset is
666 greater than 255, multiple two-byte entries are added -- see
667 compile.c for the delicate details.
678 def addCode(self
, *args
):
680 self
.code
.append(chr(arg
))
681 self
.codeOffset
= self
.codeOffset
+ len(args
)
683 def nextLine(self
, lineno
):
684 if self
.firstline
== 0:
685 self
.firstline
= lineno
686 self
.lastline
= lineno
689 addr
= self
.codeOffset
- self
.lastoff
690 line
= lineno
- self
.lastline
691 # Python assumes that lineno always increases with
692 # increasing bytecode address (lnotab is unsigned char).
693 # Depending on when SET_LINENO instructions are emitted
694 # this is not always true. Consider the code:
697 # In the bytecode stream, the assignment to "a" occurs
698 # after the loading of "b". This works with the C Python
699 # compiler because it only generates a SET_LINENO instruction
700 # for the assignment.
702 push
= self
.lnotab
.append
707 push(addr
); push(255)
710 if addr
> 0 or line
> 0:
711 push(addr
); push(line
)
712 self
.lastline
= lineno
713 self
.lastoff
= self
.codeOffset
716 return string
.join(self
.code
, '')
719 return string
.join(map(chr, self
.lnotab
), '')
721 class StackDepthTracker
:
722 # XXX 1. need to keep track of stack depth on jumps
723 # XXX 2. at least partly as a result, this code is broken
725 def findDepth(self
, insts
, debug
=0):
732 delta
= self
.effect
.get(opname
, None)
733 if delta
is not None:
734 depth
= depth
+ delta
737 for pat
, pat_delta
in self
.patterns
:
738 if opname
[:len(pat
)] == pat
:
740 depth
= depth
+ delta
742 # if we still haven't found a match
744 meth
= getattr(self
, opname
, None)
746 depth
= depth
+ meth(i
[1])
750 print depth
, maxDepth
763 'DELETE_SLICE+0': -1,
764 'DELETE_SLICE+1': -2,
765 'DELETE_SLICE+2': -2,
766 'DELETE_SLICE+3': -3,
784 'LOAD_ATTR': 0, # unlike other loads
796 def UNPACK_SEQUENCE(self
, count
):
798 def BUILD_TUPLE(self
, count
):
800 def BUILD_LIST(self
, count
):
802 def CALL_FUNCTION(self
, argc
):
803 hi
, lo
= divmod(argc
, 256)
804 return -(lo
+ hi
* 2)
805 def CALL_FUNCTION_VAR(self
, argc
):
806 return self
.CALL_FUNCTION(argc
)-1
807 def CALL_FUNCTION_KW(self
, argc
):
808 return self
.CALL_FUNCTION(argc
)-1
809 def CALL_FUNCTION_VAR_KW(self
, argc
):
810 return self
.CALL_FUNCTION(argc
)-2
811 def MAKE_FUNCTION(self
, argc
):
813 def MAKE_CLOSURE(self
, argc
):
814 # XXX need to account for free variables too!
816 def BUILD_SLICE(self
, argc
):
821 def DUP_TOPX(self
, argc
):
824 findDepth
= StackDepthTracker().findDepth