1 """A flow graph representation for Python bytecode"""
8 from compiler
import misc
10 import CO_OPTIMIZED
, CO_NEWLOCALS
, CO_VARARGS
, CO_VARKEYWORDS
14 self
.current
= self
.entry
= Block()
15 self
.exit
= Block("exit")
16 self
.blocks
= misc
.Set()
17 self
.blocks
.add(self
.entry
)
18 self
.blocks
.add(self
.exit
)
20 def startBlock(self
, block
):
23 print "end", repr(self
.current
)
24 print " next", self
.current
.next
25 print " ", self
.current
.get_children()
29 def nextBlock(self
, block
=None):
30 # XXX think we need to specify when there is implicit transfer
31 # from one block to the next. might be better to represent this
32 # with explicit JUMP_ABSOLUTE instructions that are optimized
33 # out when they are unnecessary.
35 # I think this strategy works: each block has a child
36 # designated as "next" which is returned as the last of the
37 # children. because the nodes in a graph are emitted in
38 # reverse post order, the "next" block will always be emitted
39 # immediately after its parent.
40 # Worry: maintaining this invariant could be tricky
42 block
= self
.newBlock()
44 # Note: If the current block ends with an unconditional
45 # control transfer, then it is incorrect to add an implicit
46 # transfer to the block graph. The current code requires
47 # these edges to get the blocks emitted in the right order,
48 # however. :-( If a client needs to remove these edges, call
51 self
.current
.addNext(block
)
52 self
.startBlock(block
)
59 def startExitBlock(self
):
60 self
.startBlock(self
.exit
)
64 def _enable_debug(self
):
67 def _disable_debug(self
):
70 def emit(self
, *inst
):
73 if inst
[0] in ['RETURN_VALUE', 'YIELD_VALUE']:
74 self
.current
.addOutEdge(self
.exit
)
75 if len(inst
) == 2 and isinstance(inst
[1], Block
):
76 self
.current
.addOutEdge(inst
[1])
77 self
.current
.emit(inst
)
79 def getBlocksInOrder(self
):
80 """Return the blocks in reverse postorder
82 i.e. each node appears before all of its successors
84 # XXX make sure every node that doesn't have an explicit next
85 # is set so that next points to exit
86 for b
in self
.blocks
.elements():
91 order
= dfs_postorder(self
.entry
, {})
93 self
.fixupOrder(order
, self
.exit
)
95 if not self
.exit
in order
:
96 order
.append(self
.exit
)
100 def fixupOrder(self
, blocks
, default_next
):
101 """Fixup bad order introduced by DFS."""
103 # XXX This is a total mess. There must be a better way to get
104 # the code blocks in the right order.
106 self
.fixupOrderHonorNext(blocks
, default_next
)
107 self
.fixupOrderForward(blocks
, default_next
)
109 def fixupOrderHonorNext(self
, blocks
, default_next
):
110 """Fix one problem with DFS.
112 The DFS uses child block, but doesn't know about the special
113 "next" block. As a result, the DFS can order blocks so that a
114 block isn't next to the right block for implicit control
118 for i
in range(len(blocks
)):
121 for i
in range(0, len(blocks
) - 1):
124 if not b
.next
or b
.next
[0] == default_next
or b
.next
[0] == n
:
126 # The blocks are in the wrong order. Find the chain of
127 # blocks to insert where they belong.
131 while elt
.next
and elt
.next
[0] != default_next
:
132 chain
.append(elt
.next
[0])
134 # Now remove the blocks in the chain from the current
135 # block list, so that they can be re-inserted.
139 l
.append((index
[b
], b
))
144 # Insert the chain in the proper location
145 blocks
[i
:i
+ 1] = [cur
] + chain
146 # Finally, re-compute the block indexes
147 for i
in range(len(blocks
)):
150 def fixupOrderForward(self
, blocks
, default_next
):
151 """Make sure all JUMP_FORWARDs jump forward"""
156 index
[b
] = len(chains
)
158 if b
.next
and b
.next
[0] == default_next
:
166 for i
in range(len(chains
)):
169 for c
in b
.get_children():
173 if inst
[0] == 'JUMP_FORWARD':
178 constraints
.append((index
[c
], i
))
183 # XXX just do one for now
184 # do swaps to get things in the right order
185 goes_before
, a_chain
= constraints
[0]
186 assert a_chain
> goes_before
189 chains
.insert(goes_before
, c
)
197 return self
.blocks
.elements()
200 """Return nodes appropriate for use with dominator"""
203 def getContainedGraphs(self
):
205 for b
in self
.getBlocks():
206 l
.extend(b
.getContainedGraphs())
209 def dfs_postorder(b
, seen
):
210 """Depth-first search of tree rooted at b, return in postorder"""
213 for c
in b
.get_children():
216 order
= order
+ dfs_postorder(c
, seen
)
223 def __init__(self
, label
=''):
225 self
.inEdges
= misc
.Set()
226 self
.outEdges
= misc
.Set()
228 self
.bid
= Block
._count
230 Block
._count
= Block
._count
+ 1
234 return "<block %s id=%d>" % (self
.label
, self
.bid
)
236 return "<block id=%d>" % (self
.bid
)
239 insts
= map(str, self
.insts
)
240 return "<block %s %d:\n%s>" % (self
.label
, self
.bid
,
243 def emit(self
, inst
):
246 self
.outEdges
.add(inst
[1])
247 self
.insts
.append(inst
)
249 def getInstructions(self
):
252 def addInEdge(self
, block
):
253 self
.inEdges
.add(block
)
255 def addOutEdge(self
, block
):
256 self
.outEdges
.add(block
)
258 def addNext(self
, block
):
259 self
.next
.append(block
)
260 assert len(self
.next
) == 1, map(str, self
.next
)
262 _uncond_transfer
= ('RETURN_VALUE', 'RAISE_VARARGS', 'YIELD_VALUE',
263 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP')
266 """Remove bogus edge for unconditional transfers
268 Each block has a next edge that accounts for implicit control
269 transfers, e.g. from a JUMP_IF_FALSE to the block that will be
270 executed if the test is true.
272 These edges must remain for the current assembler code to
273 work. If they are removed, the dfs_postorder gets things in
274 weird orders. However, they shouldn't be there for other
275 purposes, e.g. conversion to SSA form. This method will
276 remove the next edge when it follows an unconditional control
280 op
, arg
= self
.insts
[-1]
281 except (IndexError, ValueError):
283 if op
in self
._uncond
_transfer
:
286 def get_children(self
):
287 if self
.next
and self
.next
[0] in self
.outEdges
:
288 self
.outEdges
.remove(self
.next
[0])
289 return self
.outEdges
.elements() + self
.next
291 def getContainedGraphs(self
):
292 """Return all graphs contained within this block.
294 For example, a MAKE_FUNCTION block will contain a reference to
295 the graph for the function body.
298 for inst
in self
.insts
:
302 if hasattr(op
, 'graph'):
303 contained
.append(op
.graph
)
306 # flags for code objects
308 # the FlowGraph is transformed in place; it exists in one of these states
314 class PyFlowGraph(FlowGraph
):
315 super_init
= FlowGraph
.__init
__
317 def __init__(self
, name
, filename
, args
=(), optimized
=0, klass
=None):
320 self
.filename
= filename
321 self
.docstring
= None
322 self
.args
= args
# XXX
323 self
.argcount
= getArgCount(args
)
326 self
.flags
= CO_OPTIMIZED | CO_NEWLOCALS
331 # Free variables found by the symbol table scan, including
332 # variables used only in nested scopes, are included here.
335 # The closure list is used to track the order of cell
336 # variables and free variables in the resulting code object.
337 # The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both
338 # kinds of variables.
340 self
.varnames
= list(args
) or []
341 for i
in range(len(self
.varnames
)):
342 var
= self
.varnames
[i
]
343 if isinstance(var
, TupleArg
):
344 self
.varnames
[i
] = var
.getName()
347 def setDocstring(self
, doc
):
350 def setFlag(self
, flag
):
351 self
.flags
= self
.flags | flag
352 if flag
== CO_VARARGS
:
353 self
.argcount
= self
.argcount
- 1
355 def checkFlag(self
, flag
):
356 if self
.flags
& flag
:
359 def setFreeVars(self
, names
):
360 self
.freevars
= list(names
)
362 def setCellVars(self
, names
):
363 self
.cellvars
= names
366 """Get a Python code object"""
367 if self
.stage
== RAW
:
368 self
.computeStackDepth()
370 if self
.stage
== FLAT
:
372 if self
.stage
== CONV
:
374 if self
.stage
== DONE
:
375 return self
.newCodeObject()
376 raise RuntimeError, "inconsistent PyFlowGraph state"
378 def dump(self
, io
=None):
385 if opname
== "SET_LINENO":
388 print "\t", "%3d" % pc
, opname
391 print "\t", "%3d" % pc
, opname
, t
[1]
396 def computeStackDepth(self
):
397 """Compute the max stack depth.
399 Approach is to compute the stack effect of each basic block.
400 Then find the path through the code with the largest total
405 for b
in self
.getBlocks():
406 depth
[b
] = findDepth(b
.getInstructions())
415 children
= b
.get_children()
417 return max([max_depth(c
, d
) for c
in children
])
419 if not b
.label
== "exit":
420 return max_depth(self
.exit
, d
)
424 self
.stacksize
= max_depth(self
.entry
, 0)
426 def flattenGraph(self
):
427 """Arrange the blocks in order and resolve jumps"""
428 assert self
.stage
== RAW
429 self
.insts
= insts
= []
433 for b
in self
.getBlocksInOrder():
435 for inst
in b
.getInstructions():
439 elif inst
[0] != "SET_LINENO":
444 for i
in range(len(insts
)):
448 elif inst
[0] != "SET_LINENO":
451 if self
.hasjrel
.has_elt(opname
):
453 offset
= begin
[oparg
] - pc
454 insts
[i
] = opname
, offset
455 elif self
.hasjabs
.has_elt(opname
):
456 insts
[i
] = opname
, begin
[inst
[1]]
460 for i
in dis
.hasjrel
:
461 hasjrel
.add(dis
.opname
[i
])
463 for i
in dis
.hasjabs
:
464 hasjabs
.add(dis
.opname
[i
])
466 def convertArgs(self
):
467 """Convert arguments from symbolic to concrete form"""
468 assert self
.stage
== FLAT
469 self
.consts
.insert(0, self
.docstring
)
471 for i
in range(len(self
.insts
)):
475 conv
= self
._converters
.get(opname
, None)
477 self
.insts
[i
] = opname
, conv(self
, oparg
)
480 def sort_cellvars(self
):
481 """Sort cellvars in the order of varnames and prune from freevars.
484 for name
in self
.cellvars
:
486 self
.cellvars
= [name
for name
in self
.varnames
487 if cells
.has_key(name
)]
488 for name
in self
.cellvars
:
490 self
.cellvars
= self
.cellvars
+ cells
.keys()
491 self
.closure
= self
.cellvars
+ self
.freevars
493 def _lookupName(self
, name
, list):
494 """Return index of name in list, appending if necessary
496 This routine uses a list instead of a dictionary, because a
497 dictionary can't store two different keys if the keys have the
498 same value but different types, e.g. 2 and 2L. The compiler
499 must treat these two separately, so it does an explicit type
500 comparison before comparing the values.
503 for i
in range(len(list)):
504 if t
== type(list[i
]) and list[i
] == name
:
511 def _convert_LOAD_CONST(self
, arg
):
512 if hasattr(arg
, 'getCode'):
514 return self
._lookupName
(arg
, self
.consts
)
516 def _convert_LOAD_FAST(self
, arg
):
517 self
._lookupName
(arg
, self
.names
)
518 return self
._lookupName
(arg
, self
.varnames
)
519 _convert_STORE_FAST
= _convert_LOAD_FAST
520 _convert_DELETE_FAST
= _convert_LOAD_FAST
522 def _convert_LOAD_NAME(self
, arg
):
523 if self
.klass
is None:
524 self
._lookupName
(arg
, self
.varnames
)
525 return self
._lookupName
(arg
, self
.names
)
527 def _convert_NAME(self
, arg
):
528 if self
.klass
is None:
529 self
._lookupName
(arg
, self
.varnames
)
530 return self
._lookupName
(arg
, self
.names
)
531 _convert_STORE_NAME
= _convert_NAME
532 _convert_DELETE_NAME
= _convert_NAME
533 _convert_IMPORT_NAME
= _convert_NAME
534 _convert_IMPORT_FROM
= _convert_NAME
535 _convert_STORE_ATTR
= _convert_NAME
536 _convert_LOAD_ATTR
= _convert_NAME
537 _convert_DELETE_ATTR
= _convert_NAME
538 _convert_LOAD_GLOBAL
= _convert_NAME
539 _convert_STORE_GLOBAL
= _convert_NAME
540 _convert_DELETE_GLOBAL
= _convert_NAME
542 def _convert_DEREF(self
, arg
):
543 self
._lookupName
(arg
, self
.names
)
544 self
._lookupName
(arg
, self
.varnames
)
545 return self
._lookupName
(arg
, self
.closure
)
546 _convert_LOAD_DEREF
= _convert_DEREF
547 _convert_STORE_DEREF
= _convert_DEREF
549 def _convert_LOAD_CLOSURE(self
, arg
):
550 self
._lookupName
(arg
, self
.varnames
)
551 return self
._lookupName
(arg
, self
.closure
)
553 _cmp
= list(dis
.cmp_op
)
554 def _convert_COMPARE_OP(self
, arg
):
555 return self
._cmp
.index(arg
)
557 # similarly for other opcodes...
559 for name
, obj
in locals().items():
560 if name
[:9] == "_convert_":
562 _converters
[opname
] = obj
563 del name
, obj
, opname
565 def makeByteCode(self
):
566 assert self
.stage
== CONV
567 self
.lnotab
= lnotab
= LineAddrTable()
571 lnotab
.addCode(self
.opnum
[opname
])
574 if opname
== "SET_LINENO":
575 lnotab
.nextLine(oparg
)
577 hi
, lo
= twobyte(oparg
)
579 lnotab
.addCode(self
.opnum
[opname
], lo
, hi
)
582 print self
.opnum
[opname
], lo
, hi
587 for num
in range(len(dis
.opname
)):
588 opnum
[dis
.opname
[num
]] = num
591 def newCodeObject(self
):
592 assert self
.stage
== DONE
593 if (self
.flags
& CO_NEWLOCALS
) == 0:
596 nlocals
= len(self
.varnames
)
597 argcount
= self
.argcount
598 if self
.flags
& CO_VARKEYWORDS
:
599 argcount
= argcount
- 1
600 return new
.code(argcount
, nlocals
, self
.stacksize
, self
.flags
,
601 self
.lnotab
.getCode(), self
.getConsts(),
602 tuple(self
.names
), tuple(self
.varnames
),
603 self
.filename
, self
.name
, self
.lnotab
.firstline
,
604 self
.lnotab
.getTable(), tuple(self
.freevars
),
605 tuple(self
.cellvars
))
608 """Return a tuple for the const slot of the code object
610 Must convert references to code (MAKE_FUNCTION) to code
614 for elt
in self
.consts
:
615 if isinstance(elt
, PyFlowGraph
):
621 if opname
[:4] == 'JUMP':
625 """Helper for marking func defs with nested tuples in arglist"""
626 def __init__(self
, count
, names
):
630 return "TupleArg(%s, %s)" % (self
.count
, self
.names
)
632 return ".%d" % self
.count
634 def getArgCount(args
):
638 if isinstance(arg
, TupleArg
):
639 numNames
= len(misc
.flatten(arg
.names
))
640 argcount
= argcount
- numNames
644 """Convert an int argument into high and low bytes"""
645 assert type(val
) == types
.IntType
646 return divmod(val
, 256)
651 This class builds the lnotab, which is documented in compile.c.
652 Here's a brief recap:
654 For each SET_LINENO instruction after the first one, two bytes are
655 added to lnotab. (In some cases, multiple two-byte entries are
656 added.) The first byte is the distance in bytes between the
657 instruction for the last SET_LINENO and the current SET_LINENO.
658 The second byte is offset in line numbers. If either offset is
659 greater than 255, multiple two-byte entries are added -- see
660 compile.c for the delicate details.
671 def addCode(self
, *args
):
673 self
.code
.append(chr(arg
))
674 self
.codeOffset
= self
.codeOffset
+ len(args
)
676 def nextLine(self
, lineno
):
677 if self
.firstline
== 0:
678 self
.firstline
= lineno
679 self
.lastline
= lineno
682 addr
= self
.codeOffset
- self
.lastoff
683 line
= lineno
- self
.lastline
684 # Python assumes that lineno always increases with
685 # increasing bytecode address (lnotab is unsigned char).
686 # Depending on when SET_LINENO instructions are emitted
687 # this is not always true. Consider the code:
690 # In the bytecode stream, the assignment to "a" occurs
691 # after the loading of "b". This works with the C Python
692 # compiler because it only generates a SET_LINENO instruction
693 # for the assignment.
695 push
= self
.lnotab
.append
700 push(addr
); push(255)
703 if addr
> 0 or line
> 0:
704 push(addr
); push(line
)
705 self
.lastline
= lineno
706 self
.lastoff
= self
.codeOffset
709 return ''.join(self
.code
)
712 return ''.join(map(chr, self
.lnotab
))
714 class StackDepthTracker
:
715 # XXX 1. need to keep track of stack depth on jumps
716 # XXX 2. at least partly as a result, this code is broken
718 def findDepth(self
, insts
, debug
=0):
725 delta
= self
.effect
.get(opname
, None)
726 if delta
is not None:
727 depth
= depth
+ delta
730 for pat
, pat_delta
in self
.patterns
:
731 if opname
[:len(pat
)] == pat
:
733 depth
= depth
+ delta
735 # if we still haven't found a match
737 meth
= getattr(self
, opname
, None)
739 depth
= depth
+ meth(i
[1])
743 print depth
, maxDepth
756 'DELETE_SLICE+0': -1,
757 'DELETE_SLICE+1': -2,
758 'DELETE_SLICE+2': -2,
759 'DELETE_SLICE+3': -3,
778 'LOAD_ATTR': 0, # unlike other loads
790 def UNPACK_SEQUENCE(self
, count
):
792 def BUILD_TUPLE(self
, count
):
794 def BUILD_LIST(self
, count
):
796 def CALL_FUNCTION(self
, argc
):
797 hi
, lo
= divmod(argc
, 256)
798 return -(lo
+ hi
* 2)
799 def CALL_FUNCTION_VAR(self
, argc
):
800 return self
.CALL_FUNCTION(argc
)-1
801 def CALL_FUNCTION_KW(self
, argc
):
802 return self
.CALL_FUNCTION(argc
)-1
803 def CALL_FUNCTION_VAR_KW(self
, argc
):
804 return self
.CALL_FUNCTION(argc
)-2
805 def MAKE_FUNCTION(self
, argc
):
807 def MAKE_CLOSURE(self
, argc
):
808 # XXX need to account for free variables too!
810 def BUILD_SLICE(self
, argc
):
815 def DUP_TOPX(self
, argc
):
818 findDepth
= StackDepthTracker().findDepth