1 """A flow graph representation for Python bytecode"""
9 from compiler
import misc
14 return cmp(a
.bid
, b
.bid
)
20 self
.current
= self
.entry
= Block()
21 self
.exit
= Block("exit")
22 self
.blocks
= misc
.Set()
23 self
.blocks
.add(self
.entry
)
24 self
.blocks
.add(self
.exit
)
26 def startBlock(self
, block
):
29 print "end", repr(self
.current
)
30 print " next", self
.current
.next
31 print " ", self
.current
.get_children()
35 def nextBlock(self
, block
=None):
36 # XXX think we need to specify when there is implicit transfer
37 # from one block to the next. might be better to represent this
38 # with explicit JUMP_ABSOLUTE instructions that are optimized
39 # out when they are unnecessary.
41 # I think this strategy works: each block has a child
42 # designated as "next" which is returned as the last of the
43 # children. because the nodes in a graph are emitted in
44 # reverse post order, the "next" block will always be emitted
45 # immediately after its parent.
46 # Worry: maintaining this invariant could be tricky
48 block
= self
.newBlock()
50 # Note: If the current block ends with an unconditional
51 # control transfer, then it is incorrect to add an implicit
52 # transfer to the block graph. The current code requires
53 # these edges to get the blocks emitted in the right order,
54 # however. :-( If a client needs to remove these edges, call
57 self
.current
.addNext(block
)
58 self
.startBlock(block
)
65 def startExitBlock(self
):
66 self
.startBlock(self
.exit
)
70 def _enable_debug(self
):
73 def _disable_debug(self
):
76 def emit(self
, *inst
):
79 if inst
[0] == 'RETURN_VALUE':
80 self
.current
.addOutEdge(self
.exit
)
81 if len(inst
) == 2 and isinstance(inst
[1], Block
):
82 self
.current
.addOutEdge(inst
[1])
83 self
.current
.emit(inst
)
85 def getBlocksInOrder(self
):
86 """Return the blocks in reverse postorder
88 i.e. each node appears before all of its successors
90 # XXX make sure every node that doesn't have an explicit next
91 # is set so that next points to exit
92 for b
in self
.blocks
.elements():
97 order
= dfs_postorder(self
.entry
, {})
99 self
.fixupOrder(order
, self
.exit
)
101 if not self
.exit
in order
:
102 order
.append(self
.exit
)
106 def fixupOrder(self
, blocks
, default_next
):
107 """Fixup bad order introduced by DFS."""
109 # XXX This is a total mess. There must be a better way to get
110 # the code blocks in the right order.
112 self
.fixupOrderHonorNext(blocks
, default_next
)
113 self
.fixupOrderForward(blocks
, default_next
)
115 def fixupOrderHonorNext(self
, blocks
, default_next
):
116 """Fix one problem with DFS.
118 The DFS uses child block, but doesn't know about the special
119 "next" block. As a result, the DFS can order blocks so that a
120 block isn't next to the right block for implicit control
124 for i
in range(len(blocks
)):
127 for i
in range(0, len(blocks
) - 1):
130 if not b
.next
or b
.next
[0] == default_next
or b
.next
[0] == n
:
132 # The blocks are in the wrong order. Find the chain of
133 # blocks to insert where they belong.
137 while elt
.next
and elt
.next
[0] != default_next
:
138 chain
.append(elt
.next
[0])
140 # Now remove the blocks in the chain from the current
141 # block list, so that they can be re-inserted.
145 l
.append((index
[b
], b
))
150 # Insert the chain in the proper location
151 blocks
[i
:i
+ 1] = [cur
] + chain
152 # Finally, re-compute the block indexes
153 for i
in range(len(blocks
)):
156 def fixupOrderForward(self
, blocks
, default_next
):
157 """Make sure all JUMP_FORWARDs jump forward"""
162 index
[b
] = len(chains
)
164 if b
.next
and b
.next
[0] == default_next
:
172 for i
in range(len(chains
)):
175 for c
in b
.get_children():
179 if inst
[0] == 'JUMP_FORWARD':
184 constraints
.append((index
[c
], i
))
189 # XXX just do one for now
190 # do swaps to get things in the right order
191 goes_before
, a_chain
= constraints
[0]
192 assert a_chain
> goes_before
195 chains
.insert(goes_before
, c
)
204 return self
.blocks
.elements()
207 """Return nodes appropriate for use with dominator"""
210 def getContainedGraphs(self
):
212 for b
in self
.getBlocks():
213 l
.extend(b
.getContainedGraphs())
216 def dfs_postorder(b
, seen
):
217 """Depth-first search of tree rooted at b, return in postorder"""
220 for c
in b
.get_children():
223 order
= order
+ dfs_postorder(c
, seen
)
230 def __init__(self
, label
=''):
232 self
.inEdges
= misc
.Set()
233 self
.outEdges
= misc
.Set()
235 self
.bid
= Block
._count
237 Block
._count
= Block
._count
+ 1
241 return "<block %s id=%d>" % (self
.label
, self
.bid
)
243 return "<block id=%d>" % (self
.bid
)
246 insts
= map(str, self
.insts
)
247 return "<block %s %d:\n%s>" % (self
.label
, self
.bid
,
248 string
.join(insts
, '\n'))
250 def emit(self
, inst
):
253 self
.outEdges
.add(inst
[1])
254 self
.insts
.append(inst
)
256 def getInstructions(self
):
259 def addInEdge(self
, block
):
260 self
.inEdges
.add(block
)
262 def addOutEdge(self
, block
):
263 self
.outEdges
.add(block
)
265 def addNext(self
, block
):
266 self
.next
.append(block
)
267 assert len(self
.next
) == 1, map(str, self
.next
)
269 _uncond_transfer
= ('RETURN_VALUE', 'RAISE_VARARGS',
270 'JUMP_ABSOLUTE', 'JUMP_FORWARD')
273 """Remove bogus edge for unconditional transfers
275 Each block has a next edge that accounts for implicit control
276 transfers, e.g. from a JUMP_IF_FALSE to the block that will be
277 executed if the test is true.
279 These edges must remain for the current assembler code to
280 work. If they are removed, the dfs_postorder gets things in
281 weird orders. However, they shouldn't be there for other
282 purposes, e.g. conversion to SSA form. This method will
283 remove the next edge when it follows an unconditional control
287 op
, arg
= self
.insts
[-1]
288 except (IndexError, ValueError):
290 if op
in self
._uncond
_transfer
:
293 def get_children(self
):
294 if self
.next
and self
.next
[0] in self
.outEdges
:
295 self
.outEdges
.remove(self
.next
[0])
296 return self
.outEdges
.elements() + self
.next
298 def getContainedGraphs(self
):
299 """Return all graphs contained within this block.
301 For example, a MAKE_FUNCTION block will contain a reference to
302 the graph for the function body.
305 for inst
in self
.insts
:
309 if hasattr(op
, 'graph'):
310 contained
.append(op
.graph
)
313 # flags for code objects
314 CO_OPTIMIZED
= 0x0001
315 CO_NEWLOCALS
= 0x0002
317 CO_VARKEYWORDS
= 0x0008
320 # the FlowGraph is transformed in place; it exists in one of these states
326 class PyFlowGraph(FlowGraph
):
327 super_init
= FlowGraph
.__init
__
329 def __init__(self
, name
, filename
, args
=(), optimized
=0):
332 self
.filename
= filename
333 self
.docstring
= None
334 self
.args
= args
# XXX
335 self
.argcount
= getArgCount(args
)
337 self
.flags
= CO_OPTIMIZED | CO_NEWLOCALS
342 # Free variables found by the symbol table scan, including
343 # variables used only in nested scopes, are included here.
346 # The closure list is used to track the order of cell
347 # variables and free variables in the resulting code object.
348 # The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both
349 # kinds of variables.
351 self
.varnames
= list(args
) or []
352 for i
in range(len(self
.varnames
)):
353 var
= self
.varnames
[i
]
354 if isinstance(var
, TupleArg
):
355 self
.varnames
[i
] = var
.getName()
358 def setDocstring(self
, doc
):
361 def setFlag(self
, flag
):
362 self
.flags
= self
.flags | flag
363 if flag
== CO_VARARGS
:
364 self
.argcount
= self
.argcount
- 1
366 def setFreeVars(self
, names
):
367 self
.freevars
= list(names
)
369 def setCellVars(self
, names
):
370 self
.cellvars
= names
373 """Get a Python code object"""
374 if self
.stage
== RAW
:
376 if self
.stage
== FLAT
:
378 if self
.stage
== CONV
:
380 if self
.stage
== DONE
:
381 return self
.newCodeObject()
382 raise RuntimeError, "inconsistent PyFlowGraph state"
384 def dump(self
, io
=None):
391 if opname
== "SET_LINENO":
394 print "\t", "%3d" % pc
, opname
397 print "\t", "%3d" % pc
, opname
, t
[1]
402 def flattenGraph(self
):
403 """Arrange the blocks in order and resolve jumps"""
404 assert self
.stage
== RAW
405 self
.insts
= insts
= []
409 for b
in self
.getBlocksInOrder():
411 for inst
in b
.getInstructions():
420 for i
in range(len(insts
)):
427 if self
.hasjrel
.has_elt(opname
):
429 offset
= begin
[oparg
] - pc
430 insts
[i
] = opname
, offset
431 elif self
.hasjabs
.has_elt(opname
):
432 insts
[i
] = opname
, begin
[inst
[1]]
433 self
.stacksize
= findDepth(self
.insts
)
437 for i
in dis
.hasjrel
:
438 hasjrel
.add(dis
.opname
[i
])
440 for i
in dis
.hasjabs
:
441 hasjabs
.add(dis
.opname
[i
])
443 def convertArgs(self
):
444 """Convert arguments from symbolic to concrete form"""
445 assert self
.stage
== FLAT
446 self
.consts
.insert(0, self
.docstring
)
448 for i
in range(len(self
.insts
)):
453 conv
= self
._converters
.get(opname
, None)
455 self
.insts
[i
] = opname
, conv(self
, oparg
)
458 def sort_cellvars(self
):
459 """Sort cellvars in the order of varnames and prune from freevars.
462 for name
in self
.cellvars
:
464 self
.cellvars
= [name
for name
in self
.varnames
465 if cells
.has_key(name
)]
466 for name
in self
.cellvars
:
468 self
.cellvars
= self
.cellvars
+ cells
.keys()
469 self
.closure
= self
.cellvars
+ self
.freevars
471 def _lookupName(self
, name
, list):
472 """Return index of name in list, appending if necessary"""
474 for i
in range(len(list)):
475 # must do a comparison on type first to prevent UnicodeErrors
476 if t
== type(list[i
]) and list[i
] == name
:
483 def _convert_LOAD_CONST(self
, arg
):
484 if hasattr(arg
, 'getCode'):
486 return self
._lookupName
(arg
, self
.consts
)
488 def _convert_LOAD_FAST(self
, arg
):
489 self
._lookupName
(arg
, self
.names
)
490 return self
._lookupName
(arg
, self
.varnames
)
491 _convert_STORE_FAST
= _convert_LOAD_FAST
492 _convert_DELETE_FAST
= _convert_LOAD_FAST
494 def _convert_NAME(self
, arg
):
495 return self
._lookupName
(arg
, self
.names
)
496 _convert_LOAD_NAME
= _convert_NAME
497 _convert_STORE_NAME
= _convert_NAME
498 _convert_DELETE_NAME
= _convert_NAME
499 _convert_IMPORT_NAME
= _convert_NAME
500 _convert_IMPORT_FROM
= _convert_NAME
501 _convert_STORE_ATTR
= _convert_NAME
502 _convert_LOAD_ATTR
= _convert_NAME
503 _convert_DELETE_ATTR
= _convert_NAME
504 _convert_LOAD_GLOBAL
= _convert_NAME
505 _convert_STORE_GLOBAL
= _convert_NAME
506 _convert_DELETE_GLOBAL
= _convert_NAME
508 def _convert_DEREF(self
, arg
):
509 self
._lookupName
(arg
, self
.names
)
510 self
._lookupName
(arg
, self
.varnames
)
511 return self
._lookupName
(arg
, self
.closure
)
512 _convert_LOAD_DEREF
= _convert_DEREF
513 _convert_STORE_DEREF
= _convert_DEREF
515 def _convert_LOAD_CLOSURE(self
, arg
):
516 self
._lookupName
(arg
, self
.varnames
)
517 return self
._lookupName
(arg
, self
.closure
)
519 _cmp
= list(dis
.cmp_op
)
520 def _convert_COMPARE_OP(self
, arg
):
521 return self
._cmp
.index(arg
)
523 # similarly for other opcodes...
525 for name
, obj
in locals().items():
526 if name
[:9] == "_convert_":
528 _converters
[opname
] = obj
529 del name
, obj
, opname
531 def makeByteCode(self
):
532 assert self
.stage
== CONV
533 self
.lnotab
= lnotab
= LineAddrTable()
537 lnotab
.addCode(self
.opnum
[opname
])
540 if opname
== "SET_LINENO":
541 lnotab
.nextLine(oparg
)
542 hi
, lo
= twobyte(oparg
)
544 lnotab
.addCode(self
.opnum
[opname
], lo
, hi
)
547 print self
.opnum
[opname
], lo
, hi
552 for num
in range(len(dis
.opname
)):
553 opnum
[dis
.opname
[num
]] = num
556 def newCodeObject(self
):
557 assert self
.stage
== DONE
561 nlocals
= len(self
.varnames
)
562 argcount
= self
.argcount
563 if self
.flags
& CO_VARKEYWORDS
:
564 argcount
= argcount
- 1
565 return new
.code(argcount
, nlocals
, self
.stacksize
, self
.flags
,
566 self
.lnotab
.getCode(), self
.getConsts(),
567 tuple(self
.names
), tuple(self
.varnames
),
568 self
.filename
, self
.name
, self
.lnotab
.firstline
,
569 self
.lnotab
.getTable(), tuple(self
.freevars
),
570 tuple(self
.cellvars
))
573 """Return a tuple for the const slot of the code object
575 Must convert references to code (MAKE_FUNCTION) to code
579 for elt
in self
.consts
:
580 if isinstance(elt
, PyFlowGraph
):
586 if opname
[:4] == 'JUMP':
590 """Helper for marking func defs with nested tuples in arglist"""
591 def __init__(self
, count
, names
):
595 return "TupleArg(%s, %s)" % (self
.count
, self
.names
)
597 return ".%d" % self
.count
599 def getArgCount(args
):
603 if isinstance(arg
, TupleArg
):
604 numNames
= len(misc
.flatten(arg
.names
))
605 argcount
= argcount
- numNames
609 """Convert an int argument into high and low bytes"""
610 assert type(val
) == types
.IntType
611 return divmod(val
, 256)
616 This class builds the lnotab, which is documented in compile.c.
617 Here's a brief recap:
619 For each SET_LINENO instruction after the first one, two bytes are
620 added to lnotab. (In some cases, multiple two-byte entries are
621 added.) The first byte is the distance in bytes between the
622 instruction for the last SET_LINENO and the current SET_LINENO.
623 The second byte is offset in line numbers. If either offset is
624 greater than 255, multiple two-byte entries are added -- see
625 compile.c for the delicate details.
636 def addCode(self
, *args
):
638 self
.code
.append(chr(arg
))
639 self
.codeOffset
= self
.codeOffset
+ len(args
)
641 def nextLine(self
, lineno
):
642 if self
.firstline
== 0:
643 self
.firstline
= lineno
644 self
.lastline
= lineno
647 addr
= self
.codeOffset
- self
.lastoff
648 line
= lineno
- self
.lastline
649 # Python assumes that lineno always increases with
650 # increasing bytecode address (lnotab is unsigned char).
651 # Depending on when SET_LINENO instructions are emitted
652 # this is not always true. Consider the code:
655 # In the bytecode stream, the assignment to "a" occurs
656 # after the loading of "b". This works with the C Python
657 # compiler because it only generates a SET_LINENO instruction
658 # for the assignment.
660 push
= self
.lnotab
.append
665 push(addr
); push(255)
668 if addr
> 0 or line
> 0:
669 push(addr
); push(line
)
670 self
.lastline
= lineno
671 self
.lastoff
= self
.codeOffset
674 return string
.join(self
.code
, '')
677 return string
.join(map(chr, self
.lnotab
), '')
679 class StackDepthTracker
:
680 # XXX 1. need to keep track of stack depth on jumps
681 # XXX 2. at least partly as a result, this code is broken
683 def findDepth(self
, insts
):
688 delta
= self
.effect
.get(opname
, 0)
690 depth
= depth
+ delta
694 depth
= depth
+ delta
699 for pat
, pat_delta
in self
.patterns
:
700 if opname
[:len(pat
)] == pat
:
702 depth
= depth
+ delta
704 # if we still haven't found a match
706 meth
= getattr(self
, opname
, None)
708 depth
= depth
+ meth(i
[1])
723 'DELETE_SLICE+0': -1,
724 'DELETE_SLICE+1': -2,
725 'DELETE_SLICE+2': -2,
726 'DELETE_SLICE+3': -3,
753 # UNPACK_SEQUENCE, BUILD_TUPLE,
754 # BUILD_LIST, CALL_FUNCTION, MAKE_FUNCTION, BUILD_SLICE
755 def UNPACK_SEQUENCE(self
, count
):
757 def BUILD_TUPLE(self
, count
):
759 def BUILD_LIST(self
, count
):
761 def CALL_FUNCTION(self
, argc
):
762 hi
, lo
= divmod(argc
, 256)
764 def CALL_FUNCTION_VAR(self
, argc
):
765 return self
.CALL_FUNCTION(argc
)+1
766 def CALL_FUNCTION_KW(self
, argc
):
767 return self
.CALL_FUNCTION(argc
)+1
768 def CALL_FUNCTION_VAR_KW(self
, argc
):
769 return self
.CALL_FUNCTION(argc
)+2
770 def MAKE_FUNCTION(self
, argc
):
772 def BUILD_SLICE(self
, argc
):
778 findDepth
= StackDepthTracker().findDepth