1 """A flow graph representation for Python bytecode"""
8 from compiler
import misc
12 self
.current
= self
.entry
= Block()
13 self
.exit
= Block("exit")
14 self
.blocks
= misc
.Set()
15 self
.blocks
.add(self
.entry
)
16 self
.blocks
.add(self
.exit
)
18 def startBlock(self
, block
):
21 def nextBlock(self
, block
=None):
23 block
= self
.newBlock()
24 # XXX think we need to specify when there is implicit transfer
25 # from one block to the next
27 # I think this strategy works: each block has a child
28 # designated as "next" which is returned as the last of the
29 # children. because the nodes in a graph are emitted in
30 # reverse post order, the "next" block will always be emitted
31 # immediately after its parent.
32 # Worry: maintaining this invariant could be tricky
33 self
.current
.addNext(block
)
34 self
.startBlock(block
)
41 def startExitBlock(self
):
42 self
.startBlock(self
.exit
)
44 def emit(self
, *inst
):
45 # XXX should jump instructions implicitly call nextBlock?
46 if inst
[0] == 'RETURN_VALUE':
47 self
.current
.addOutEdge(self
.exit
)
48 self
.current
.emit(inst
)
51 """Return the blocks in reverse postorder
53 i.e. each node appears before all of its successors
55 # XXX make sure every node that doesn't have an explicit next
56 # is set so that next points to exit
57 for b
in self
.blocks
.elements():
62 order
= dfs_postorder(self
.entry
, {})
65 if not self
.exit
in order
:
66 order
.append(self
.exit
)
69 def dfs_postorder(b
, seen
):
70 """Depth-first search of tree rooted at b, return in postorder"""
73 for c
in b
.children():
76 order
= order
+ dfs_postorder(c
, seen
)
83 def __init__(self
, label
=''):
85 self
.inEdges
= misc
.Set()
86 self
.outEdges
= misc
.Set()
88 self
.bid
= Block
._count
90 Block
._count
= Block
._count
+ 1
94 return "<block %s id=%d len=%d>" % (self
.label
, self
.bid
,
97 return "<block id=%d len=%d>" % (self
.bid
, len(self
.insts
))
100 insts
= map(str, self
.insts
)
101 return "<block %s %d:\n%s>" % (self
.label
, self
.bid
,
102 string
.join(insts
, '\n'))
104 def emit(self
, inst
):
107 self
.outEdges
.add(inst
[1])
108 self
.insts
.append(inst
)
110 def getInstructions(self
):
113 def addInEdge(self
, block
):
114 self
.inEdges
.add(block
)
116 def addOutEdge(self
, block
):
117 self
.outEdges
.add(block
)
119 def addNext(self
, block
):
120 self
.next
.append(block
)
121 assert len(self
.next
) == 1, map(str, self
.next
)
124 return self
.outEdges
.elements() + self
.next
126 # flags for code objects
127 CO_OPTIMIZED
= 0x0001
128 CO_NEWLOCALS
= 0x0002
130 CO_VARKEYWORDS
= 0x0008
132 # the FlowGraph is transformed in place; it exists in one of these states
138 class PyFlowGraph(FlowGraph
):
139 super_init
= FlowGraph
.__init
__
141 def __init__(self
, name
, filename
, args
=(), optimized
=0):
144 self
.filename
= filename
145 self
.docstring
= None
146 self
.args
= args
# XXX
147 self
.argcount
= getArgCount(args
)
149 self
.flags
= CO_OPTIMIZED | CO_NEWLOCALS
154 self
.varnames
= list(args
) or []
155 for i
in range(len(self
.varnames
)):
156 var
= self
.varnames
[i
]
157 if isinstance(var
, TupleArg
):
158 self
.varnames
[i
] = var
.getName()
161 def setDocstring(self
, doc
):
163 self
.consts
.insert(0, doc
)
165 def setFlag(self
, flag
):
166 self
.flags
= self
.flags | flag
167 if flag
== CO_VARARGS
:
168 self
.argcount
= self
.argcount
- 1
171 """Get a Python code object"""
172 if self
.stage
== RAW
:
174 if self
.stage
== FLAT
:
176 if self
.stage
== CONV
:
178 if self
.stage
== DONE
:
179 return self
.newCodeObject()
180 raise RuntimeError, "inconsistent PyFlowGraph state"
182 def dump(self
, io
=None):
189 if opname
== "SET_LINENO":
192 print "\t", "%3d" % pc
, opname
195 print "\t", "%3d" % pc
, opname
, t
[1]
200 def flattenGraph(self
):
201 """Arrange the blocks in order and resolve jumps"""
202 assert self
.stage
== RAW
203 self
.insts
= insts
= []
207 for b
in self
.getBlocks():
209 for inst
in b
.getInstructions():
218 for i
in range(len(insts
)):
225 if self
.hasjrel
.has_elt(opname
):
227 offset
= begin
[oparg
] - pc
228 insts
[i
] = opname
, offset
229 elif self
.hasjabs
.has_elt(opname
):
230 insts
[i
] = opname
, begin
[inst
[1]]
231 self
.stacksize
= findDepth(self
.insts
)
235 for i
in dis
.hasjrel
:
236 hasjrel
.add(dis
.opname
[i
])
238 for i
in dis
.hasjabs
:
239 hasjabs
.add(dis
.opname
[i
])
241 def convertArgs(self
):
242 """Convert arguments from symbolic to concrete form"""
243 assert self
.stage
== FLAT
244 for i
in range(len(self
.insts
)):
249 conv
= self
._converters
.get(opname
, None)
251 self
.insts
[i
] = opname
, conv(self
, oparg
)
254 def _lookupName(self
, name
, list):
255 """Return index of name in list, appending if necessary"""
258 # this is cheap, but incorrect in some cases, e.g 2 vs. 2L
259 if type(name
) == type(list[i
]):
261 for i
in range(len(list)):
263 if type(elt
) == type(name
) and elt
== name
:
270 def _convert_LOAD_CONST(self
, arg
):
271 return self
._lookupName
(arg
, self
.consts
)
273 def _convert_LOAD_FAST(self
, arg
):
274 self
._lookupName
(arg
, self
.names
)
275 return self
._lookupName
(arg
, self
.varnames
)
276 _convert_STORE_FAST
= _convert_LOAD_FAST
277 _convert_DELETE_FAST
= _convert_LOAD_FAST
279 def _convert_NAME(self
, arg
):
280 return self
._lookupName
(arg
, self
.names
)
281 _convert_LOAD_NAME
= _convert_NAME
282 _convert_STORE_NAME
= _convert_NAME
283 _convert_DELETE_NAME
= _convert_NAME
284 _convert_IMPORT_NAME
= _convert_NAME
285 _convert_IMPORT_FROM
= _convert_NAME
286 _convert_STORE_ATTR
= _convert_NAME
287 _convert_LOAD_ATTR
= _convert_NAME
288 _convert_DELETE_ATTR
= _convert_NAME
289 _convert_LOAD_GLOBAL
= _convert_NAME
290 _convert_STORE_GLOBAL
= _convert_NAME
291 _convert_DELETE_GLOBAL
= _convert_NAME
293 _cmp
= list(dis
.cmp_op
)
294 def _convert_COMPARE_OP(self
, arg
):
295 return self
._cmp
.index(arg
)
297 # similarly for other opcodes...
299 for name
, obj
in locals().items():
300 if name
[:9] == "_convert_":
302 _converters
[opname
] = obj
303 del name
, obj
, opname
305 def makeByteCode(self
):
306 assert self
.stage
== CONV
307 self
.lnotab
= lnotab
= LineAddrTable()
311 lnotab
.addCode(self
.opnum
[opname
])
314 if opname
== "SET_LINENO":
315 lnotab
.nextLine(oparg
)
316 hi
, lo
= twobyte(oparg
)
318 lnotab
.addCode(self
.opnum
[opname
], lo
, hi
)
321 print self
.opnum
[opname
], lo
, hi
326 for num
in range(len(dis
.opname
)):
327 opnum
[dis
.opname
[num
]] = num
330 def newCodeObject(self
):
331 assert self
.stage
== DONE
335 nlocals
= len(self
.varnames
)
336 argcount
= self
.argcount
337 if self
.flags
& CO_VARKEYWORDS
:
338 argcount
= argcount
- 1
339 return new
.code(argcount
, nlocals
, self
.stacksize
, self
.flags
,
340 self
.lnotab
.getCode(), self
.getConsts(),
341 tuple(self
.names
), tuple(self
.varnames
),
342 self
.filename
, self
.name
, self
.lnotab
.firstline
,
343 self
.lnotab
.getTable())
346 """Return a tuple for the const slot of the code object
348 Must convert references to code (MAKE_FUNCTION) to code
352 for elt
in self
.consts
:
353 if isinstance(elt
, PyFlowGraph
):
359 if opname
[:4] == 'JUMP':
363 """Helper for marking func defs with nested tuples in arglist"""
364 def __init__(self
, count
, names
):
368 return "TupleArg(%s, %s)" % (self
.count
, self
.names
)
370 return ".nested%d" % self
.count
372 def getArgCount(args
):
376 if isinstance(arg
, TupleArg
):
377 numNames
= len(misc
.flatten(arg
.names
))
378 argcount
= argcount
- numNames
382 """Convert an int argument into high and low bytes"""
383 assert type(val
) == types
.IntType
384 return divmod(val
, 256)
389 This class builds the lnotab, which is undocumented but described
390 by com_set_lineno in compile.c. Here's an attempt at explanation:
392 For each SET_LINENO instruction after the first one, two bytes are
393 added to lnotab. (In some cases, multiple two-byte entries are
394 added.) The first byte is the distance in bytes between the
395 instruction for the last SET_LINENO and the current SET_LINENO.
396 The second byte is offset in line numbers. If either offset is
397 greater than 255, multiple two-byte entries are added -- one entry
398 for each factor of 255.
409 def addCode(self
, *args
):
411 self
.code
.append(chr(arg
))
412 self
.codeOffset
= self
.codeOffset
+ len(args
)
414 def nextLine(self
, lineno
):
415 if self
.firstline
== 0:
416 self
.firstline
= lineno
417 self
.lastline
= lineno
420 addr
= self
.codeOffset
- self
.lastoff
421 line
= lineno
- self
.lastline
422 # Python assumes that lineno always increases with
423 # increasing bytecode address (lnotab is unsigned char).
424 # Depending on when SET_LINENO instructions are emitted
425 # this is not always true. Consider the code:
428 # In the bytecode stream, the assignment to "a" occurs
429 # after the loading of "b". This works with the C Python
430 # compiler because it only generates a SET_LINENO instruction
431 # for the assignment.
433 while addr
> 0 or line
> 0:
434 # write the values in 1-byte chunks that sum
442 self
.lnotab
.append(trunc_addr
)
443 self
.lnotab
.append(trunc_line
)
444 addr
= addr
- trunc_addr
445 line
= line
- trunc_line
446 self
.lastline
= lineno
447 self
.lastoff
= self
.codeOffset
450 return string
.join(self
.code
, '')
453 return string
.join(map(chr, self
.lnotab
), '')
455 class StackDepthTracker
:
456 # XXX 1. need to keep track of stack depth on jumps
457 # XXX 2. at least partly as a result, this code is broken
459 def findDepth(self
, insts
):
464 delta
= self
.effect
.get(opname
, 0)
466 depth
= depth
+ delta
470 depth
= depth
+ delta
475 for pat
, pat_delta
in self
.patterns
:
476 if opname
[:len(pat
)] == pat
:
478 depth
= depth
+ delta
480 # if we still haven't found a match
482 meth
= getattr(self
, opname
, None)
484 depth
= depth
+ meth(i
[1])
499 'DELETE_SLICE+0': -1,
500 'DELETE_SLICE+1': -2,
501 'DELETE_SLICE+2': -2,
502 'DELETE_SLICE+3': -3,
527 # UNPACK_SEQUENCE, BUILD_TUPLE,
528 # BUILD_LIST, CALL_FUNCTION, MAKE_FUNCTION, BUILD_SLICE
529 def UNPACK_SEQUENCE(self
, count
):
531 def BUILD_TUPLE(self
, count
):
533 def BUILD_LIST(self
, count
):
535 def CALL_FUNCTION(self
, argc
):
536 hi
, lo
= divmod(argc
, 256)
538 def CALL_FUNCTION_VAR(self
, argc
):
539 return self
.CALL_FUNCTION(argc
)+1
540 def CALL_FUNCTION_KW(self
, argc
):
541 return self
.CALL_FUNCTION(argc
)+1
542 def CALL_FUNCTION_VAR_KW(self
, argc
):
543 return self
.CALL_FUNCTION(argc
)+2
544 def MAKE_FUNCTION(self
, argc
):
546 def BUILD_SLICE(self
, argc
):
552 findDepth
= StackDepthTracker().findDepth