2 Specification, compiler, disassembler, and interpreter
3 for LLDB dataformatter bytecode.
5 See https://lldb.llvm.org/resources/formatterbytecode.html for more details.
8 from __future__
import annotations
21 def define_opcode(n
, mnemonic
, name
):
22 globals()["op_" + name
] = n
28 define_opcode(1, "dup", "dup")
29 define_opcode(2, "drop", "drop")
30 define_opcode(3, "pick", "pick")
31 define_opcode(4, "over", "over")
32 define_opcode(5, "swap", "swap")
33 define_opcode(6, "rot", "rot")
35 define_opcode(0x10, "{", "begin")
36 define_opcode(0x11, "if", "if")
37 define_opcode(0x12, "ifelse", "ifelse")
38 define_opcode(0x13, "return", "return")
40 define_opcode(0x20, None, "lit_uint")
41 define_opcode(0x21, None, "lit_int")
42 define_opcode(0x22, None, "lit_string")
43 define_opcode(0x23, None, "lit_selector")
45 define_opcode(0x2A, "as_int", "as_int")
46 define_opcode(0x2B, "as_uint", "as_uint")
47 define_opcode(0x2C, "is_null", "is_null")
49 define_opcode(0x30, "+", "plus")
50 define_opcode(0x31, "-", "minus")
51 define_opcode(0x32, "*", "mul")
52 define_opcode(0x33, "/", "div")
53 define_opcode(0x34, "%", "mod")
54 define_opcode(0x35, "<<", "shl")
55 define_opcode(0x36, ">>", "shr")
57 define_opcode(0x40, "&", "and")
58 define_opcode(0x41, "|", "or")
59 define_opcode(0x42, "^", "xor")
60 define_opcode(0x43, "~", "not")
62 define_opcode(0x50, "=", "eq")
63 define_opcode(0x51, "!=", "neq")
64 define_opcode(0x52, "<", "lt")
65 define_opcode(0x53, ">", "gt")
66 define_opcode(0x54, "=<", "le")
67 define_opcode(0x55, ">=", "ge")
69 define_opcode(0x60, "call", "call")
74 sig_get_num_children
= 2
75 sig_get_child_index
= 3
76 sig_get_child_at_index
= 4
82 def define_selector(n
, name
):
83 globals()["sel_" + name
] = n
84 selector
["@" + name
] = n
85 selector
[n
] = "@" + name
88 define_selector(0, "summary")
89 define_selector(1, "type_summary")
91 define_selector(0x10, "get_num_children")
92 define_selector(0x11, "get_child_at_index")
93 define_selector(0x12, "get_child_with_name")
94 define_selector(0x13, "get_child_index")
95 define_selector(0x15, "get_type")
96 define_selector(0x16, "get_template_argument_type")
97 define_selector(0x17, "cast")
98 define_selector(0x20, "get_value")
99 define_selector(0x21, "get_value_as_unsigned")
100 define_selector(0x22, "get_value_as_signed")
101 define_selector(0x23, "get_value_as_address")
103 define_selector(0x40, "read_memory_byte")
104 define_selector(0x41, "read_memory_uint32")
105 define_selector(0x42, "read_memory_int32")
106 define_selector(0x43, "read_memory_unsigned")
107 define_selector(0x44, "read_memory_signed")
108 define_selector(0x45, "read_memory_address")
109 define_selector(0x46, "read_memory")
111 define_selector(0x50, "fmt")
112 define_selector(0x51, "sprintf")
113 define_selector(0x52, "strlen")
116 ################################################################################
118 ################################################################################
121 def compile(assembler
: str) -> bytearray
:
122 """Compile assembler into bytecode"""
123 # This is a stack of all in-flight/unterminated blocks.
124 bytecode
= [bytearray()]
127 bytecode
[-1].append(byte
)
129 tokens
= list(assembler
.split(" "))
136 bytecode
.append(bytearray())
138 block
= bytecode
.pop()
140 emit(len(block
)) # FIXME: uleb
141 bytecode
[-1].extend(block
)
142 elif tok
[0].isdigit():
145 emit(int(tok
[:-1])) # FIXME
148 emit(int(tok
)) # FIXME
150 emit(op_lit_selector
)
160 s
.append(ord(c
)) # FIXME
167 # FIXME assert this is last in token
176 bytecode
[-1].extend(s
)
179 assert len(bytecode
) == 1 # unterminated {
183 ################################################################################
185 ################################################################################
188 def disassemble(bytecode
: bytearray
) -> (str, int):
189 """Disassemble bytecode into (assembler, token starts)"""
191 all_bytes
= list(bytecode
)
197 """Fetch the next byte in the bytecode and keep track of all
199 for i
in range(len(blocks
)):
201 tokens
.append(len(asm
))
202 return all_bytes
.pop()
209 blocks
.append(length
)
210 elif b
== op_lit_uint
:
212 asm
+= str(b
) # FIXME uleb
214 elif b
== op_lit_int
:
217 elif b
== op_lit_selector
:
220 elif b
== op_lit_string
:
224 s
+= chr(next_byte())
226 asm
+= '"' + repr(s
)[2:]
230 while blocks
and blocks
[-1] == 0:
242 ################################################################################
244 ################################################################################
247 def count_fmt_params(fmt
: str) -> int:
248 """Count the number of parameters in a format string"""
249 from string
import Formatter
253 for _
, name
, _
, _
in f
.parse(fmt
):
259 def interpret(bytecode
: bytearray
, control
: list, data
: list, tracing
: bool = False):
260 """Interpret bytecode"""
262 frame
.append((0, len(bytecode
)))
265 """print a trace of the execution for debugging purposes"""
268 if isinstance(d
, int):
270 if isinstance(d
, str):
275 asm
, tokens
= disassemble(bytecode
)
277 "=== frame = {1}, data = {2}, opcode = {0}".format(
278 opcode
[b
], frame
, [fmt(d
) for d
in data
]
282 print(" " * (tokens
[pc
]) + "^")
285 """Fetch the next byte and update the PC"""
287 assert pc
< len(bytecode
)
289 frame
[-1] = pc
+ 1, end
290 # At the end of a block?
299 frame
[-1] = pc
+ 1, end
302 while frame
[-1][0] < len(bytecode
):
308 # Data stack manipulation.
310 data
.append(data
[-1])
314 data
.append(data
[data
.pop()])
316 data
.append(data
[-2])
330 # Control stack manipulation.
334 control
.append((pc
, pc
+ length
))
335 frame
[-1] = pc
+ length
, end
338 frame
.append(control
.pop())
342 frame
.append(control
.pop())
344 frame
.append(control
.pop())
351 elif b
== op_lit_uint
:
352 b
= next_byte() # FIXME uleb
354 elif b
== op_lit_int
:
355 b
= next_byte() # FIXME uleb
357 elif b
== op_lit_selector
:
360 elif b
== op_lit_string
:
364 s
+= chr(next_byte())
368 elif b
== op_as_uint
:
372 elif b
== op_is_null
:
373 data
.append(1 if data
.pop() == None else 0)
375 # Arithmetic, logic, etc.
377 data
.append(data
.pop() + data
.pop())
379 data
.append(-data
.pop() + data
.pop())
381 data
.append(data
.pop() * data
.pop())
384 data
.append(data
.pop() / y
)
387 data
.append(data
.pop() % y
)
390 data
.append(data
.pop() << y
)
393 data
.append(data
.pop() >> y
)
395 data
.append(data
.pop() & data
.pop())
397 data
.append(data
.pop() | data
.pop())
399 data
.append(data
.pop() ^ data
.pop())
401 data
.append(not data
.pop())
403 data
.append(data
.pop() == data
.pop())
405 data
.append(data
.pop() != data
.pop())
407 data
.append(data
.pop() > data
.pop())
409 data
.append(data
.pop() < data
.pop())
411 data
.append(data
.pop() >= data
.pop())
413 data
.append(data
.pop() <= data
.pop())
418 if sel
== sel_summary
:
419 data
.append(data
.pop().GetSummary())
420 elif sel
== sel_get_num_children
:
421 data
.append(data
.pop().GetNumChildren())
422 elif sel
== sel_get_child_at_index
:
425 data
.append(valobj
.GetChildAtIndex(index
))
426 elif sel
== sel_get_child_with_name
:
429 data
.append(valobj
.GetChildMemberWithName(name
))
430 elif sel
== sel_get_child_index
:
433 data
.append(valobj
.GetIndexOfChildWithName(name
))
434 elif sel
== sel_get_type
:
435 data
.append(data
.pop().GetType())
436 elif sel
== sel_get_template_argument_type
:
439 data
.append(valobj
.GetTemplateArgumentType(n
))
440 elif sel
== sel_get_value
:
441 data
.append(data
.pop().GetValue())
442 elif sel
== sel_get_value_as_unsigned
:
443 data
.append(data
.pop().GetValueAsUnsigned())
444 elif sel
== sel_get_value_as_signed
:
445 data
.append(data
.pop().GetValueAsSigned())
446 elif sel
== sel_get_value_as_address
:
447 data
.append(data
.pop().GetValueAsAddress())
448 elif sel
== sel_cast
:
451 data
.append(valobj
.Cast(sbtype
))
452 elif sel
== sel_strlen
:
454 data
.append(len(s
) if s
else 0)
457 n
= count_fmt_params(fmt
)
460 args
.append(data
.pop())
461 data
.append(fmt
.format(*args
))
463 print("not implemented: " + selector
[sel
])
469 if __name__
== "__main__":
470 # Work around the fact that one of the local files is called
471 # types.py, which breaks some versions of python.
474 path
= os
.path
.abspath(os
.path
.dirname(__file__
))
475 sys
.path
.remove(path
)
478 parser
= argparse
.ArgumentParser(
480 Compiler, disassembler, and interpreter for LLDB dataformatter bytecode.
481 See https://lldb.llvm.org/resources/formatterbytecode.html for more details.
485 "-c", "--compile", type=str, help="compile assembler into bytecode"
487 parser
.add_argument("-d", "--disassemble", type=str, help="disassemble bytecode")
488 parser
.add_argument("-t", "--test", action
="store_true", help="run unit tests")
489 args
= parser
.parse_args()
491 print(compile(str(args
.compile)).hex())
494 print(disassemble(bytearray
.fromhex(str(args
.disassemble
))))
496 ############################################################################
498 ############################################################################
502 class TestCompiler(unittest
.TestCase
):
504 self
.assertEqual(compile("1u dup").hex(), "200101")
505 self
.assertEqual(compile('"1u dup"').hex(), "2206317520647570")
506 self
.assertEqual(compile("16 < { dup } if").hex(), "21105210010111")
507 self
.assertEqual(compile('{ { " } " } }').hex(), "100710052203207d20")
510 self
.assertEqual(disassemble(compile(asm
))[0], asm
)
513 roundtrip('1u dup "1u dup"')
514 roundtrip("16 < { dup } if")
515 roundtrip('{ { " } " } }')
517 self
.assertEqual(interpret(compile("1 1 +"), [], []), 2)
518 self
.assertEqual(interpret(compile("2 1 1 + *"), [], []), 4)
520 interpret(compile('2 1 > { "yes" } { "no" } ifelse'), [], []), "yes"
526 path
= os
.path
.dirname(__file__
)