[libc] Switch to using the generic `<gpuintrin.h>` implementations (#121810)
[llvm-project.git] / lldb / examples / python / formatter_bytecode.py
blob36a14be283f319bc024ba23d8fd72cc1f721f8bc
1 """
2 Specification, compiler, disassembler, and interpreter
3 for LLDB dataformatter bytecode.
5 See https://lldb.llvm.org/resources/formatterbytecode.html for more details.
6 """
8 from __future__ import annotations
10 # Types
11 type_String = 1
12 type_Int = 2
13 type_UInt = 3
14 type_Object = 4
15 type_Type = 5
17 # Opcodes
18 opcode = dict()
21 def define_opcode(n, mnemonic, name):
22 globals()["op_" + name] = n
23 if mnemonic:
24 opcode[mnemonic] = n
25 opcode[n] = mnemonic
28 define_opcode(1, "dup", "dup")
29 define_opcode(2, "drop", "drop")
30 define_opcode(3, "pick", "pick")
31 define_opcode(4, "over", "over")
32 define_opcode(5, "swap", "swap")
33 define_opcode(6, "rot", "rot")
35 define_opcode(0x10, "{", "begin")
36 define_opcode(0x11, "if", "if")
37 define_opcode(0x12, "ifelse", "ifelse")
38 define_opcode(0x13, "return", "return")
40 define_opcode(0x20, None, "lit_uint")
41 define_opcode(0x21, None, "lit_int")
42 define_opcode(0x22, None, "lit_string")
43 define_opcode(0x23, None, "lit_selector")
45 define_opcode(0x2A, "as_int", "as_int")
46 define_opcode(0x2B, "as_uint", "as_uint")
47 define_opcode(0x2C, "is_null", "is_null")
49 define_opcode(0x30, "+", "plus")
50 define_opcode(0x31, "-", "minus")
51 define_opcode(0x32, "*", "mul")
52 define_opcode(0x33, "/", "div")
53 define_opcode(0x34, "%", "mod")
54 define_opcode(0x35, "<<", "shl")
55 define_opcode(0x36, ">>", "shr")
57 define_opcode(0x40, "&", "and")
58 define_opcode(0x41, "|", "or")
59 define_opcode(0x42, "^", "xor")
60 define_opcode(0x43, "~", "not")
62 define_opcode(0x50, "=", "eq")
63 define_opcode(0x51, "!=", "neq")
64 define_opcode(0x52, "<", "lt")
65 define_opcode(0x53, ">", "gt")
66 define_opcode(0x54, "=<", "le")
67 define_opcode(0x55, ">=", "ge")
69 define_opcode(0x60, "call", "call")
71 # Function signatures
72 sig_summary = 0
73 sig_init = 1
74 sig_get_num_children = 2
75 sig_get_child_index = 3
76 sig_get_child_at_index = 4
78 # Selectors
79 selector = dict()
82 def define_selector(n, name):
83 globals()["sel_" + name] = n
84 selector["@" + name] = n
85 selector[n] = "@" + name
88 define_selector(0, "summary")
89 define_selector(1, "type_summary")
91 define_selector(0x10, "get_num_children")
92 define_selector(0x11, "get_child_at_index")
93 define_selector(0x12, "get_child_with_name")
94 define_selector(0x13, "get_child_index")
95 define_selector(0x15, "get_type")
96 define_selector(0x16, "get_template_argument_type")
97 define_selector(0x17, "cast")
98 define_selector(0x20, "get_value")
99 define_selector(0x21, "get_value_as_unsigned")
100 define_selector(0x22, "get_value_as_signed")
101 define_selector(0x23, "get_value_as_address")
103 define_selector(0x40, "read_memory_byte")
104 define_selector(0x41, "read_memory_uint32")
105 define_selector(0x42, "read_memory_int32")
106 define_selector(0x43, "read_memory_unsigned")
107 define_selector(0x44, "read_memory_signed")
108 define_selector(0x45, "read_memory_address")
109 define_selector(0x46, "read_memory")
111 define_selector(0x50, "fmt")
112 define_selector(0x51, "sprintf")
113 define_selector(0x52, "strlen")
116 ################################################################################
117 # Compiler.
118 ################################################################################
121 def compile(assembler: str) -> bytearray:
122 """Compile assembler into bytecode"""
123 # This is a stack of all in-flight/unterminated blocks.
124 bytecode = [bytearray()]
126 def emit(byte):
127 bytecode[-1].append(byte)
129 tokens = list(assembler.split(" "))
130 tokens.reverse()
131 while tokens:
132 tok = tokens.pop()
133 if tok == "":
134 pass
135 elif tok == "{":
136 bytecode.append(bytearray())
137 elif tok == "}":
138 block = bytecode.pop()
139 emit(op_begin)
140 emit(len(block)) # FIXME: uleb
141 bytecode[-1].extend(block)
142 elif tok[0].isdigit():
143 if tok[-1] == "u":
144 emit(op_lit_uint)
145 emit(int(tok[:-1])) # FIXME
146 else:
147 emit(op_lit_int)
148 emit(int(tok)) # FIXME
149 elif tok[0] == "@":
150 emit(op_lit_selector)
151 emit(selector[tok])
152 elif tok[0] == '"':
153 s = bytearray()
154 done = False
155 chrs = tok[1:]
156 while not done:
157 quoted = False
158 for c in chrs:
159 if quoted:
160 s.append(ord(c)) # FIXME
161 quoted = False
162 elif c == "\\":
163 quoted = True
164 elif c == '"':
165 done = True
166 break
167 # FIXME assert this is last in token
168 else:
169 s.append(ord(c))
170 if not done:
171 s.append(ord(" "))
172 chrs = tokens.pop()
174 emit(op_lit_string)
175 emit(len(s))
176 bytecode[-1].extend(s)
177 else:
178 emit(opcode[tok])
179 assert len(bytecode) == 1 # unterminated {
180 return bytecode[0]
183 ################################################################################
184 # Disassembler.
185 ################################################################################
188 def disassemble(bytecode: bytearray) -> (str, int):
189 """Disassemble bytecode into (assembler, token starts)"""
190 asm = ""
191 all_bytes = list(bytecode)
192 all_bytes.reverse()
193 blocks = []
194 tokens = [0]
196 def next_byte():
197 """Fetch the next byte in the bytecode and keep track of all
198 in-flight blocks"""
199 for i in range(len(blocks)):
200 blocks[i] -= 1
201 tokens.append(len(asm))
202 return all_bytes.pop()
204 while all_bytes:
205 b = next_byte()
206 if b == op_begin:
207 asm += "{"
208 length = next_byte()
209 blocks.append(length)
210 elif b == op_lit_uint:
211 b = next_byte()
212 asm += str(b) # FIXME uleb
213 asm += "u"
214 elif b == op_lit_int:
215 b = next_byte()
216 asm += str(b)
217 elif b == op_lit_selector:
218 b = next_byte()
219 asm += selector[b]
220 elif b == op_lit_string:
221 length = next_byte()
222 s = "'"
223 while length:
224 s += chr(next_byte())
225 length -= 1
226 asm += '"' + repr(s)[2:]
227 else:
228 asm += opcode[b]
230 while blocks and blocks[-1] == 0:
231 asm += " }"
232 blocks.pop()
234 if all_bytes:
235 asm += " "
237 if blocks:
238 asm += "ERROR"
239 return asm, tokens
242 ################################################################################
243 # Interpreter.
244 ################################################################################
247 def count_fmt_params(fmt: str) -> int:
248 """Count the number of parameters in a format string"""
249 from string import Formatter
251 f = Formatter()
252 n = 0
253 for _, name, _, _ in f.parse(fmt):
254 if name > n:
255 n = name
256 return n
259 def interpret(bytecode: bytearray, control: list, data: list, tracing: bool = False):
260 """Interpret bytecode"""
261 frame = []
262 frame.append((0, len(bytecode)))
264 def trace():
265 """print a trace of the execution for debugging purposes"""
267 def fmt(d):
268 if isinstance(d, int):
269 return str(d)
270 if isinstance(d, str):
271 return d
272 return repr(type(d))
274 pc, end = frame[-1]
275 asm, tokens = disassemble(bytecode)
276 print(
277 "=== frame = {1}, data = {2}, opcode = {0}".format(
278 opcode[b], frame, [fmt(d) for d in data]
281 print(asm)
282 print(" " * (tokens[pc]) + "^")
284 def next_byte():
285 """Fetch the next byte and update the PC"""
286 pc, end = frame[-1]
287 assert pc < len(bytecode)
288 b = bytecode[pc]
289 frame[-1] = pc + 1, end
290 # At the end of a block?
291 while pc >= end:
292 frame.pop()
293 if not frame:
294 return None
295 pc, end = frame[-1]
296 if pc >= end:
297 return None
298 b = bytecode[pc]
299 frame[-1] = pc + 1, end
300 return b
302 while frame[-1][0] < len(bytecode):
303 b = next_byte()
304 if b == None:
305 break
306 if tracing:
307 trace()
308 # Data stack manipulation.
309 if b == op_dup:
310 data.append(data[-1])
311 elif b == op_drop:
312 data.pop()
313 elif b == op_pick:
314 data.append(data[data.pop()])
315 elif b == op_over:
316 data.append(data[-2])
317 elif b == op_swap:
318 x = data.pop()
319 y = data.pop()
320 data.append(x)
321 data.append(y)
322 elif b == op_rot:
323 z = data.pop()
324 y = data.pop()
325 x = data.pop()
326 data.append(z)
327 data.append(x)
328 data.append(y)
330 # Control stack manipulation.
331 elif b == op_begin:
332 length = next_byte()
333 pc, end = frame[-1]
334 control.append((pc, pc + length))
335 frame[-1] = pc + length, end
336 elif b == op_if:
337 if data.pop():
338 frame.append(control.pop())
339 elif b == op_ifelse:
340 if data.pop():
341 control.pop()
342 frame.append(control.pop())
343 else:
344 frame.append(control.pop())
345 control.pop()
346 elif b == op_return:
347 control.clear()
348 return data[-1]
350 # Literals.
351 elif b == op_lit_uint:
352 b = next_byte() # FIXME uleb
353 data.append(int(b))
354 elif b == op_lit_int:
355 b = next_byte() # FIXME uleb
356 data.append(int(b))
357 elif b == op_lit_selector:
358 b = next_byte()
359 data.append(b)
360 elif b == op_lit_string:
361 length = next_byte()
362 s = ""
363 while length:
364 s += chr(next_byte())
365 length -= 1
366 data.append(s)
368 elif b == op_as_uint:
369 pass
370 elif b == op_as_int:
371 pass
372 elif b == op_is_null:
373 data.append(1 if data.pop() == None else 0)
375 # Arithmetic, logic, etc.
376 elif b == op_plus:
377 data.append(data.pop() + data.pop())
378 elif b == op_minus:
379 data.append(-data.pop() + data.pop())
380 elif b == op_mul:
381 data.append(data.pop() * data.pop())
382 elif b == op_div:
383 y = data.pop()
384 data.append(data.pop() / y)
385 elif b == op_mod:
386 y = data.pop()
387 data.append(data.pop() % y)
388 elif b == op_shl:
389 y = data.pop()
390 data.append(data.pop() << y)
391 elif b == op_shr:
392 y = data.pop()
393 data.append(data.pop() >> y)
394 elif b == op_and:
395 data.append(data.pop() & data.pop())
396 elif b == op_or:
397 data.append(data.pop() | data.pop())
398 elif b == op_xor:
399 data.append(data.pop() ^ data.pop())
400 elif b == op_not:
401 data.append(not data.pop())
402 elif b == op_eq:
403 data.append(data.pop() == data.pop())
404 elif b == op_neq:
405 data.append(data.pop() != data.pop())
406 elif b == op_lt:
407 data.append(data.pop() > data.pop())
408 elif b == op_gt:
409 data.append(data.pop() < data.pop())
410 elif b == op_le:
411 data.append(data.pop() >= data.pop())
412 elif b == op_ge:
413 data.append(data.pop() <= data.pop())
415 # Function calls.
416 elif b == op_call:
417 sel = data.pop()
418 if sel == sel_summary:
419 data.append(data.pop().GetSummary())
420 elif sel == sel_get_num_children:
421 data.append(data.pop().GetNumChildren())
422 elif sel == sel_get_child_at_index:
423 index = data.pop()
424 valobj = data.pop()
425 data.append(valobj.GetChildAtIndex(index))
426 elif sel == sel_get_child_with_name:
427 name = data.pop()
428 valobj = data.pop()
429 data.append(valobj.GetChildMemberWithName(name))
430 elif sel == sel_get_child_index:
431 name = data.pop()
432 valobj = data.pop()
433 data.append(valobj.GetIndexOfChildWithName(name))
434 elif sel == sel_get_type:
435 data.append(data.pop().GetType())
436 elif sel == sel_get_template_argument_type:
437 n = data.pop()
438 valobj = data.pop()
439 data.append(valobj.GetTemplateArgumentType(n))
440 elif sel == sel_get_value:
441 data.append(data.pop().GetValue())
442 elif sel == sel_get_value_as_unsigned:
443 data.append(data.pop().GetValueAsUnsigned())
444 elif sel == sel_get_value_as_signed:
445 data.append(data.pop().GetValueAsSigned())
446 elif sel == sel_get_value_as_address:
447 data.append(data.pop().GetValueAsAddress())
448 elif sel == sel_cast:
449 sbtype = data.pop()
450 valobj = data.pop()
451 data.append(valobj.Cast(sbtype))
452 elif sel == sel_strlen:
453 s = data.pop()
454 data.append(len(s) if s else 0)
455 elif sel == sel_fmt:
456 fmt = data.pop()
457 n = count_fmt_params(fmt)
458 args = []
459 for i in range(n):
460 args.append(data.pop())
461 data.append(fmt.format(*args))
462 else:
463 print("not implemented: " + selector[sel])
464 assert False
465 pass
466 return data[-1]
469 if __name__ == "__main__":
470 # Work around the fact that one of the local files is called
471 # types.py, which breaks some versions of python.
472 import os, sys
474 path = os.path.abspath(os.path.dirname(__file__))
475 sys.path.remove(path)
476 import argparse
478 parser = argparse.ArgumentParser(
479 description="""
480 Compiler, disassembler, and interpreter for LLDB dataformatter bytecode.
481 See https://lldb.llvm.org/resources/formatterbytecode.html for more details.
484 parser.add_argument(
485 "-c", "--compile", type=str, help="compile assembler into bytecode"
487 parser.add_argument("-d", "--disassemble", type=str, help="disassemble bytecode")
488 parser.add_argument("-t", "--test", action="store_true", help="run unit tests")
489 args = parser.parse_args()
490 if args.compile:
491 print(compile(str(args.compile)).hex())
493 if args.disassemble:
494 print(disassemble(bytearray.fromhex(str(args.disassemble))))
496 ############################################################################
497 # Tests.
498 ############################################################################
499 if args.test:
500 import unittest
502 class TestCompiler(unittest.TestCase):
503 def test(self):
504 self.assertEqual(compile("1u dup").hex(), "200101")
505 self.assertEqual(compile('"1u dup"').hex(), "2206317520647570")
506 self.assertEqual(compile("16 < { dup } if").hex(), "21105210010111")
507 self.assertEqual(compile('{ { " } " } }').hex(), "100710052203207d20")
509 def roundtrip(asm):
510 self.assertEqual(disassemble(compile(asm))[0], asm)
512 roundtrip("1u dup")
513 roundtrip('1u dup "1u dup"')
514 roundtrip("16 < { dup } if")
515 roundtrip('{ { " } " } }')
517 self.assertEqual(interpret(compile("1 1 +"), [], []), 2)
518 self.assertEqual(interpret(compile("2 1 1 + *"), [], []), 4)
519 self.assertEqual(
520 interpret(compile('2 1 > { "yes" } { "no" } ifelse'), [], []), "yes"
523 import sys
525 sys.argv.pop()
526 path = os.path.dirname(__file__)
527 sys.path.remove
528 unittest.main()