[docs] Update HowToReleaseLLVM documentation.
[llvm-project.git] / lldb / examples / python / symbolication.py
blob9784c28a4476663ec5b9db520df63f651a254525
1 #!/usr/bin/env python
3 #----------------------------------------------------------------------
4 # Be sure to add the python path that points to the LLDB shared library.
6 # To use this in the embedded python interpreter using "lldb":
8 # cd /path/containing/crashlog.py
9 # lldb
10 # (lldb) script import crashlog
11 # "crashlog" command installed, type "crashlog --help" for detailed help
12 # (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash
14 # The benefit of running the crashlog command inside lldb in the
15 # embedded python interpreter is when the command completes, there
16 # will be a target with all of the files loaded at the locations
17 # described in the crash log. Only the files that have stack frames
18 # in the backtrace will be loaded unless the "--load-all" option
19 # has been specified. This allows users to explore the program in the
20 # state it was in right at crash time.
22 # On MacOSX csh, tcsh:
23 # ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
25 # On MacOSX sh, bash:
26 # PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
27 #----------------------------------------------------------------------
29 import lldb
30 import optparse
31 import os
32 import plistlib
33 import re
34 import shlex
35 import sys
36 import time
37 import uuid
38 import json
39 import tempfile
42 class Address:
43 """Class that represents an address that will be symbolicated"""
45 def __init__(self, target, load_addr):
46 self.target = target
47 self.load_addr = load_addr # The load address that this object represents
48 # the resolved lldb.SBAddress (if any), named so_addr for
49 # section/offset address
50 self.so_addr = None
51 self.sym_ctx = None # The cached symbol context for this address
52 # Any original textual description of this address to be used as a
53 # backup in case symbolication fails
54 self.description = None
55 self.symbolication = None # The cached symbolicated string that describes this address
56 self.inlined = False
58 def __str__(self):
59 s = "%#16.16x" % (self.load_addr)
60 if self.symbolication:
61 s += " %s" % (self.symbolication)
62 elif self.description:
63 s += " %s" % (self.description)
64 elif self.so_addr:
65 s += " %s" % (self.so_addr)
66 return s
68 def resolve_addr(self):
69 if self.so_addr is None:
70 self.so_addr = self.target.ResolveLoadAddress(self.load_addr)
71 return self.so_addr
73 def is_inlined(self):
74 return self.inlined
76 def get_symbol_context(self):
77 if self.sym_ctx is None:
78 sb_addr = self.resolve_addr()
79 if sb_addr:
80 self.sym_ctx = self.target.ResolveSymbolContextForAddress(
81 sb_addr, lldb.eSymbolContextEverything)
82 else:
83 self.sym_ctx = lldb.SBSymbolContext()
84 return self.sym_ctx
86 def get_instructions(self):
87 sym_ctx = self.get_symbol_context()
88 if sym_ctx:
89 function = sym_ctx.GetFunction()
90 if function:
91 return function.GetInstructions(self.target)
92 return sym_ctx.GetSymbol().GetInstructions(self.target)
93 return None
95 def symbolicate(self, verbose=False):
96 if self.symbolication is None:
97 self.symbolication = ''
98 self.inlined = False
99 sym_ctx = self.get_symbol_context()
100 if sym_ctx:
101 module = sym_ctx.GetModule()
102 if module:
103 # Print full source file path in verbose mode
104 if verbose:
105 self.symbolication += str(module.GetFileSpec()) + '`'
106 else:
107 self.symbolication += module.GetFileSpec().GetFilename() + '`'
108 function_start_load_addr = -1
109 function = sym_ctx.GetFunction()
110 block = sym_ctx.GetBlock()
111 line_entry = sym_ctx.GetLineEntry()
112 symbol = sym_ctx.GetSymbol()
113 inlined_block = block.GetContainingInlinedBlock()
114 if function:
115 self.symbolication += function.GetName()
117 if inlined_block:
118 self.inlined = True
119 self.symbolication += ' [inlined] ' + \
120 inlined_block.GetInlinedName()
121 block_range_idx = inlined_block.GetRangeIndexForBlockAddress(
122 self.so_addr)
123 if block_range_idx < lldb.UINT32_MAX:
124 block_range_start_addr = inlined_block.GetRangeStartAddress(
125 block_range_idx)
126 function_start_load_addr = block_range_start_addr.GetLoadAddress(
127 self.target)
128 if function_start_load_addr == -1:
129 function_start_load_addr = function.GetStartAddress().GetLoadAddress(self.target)
130 elif symbol:
131 self.symbolication += symbol.GetName()
132 function_start_load_addr = symbol.GetStartAddress().GetLoadAddress(self.target)
133 else:
134 self.symbolication = ''
135 return False
137 # Dump the offset from the current function or symbol if it
138 # is non zero
139 function_offset = self.load_addr - function_start_load_addr
140 if function_offset > 0:
141 self.symbolication += " + %u" % (function_offset)
142 elif function_offset < 0:
143 self.symbolication += " %i (invalid negative offset, file a bug) " % function_offset
145 # Print out any line information if any is available
146 if line_entry.GetFileSpec():
147 # Print full source file path in verbose mode
148 if verbose:
149 self.symbolication += ' at %s' % line_entry.GetFileSpec()
150 else:
151 self.symbolication += ' at %s' % line_entry.GetFileSpec().GetFilename()
152 self.symbolication += ':%u' % line_entry.GetLine()
153 column = line_entry.GetColumn()
154 if column > 0:
155 self.symbolication += ':%u' % column
156 return True
157 return False
160 class Section:
161 """Class that represents an load address range"""
162 sect_info_regex = re.compile('(?P<name>[^=]+)=(?P<range>.*)')
163 addr_regex = re.compile('^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$')
164 range_regex = re.compile(
165 '^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$')
167 def __init__(self, start_addr=None, end_addr=None, name=None):
168 self.start_addr = start_addr
169 self.end_addr = end_addr
170 self.name = name
172 @classmethod
173 def InitWithSBTargetAndSBSection(cls, target, section):
174 sect_load_addr = section.GetLoadAddress(target)
175 if sect_load_addr != lldb.LLDB_INVALID_ADDRESS:
176 obj = cls(
177 sect_load_addr,
178 sect_load_addr +
179 section.size,
180 section.name)
181 return obj
182 else:
183 return None
185 def contains(self, addr):
186 return self.start_addr <= addr and addr < self.end_addr
188 def set_from_string(self, s):
189 match = self.sect_info_regex.match(s)
190 if match:
191 self.name = match.group('name')
192 range_str = match.group('range')
193 addr_match = self.addr_regex.match(range_str)
194 if addr_match:
195 self.start_addr = int(addr_match.group('start'), 16)
196 self.end_addr = None
197 return True
199 range_match = self.range_regex.match(range_str)
200 if range_match:
201 self.start_addr = int(range_match.group('start'), 16)
202 self.end_addr = int(range_match.group('end'), 16)
203 op = range_match.group('op')
204 if op == '+':
205 self.end_addr += self.start_addr
206 return True
207 print('error: invalid section info string "%s"' % s)
208 print('Valid section info formats are:')
209 print('Format Example Description')
210 print('--------------------- -----------------------------------------------')
211 print('<name>=<base> __TEXT=0x123000 Section from base address only')
212 print('<name>=<base>-<end> __TEXT=0x123000-0x124000 Section from base address and end address')
213 print('<name>=<base>+<size> __TEXT=0x123000+0x1000 Section from base address and size')
214 return False
216 def __str__(self):
217 if self.name:
218 if self.end_addr is not None:
219 if self.start_addr is not None:
220 return "%s=[0x%16.16x - 0x%16.16x)" % (
221 self.name, self.start_addr, self.end_addr)
222 else:
223 if self.start_addr is not None:
224 return "%s=0x%16.16x" % (self.name, self.start_addr)
225 return self.name
226 return "<invalid>"
229 class Image:
230 """A class that represents an executable image and any associated data"""
232 def __init__(self, path, uuid=None):
233 self.path = path
234 self.resolved_path = None
235 self.resolve = False
236 self.resolved = False
237 self.unavailable = False
238 self.uuid = uuid
239 self.section_infos = list()
240 self.identifier = None
241 self.version = None
242 self.arch = None
243 self.module = None
244 self.symfile = None
245 self.slide = None
246 self.symbols = dict()
248 @classmethod
249 def InitWithSBTargetAndSBModule(cls, target, module):
250 '''Initialize this Image object with a module from a target.'''
251 obj = cls(module.file.fullpath, module.uuid)
252 obj.resolved_path = module.platform_file.fullpath
253 obj.resolved = True
254 for section in module.sections:
255 symb_section = Section.InitWithSBTargetAndSBSection(
256 target, section)
257 if symb_section:
258 obj.section_infos.append(symb_section)
259 obj.arch = module.triple
260 obj.module = module
261 obj.symfile = None
262 obj.slide = None
263 return obj
265 def dump(self, prefix):
266 print("%s%s" % (prefix, self))
268 def debug_dump(self):
269 print('path = "%s"' % (self.path))
270 print('resolved_path = "%s"' % (self.resolved_path))
271 print('resolved = %i' % (self.resolved))
272 print('unavailable = %i' % (self.unavailable))
273 print('uuid = %s' % (self.uuid))
274 print('section_infos = %s' % (self.section_infos))
275 print('identifier = "%s"' % (self.identifier))
276 print('version = %s' % (self.version))
277 print('arch = %s' % (self.arch))
278 print('module = %s' % (self.module))
279 print('symfile = "%s"' % (self.symfile))
280 print('slide = %i (0x%x)' % (self.slide, self.slide))
282 def __str__(self):
283 s = ''
284 if self.uuid:
285 s += "%s " % (self.get_uuid())
286 if self.arch:
287 s += "%s " % (self.arch)
288 if self.version:
289 s += "%s " % (self.version)
290 resolved_path = self.get_resolved_path()
291 if resolved_path:
292 s += "%s " % (resolved_path)
293 for section_info in self.section_infos:
294 s += ", %s" % (section_info)
295 if self.slide is not None:
296 s += ', slide = 0x%16.16x' % self.slide
297 return s
299 def add_section(self, section):
300 # print "added '%s' to '%s'" % (section, self.path)
301 self.section_infos.append(section)
303 def get_section_containing_load_addr(self, load_addr):
304 for section_info in self.section_infos:
305 if section_info.contains(load_addr):
306 return section_info
307 return None
309 def get_resolved_path(self):
310 if self.resolved_path:
311 return self.resolved_path
312 elif self.path:
313 return self.path
314 return None
316 def get_resolved_path_basename(self):
317 path = self.get_resolved_path()
318 if path:
319 return os.path.basename(path)
320 return None
322 def symfile_basename(self):
323 if self.symfile:
324 return os.path.basename(self.symfile)
325 return None
327 def has_section_load_info(self):
328 return self.section_infos or self.slide is not None
330 def load_module(self, target):
331 if self.unavailable:
332 return None # We already warned that we couldn't find this module, so don't return an error string
333 # Load this module into "target" using the section infos to
334 # set the section load addresses
335 if self.has_section_load_info():
336 if target:
337 if self.module:
338 if self.section_infos:
339 num_sections_loaded = 0
340 for section_info in self.section_infos:
341 if section_info.name:
342 section = self.module.FindSection(
343 section_info.name)
344 if section:
345 error = target.SetSectionLoadAddress(
346 section, section_info.start_addr)
347 if error.Success():
348 num_sections_loaded += 1
349 else:
350 return 'error: %s' % error.GetCString()
351 else:
352 return 'error: unable to find the section named "%s"' % section_info.name
353 else:
354 return 'error: unable to find "%s" section in "%s"' % (
355 range.name, self.get_resolved_path())
356 if num_sections_loaded == 0:
357 return 'error: no sections were successfully loaded'
358 else:
359 err = target.SetModuleLoadAddress(
360 self.module, self.slide)
361 if err.Fail():
362 return err.GetCString()
363 return None
364 else:
365 return 'error: invalid module'
366 else:
367 return 'error: invalid target'
368 else:
369 return 'error: no section infos'
371 def add_module(self, target):
372 '''Add the Image described in this object to "target" and load the sections if "load" is True.'''
373 if target:
374 # Try and find using UUID only first so that paths need not match
375 # up
376 uuid_str = self.get_normalized_uuid_string()
377 if uuid_str:
378 self.module = target.AddModule(None, None, uuid_str)
379 if not self.module and self.resolve:
380 self.locate_module_and_debug_symbols()
381 if not self.unavailable:
382 resolved_path = self.get_resolved_path()
383 self.module = target.AddModule(
384 resolved_path, None, uuid_str, self.symfile)
385 if not self.module and self.section_infos:
386 name = os.path.basename(self.path)
387 with tempfile.NamedTemporaryFile(suffix='.' + name) as tf:
388 data = {
389 'triple': target.triple,
390 'uuid': uuid_str,
391 'type': 'sharedlibrary',
392 'sections': list(),
393 'symbols': list()
395 for section in self.section_infos:
396 data['sections'].append({
397 'name' : section.name,
398 'size': section.end_addr - section.start_addr
400 data['symbols'] = list(self.symbols.values())
401 with open(tf.name, 'w') as f:
402 f.write(json.dumps(data, indent=4))
403 self.module = target.AddModule(tf.name, None, uuid_str)
404 if not self.module and not self.unavailable:
405 return 'error: unable to get module for (%s) "%s"' % (
406 self.arch, self.get_resolved_path())
407 if self.has_section_load_info():
408 return self.load_module(target)
409 else:
410 return None # No sections, the module was added to the target, so success
411 else:
412 return 'error: invalid target'
414 def locate_module_and_debug_symbols(self):
415 # By default, just use the paths that were supplied in:
416 # self.path
417 # self.resolved_path
418 # self.module
419 # self.symfile
420 # Subclasses can inherit from this class and override this function
421 self.resolved = True
422 return True
424 def get_uuid(self):
425 if not self.uuid and self.module:
426 self.uuid = uuid.UUID(self.module.GetUUIDString())
427 return self.uuid
429 def get_normalized_uuid_string(self):
430 if self.uuid:
431 return str(self.uuid).upper()
432 return None
434 def create_target(self, debugger):
435 '''Create a target using the information in this Image object.'''
436 if self.unavailable:
437 return None
439 if self.locate_module_and_debug_symbols():
440 resolved_path = self.get_resolved_path()
441 path_spec = lldb.SBFileSpec(resolved_path)
442 error = lldb.SBError()
443 target = debugger.CreateTarget(
444 resolved_path, self.arch, None, False, error)
445 if target:
446 self.module = target.FindModule(path_spec)
447 if self.has_section_load_info():
448 err = self.load_module(target)
449 if err:
450 print('ERROR: ', err)
451 return target
452 else:
453 print('error: unable to create a valid target for (%s) "%s"' % (self.arch, self.path))
454 else:
455 print('error: unable to locate main executable (%s) "%s"' % (self.arch, self.path))
456 return None
459 class Symbolicator:
461 def __init__(self, debugger=None, target=None, images=list()):
462 """A class the represents the information needed to symbolicate
463 addresses in a program.
465 Do not call this initializer directly, but rather use the factory
466 methods.
468 self.debugger = debugger
469 self.target = target
470 self.images = images # a list of images to be used when symbolicating
471 self.addr_mask = 0xffffffffffffffff
473 @classmethod
474 def InitWithSBTarget(cls, target):
475 """Initialize a new Symbolicator with an existing SBTarget."""
476 obj = cls(target=target)
477 triple = target.triple
478 if triple:
479 arch = triple.split('-')[0]
480 if "arm" in arch:
481 obj.addr_mask = 0xfffffffffffffffe
483 for module in target.modules:
484 image = Image.InitWithSBTargetAndSBModule(target, module)
485 obj.images.append(image)
486 return obj
488 @classmethod
489 def InitWithSBDebugger(cls, debugger, images):
490 """Initialize a new Symbolicator with an existing debugger and list of
491 images. The Symbolicator will create the target."""
492 obj = cls(debugger=debugger, images=images)
493 return obj
495 def __str__(self):
496 s = "Symbolicator:\n"
497 if self.target:
498 s += "Target = '%s'\n" % (self.target)
499 s += "Target modules:\n"
500 for m in self.target.modules:
501 s += str(m) + "\n"
502 s += "Images:\n"
503 for image in self.images:
504 s += ' %s\n' % (image)
505 return s
507 def find_images_with_identifier(self, identifier):
508 images = list()
509 for image in self.images:
510 if image.identifier == identifier:
511 images.append(image)
512 if len(images) == 0:
513 regex_text = '^.*\.%s$' % (re.escape(identifier))
514 regex = re.compile(regex_text)
515 for image in self.images:
516 if regex.match(image.identifier):
517 images.append(image)
518 return images
520 def find_image_containing_load_addr(self, load_addr):
521 for image in self.images:
522 if image.get_section_containing_load_addr(load_addr):
523 return image
524 return None
526 def create_target(self):
527 if self.target:
528 return self.target
530 if self.images:
531 for image in self.images:
532 self.target = image.create_target(self.debugger)
533 if self.target:
534 if self.target.GetAddressByteSize() == 4:
535 triple = self.target.triple
536 if triple:
537 arch = triple.split('-')[0]
538 if "arm" in arch:
539 self.addr_mask = 0xfffffffffffffffe
540 return self.target
541 return None
543 def symbolicate(self, load_addr, verbose=False):
544 if not self.target:
545 self.create_target()
546 if self.target:
547 live_process = False
548 process = self.target.process
549 if process:
550 state = process.state
551 if state > lldb.eStateUnloaded and state < lldb.eStateDetached:
552 live_process = True
553 # If we don't have a live process, we can attempt to find the image
554 # that a load address belongs to and lazily load its module in the
555 # target, but we shouldn't do any of this if we have a live process
556 if not live_process:
557 image = self.find_image_containing_load_addr(load_addr)
558 if image:
559 image.add_module(self.target)
560 symbolicated_address = Address(self.target, load_addr)
561 if symbolicated_address.symbolicate(verbose):
562 if symbolicated_address.so_addr:
563 symbolicated_addresses = list()
564 symbolicated_addresses.append(symbolicated_address)
565 # See if we were able to reconstruct anything?
566 while True:
567 inlined_parent_so_addr = lldb.SBAddress()
568 inlined_parent_sym_ctx = symbolicated_address.sym_ctx.GetParentOfInlinedScope(
569 symbolicated_address.so_addr, inlined_parent_so_addr)
570 if not inlined_parent_sym_ctx:
571 break
572 if not inlined_parent_so_addr:
573 break
575 symbolicated_address = Address(
576 self.target, inlined_parent_so_addr.GetLoadAddress(
577 self.target))
578 symbolicated_address.sym_ctx = inlined_parent_sym_ctx
579 symbolicated_address.so_addr = inlined_parent_so_addr
580 symbolicated_address.symbolicate(verbose)
582 # push the new frame onto the new frame stack
583 symbolicated_addresses.append(symbolicated_address)
585 if symbolicated_addresses:
586 return symbolicated_addresses
587 else:
588 print('error: no target in Symbolicator')
589 return None
592 def disassemble_instructions(
593 target,
594 instructions,
596 insts_before_pc,
597 insts_after_pc,
598 non_zeroeth_frame):
599 lines = list()
600 pc_index = -1
601 comment_column = 50
602 for inst_idx, inst in enumerate(instructions):
603 inst_pc = inst.GetAddress().GetLoadAddress(target)
604 if pc == inst_pc:
605 pc_index = inst_idx
606 mnemonic = inst.GetMnemonic(target)
607 operands = inst.GetOperands(target)
608 comment = inst.GetComment(target)
609 lines.append("%#16.16x: %8s %s" % (inst_pc, mnemonic, operands))
610 if comment:
611 line_len = len(lines[-1])
612 if line_len < comment_column:
613 lines[-1] += ' ' * (comment_column - line_len)
614 lines[-1] += "; %s" % comment
616 if pc_index >= 0:
617 # If we are disassembling the non-zeroeth frame, we need to backup the
618 # PC by 1
619 if non_zeroeth_frame and pc_index > 0:
620 pc_index = pc_index - 1
621 if insts_before_pc == -1:
622 start_idx = 0
623 else:
624 start_idx = pc_index - insts_before_pc
625 if start_idx < 0:
626 start_idx = 0
627 if insts_before_pc == -1:
628 end_idx = inst_idx
629 else:
630 end_idx = pc_index + insts_after_pc
631 if end_idx > inst_idx:
632 end_idx = inst_idx
633 for i in range(start_idx, end_idx + 1):
634 if i == pc_index:
635 print(' -> ', lines[i])
636 else:
637 print(' ', lines[i])
640 def print_module_section_data(section):
641 print(section)
642 section_data = section.GetSectionData()
643 if section_data:
644 ostream = lldb.SBStream()
645 section_data.GetDescription(ostream, section.GetFileAddress())
646 print(ostream.GetData())
649 def print_module_section(section, depth):
650 print(section)
651 if depth > 0:
652 num_sub_sections = section.GetNumSubSections()
653 for sect_idx in range(num_sub_sections):
654 print_module_section(
655 section.GetSubSectionAtIndex(sect_idx), depth - 1)
658 def print_module_sections(module, depth):
659 for sect in module.section_iter():
660 print_module_section(sect, depth)
663 def print_module_symbols(module):
664 for sym in module:
665 print(sym)
668 def Symbolicate(debugger, command_args):
670 usage = "usage: %prog [options] <addr1> [addr2 ...]"
671 description = '''Symbolicate one or more addresses using LLDB's python scripting API..'''
672 parser = optparse.OptionParser(
673 description=description,
674 prog='crashlog.py',
675 usage=usage)
676 parser.add_option(
677 '-v',
678 '--verbose',
679 action='store_true',
680 dest='verbose',
681 help='display verbose debug info',
682 default=False)
683 parser.add_option(
684 '-p',
685 '--platform',
686 type='string',
687 metavar='platform',
688 dest='platform',
689 help='Specify the platform to use when creating the debug target. Valid values include "localhost", "darwin-kernel", "ios-simulator", "remote-freebsd", "remote-macosx", "remote-ios", "remote-linux".')
690 parser.add_option(
691 '-f',
692 '--file',
693 type='string',
694 metavar='file',
695 dest='file',
696 help='Specify a file to use when symbolicating')
697 parser.add_option(
698 '-a',
699 '--arch',
700 type='string',
701 metavar='arch',
702 dest='arch',
703 help='Specify a architecture to use when symbolicating')
704 parser.add_option(
705 '-s',
706 '--slide',
707 type='int',
708 metavar='slide',
709 dest='slide',
710 help='Specify the slide to use on the file specified with the --file option',
711 default=None)
712 parser.add_option(
713 '--section',
714 type='string',
715 action='append',
716 dest='section_strings',
717 help='specify <sect-name>=<start-addr> or <sect-name>=<start-addr>-<end-addr>')
718 try:
719 (options, args) = parser.parse_args(command_args)
720 except:
721 return
722 symbolicator = Symbolicator(debugger)
723 images = list()
724 if options.file:
725 image = Image(options.file)
726 image.arch = options.arch
727 # Add any sections that were specified with one or more --section
728 # options
729 if options.section_strings:
730 for section_str in options.section_strings:
731 section = Section()
732 if section.set_from_string(section_str):
733 image.add_section(section)
734 else:
735 sys.exit(1)
736 if options.slide is not None:
737 image.slide = options.slide
738 symbolicator.images.append(image)
740 target = symbolicator.create_target()
741 if options.verbose:
742 print(symbolicator)
743 if target:
744 for addr_str in args:
745 addr = int(addr_str, 0)
746 symbolicated_addrs = symbolicator.symbolicate(
747 addr, options.verbose)
748 for symbolicated_addr in symbolicated_addrs:
749 print(symbolicated_addr)
750 print()
751 else:
752 print('error: no target for %s' % (symbolicator))
754 if __name__ == '__main__':
755 # Create a new debugger instance
756 debugger = lldb.SBDebugger.Create()
757 Symbolicate(debugger, sys.argv[1:])
758 SBDebugger.Destroy(debugger)