3 # ----------------------------------------------------------------------
4 # Be sure to add the python path that points to the LLDB shared library.
6 # To use this in the embedded python interpreter using "lldb":
8 # cd /path/containing/crashlog.py
10 # (lldb) script import crashlog
11 # "crashlog" command installed, type "crashlog --help" for detailed help
12 # (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash
14 # The benefit of running the crashlog command inside lldb in the
15 # embedded python interpreter is when the command completes, there
16 # will be a target with all of the files loaded at the locations
17 # described in the crash log. Only the files that have stack frames
18 # in the backtrace will be loaded unless the "--load-all" option
19 # has been specified. This allows users to explore the program in the
20 # state it was in right at crash time.
22 # On MacOSX csh, tcsh:
23 # ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
26 # PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
27 # ----------------------------------------------------------------------
43 """Class that represents an address that will be symbolicated"""
45 def __init__(self
, target
, load_addr
):
47 self
.load_addr
= load_addr
# The load address that this object represents
48 # the resolved lldb.SBAddress (if any), named so_addr for
49 # section/offset address
51 self
.sym_ctx
= None # The cached symbol context for this address
52 # Any original textual description of this address to be used as a
53 # backup in case symbolication fails
54 self
.description
= None
55 self
.symbolication
= (
56 None # The cached symbolicated string that describes this address
61 s
= "%#16.16x" % (self
.load_addr
)
62 if self
.symbolication
:
63 s
+= " %s" % (self
.symbolication
)
64 elif self
.description
:
65 s
+= " %s" % (self
.description
)
67 s
+= " %s" % (self
.so_addr
)
70 def resolve_addr(self
):
71 if self
.so_addr
is None:
72 self
.so_addr
= self
.target
.ResolveLoadAddress(self
.load_addr
)
78 def get_symbol_context(self
):
79 if self
.sym_ctx
is None:
80 sb_addr
= self
.resolve_addr()
82 self
.sym_ctx
= self
.target
.ResolveSymbolContextForAddress(
83 sb_addr
, lldb
.eSymbolContextEverything
86 self
.sym_ctx
= lldb
.SBSymbolContext()
89 def get_instructions(self
):
90 sym_ctx
= self
.get_symbol_context()
92 function
= sym_ctx
.GetFunction()
94 return function
.GetInstructions(self
.target
)
95 return sym_ctx
.GetSymbol().GetInstructions(self
.target
)
98 def symbolicate(self
, verbose
=False):
99 if self
.symbolication
is None:
100 self
.symbolication
= ""
102 sym_ctx
= self
.get_symbol_context()
104 module
= sym_ctx
.GetModule()
106 # Print full source file path in verbose mode
108 self
.symbolication
+= str(module
.GetFileSpec()) + "`"
110 self
.symbolication
+= module
.GetFileSpec().GetFilename() + "`"
111 function_start_load_addr
= -1
112 function
= sym_ctx
.GetFunction()
113 block
= sym_ctx
.GetBlock()
114 line_entry
= sym_ctx
.GetLineEntry()
115 symbol
= sym_ctx
.GetSymbol()
116 inlined_block
= block
.GetContainingInlinedBlock()
118 self
.symbolication
+= function
.GetName()
122 self
.symbolication
+= (
123 " [inlined] " + inlined_block
.GetInlinedName()
126 inlined_block
.GetRangeIndexForBlockAddress(self
.so_addr
)
128 if block_range_idx
< lldb
.UINT32_MAX
:
129 block_range_start_addr
= (
130 inlined_block
.GetRangeStartAddress(block_range_idx
)
132 function_start_load_addr
= (
133 block_range_start_addr
.GetLoadAddress(self
.target
)
135 if function_start_load_addr
== -1:
136 function_start_load_addr
= (
137 function
.GetStartAddress().GetLoadAddress(self
.target
)
140 self
.symbolication
+= symbol
.GetName()
141 function_start_load_addr
= (
142 symbol
.GetStartAddress().GetLoadAddress(self
.target
)
145 self
.symbolication
= ""
148 # Dump the offset from the current function or symbol if it
150 function_offset
= self
.load_addr
- function_start_load_addr
151 if function_offset
> 0:
152 self
.symbolication
+= " + %u" % (function_offset
)
153 elif function_offset
< 0:
154 self
.symbolication
+= (
155 " %i (invalid negative offset, file a bug) "
159 # Print out any line information if any is available
160 if line_entry
.GetFileSpec():
161 # Print full source file path in verbose mode
163 self
.symbolication
+= " at %s" % line_entry
.GetFileSpec()
165 self
.symbolication
+= (
166 " at %s" % line_entry
.GetFileSpec().GetFilename()
168 self
.symbolication
+= ":%u" % line_entry
.GetLine()
169 column
= line_entry
.GetColumn()
171 self
.symbolication
+= ":%u" % column
177 """Class that represents an load address range"""
179 sect_info_regex
= re
.compile("(?P<name>[^=]+)=(?P<range>.*)")
180 addr_regex
= re
.compile("^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$")
181 range_regex
= re
.compile(
182 "^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$"
185 def __init__(self
, start_addr
=None, end_addr
=None, name
=None):
186 self
.start_addr
= start_addr
187 self
.end_addr
= end_addr
191 def InitWithSBTargetAndSBSection(cls
, target
, section
):
192 sect_load_addr
= section
.GetLoadAddress(target
)
193 if sect_load_addr
!= lldb
.LLDB_INVALID_ADDRESS
:
194 obj
= cls(sect_load_addr
, sect_load_addr
+ section
.size
, section
.name
)
199 def contains(self
, addr
):
200 return self
.start_addr
<= addr
and addr
< self
.end_addr
202 def set_from_string(self
, s
):
203 match
= self
.sect_info_regex
.match(s
)
205 self
.name
= match
.group("name")
206 range_str
= match
.group("range")
207 addr_match
= self
.addr_regex
.match(range_str
)
209 self
.start_addr
= int(addr_match
.group("start"), 16)
213 range_match
= self
.range_regex
.match(range_str
)
215 self
.start_addr
= int(range_match
.group("start"), 16)
216 self
.end_addr
= int(range_match
.group("end"), 16)
217 op
= range_match
.group("op")
219 self
.end_addr
+= self
.start_addr
221 print('error: invalid section info string "%s"' % s
)
222 print("Valid section info formats are:")
223 print("Format Example Description")
224 print("--------------------- -----------------------------------------------")
226 "<name>=<base> __TEXT=0x123000 Section from base address only"
229 "<name>=<base>-<end> __TEXT=0x123000-0x124000 Section from base address and end address"
232 "<name>=<base>+<size> __TEXT=0x123000+0x1000 Section from base address and size"
238 if self
.end_addr
is not None:
239 if self
.start_addr
is not None:
240 return "%s=[0x%16.16x - 0x%16.16x)" % (
246 if self
.start_addr
is not None:
247 return "%s=0x%16.16x" % (self
.name
, self
.start_addr
)
253 """A class that represents an executable image and any associated data"""
255 def __init__(self
, path
, uuid
=None):
257 self
.resolved_path
= None
259 self
.resolved
= False
260 self
.unavailable
= False
262 self
.section_infos
= list()
263 self
.identifier
= None
269 self
.symbols
= dict()
272 def InitWithSBTargetAndSBModule(cls
, target
, module
):
273 """Initialize this Image object with a module from a target."""
274 obj
= cls(module
.file.fullpath
, module
.uuid
)
275 obj
.resolved_path
= module
.platform_file
.fullpath
277 for section
in module
.sections
:
278 symb_section
= Section
.InitWithSBTargetAndSBSection(target
, section
)
280 obj
.section_infos
.append(symb_section
)
281 obj
.arch
= module
.triple
287 def dump(self
, prefix
):
288 print("%s%s" % (prefix
, self
))
290 def debug_dump(self
):
291 print('path = "%s"' % (self
.path
))
292 print('resolved_path = "%s"' % (self
.resolved_path
))
293 print("resolved = %i" % (self
.resolved
))
294 print("unavailable = %i" % (self
.unavailable
))
295 print("uuid = %s" % (self
.uuid
))
296 print("section_infos = %s" % (self
.section_infos
))
297 print('identifier = "%s"' % (self
.identifier
))
298 print("version = %s" % (self
.version
))
299 print("arch = %s" % (self
.arch
))
300 print("module = %s" % (self
.module
))
301 print('symfile = "%s"' % (self
.symfile
))
302 print("slide = %i (0x%x)" % (self
.slide
, self
.slide
))
307 s
+= "%s " % (self
.get_uuid())
309 s
+= "%s " % (self
.arch
)
311 s
+= "%s " % (self
.version
)
312 resolved_path
= self
.get_resolved_path()
314 s
+= "%s " % (resolved_path
)
315 for section_info
in self
.section_infos
:
316 s
+= ", %s" % (section_info
)
317 if self
.slide
is not None:
318 s
+= ", slide = 0x%16.16x" % self
.slide
321 def add_section(self
, section
):
322 # print "added '%s' to '%s'" % (section, self.path)
323 self
.section_infos
.append(section
)
325 def get_section_containing_load_addr(self
, load_addr
):
326 for section_info
in self
.section_infos
:
327 if section_info
.contains(load_addr
):
331 def get_resolved_path(self
):
332 if self
.resolved_path
:
333 return self
.resolved_path
338 def get_resolved_path_basename(self
):
339 path
= self
.get_resolved_path()
341 return os
.path
.basename(path
)
344 def symfile_basename(self
):
346 return os
.path
.basename(self
.symfile
)
349 def has_section_load_info(self
):
350 return self
.section_infos
or self
.slide
is not None
352 def load_module(self
, target
):
354 return None # We already warned that we couldn't find this module, so don't return an error string
355 # Load this module into "target" using the section infos to
356 # set the section load addresses
357 if self
.has_section_load_info():
360 if self
.section_infos
:
361 num_sections_loaded
= 0
362 for section_info
in self
.section_infos
:
363 if section_info
.name
:
364 section
= self
.module
.FindSection(section_info
.name
)
366 error
= target
.SetSectionLoadAddress(
367 section
, section_info
.start_addr
370 num_sections_loaded
+= 1
372 return "error: %s" % error
.GetCString()
375 'error: unable to find the section named "%s"'
379 return 'error: unable to find "%s" section in "%s"' % (
381 self
.get_resolved_path(),
383 if num_sections_loaded
== 0:
384 return "error: no sections were successfully loaded"
386 err
= target
.SetModuleLoadAddress(self
.module
, self
.slide
)
388 return err
.GetCString()
391 return "error: invalid module"
393 return "error: invalid target"
395 return "error: no section infos"
397 def add_module(self
, target
, obj_dir
=None):
398 """Add the Image described in this object to "target" and load the sections if "load" is True."""
399 if not self
.path
and self
.uuid
== uuid
.UUID(int=0):
400 return "error: invalid image"
403 # Try and find using UUID only first so that paths need not match
405 uuid_str
= self
.get_normalized_uuid_string()
407 self
.module
= target
.AddModule(None, None, uuid_str
)
408 if not self
.module
and self
.resolve
:
409 self
.locate_module_and_debug_symbols()
410 if not self
.unavailable
:
411 resolved_path
= self
.get_resolved_path()
412 self
.module
= target
.AddModule(
413 resolved_path
, None, uuid_str
, self
.symfile
415 if not self
.module
and self
.section_infos
:
416 name
= os
.path
.basename(self
.path
)
417 if obj_dir
and os
.path
.isdir(obj_dir
):
419 "triple": target
.triple
,
421 "type": "sharedlibrary",
425 for section
in self
.section_infos
:
426 data
["sections"].append(
428 "name": section
.name
,
429 "size": section
.end_addr
- section
.start_addr
,
432 data
["symbols"] = list(self
.symbols
.values())
433 obj_file
= os
.path
.join(obj_dir
, name
)
434 with
open(obj_file
, "w") as f
:
435 f
.write(json
.dumps(data
, indent
=4))
436 self
.module
= target
.AddModule(obj_file
, None, uuid_str
)
438 # If we were able to add the module with inlined
439 # symbols, we should mark it as available so load_module
440 # does not exit early.
441 self
.unavailable
= False
442 if not self
.module
and not self
.unavailable
:
443 return 'error: unable to get module for (%s) "%s"' % (
445 self
.get_resolved_path(),
447 if self
.has_section_load_info():
448 return self
.load_module(target
)
451 None # No sections, the module was added to the target, so success
454 return "error: invalid target"
456 def locate_module_and_debug_symbols(self
):
457 # By default, just use the paths that were supplied in:
462 # Subclasses can inherit from this class and override this function
467 if not self
.uuid
and self
.module
:
468 self
.uuid
= uuid
.UUID(self
.module
.GetUUIDString())
471 def get_normalized_uuid_string(self
):
473 return str(self
.uuid
).upper()
476 def create_target(self
, debugger
):
477 """Create a target using the information in this Image object."""
481 if self
.locate_module_and_debug_symbols():
482 resolved_path
= self
.get_resolved_path()
483 path_spec
= lldb
.SBFileSpec(resolved_path
)
484 error
= lldb
.SBError()
485 target
= debugger
.CreateTarget(resolved_path
, self
.arch
, None, False, error
)
487 self
.module
= target
.FindModule(path_spec
)
488 if self
.has_section_load_info():
489 err
= self
.load_module(target
)
491 print("ERROR: ", err
)
495 'error: unable to create a valid target for (%s) "%s"'
496 % (self
.arch
, self
.path
)
500 'error: unable to locate main executable (%s) "%s"'
501 % (self
.arch
, self
.path
)
507 def __init__(self
, debugger
=None, target
=None, images
=None):
508 """A class the represents the information needed to symbolicate
509 addresses in a program.
511 Do not call this initializer directly, but rather use the factory
514 self
.debugger
= debugger
516 # a list of images to be used when symbolicating
517 self
.images
= images
if images
else list()
518 self
.addr_mask
= 0xFFFFFFFFFFFFFFFF
521 def InitWithSBTarget(cls
, target
):
522 """Initialize a new Symbolicator with an existing SBTarget."""
523 obj
= cls(target
=target
)
524 triple
= target
.triple
526 arch
= triple
.split("-")[0]
528 obj
.addr_mask
= 0xFFFFFFFFFFFFFFFE
530 for module
in target
.modules
:
531 image
= Image
.InitWithSBTargetAndSBModule(target
, module
)
532 obj
.images
.append(image
)
536 def InitWithSBDebugger(cls
, debugger
, images
):
537 """Initialize a new Symbolicator with an existing debugger and list of
538 images. The Symbolicator will create the target."""
539 obj
= cls(debugger
=debugger
, images
=images
)
543 s
= "Symbolicator:\n"
545 s
+= "Target = '%s'\n" % (self
.target
)
546 s
+= "Target modules:\n"
547 for m
in self
.target
.modules
:
550 for image
in self
.images
:
551 s
+= " %s\n" % (image
)
554 def find_images_with_identifier(self
, identifier
):
556 for image
in self
.images
:
557 if image
.identifier
== identifier
:
560 regex_text
= "^.*\.%s$" % (re
.escape(identifier
))
561 regex
= re
.compile(regex_text
)
562 for image
in self
.images
:
563 if regex
.match(image
.identifier
):
567 def find_image_containing_load_addr(self
, load_addr
):
568 for image
in self
.images
:
569 if image
.get_section_containing_load_addr(load_addr
):
573 def create_target(self
):
578 for image
in self
.images
:
579 self
.target
= image
.create_target(self
.debugger
)
581 if self
.target
.GetAddressByteSize() == 4:
582 triple
= self
.target
.triple
584 arch
= triple
.split("-")[0]
586 self
.addr_mask
= 0xFFFFFFFFFFFFFFFE
590 def symbolicate(self
, load_addr
, verbose
=False):
595 process
= self
.target
.process
597 state
= process
.state
598 if state
> lldb
.eStateUnloaded
and state
< lldb
.eStateDetached
:
600 # If we don't have a live process, we can attempt to find the image
601 # that a load address belongs to and lazily load its module in the
602 # target, but we shouldn't do any of this if we have a live process
604 image
= self
.find_image_containing_load_addr(load_addr
)
606 image
.add_module(self
.target
)
607 symbolicated_address
= Address(self
.target
, load_addr
)
608 if symbolicated_address
.symbolicate(verbose
):
609 if symbolicated_address
.so_addr
:
610 symbolicated_addresses
= list()
611 symbolicated_addresses
.append(symbolicated_address
)
612 # See if we were able to reconstruct anything?
614 inlined_parent_so_addr
= lldb
.SBAddress()
615 inlined_parent_sym_ctx
= (
616 symbolicated_address
.sym_ctx
.GetParentOfInlinedScope(
617 symbolicated_address
.so_addr
, inlined_parent_so_addr
620 if not inlined_parent_sym_ctx
:
622 if not inlined_parent_so_addr
:
625 symbolicated_address
= Address(
627 inlined_parent_so_addr
.GetLoadAddress(self
.target
),
629 symbolicated_address
.sym_ctx
= inlined_parent_sym_ctx
630 symbolicated_address
.so_addr
= inlined_parent_so_addr
631 symbolicated_address
.symbolicate(verbose
)
633 # push the new frame onto the new frame stack
634 symbolicated_addresses
.append(symbolicated_address
)
636 if symbolicated_addresses
:
637 return symbolicated_addresses
639 print("error: no target in Symbolicator")
643 def disassemble_instructions(
644 target
, instructions
, pc
, insts_before_pc
, insts_after_pc
, non_zeroeth_frame
649 for inst_idx
, inst
in enumerate(instructions
):
650 inst_pc
= inst
.GetAddress().GetLoadAddress(target
)
653 mnemonic
= inst
.GetMnemonic(target
)
654 operands
= inst
.GetOperands(target
)
655 comment
= inst
.GetComment(target
)
656 lines
.append("%#16.16x: %8s %s" % (inst_pc
, mnemonic
, operands
))
658 line_len
= len(lines
[-1])
659 if line_len
< comment_column
:
660 lines
[-1] += " " * (comment_column
- line_len
)
661 lines
[-1] += "; %s" % comment
664 # If we are disassembling the non-zeroeth frame, we need to backup the
666 if non_zeroeth_frame
and pc_index
> 0:
667 pc_index
= pc_index
- 1
668 if insts_before_pc
== -1:
671 start_idx
= pc_index
- insts_before_pc
674 if insts_before_pc
== -1:
677 end_idx
= pc_index
+ insts_after_pc
678 if end_idx
> inst_idx
:
680 for i
in range(start_idx
, end_idx
+ 1):
682 print(" -> ", lines
[i
])
687 def print_module_section_data(section
):
689 section_data
= section
.GetSectionData()
691 ostream
= lldb
.SBStream()
692 section_data
.GetDescription(ostream
, section
.GetFileAddress())
693 print(ostream
.GetData())
696 def print_module_section(section
, depth
):
699 num_sub_sections
= section
.GetNumSubSections()
700 for sect_idx
in range(num_sub_sections
):
701 print_module_section(section
.GetSubSectionAtIndex(sect_idx
), depth
- 1)
704 def print_module_sections(module
, depth
):
705 for sect
in module
.section_iter():
706 print_module_section(sect
, depth
)
709 def print_module_symbols(module
):
714 def Symbolicate(debugger
, command_args
):
715 usage
= "usage: %prog [options] <addr1> [addr2 ...]"
717 """Symbolicate one or more addresses using LLDB's python scripting API.."""
719 parser
= optparse
.OptionParser(
720 description
=description
, prog
="crashlog.py", usage
=usage
727 help="display verbose debug info",
736 help='Specify the platform to use when creating the debug target. Valid values include "localhost", "darwin-kernel", "ios-simulator", "remote-freebsd", "remote-macosx", "remote-ios", "remote-linux".',
744 help="Specify a file to use when symbolicating",
752 help="Specify a architecture to use when symbolicating",
760 help="Specify the slide to use on the file specified with the --file option",
767 dest
="section_strings",
768 help="specify <sect-name>=<start-addr> or <sect-name>=<start-addr>-<end-addr>",
771 (options
, args
) = parser
.parse_args(command_args
)
774 symbolicator
= Symbolicator(debugger
)
777 image
= Image(options
.file)
778 image
.arch
= options
.arch
779 # Add any sections that were specified with one or more --section
781 if options
.section_strings
:
782 for section_str
in options
.section_strings
:
784 if section
.set_from_string(section_str
):
785 image
.add_section(section
)
788 if options
.slide
is not None:
789 image
.slide
= options
.slide
790 symbolicator
.images
.append(image
)
792 target
= symbolicator
.create_target()
796 for addr_str
in args
:
797 addr
= int(addr_str
, 0)
798 symbolicated_addrs
= symbolicator
.symbolicate(addr
, options
.verbose
)
799 for symbolicated_addr
in symbolicated_addrs
:
800 print(symbolicated_addr
)
803 print("error: no target for %s" % (symbolicator
))
806 if __name__
== "__main__":
807 # Create a new debugger instance
808 debugger
= lldb
.SBDebugger
.Create()
809 Symbolicate(debugger
, sys
.argv
[1:])
810 SBDebugger
.Destroy(debugger
)