2 # ===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
4 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 # See https://llvm.org/LICENSE.txt for license information.
6 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 # ===------------------------------------------------------------------------===#
11 asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/SymbolFiles" < asan.log
15 This script provides a way for external plug-ins to hook into the behaviour of
16 various parts of this script (see `--plugins`). This is useful for situations
17 where it is necessary to handle site-specific quirks (e.g. binaries with debug
18 symbols only accessible via a remote service) without having to modify the
35 binutils_prefix
= None
36 fix_filename_patterns
= None
38 allow_system_symbolizer
= True
39 force_system_symbolizer
= False
41 # FIXME: merge the code that calls fix_filename().
42 def fix_filename(file_name
):
43 if fix_filename_patterns
:
44 for path_to_cut
in fix_filename_patterns
:
45 file_name
= re
.sub(".*" + path_to_cut
, "", file_name
)
46 file_name
= re
.sub(".*asan_[a-z_]*.(cc|cpp):[0-9]*", "_asan_rtl_", file_name
)
47 file_name
= re
.sub(".*crtstuff.c:0", "???:0", file_name
)
72 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
79 class Symbolizer(object):
83 def symbolize(self
, addr
, binary
, offset
):
84 """Symbolize the given address (pair of binary and offset).
86 Overriden in subclasses.
88 addr: virtual address of an instruction.
89 binary: path to executable/shared object containing this instruction.
90 offset: instruction offset in the @binary.
92 list of strings (one string for each inlined frame) describing
93 the code locations for this instruction (that is, function name, file
94 name, line and column numbers).
99 class LLVMSymbolizer(Symbolizer
):
100 def __init__(self
, symbolizer_path
, default_arch
, system
, dsym_hints
=[]):
101 super(LLVMSymbolizer
, self
).__init
__()
102 self
.symbolizer_path
= symbolizer_path
103 self
.default_arch
= default_arch
105 self
.dsym_hints
= dsym_hints
106 self
.pipe
= self
.open_llvm_symbolizer()
108 def open_llvm_symbolizer(self
):
110 self
.symbolizer_path
,
111 ("--demangle" if demangle
else "--no-demangle"),
112 "--functions=linkage",
114 "--default-arch=%s" % self
.default_arch
,
116 if self
.system
== "Darwin":
117 for hint
in self
.dsym_hints
:
118 cmd
.append("--dsym-hint=%s" % hint
)
119 logging
.debug(" ".join(cmd
))
121 result
= subprocess
.Popen(
123 stdin
=subprocess
.PIPE
,
124 stdout
=subprocess
.PIPE
,
126 universal_newlines
=True,
132 def symbolize(self
, addr
, binary
, offset
):
133 """Overrides Symbolizer.symbolize."""
138 symbolizer_input
= '"%s" %s' % (binary
, offset
)
139 logging
.debug(symbolizer_input
)
140 self
.pipe
.stdin
.write("%s\n" % symbolizer_input
)
142 function_name
= self
.pipe
.stdout
.readline().rstrip()
143 if not function_name
:
145 file_name
= self
.pipe
.stdout
.readline().rstrip()
146 file_name
= fix_filename(file_name
)
147 if not function_name
.startswith("??") or not file_name
.startswith("??"):
148 # Append only non-trivial frames.
149 result
.append("%s in %s %s" % (addr
, function_name
, file_name
))
157 def LLVMSymbolizerFactory(system
, default_arch
, dsym_hints
=[]):
158 symbolizer_path
= os
.getenv("LLVM_SYMBOLIZER_PATH")
159 if not symbolizer_path
:
160 symbolizer_path
= os
.getenv("ASAN_SYMBOLIZER_PATH")
161 if not symbolizer_path
:
162 # Assume llvm-symbolizer is in PATH.
163 symbolizer_path
= "llvm-symbolizer"
164 return LLVMSymbolizer(symbolizer_path
, default_arch
, system
, dsym_hints
)
167 class Addr2LineSymbolizer(Symbolizer
):
168 def __init__(self
, binary
):
169 super(Addr2LineSymbolizer
, self
).__init
__()
171 self
.pipe
= self
.open_addr2line()
172 self
.output_terminator
= -1
174 def open_addr2line(self
):
175 addr2line_tool
= "addr2line"
177 addr2line_tool
= binutils_prefix
+ addr2line_tool
178 logging
.debug("addr2line binary is %s" % shutil
.which(addr2line_tool
))
179 cmd
= [addr2line_tool
, "-fi"]
181 cmd
+= ["--demangle"]
182 cmd
+= ["-e", self
.binary
]
183 logging
.debug(" ".join(cmd
))
184 return subprocess
.Popen(
186 stdin
=subprocess
.PIPE
,
187 stdout
=subprocess
.PIPE
,
189 universal_newlines
=True,
192 def symbolize(self
, addr
, binary
, offset
):
193 """Overrides Symbolizer.symbolize."""
194 if self
.binary
!= binary
:
198 self
.pipe
.stdin
.write("%s\n" % offset
)
199 self
.pipe
.stdin
.write("%s\n" % self
.output_terminator
)
200 is_first_frame
= True
202 function_name
= self
.pipe
.stdout
.readline().rstrip()
203 logging
.debug("read function_name='%s' from addr2line" % function_name
)
204 # If llvm-symbolizer is installed as addr2line, older versions of
205 # llvm-symbolizer will print -1 when presented with -1 and not print
206 # a second line. In that case we will block for ever trying to read the
207 # file name. This also happens for non-existent files, in which case GNU
208 # addr2line exits immediate, but llvm-symbolizer does not (see
209 # https://llvm.org/PR42754).
210 if function_name
== "-1":
211 logging
.debug("got function '-1' -> no more input")
213 file_name
= self
.pipe
.stdout
.readline().rstrip()
214 logging
.debug("read file_name='%s' from addr2line" % file_name
)
216 is_first_frame
= False
217 elif function_name
== "??":
218 assert file_name
== "??:0", file_name
219 logging
.debug("got function '??' -> no more input")
221 elif not function_name
:
222 assert not file_name
, file_name
223 logging
.debug("got empty function name -> no more input")
225 if not function_name
and not file_name
:
227 "got empty function and file name -> unknown function"
231 lines
.append((function_name
, file_name
))
233 # EPIPE happens if addr2line exits early (which some implementations do
234 # if an invalid file is passed).
235 if e
.errno
== errno
.EPIPE
:
237 f
"addr2line exited early (broken pipe) returncode={self.pipe.poll()}"
241 "unexpected I/O exception communicating with addr2line", exc_info
=e
243 lines
.append(("??", "??:0"))
244 except Exception as e
:
246 "got unknown exception communicating with addr2line", exc_info
=e
248 lines
.append(("??", "??:0"))
250 "%s in %s %s" % (addr
, function
, fix_filename(file))
251 for (function
, file) in lines
255 class UnbufferedLineConverter(object):
257 Wrap a child process that responds to each line of input with one line of
258 output. Uses pty to trick the child into providing unbuffered output.
261 def __init__(self
, args
, close_stderr
=False):
262 # Local imports so that the script can start on Windows.
268 # We're the child. Transfer control to command.
270 dev_null
= os
.open("/dev/null", 0)
272 os
.execvp(args
[0], args
)
275 attr
= termios
.tcgetattr(fd
)
276 attr
[3] = attr
[3] & ~termios
.ECHO
277 termios
.tcsetattr(fd
, termios
.TCSANOW
, attr
)
278 # Set up a file()-like interface to the child process
279 self
.r
= os
.fdopen(fd
, "r", 1)
280 self
.w
= os
.fdopen(os
.dup(fd
), "w", 1)
282 def convert(self
, line
):
283 self
.w
.write(line
+ "\n")
284 return self
.readline()
287 return self
.r
.readline().rstrip()
290 class DarwinSymbolizer(Symbolizer
):
291 def __init__(self
, addr
, binary
, arch
):
292 super(DarwinSymbolizer
, self
).__init
__()
298 logging
.debug("atos -o %s -arch %s", self
.binary
, self
.arch
)
299 cmdline
= ["atos", "-o", self
.binary
, "-arch", self
.arch
]
300 self
.atos
= UnbufferedLineConverter(cmdline
, close_stderr
=True)
302 def symbolize(self
, addr
, binary
, offset
):
303 """Overrides Symbolizer.symbolize."""
304 if self
.binary
!= binary
:
306 if not os
.path
.exists(binary
):
307 # If the binary doesn't exist atos will exit which will lead to IOError
308 # exceptions being raised later on so just don't try to symbolize.
309 return ["{} ({}:{}+{})".format(addr
, binary
, self
.arch
, offset
)]
310 atos_line
= self
.atos
.convert("0x%x" % int(offset
, 16))
311 while "got symbolicator for" in atos_line
:
312 atos_line
= self
.atos
.readline()
313 # A well-formed atos response looks like this:
314 # foo(type1, type2) (in object.name) (filename.cc:80)
316 # * For C functions atos omits parentheses and argument types.
317 # * For C++ functions the function name (i.e., `foo` above) may contain
318 # templates which may contain parentheses.
319 match
= re
.match(r
"^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line
)
320 logging
.debug("atos_line: %s", atos_line
)
322 function_name
= match
.group(1)
323 file_name
= fix_filename(match
.group(3))
324 return ["%s in %s %s" % (addr
, function_name
, file_name
)]
326 return ["%s in %s" % (addr
, atos_line
)]
329 # Chain several symbolizers so that if one symbolizer fails, we fall back
330 # to the next symbolizer in chain.
331 class ChainSymbolizer(Symbolizer
):
332 def __init__(self
, symbolizer_list
):
333 super(ChainSymbolizer
, self
).__init
__()
334 self
.symbolizer_list
= symbolizer_list
336 def symbolize(self
, addr
, binary
, offset
):
337 """Overrides Symbolizer.symbolize."""
338 for symbolizer
in self
.symbolizer_list
:
340 result
= symbolizer
.symbolize(addr
, binary
, offset
)
345 def append_symbolizer(self
, symbolizer
):
346 self
.symbolizer_list
.append(symbolizer
)
349 def BreakpadSymbolizerFactory(binary
):
350 suffix
= os
.getenv("BREAKPAD_SUFFIX")
352 filename
= binary
+ suffix
353 if os
.access(filename
, os
.F_OK
):
354 return BreakpadSymbolizer(filename
)
358 def SystemSymbolizerFactory(system
, addr
, binary
, arch
):
359 if system
== "Darwin":
360 return DarwinSymbolizer(addr
, binary
, arch
)
361 elif system
in ["Linux", "FreeBSD", "NetBSD", "SunOS"]:
362 return Addr2LineSymbolizer(binary
)
365 class BreakpadSymbolizer(Symbolizer
):
366 def __init__(self
, filename
):
367 super(BreakpadSymbolizer
, self
).__init
__()
368 self
.filename
= filename
369 lines
= file(filename
).readlines()
372 self
.address_list
= []
374 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
375 fragments
= lines
[0].rstrip().split()
376 self
.arch
= fragments
[2]
377 self
.debug_id
= fragments
[3]
378 self
.binary
= " ".join(fragments
[4:])
379 self
.parse_lines(lines
[1:])
381 def parse_lines(self
, lines
):
382 cur_function_addr
= ""
384 fragments
= line
.split()
385 if fragments
[0] == "FILE":
386 assert int(fragments
[1]) == len(self
.files
)
387 self
.files
.append(" ".join(fragments
[2:]))
388 elif fragments
[0] == "PUBLIC":
389 self
.symbols
[int(fragments
[1], 16)] = " ".join(fragments
[3:])
390 elif fragments
[0] in ["CFI", "STACK"]:
392 elif fragments
[0] == "FUNC":
393 cur_function_addr
= int(fragments
[1], 16)
394 if not cur_function_addr
in self
.symbols
.keys():
395 self
.symbols
[cur_function_addr
] = " ".join(fragments
[4:])
397 # Line starting with an address.
398 addr
= int(fragments
[0], 16)
399 self
.address_list
.append(addr
)
400 # Tuple of symbol address, size, line, file number.
401 self
.addresses
[addr
] = (
403 int(fragments
[1], 16),
407 self
.address_list
.sort()
409 def get_sym_file_line(self
, addr
):
411 if addr
in self
.addresses
.keys():
414 index
= bisect
.bisect_left(self
.address_list
, addr
)
418 key
= self
.address_list
[index
- 1]
419 sym_id
, size
, line_no
, file_no
= self
.addresses
[key
]
420 symbol
= self
.symbols
[sym_id
]
421 filename
= self
.files
[file_no
]
422 if addr
< key
+ size
:
423 return symbol
, filename
, line_no
427 def symbolize(self
, addr
, binary
, offset
):
428 if self
.binary
!= binary
:
430 res
= self
.get_sym_file_line(int(offset
, 16))
432 function_name
, file_name
, line_no
= res
433 result
= ["%s in %s %s:%d" % (addr
, function_name
, file_name
, line_no
)]
440 class SymbolizationLoop(object):
441 def __init__(self
, plugin_proxy
=None, dsym_hint_producer
=None):
442 self
.plugin_proxy
= plugin_proxy
443 if sys
.platform
== "win32":
444 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
445 # even in sandboxed processes. Nothing needs to be done here.
446 self
.process_line
= self
.process_line_echo
448 # Used by clients who may want to supply a different binary name.
449 # E.g. in Chrome several binaries may share a single .dSYM.
450 self
.dsym_hint_producer
= dsym_hint_producer
451 self
.system
= os
.uname()[0]
452 if self
.system
not in ["Linux", "Darwin", "FreeBSD", "NetBSD", "SunOS"]:
453 raise Exception("Unknown system")
454 self
.llvm_symbolizers
= {}
455 self
.last_llvm_symbolizer
= None
456 self
.dsym_hints
= set([])
458 self
.process_line
= self
.process_line_posix
459 self
.using_module_map
= plugin_proxy
.has_plugin(ModuleMapPlugIn
.get_name())
461 def symbolize_address(self
, addr
, binary
, offset
, arch
):
462 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
463 # a single symbolizer binary.
464 # On Darwin, if the dsym hint producer is present:
465 # 1. check whether we've seen this binary already; if so,
466 # use |llvm_symbolizers[binary]|, which has already loaded the debug
467 # info for this binary (might not be the case for
468 # |last_llvm_symbolizer|);
469 # 2. otherwise check if we've seen all the hints for this binary already;
470 # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
471 # 3. otherwise create a new symbolizer and pass all currently known
474 if not force_system_symbolizer
:
475 if not binary
in self
.llvm_symbolizers
:
476 use_new_symbolizer
= True
477 if self
.system
== "Darwin" and self
.dsym_hint_producer
:
478 dsym_hints_for_binary
= set(self
.dsym_hint_producer(binary
))
479 use_new_symbolizer
= bool(dsym_hints_for_binary
- self
.dsym_hints
)
480 self
.dsym_hints |
= dsym_hints_for_binary
481 if self
.last_llvm_symbolizer
and not use_new_symbolizer
:
482 self
.llvm_symbolizers
[binary
] = self
.last_llvm_symbolizer
484 self
.last_llvm_symbolizer
= LLVMSymbolizerFactory(
485 self
.system
, arch
, self
.dsym_hints
487 self
.llvm_symbolizers
[binary
] = self
.last_llvm_symbolizer
488 # Use the chain of symbolizers:
489 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
490 # (fall back to next symbolizer if the previous one fails).
491 if not binary
in symbolizers
:
492 symbolizers
[binary
] = ChainSymbolizer(
493 [BreakpadSymbolizerFactory(binary
), self
.llvm_symbolizers
[binary
]]
495 result
= symbolizers
[binary
].symbolize(addr
, binary
, offset
)
497 symbolizers
[binary
] = ChainSymbolizer([])
499 if not allow_system_symbolizer
:
500 raise Exception("Failed to launch or use llvm-symbolizer.")
501 # Initialize system symbolizer only if other symbolizers failed.
502 symbolizers
[binary
].append_symbolizer(
503 SystemSymbolizerFactory(self
.system
, addr
, binary
, arch
)
505 result
= symbolizers
[binary
].symbolize(addr
, binary
, offset
)
506 # The system symbolizer must produce some result.
510 def get_symbolized_lines(self
, symbolized_lines
, inc_frame_counter
=True):
511 if not symbolized_lines
:
512 if inc_frame_counter
:
514 return [self
.current_line
]
516 assert inc_frame_counter
518 for symbolized_frame
in symbolized_lines
:
520 " #%s %s" % (str(self
.frame_no
), symbolized_frame
.rstrip())
525 def process_logfile(self
):
528 processed
= self
.process_line(line
)
529 print("\n".join(processed
))
531 def process_line_echo(self
, line
):
532 return [line
.rstrip()]
534 def process_line_posix(self
, line
):
535 self
.current_line
= line
.rstrip()
537 # #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
538 # Partially symbolicated:
539 # #0 0x7f6e35cf2e45 in foo (foo.so+0x11fe45)
540 # NOTE: We have to very liberal with symbol
541 # names in the regex because it could be an
542 # Objective-C or C++ demangled name.
543 stack_trace_line_format
= (
544 r
"^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)"
546 match
= re
.match(stack_trace_line_format
, line
)
548 logging
.debug('Line "{}" does not match regex'.format(line
))
549 # Not a frame line so don't increment the frame counter.
550 return self
.get_symbolized_lines(None, inc_frame_counter
=False)
552 _
, frameno_str
, addr
, binary
, offset
= match
.groups()
554 if not self
.using_module_map
and not os
.path
.isabs(binary
):
555 # Do not try to symbolicate if the binary is just the module file name
556 # and a module map is unavailable.
557 # FIXME(dliew): This is currently necessary for reports on Darwin that are
558 # partially symbolicated by `atos`.
559 return self
.get_symbolized_lines(None)
561 # Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h"
562 colon_pos
= binary
.rfind(":")
564 maybe_arch
= binary
[colon_pos
+ 1 :]
565 if is_valid_arch(maybe_arch
):
567 binary
= binary
[0:colon_pos
]
569 arch
= guess_arch(addr
)
570 if frameno_str
== "0":
571 # Assume that frame #0 is the first frame of new stack trace.
573 original_binary
= binary
574 binary
= self
.plugin_proxy
.filter_binary_path(binary
)
576 # The binary filter has told us this binary can't be symbolized.
577 logging
.debug('Skipping symbolication of binary "%s"', original_binary
)
578 return self
.get_symbolized_lines(None)
579 symbolized_line
= self
.symbolize_address(addr
, binary
, offset
, arch
)
580 if not symbolized_line
:
581 if original_binary
!= binary
:
582 symbolized_line
= self
.symbolize_address(
583 addr
, original_binary
, offset
, arch
585 return self
.get_symbolized_lines(symbolized_line
)
588 class AsanSymbolizerPlugInProxy(object):
590 Serves several purposes:
591 - Manages the lifetime of plugins (must be used a `with` statement).
592 - Provides interface for calling into plugins from within this script.
597 self
._plugin
_names
= set()
599 def _load_plugin_from_file_impl_py_gt_2(self
, file_path
, globals_space
):
600 with
open(file_path
, "r") as f
:
601 exec(f
.read(), globals_space
, None)
603 def load_plugin_from_file(self
, file_path
):
604 logging
.info('Loading plugins from "{}"'.format(file_path
))
605 globals_space
= dict(globals())
606 # Provide function to register plugins
607 def register_plugin(plugin
):
608 logging
.info("Registering plugin %s", plugin
.get_name())
609 self
.add_plugin(plugin
)
611 globals_space
["register_plugin"] = register_plugin
612 if sys
.version_info
.major
< 3:
613 execfile(file_path
, globals_space
, None)
615 # Indirection here is to avoid a bug in older Python 2 versions:
616 # `SyntaxError: unqualified exec is not allowed in function ...`
617 self
._load
_plugin
_from
_file
_impl
_py
_gt
_2(file_path
, globals_space
)
619 def add_plugin(self
, plugin
):
620 assert isinstance(plugin
, AsanSymbolizerPlugIn
)
621 self
._plugins
.append(plugin
)
622 self
._plugin
_names
.add(plugin
.get_name())
623 plugin
._receive
_proxy
(self
)
625 def remove_plugin(self
, plugin
):
626 assert isinstance(plugin
, AsanSymbolizerPlugIn
)
627 self
._plugins
.remove(plugin
)
628 self
._plugin
_names
.remove(plugin
.get_name())
629 logging
.debug("Removing plugin %s", plugin
.get_name())
632 def has_plugin(self
, name
):
634 Returns true iff the plugin name is currently
635 being managed by AsanSymbolizerPlugInProxy.
637 return name
in self
._plugin
_names
639 def register_cmdline_args(self
, parser
):
640 plugins
= list(self
._plugins
)
641 for plugin
in plugins
:
642 plugin
.register_cmdline_args(parser
)
644 def process_cmdline_args(self
, pargs
):
645 # Use copy so we can remove items as we iterate.
646 plugins
= list(self
._plugins
)
647 for plugin
in plugins
:
648 keep
= plugin
.process_cmdline_args(pargs
)
649 assert isinstance(keep
, bool)
651 self
.remove_plugin(plugin
)
656 def __exit__(self
, exc_type
, exc_val
, exc_tb
):
657 for plugin
in self
._plugins
:
659 # Don't suppress raised exceptions
662 def _filter_single_value(self
, function_name
, input_value
):
664 Helper for filter style plugin functions.
666 new_value
= input_value
667 for plugin
in self
._plugins
:
668 result
= getattr(plugin
, function_name
)(new_value
)
674 def filter_binary_path(self
, binary_path
):
676 Consult available plugins to filter the path to a binary
677 to make it suitable for symbolication.
679 Returns `None` if symbolication should not be attempted for this
682 return self
._filter
_single
_value
("filter_binary_path", binary_path
)
684 def filter_module_desc(self
, module_desc
):
686 Consult available plugins to determine the module
687 description suitable for symbolication.
689 Returns `None` if symbolication should not be attempted for this module.
691 assert isinstance(module_desc
, ModuleDesc
)
692 return self
._filter
_single
_value
("filter_module_desc", module_desc
)
695 class AsanSymbolizerPlugIn(object):
697 This is the interface the `asan_symbolize.py` code uses to talk
704 Returns the name of the plugin.
708 def _receive_proxy(self
, proxy
):
709 assert isinstance(proxy
, AsanSymbolizerPlugInProxy
)
712 def register_cmdline_args(self
, parser
):
714 Hook for registering command line arguments to be
715 consumed in `process_cmdline_args()`.
717 `parser` - Instance of `argparse.ArgumentParser`.
721 def process_cmdline_args(self
, pargs
):
723 Hook for handling parsed arguments. Implementations
724 should not modify `pargs`.
726 `pargs` - Instance of `argparse.Namespace` containing
727 parsed command line arguments.
729 Return `True` if plug-in should be used, otherwise
736 Hook called when a plugin is about to be destroyed.
737 Implementations should free any allocated resources here.
741 # Symbolization hooks
742 def filter_binary_path(self
, binary_path
):
744 Given a binary path return a binary path suitable for symbolication.
746 Implementations should return `None` if symbolication of this binary
751 def filter_module_desc(self
, module_desc
):
753 Given a ModuleDesc object (`module_desc`) return
754 a ModuleDesc suitable for symbolication.
756 Implementations should return `None` if symbolication of this binary
762 class ModuleDesc(object):
763 def __init__(self
, name
, arch
, start_addr
, end_addr
, module_path
, uuid
):
766 self
.start_addr
= start_addr
767 self
.end_addr
= end_addr
768 # Module path from an ASan report.
769 self
.module_path
= module_path
770 # Module for performing symbolization, by default same as above.
771 self
.module_path_for_symbolization
= module_path
773 assert self
.is_valid()
776 assert self
.is_valid()
777 return "{name} {arch} {start_addr:#016x}-{end_addr:#016x} {module_path} {uuid}".format(
780 start_addr
=self
.start_addr
,
781 end_addr
=self
.end_addr
,
782 module_path
=self
.module_path
783 if self
.module_path
== self
.module_path_for_symbolization
784 else "{} ({})".format(self
.module_path_for_symbolization
, self
.module_path
),
789 if not isinstance(self
.name
, str):
791 if not isinstance(self
.arch
, str):
793 if not isinstance(self
.start_addr
, int):
795 if self
.start_addr
< 0:
797 if not isinstance(self
.end_addr
, int):
799 if self
.end_addr
<= self
.start_addr
:
801 if not isinstance(self
.module_path
, str):
803 if not os
.path
.isabs(self
.module_path
):
805 if not isinstance(self
.module_path_for_symbolization
, str):
807 if not os
.path
.isabs(self
.module_path_for_symbolization
):
809 if not isinstance(self
.uuid
, str):
814 class GetUUIDFromBinaryException(Exception):
815 def __init__(self
, msg
):
816 super(GetUUIDFromBinaryException
, self
).__init
__(msg
)
819 _get_uuid_from_binary_cache
= dict()
822 def get_uuid_from_binary(path_to_binary
, arch
=None):
823 cache_key
= (path_to_binary
, arch
)
824 cached_value
= _get_uuid_from_binary_cache
.get(cache_key
)
827 if not os
.path
.exists(path_to_binary
):
828 raise GetUUIDFromBinaryException(
829 'Binary "{}" does not exist'.format(path_to_binary
)
831 cmd
= ["/usr/bin/otool", "-l"]
833 cmd
.extend(["-arch", arch
])
834 cmd
.append(path_to_binary
)
835 output
= subprocess
.check_output(cmd
, stderr
=subprocess
.STDOUT
)
836 # Look for this output:
839 # uuid 4CA778FE-5BF9-3C45-AE59-7DF01B2BE83F
840 if isinstance(output
, str):
843 assert isinstance(output
, bytes
)
844 output_str
= output
.decode()
845 assert isinstance(output_str
, str)
846 lines
= output_str
.split("\n")
848 for index
, line
in enumerate(lines
):
849 stripped_line
= line
.strip()
850 if not stripped_line
.startswith("cmd LC_UUID"):
852 uuid_line
= lines
[index
+ 2].strip()
853 if not uuid_line
.startswith("uuid"):
854 raise GetUUIDFromBinaryException('Malformed output: "{}"'.format(uuid_line
))
855 split_uuid_line
= uuid_line
.split()
856 uuid
= split_uuid_line
[1]
859 logging
.error("Failed to retrieve UUID from binary {}".format(path_to_binary
))
860 logging
.error("otool output was:\n{}".format(output_str
))
861 raise GetUUIDFromBinaryException(
862 'Failed to retrieve UUID from binary "{}"'.format(path_to_binary
)
866 _get_uuid_from_binary_cache
[cache_key
] = uuid
870 class ModuleMap(object):
872 self
._module
_name
_to
_description
_map
= dict()
874 def add_module(self
, desc
):
875 assert isinstance(desc
, ModuleDesc
)
876 assert desc
.name
not in self
._module
_name
_to
_description
_map
877 self
._module
_name
_to
_description
_map
[desc
.name
] = desc
879 def find_module_by_name(self
, name
):
880 return self
._module
_name
_to
_description
_map
.get(name
, None)
883 s
= "{} modules:\n".format(self
.num_modules
)
884 for module_desc
in sorted(
885 self
._module
_name
_to
_description
_map
.values(), key
=lambda v
: v
.start_addr
887 s
+= str(module_desc
) + "\n"
891 def num_modules(self
):
892 return len(self
._module
_name
_to
_description
_map
)
896 return set(self
._module
_name
_to
_description
_map
.values())
898 def get_module_path_for_symbolication(self
, module_name
, proxy
, validate_uuid
):
899 module_desc
= self
.find_module_by_name(module_name
)
900 if module_desc
is None:
902 # Allow a plug-in to change the module description to make it
903 # suitable for symbolication or avoid symbolication altogether.
904 module_desc
= proxy
.filter_module_desc(module_desc
)
905 if module_desc
is None:
909 "Validating UUID of {}".format(
910 module_desc
.module_path_for_symbolization
914 uuid
= get_uuid_from_binary(
915 module_desc
.module_path_for_symbolization
, arch
=module_desc
.arch
917 if uuid
!= module_desc
.uuid
:
919 "Detected UUID mismatch {} != {}".format(uuid
, module_desc
.uuid
)
921 # UUIDs don't match. Tell client to not symbolize this.
923 except GetUUIDFromBinaryException
as e
:
924 logging
.error("Failed to get binary from UUID: %s", str(e
))
928 "Skipping validation of UUID of {}".format(
929 module_desc
.module_path_for_symbolization
932 return module_desc
.module_path_for_symbolization
935 def parse_from_file(module_map_path
):
936 if not os
.path
.exists(module_map_path
):
937 raise Exception('module map "{}" does not exist'.format(module_map_path
))
938 with
open(module_map_path
, "r") as f
:
941 # 0x2db4000-0x102ddc000 /path/to (arm64) <0D6BBDE0-FF90-3680-899D-8E6F9528E04C>
942 hex_regex
= lambda name
: r
"0x(?P<" + name
+ r
">[0-9a-f]+)"
943 module_path_regex
= r
"(?P<path>.+)"
944 arch_regex
= r
"\((?P<arch>.+)\)"
945 uuid_regex
= r
"<(?P<uuid>[0-9A-Z-]+)>"
946 line_regex
= r
"^{}-{}\s+{}\s+{}\s+{}".format(
947 hex_regex("start_addr"),
948 hex_regex("end_addr"),
953 matcher
= re
.compile(line_regex
)
960 if line
.startswith("Process module map:"):
963 if line
.startswith("End of module map"):
965 m_obj
= matcher
.match(line
)
968 'Failed to parse line {} "{}"'.format(line_num
, line
)
970 arch
= m_obj
.group("arch")
971 start_addr
= int(m_obj
.group("start_addr"), base
=16)
972 end_addr
= int(m_obj
.group("end_addr"), base
=16)
973 module_path
= m_obj
.group("path")
974 uuid
= m_obj
.group("uuid")
975 module_desc
= ModuleDesc(
976 name
=os
.path
.basename(module_path
),
978 start_addr
=start_addr
,
980 module_path
=module_path
,
983 mm
.add_module(module_desc
)
986 'Loaded Module map from "{}":\n{}'.format(f
.name
, str(mm
))
991 class SysRootFilterPlugIn(AsanSymbolizerPlugIn
):
993 Simple plug-in to add sys root prefix to all binary paths
994 used for symbolication.
998 self
.sysroot_path
= ""
1000 def register_cmdline_args(self
, parser
):
1001 parser
.add_argument(
1005 help="set path to sysroot for sanitized binaries",
1008 def process_cmdline_args(self
, pargs
):
1009 if pargs
.sys_root
is None:
1010 # Not being used so remove ourselves.
1012 self
.sysroot_path
= pargs
.sys_root
1015 def filter_binary_path(self
, path
):
1016 return self
.sysroot_path
+ path
1019 class ModuleMapPlugIn(AsanSymbolizerPlugIn
):
1021 self
._module
_map
= None
1022 self
._uuid
_validation
= True
1024 def register_cmdline_args(self
, parser
):
1025 parser
.add_argument(
1027 help="Path to text file containing module map"
1028 "output. See print_module_map ASan option.",
1030 parser
.add_argument(
1031 "--skip-uuid-validation",
1033 action
="store_true",
1034 help="Skips validating UUID of modules using otool.",
1037 def process_cmdline_args(self
, pargs
):
1038 if not pargs
.module_map
:
1040 self
._module
_map
= ModuleMap
.parse_from_file(args
.module_map
)
1041 if self
._module
_map
is None:
1042 msg
= "Failed to find module map"
1044 raise Exception(msg
)
1045 self
._uuid
_validation
= not pargs
.skip_uuid_validation
1048 def filter_binary_path(self
, binary_path
):
1049 if os
.path
.isabs(binary_path
):
1050 # This is a binary path so transform into
1052 module_name
= os
.path
.basename(binary_path
)
1054 module_name
= binary_path
1055 return self
._module
_map
.get_module_path_for_symbolication(
1056 module_name
, self
.proxy
, self
._uuid
_validation
1060 def add_logging_args(parser
):
1061 parser
.add_argument(
1064 help="Destination path for script logging (default stderr).",
1066 parser
.add_argument(
1068 choices
=["debug", "info", "warning", "error", "critical"],
1070 help="Log level for script (default: %(default)s).",
1074 def setup_logging():
1075 # Set up a parser just for parsing the logging arguments.
1076 # This is necessary because logging should be configured before we
1077 # perform the main argument parsing.
1078 parser
= argparse
.ArgumentParser(add_help
=False)
1079 add_logging_args(parser
)
1080 pargs
, unparsed_args
= parser
.parse_known_args()
1082 log_level
= getattr(logging
, pargs
.log_level
.upper())
1083 if log_level
== logging
.DEBUG
:
1085 "%(levelname)s: [%(funcName)s() %(filename)s:%(lineno)d] %(message)s"
1088 log_format
= "%(levelname)s: %(message)s"
1089 basic_config
= {"level": log_level
, "format": log_format
}
1090 log_dest
= pargs
.log_dest
1092 basic_config
["filename"] = log_dest
1093 logging
.basicConfig(**basic_config
)
1095 'Logging level set to "{}" and directing output to "{}"'.format(
1096 pargs
.log_level
, "stderr" if log_dest
is None else log_dest
1099 return unparsed_args
1102 def add_load_plugin_args(parser
):
1103 parser
.add_argument("-p", "--plugins", help="Load plug-in", nargs
="+", default
=[])
1106 def setup_plugins(plugin_proxy
, args
):
1107 parser
= argparse
.ArgumentParser(add_help
=False)
1108 add_load_plugin_args(parser
)
1109 pargs
, unparsed_args
= parser
.parse_known_args()
1110 for plugin_path
in pargs
.plugins
:
1111 plugin_proxy
.load_plugin_from_file(plugin_path
)
1112 # Add built-in plugins.
1113 plugin_proxy
.add_plugin(ModuleMapPlugIn())
1114 plugin_proxy
.add_plugin(SysRootFilterPlugIn())
1115 return unparsed_args
1118 if __name__
== "__main__":
1119 remaining_args
= setup_logging()
1120 with
AsanSymbolizerPlugInProxy() as plugin_proxy
:
1121 remaining_args
= setup_plugins(plugin_proxy
, remaining_args
)
1122 parser
= argparse
.ArgumentParser(
1123 formatter_class
=argparse
.RawDescriptionHelpFormatter
,
1124 description
="ASan symbolization script",
1127 parser
.add_argument(
1130 help="pattern to be cut from the result file path ",
1132 parser
.add_argument(
1133 "-d", "--demangle", action
="store_true", help="demangle function names"
1135 parser
.add_argument(
1136 "-c", metavar
="CROSS_COMPILE", help="set prefix for binutils"
1138 parser
.add_argument(
1142 type=argparse
.FileType("r"),
1143 help="set log file name to parse, default is stdin",
1145 parser
.add_argument(
1146 "--force-system-symbolizer",
1147 action
="store_true",
1148 help="don't use llvm-symbolizer",
1150 # Add logging arguments so that `--help` shows them.
1151 add_logging_args(parser
)
1152 # Add load plugin arguments so that `--help` shows them.
1153 add_load_plugin_args(parser
)
1154 plugin_proxy
.register_cmdline_args(parser
)
1155 args
= parser
.parse_args(remaining_args
)
1156 plugin_proxy
.process_cmdline_args(args
)
1157 if args
.path_to_cut
:
1158 fix_filename_patterns
= args
.path_to_cut
1162 binutils_prefix
= args
.c
1164 logfile
= args
.logfile
1167 if args
.force_system_symbolizer
:
1168 force_system_symbolizer
= True
1169 if force_system_symbolizer
:
1170 assert allow_system_symbolizer
1171 loop
= SymbolizationLoop(plugin_proxy
)
1172 loop
.process_logfile()