Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / compiler-rt / lib / asan / scripts / asan_symbolize.py
blobb08769614aeb18f102989bf9af8e55c5f02b2f06
1 #!/usr/bin/env python
2 # ===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
4 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 # See https://llvm.org/LICENSE.txt for license information.
6 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 # ===------------------------------------------------------------------------===#
9 """
10 Example of use:
11 asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/SymbolFiles" < asan.log
13 PLUGINS
15 This script provides a way for external plug-ins to hook into the behaviour of
16 various parts of this script (see `--plugins`). This is useful for situations
17 where it is necessary to handle site-specific quirks (e.g. binaries with debug
18 symbols only accessible via a remote service) without having to modify the
19 script itself.
21 """
22 import argparse
23 import bisect
24 import errno
25 import getopt
26 import logging
27 import os
28 import re
29 import shutil
30 import subprocess
31 import sys
33 symbolizers = {}
34 demangle = False
35 binutils_prefix = None
36 fix_filename_patterns = None
37 logfile = sys.stdin
38 allow_system_symbolizer = True
39 force_system_symbolizer = False
41 # FIXME: merge the code that calls fix_filename().
42 def fix_filename(file_name):
43 if fix_filename_patterns:
44 for path_to_cut in fix_filename_patterns:
45 file_name = re.sub(".*" + path_to_cut, "", file_name)
46 file_name = re.sub(".*asan_[a-z_]*.(cc|cpp):[0-9]*", "_asan_rtl_", file_name)
47 file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
48 return file_name
51 def is_valid_arch(s):
52 return s in [
53 "i386",
54 "x86_64",
55 "x86_64h",
56 "arm",
57 "armv6",
58 "armv7",
59 "armv7s",
60 "armv7k",
61 "arm64",
62 "powerpc64",
63 "powerpc64le",
64 "s390x",
65 "s390",
66 "riscv64",
67 "loongarch64",
71 def guess_arch(addr):
72 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
73 if len(addr) > 10:
74 return "x86_64"
75 else:
76 return "i386"
79 class Symbolizer(object):
80 def __init__(self):
81 pass
83 def symbolize(self, addr, binary, offset):
84 """Symbolize the given address (pair of binary and offset).
86 Overriden in subclasses.
87 Args:
88 addr: virtual address of an instruction.
89 binary: path to executable/shared object containing this instruction.
90 offset: instruction offset in the @binary.
91 Returns:
92 list of strings (one string for each inlined frame) describing
93 the code locations for this instruction (that is, function name, file
94 name, line and column numbers).
95 """
96 return None
99 class LLVMSymbolizer(Symbolizer):
100 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
101 super(LLVMSymbolizer, self).__init__()
102 self.symbolizer_path = symbolizer_path
103 self.default_arch = default_arch
104 self.system = system
105 self.dsym_hints = dsym_hints
106 self.pipe = self.open_llvm_symbolizer()
108 def open_llvm_symbolizer(self):
109 cmd = [
110 self.symbolizer_path,
111 ("--demangle" if demangle else "--no-demangle"),
112 "--functions=linkage",
113 "--inlines",
114 "--default-arch=%s" % self.default_arch,
116 if self.system == "Darwin":
117 for hint in self.dsym_hints:
118 cmd.append("--dsym-hint=%s" % hint)
119 logging.debug(" ".join(cmd))
120 try:
121 result = subprocess.Popen(
122 cmd,
123 stdin=subprocess.PIPE,
124 stdout=subprocess.PIPE,
125 bufsize=0,
126 universal_newlines=True,
128 except OSError:
129 result = None
130 return result
132 def symbolize(self, addr, binary, offset):
133 """Overrides Symbolizer.symbolize."""
134 if not self.pipe:
135 return None
136 result = []
137 try:
138 symbolizer_input = '"%s" %s' % (binary, offset)
139 logging.debug(symbolizer_input)
140 self.pipe.stdin.write("%s\n" % symbolizer_input)
141 while True:
142 function_name = self.pipe.stdout.readline().rstrip()
143 if not function_name:
144 break
145 file_name = self.pipe.stdout.readline().rstrip()
146 file_name = fix_filename(file_name)
147 if not function_name.startswith("??") or not file_name.startswith("??"):
148 # Append only non-trivial frames.
149 result.append("%s in %s %s" % (addr, function_name, file_name))
150 except Exception:
151 result = []
152 if not result:
153 result = None
154 return result
157 def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
158 symbolizer_path = os.getenv("LLVM_SYMBOLIZER_PATH")
159 if not symbolizer_path:
160 symbolizer_path = os.getenv("ASAN_SYMBOLIZER_PATH")
161 if not symbolizer_path:
162 # Assume llvm-symbolizer is in PATH.
163 symbolizer_path = "llvm-symbolizer"
164 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
167 class Addr2LineSymbolizer(Symbolizer):
168 def __init__(self, binary):
169 super(Addr2LineSymbolizer, self).__init__()
170 self.binary = binary
171 self.pipe = self.open_addr2line()
172 self.output_terminator = -1
174 def open_addr2line(self):
175 addr2line_tool = "addr2line"
176 if binutils_prefix:
177 addr2line_tool = binutils_prefix + addr2line_tool
178 logging.debug("addr2line binary is %s" % shutil.which(addr2line_tool))
179 cmd = [addr2line_tool, "-fi"]
180 if demangle:
181 cmd += ["--demangle"]
182 cmd += ["-e", self.binary]
183 logging.debug(" ".join(cmd))
184 return subprocess.Popen(
185 cmd,
186 stdin=subprocess.PIPE,
187 stdout=subprocess.PIPE,
188 bufsize=0,
189 universal_newlines=True,
192 def symbolize(self, addr, binary, offset):
193 """Overrides Symbolizer.symbolize."""
194 if self.binary != binary:
195 return None
196 lines = []
197 try:
198 self.pipe.stdin.write("%s\n" % offset)
199 self.pipe.stdin.write("%s\n" % self.output_terminator)
200 is_first_frame = True
201 while True:
202 function_name = self.pipe.stdout.readline().rstrip()
203 logging.debug("read function_name='%s' from addr2line" % function_name)
204 # If llvm-symbolizer is installed as addr2line, older versions of
205 # llvm-symbolizer will print -1 when presented with -1 and not print
206 # a second line. In that case we will block for ever trying to read the
207 # file name. This also happens for non-existent files, in which case GNU
208 # addr2line exits immediate, but llvm-symbolizer does not (see
209 # https://llvm.org/PR42754).
210 if function_name == "-1":
211 logging.debug("got function '-1' -> no more input")
212 break
213 file_name = self.pipe.stdout.readline().rstrip()
214 logging.debug("read file_name='%s' from addr2line" % file_name)
215 if is_first_frame:
216 is_first_frame = False
217 elif function_name == "??":
218 assert file_name == "??:0", file_name
219 logging.debug("got function '??' -> no more input")
220 break
221 elif not function_name:
222 assert not file_name, file_name
223 logging.debug("got empty function name -> no more input")
224 break
225 if not function_name and not file_name:
226 logging.debug(
227 "got empty function and file name -> unknown function"
229 function_name = "??"
230 file_name = "??:0"
231 lines.append((function_name, file_name))
232 except IOError as e:
233 # EPIPE happens if addr2line exits early (which some implementations do
234 # if an invalid file is passed).
235 if e.errno == errno.EPIPE:
236 logging.debug(
237 f"addr2line exited early (broken pipe) returncode={self.pipe.poll()}"
239 else:
240 logging.debug(
241 "unexpected I/O exception communicating with addr2line", exc_info=e
243 lines.append(("??", "??:0"))
244 except Exception as e:
245 logging.debug(
246 "got unknown exception communicating with addr2line", exc_info=e
248 lines.append(("??", "??:0"))
249 return [
250 "%s in %s %s" % (addr, function, fix_filename(file))
251 for (function, file) in lines
255 class UnbufferedLineConverter(object):
257 Wrap a child process that responds to each line of input with one line of
258 output. Uses pty to trick the child into providing unbuffered output.
261 def __init__(self, args, close_stderr=False):
262 # Local imports so that the script can start on Windows.
263 import pty
264 import termios
266 pid, fd = pty.fork()
267 if pid == 0:
268 # We're the child. Transfer control to command.
269 if close_stderr:
270 dev_null = os.open("/dev/null", 0)
271 os.dup2(dev_null, 2)
272 os.execvp(args[0], args)
273 else:
274 # Disable echoing.
275 attr = termios.tcgetattr(fd)
276 attr[3] = attr[3] & ~termios.ECHO
277 termios.tcsetattr(fd, termios.TCSANOW, attr)
278 # Set up a file()-like interface to the child process
279 self.r = os.fdopen(fd, "r", 1)
280 self.w = os.fdopen(os.dup(fd), "w", 1)
282 def convert(self, line):
283 self.w.write(line + "\n")
284 return self.readline()
286 def readline(self):
287 return self.r.readline().rstrip()
290 class DarwinSymbolizer(Symbolizer):
291 def __init__(self, addr, binary, arch):
292 super(DarwinSymbolizer, self).__init__()
293 self.binary = binary
294 self.arch = arch
295 self.open_atos()
297 def open_atos(self):
298 logging.debug("atos -o %s -arch %s", self.binary, self.arch)
299 cmdline = ["atos", "-o", self.binary, "-arch", self.arch]
300 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
302 def symbolize(self, addr, binary, offset):
303 """Overrides Symbolizer.symbolize."""
304 if self.binary != binary:
305 return None
306 if not os.path.exists(binary):
307 # If the binary doesn't exist atos will exit which will lead to IOError
308 # exceptions being raised later on so just don't try to symbolize.
309 return ["{} ({}:{}+{})".format(addr, binary, self.arch, offset)]
310 atos_line = self.atos.convert("0x%x" % int(offset, 16))
311 while "got symbolicator for" in atos_line:
312 atos_line = self.atos.readline()
313 # A well-formed atos response looks like this:
314 # foo(type1, type2) (in object.name) (filename.cc:80)
315 # NOTE:
316 # * For C functions atos omits parentheses and argument types.
317 # * For C++ functions the function name (i.e., `foo` above) may contain
318 # templates which may contain parentheses.
319 match = re.match("^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line)
320 logging.debug("atos_line: %s", atos_line)
321 if match:
322 function_name = match.group(1)
323 file_name = fix_filename(match.group(3))
324 return ["%s in %s %s" % (addr, function_name, file_name)]
325 else:
326 return ["%s in %s" % (addr, atos_line)]
329 # Chain several symbolizers so that if one symbolizer fails, we fall back
330 # to the next symbolizer in chain.
331 class ChainSymbolizer(Symbolizer):
332 def __init__(self, symbolizer_list):
333 super(ChainSymbolizer, self).__init__()
334 self.symbolizer_list = symbolizer_list
336 def symbolize(self, addr, binary, offset):
337 """Overrides Symbolizer.symbolize."""
338 for symbolizer in self.symbolizer_list:
339 if symbolizer:
340 result = symbolizer.symbolize(addr, binary, offset)
341 if result:
342 return result
343 return None
345 def append_symbolizer(self, symbolizer):
346 self.symbolizer_list.append(symbolizer)
349 def BreakpadSymbolizerFactory(binary):
350 suffix = os.getenv("BREAKPAD_SUFFIX")
351 if suffix:
352 filename = binary + suffix
353 if os.access(filename, os.F_OK):
354 return BreakpadSymbolizer(filename)
355 return None
358 def SystemSymbolizerFactory(system, addr, binary, arch):
359 if system == "Darwin":
360 return DarwinSymbolizer(addr, binary, arch)
361 elif system in ["Linux", "FreeBSD", "NetBSD", "SunOS"]:
362 return Addr2LineSymbolizer(binary)
365 class BreakpadSymbolizer(Symbolizer):
366 def __init__(self, filename):
367 super(BreakpadSymbolizer, self).__init__()
368 self.filename = filename
369 lines = file(filename).readlines()
370 self.files = []
371 self.symbols = {}
372 self.address_list = []
373 self.addresses = {}
374 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
375 fragments = lines[0].rstrip().split()
376 self.arch = fragments[2]
377 self.debug_id = fragments[3]
378 self.binary = " ".join(fragments[4:])
379 self.parse_lines(lines[1:])
381 def parse_lines(self, lines):
382 cur_function_addr = ""
383 for line in lines:
384 fragments = line.split()
385 if fragments[0] == "FILE":
386 assert int(fragments[1]) == len(self.files)
387 self.files.append(" ".join(fragments[2:]))
388 elif fragments[0] == "PUBLIC":
389 self.symbols[int(fragments[1], 16)] = " ".join(fragments[3:])
390 elif fragments[0] in ["CFI", "STACK"]:
391 pass
392 elif fragments[0] == "FUNC":
393 cur_function_addr = int(fragments[1], 16)
394 if not cur_function_addr in self.symbols.keys():
395 self.symbols[cur_function_addr] = " ".join(fragments[4:])
396 else:
397 # Line starting with an address.
398 addr = int(fragments[0], 16)
399 self.address_list.append(addr)
400 # Tuple of symbol address, size, line, file number.
401 self.addresses[addr] = (
402 cur_function_addr,
403 int(fragments[1], 16),
404 int(fragments[2]),
405 int(fragments[3]),
407 self.address_list.sort()
409 def get_sym_file_line(self, addr):
410 key = None
411 if addr in self.addresses.keys():
412 key = addr
413 else:
414 index = bisect.bisect_left(self.address_list, addr)
415 if index == 0:
416 return None
417 else:
418 key = self.address_list[index - 1]
419 sym_id, size, line_no, file_no = self.addresses[key]
420 symbol = self.symbols[sym_id]
421 filename = self.files[file_no]
422 if addr < key + size:
423 return symbol, filename, line_no
424 else:
425 return None
427 def symbolize(self, addr, binary, offset):
428 if self.binary != binary:
429 return None
430 res = self.get_sym_file_line(int(offset, 16))
431 if res:
432 function_name, file_name, line_no = res
433 result = ["%s in %s %s:%d" % (addr, function_name, file_name, line_no)]
434 print(result)
435 return result
436 else:
437 return None
440 class SymbolizationLoop(object):
441 def __init__(self, plugin_proxy=None, dsym_hint_producer=None):
442 self.plugin_proxy = plugin_proxy
443 if sys.platform == "win32":
444 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
445 # even in sandboxed processes. Nothing needs to be done here.
446 self.process_line = self.process_line_echo
447 else:
448 # Used by clients who may want to supply a different binary name.
449 # E.g. in Chrome several binaries may share a single .dSYM.
450 self.dsym_hint_producer = dsym_hint_producer
451 self.system = os.uname()[0]
452 if self.system not in ["Linux", "Darwin", "FreeBSD", "NetBSD", "SunOS"]:
453 raise Exception("Unknown system")
454 self.llvm_symbolizers = {}
455 self.last_llvm_symbolizer = None
456 self.dsym_hints = set([])
457 self.frame_no = 0
458 self.process_line = self.process_line_posix
459 self.using_module_map = plugin_proxy.has_plugin(ModuleMapPlugIn.get_name())
461 def symbolize_address(self, addr, binary, offset, arch):
462 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
463 # a single symbolizer binary.
464 # On Darwin, if the dsym hint producer is present:
465 # 1. check whether we've seen this binary already; if so,
466 # use |llvm_symbolizers[binary]|, which has already loaded the debug
467 # info for this binary (might not be the case for
468 # |last_llvm_symbolizer|);
469 # 2. otherwise check if we've seen all the hints for this binary already;
470 # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
471 # 3. otherwise create a new symbolizer and pass all currently known
472 # .dSYM hints to it.
473 result = None
474 if not force_system_symbolizer:
475 if not binary in self.llvm_symbolizers:
476 use_new_symbolizer = True
477 if self.system == "Darwin" and self.dsym_hint_producer:
478 dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
479 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
480 self.dsym_hints |= dsym_hints_for_binary
481 if self.last_llvm_symbolizer and not use_new_symbolizer:
482 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
483 else:
484 self.last_llvm_symbolizer = LLVMSymbolizerFactory(
485 self.system, arch, self.dsym_hints
487 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
488 # Use the chain of symbolizers:
489 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
490 # (fall back to next symbolizer if the previous one fails).
491 if not binary in symbolizers:
492 symbolizers[binary] = ChainSymbolizer(
493 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]]
495 result = symbolizers[binary].symbolize(addr, binary, offset)
496 else:
497 symbolizers[binary] = ChainSymbolizer([])
498 if result is None:
499 if not allow_system_symbolizer:
500 raise Exception("Failed to launch or use llvm-symbolizer.")
501 # Initialize system symbolizer only if other symbolizers failed.
502 symbolizers[binary].append_symbolizer(
503 SystemSymbolizerFactory(self.system, addr, binary, arch)
505 result = symbolizers[binary].symbolize(addr, binary, offset)
506 # The system symbolizer must produce some result.
507 assert result
508 return result
510 def get_symbolized_lines(self, symbolized_lines, inc_frame_counter=True):
511 if not symbolized_lines:
512 if inc_frame_counter:
513 self.frame_no += 1
514 return [self.current_line]
515 else:
516 assert inc_frame_counter
517 result = []
518 for symbolized_frame in symbolized_lines:
519 result.append(
520 " #%s %s" % (str(self.frame_no), symbolized_frame.rstrip())
522 self.frame_no += 1
523 return result
525 def process_logfile(self):
526 self.frame_no = 0
527 for line in logfile:
528 processed = self.process_line(line)
529 print("\n".join(processed))
531 def process_line_echo(self, line):
532 return [line.rstrip()]
534 def process_line_posix(self, line):
535 self.current_line = line.rstrip()
536 # Unsymbolicated:
537 # #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
538 # Partially symbolicated:
539 # #0 0x7f6e35cf2e45 in foo (foo.so+0x11fe45)
540 # NOTE: We have to very liberal with symbol
541 # names in the regex because it could be an
542 # Objective-C or C++ demangled name.
543 stack_trace_line_format = (
544 "^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)"
546 match = re.match(stack_trace_line_format, line)
547 if not match:
548 logging.debug('Line "{}" does not match regex'.format(line))
549 # Not a frame line so don't increment the frame counter.
550 return self.get_symbolized_lines(None, inc_frame_counter=False)
551 logging.debug(line)
552 _, frameno_str, addr, binary, offset = match.groups()
554 if not self.using_module_map and not os.path.isabs(binary):
555 # Do not try to symbolicate if the binary is just the module file name
556 # and a module map is unavailable.
557 # FIXME(dliew): This is currently necessary for reports on Darwin that are
558 # partially symbolicated by `atos`.
559 return self.get_symbolized_lines(None)
560 arch = ""
561 # Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h"
562 colon_pos = binary.rfind(":")
563 if colon_pos != -1:
564 maybe_arch = binary[colon_pos + 1 :]
565 if is_valid_arch(maybe_arch):
566 arch = maybe_arch
567 binary = binary[0:colon_pos]
568 if arch == "":
569 arch = guess_arch(addr)
570 if frameno_str == "0":
571 # Assume that frame #0 is the first frame of new stack trace.
572 self.frame_no = 0
573 original_binary = binary
574 binary = self.plugin_proxy.filter_binary_path(binary)
575 if binary is None:
576 # The binary filter has told us this binary can't be symbolized.
577 logging.debug('Skipping symbolication of binary "%s"', original_binary)
578 return self.get_symbolized_lines(None)
579 symbolized_line = self.symbolize_address(addr, binary, offset, arch)
580 if not symbolized_line:
581 if original_binary != binary:
582 symbolized_line = self.symbolize_address(
583 addr, original_binary, offset, arch
585 return self.get_symbolized_lines(symbolized_line)
588 class AsanSymbolizerPlugInProxy(object):
590 Serves several purposes:
591 - Manages the lifetime of plugins (must be used a `with` statement).
592 - Provides interface for calling into plugins from within this script.
595 def __init__(self):
596 self._plugins = []
597 self._plugin_names = set()
599 def _load_plugin_from_file_impl_py_gt_2(self, file_path, globals_space):
600 with open(file_path, "r") as f:
601 exec(f.read(), globals_space, None)
603 def load_plugin_from_file(self, file_path):
604 logging.info('Loading plugins from "{}"'.format(file_path))
605 globals_space = dict(globals())
606 # Provide function to register plugins
607 def register_plugin(plugin):
608 logging.info("Registering plugin %s", plugin.get_name())
609 self.add_plugin(plugin)
611 globals_space["register_plugin"] = register_plugin
612 if sys.version_info.major < 3:
613 execfile(file_path, globals_space, None)
614 else:
615 # Indirection here is to avoid a bug in older Python 2 versions:
616 # `SyntaxError: unqualified exec is not allowed in function ...`
617 self._load_plugin_from_file_impl_py_gt_2(file_path, globals_space)
619 def add_plugin(self, plugin):
620 assert isinstance(plugin, AsanSymbolizerPlugIn)
621 self._plugins.append(plugin)
622 self._plugin_names.add(plugin.get_name())
623 plugin._receive_proxy(self)
625 def remove_plugin(self, plugin):
626 assert isinstance(plugin, AsanSymbolizerPlugIn)
627 self._plugins.remove(plugin)
628 self._plugin_names.remove(plugin.get_name())
629 logging.debug("Removing plugin %s", plugin.get_name())
630 plugin.destroy()
632 def has_plugin(self, name):
634 Returns true iff the plugin name is currently
635 being managed by AsanSymbolizerPlugInProxy.
637 return name in self._plugin_names
639 def register_cmdline_args(self, parser):
640 plugins = list(self._plugins)
641 for plugin in plugins:
642 plugin.register_cmdline_args(parser)
644 def process_cmdline_args(self, pargs):
645 # Use copy so we can remove items as we iterate.
646 plugins = list(self._plugins)
647 for plugin in plugins:
648 keep = plugin.process_cmdline_args(pargs)
649 assert isinstance(keep, bool)
650 if not keep:
651 self.remove_plugin(plugin)
653 def __enter__(self):
654 return self
656 def __exit__(self, exc_type, exc_val, exc_tb):
657 for plugin in self._plugins:
658 plugin.destroy()
659 # Don't suppress raised exceptions
660 return False
662 def _filter_single_value(self, function_name, input_value):
664 Helper for filter style plugin functions.
666 new_value = input_value
667 for plugin in self._plugins:
668 result = getattr(plugin, function_name)(new_value)
669 if result is None:
670 return None
671 new_value = result
672 return new_value
674 def filter_binary_path(self, binary_path):
676 Consult available plugins to filter the path to a binary
677 to make it suitable for symbolication.
679 Returns `None` if symbolication should not be attempted for this
680 binary.
682 return self._filter_single_value("filter_binary_path", binary_path)
684 def filter_module_desc(self, module_desc):
686 Consult available plugins to determine the module
687 description suitable for symbolication.
689 Returns `None` if symbolication should not be attempted for this module.
691 assert isinstance(module_desc, ModuleDesc)
692 return self._filter_single_value("filter_module_desc", module_desc)
695 class AsanSymbolizerPlugIn(object):
697 This is the interface the `asan_symbolize.py` code uses to talk
698 to plugins.
701 @classmethod
702 def get_name(cls):
704 Returns the name of the plugin.
706 return cls.__name__
708 def _receive_proxy(self, proxy):
709 assert isinstance(proxy, AsanSymbolizerPlugInProxy)
710 self.proxy = proxy
712 def register_cmdline_args(self, parser):
714 Hook for registering command line arguments to be
715 consumed in `process_cmdline_args()`.
717 `parser` - Instance of `argparse.ArgumentParser`.
719 pass
721 def process_cmdline_args(self, pargs):
723 Hook for handling parsed arguments. Implementations
724 should not modify `pargs`.
726 `pargs` - Instance of `argparse.Namespace` containing
727 parsed command line arguments.
729 Return `True` if plug-in should be used, otherwise
730 return `False`.
732 return True
734 def destroy(self):
736 Hook called when a plugin is about to be destroyed.
737 Implementations should free any allocated resources here.
739 pass
741 # Symbolization hooks
742 def filter_binary_path(self, binary_path):
744 Given a binary path return a binary path suitable for symbolication.
746 Implementations should return `None` if symbolication of this binary
747 should be skipped.
749 return binary_path
751 def filter_module_desc(self, module_desc):
753 Given a ModuleDesc object (`module_desc`) return
754 a ModuleDesc suitable for symbolication.
756 Implementations should return `None` if symbolication of this binary
757 should be skipped.
759 return module_desc
762 class ModuleDesc(object):
763 def __init__(self, name, arch, start_addr, end_addr, module_path, uuid):
764 self.name = name
765 self.arch = arch
766 self.start_addr = start_addr
767 self.end_addr = end_addr
768 # Module path from an ASan report.
769 self.module_path = module_path
770 # Module for performing symbolization, by default same as above.
771 self.module_path_for_symbolization = module_path
772 self.uuid = uuid
773 assert self.is_valid()
775 def __str__(self):
776 assert self.is_valid()
777 return "{name} {arch} {start_addr:#016x}-{end_addr:#016x} {module_path} {uuid}".format(
778 name=self.name,
779 arch=self.arch,
780 start_addr=self.start_addr,
781 end_addr=self.end_addr,
782 module_path=self.module_path
783 if self.module_path == self.module_path_for_symbolization
784 else "{} ({})".format(self.module_path_for_symbolization, self.module_path),
785 uuid=self.uuid,
788 def is_valid(self):
789 if not isinstance(self.name, str):
790 return False
791 if not isinstance(self.arch, str):
792 return False
793 if not isinstance(self.start_addr, int):
794 return False
795 if self.start_addr < 0:
796 return False
797 if not isinstance(self.end_addr, int):
798 return False
799 if self.end_addr <= self.start_addr:
800 return False
801 if not isinstance(self.module_path, str):
802 return False
803 if not os.path.isabs(self.module_path):
804 return False
805 if not isinstance(self.module_path_for_symbolization, str):
806 return False
807 if not os.path.isabs(self.module_path_for_symbolization):
808 return False
809 if not isinstance(self.uuid, str):
810 return False
811 return True
814 class GetUUIDFromBinaryException(Exception):
815 def __init__(self, msg):
816 super(GetUUIDFromBinaryException, self).__init__(msg)
819 _get_uuid_from_binary_cache = dict()
822 def get_uuid_from_binary(path_to_binary, arch=None):
823 cache_key = (path_to_binary, arch)
824 cached_value = _get_uuid_from_binary_cache.get(cache_key)
825 if cached_value:
826 return cached_value
827 if not os.path.exists(path_to_binary):
828 raise GetUUIDFromBinaryException(
829 'Binary "{}" does not exist'.format(path_to_binary)
831 cmd = ["/usr/bin/otool", "-l"]
832 if arch:
833 cmd.extend(["-arch", arch])
834 cmd.append(path_to_binary)
835 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
836 # Look for this output:
837 # cmd LC_UUID
838 # cmdsize 24
839 # uuid 4CA778FE-5BF9-3C45-AE59-7DF01B2BE83F
840 if isinstance(output, str):
841 output_str = output
842 else:
843 assert isinstance(output, bytes)
844 output_str = output.decode()
845 assert isinstance(output_str, str)
846 lines = output_str.split("\n")
847 uuid = None
848 for index, line in enumerate(lines):
849 stripped_line = line.strip()
850 if not stripped_line.startswith("cmd LC_UUID"):
851 continue
852 uuid_line = lines[index + 2].strip()
853 if not uuid_line.startswith("uuid"):
854 raise GetUUIDFromBinaryException('Malformed output: "{}"'.format(uuid_line))
855 split_uuid_line = uuid_line.split()
856 uuid = split_uuid_line[1]
857 break
858 if uuid is None:
859 logging.error("Failed to retrieve UUID from binary {}".format(path_to_binary))
860 logging.error("otool output was:\n{}".format(output_str))
861 raise GetUUIDFromBinaryException(
862 'Failed to retrieve UUID from binary "{}"'.format(path_to_binary)
864 else:
865 # Update cache
866 _get_uuid_from_binary_cache[cache_key] = uuid
867 return uuid
870 class ModuleMap(object):
871 def __init__(self):
872 self._module_name_to_description_map = dict()
874 def add_module(self, desc):
875 assert isinstance(desc, ModuleDesc)
876 assert desc.name not in self._module_name_to_description_map
877 self._module_name_to_description_map[desc.name] = desc
879 def find_module_by_name(self, name):
880 return self._module_name_to_description_map.get(name, None)
882 def __str__(self):
883 s = "{} modules:\n".format(self.num_modules)
884 for module_desc in sorted(
885 self._module_name_to_description_map.values(), key=lambda v: v.start_addr
887 s += str(module_desc) + "\n"
888 return s
890 @property
891 def num_modules(self):
892 return len(self._module_name_to_description_map)
894 @property
895 def modules(self):
896 return set(self._module_name_to_description_map.values())
898 def get_module_path_for_symbolication(self, module_name, proxy, validate_uuid):
899 module_desc = self.find_module_by_name(module_name)
900 if module_desc is None:
901 return None
902 # Allow a plug-in to change the module description to make it
903 # suitable for symbolication or avoid symbolication altogether.
904 module_desc = proxy.filter_module_desc(module_desc)
905 if module_desc is None:
906 return None
907 if validate_uuid:
908 logging.debug(
909 "Validating UUID of {}".format(
910 module_desc.module_path_for_symbolization
913 try:
914 uuid = get_uuid_from_binary(
915 module_desc.module_path_for_symbolization, arch=module_desc.arch
917 if uuid != module_desc.uuid:
918 logging.warning(
919 "Detected UUID mismatch {} != {}".format(uuid, module_desc.uuid)
921 # UUIDs don't match. Tell client to not symbolize this.
922 return None
923 except GetUUIDFromBinaryException as e:
924 logging.error("Failed to get binary from UUID: %s", str(e))
925 return None
926 else:
927 logging.warning(
928 "Skipping validation of UUID of {}".format(
929 module_desc.module_path_for_symbolization
932 return module_desc.module_path_for_symbolization
934 @staticmethod
935 def parse_from_file(module_map_path):
936 if not os.path.exists(module_map_path):
937 raise Exception('module map "{}" does not exist'.format(module_map_path))
938 with open(module_map_path, "r") as f:
939 mm = None
940 # E.g.
941 # 0x2db4000-0x102ddc000 /path/to (arm64) <0D6BBDE0-FF90-3680-899D-8E6F9528E04C>
942 hex_regex = lambda name: r"0x(?P<" + name + r">[0-9a-f]+)"
943 module_path_regex = r"(?P<path>.+)"
944 arch_regex = r"\((?P<arch>.+)\)"
945 uuid_regex = r"<(?P<uuid>[0-9A-Z-]+)>"
946 line_regex = r"^{}-{}\s+{}\s+{}\s+{}".format(
947 hex_regex("start_addr"),
948 hex_regex("end_addr"),
949 module_path_regex,
950 arch_regex,
951 uuid_regex,
953 matcher = re.compile(line_regex)
954 line_num = 0
955 line = "dummy"
956 while line != "":
957 line = f.readline()
958 line_num += 1
959 if mm is None:
960 if line.startswith("Process module map:"):
961 mm = ModuleMap()
962 continue
963 if line.startswith("End of module map"):
964 break
965 m_obj = matcher.match(line)
966 if not m_obj:
967 raise Exception(
968 'Failed to parse line {} "{}"'.format(line_num, line)
970 arch = m_obj.group("arch")
971 start_addr = int(m_obj.group("start_addr"), base=16)
972 end_addr = int(m_obj.group("end_addr"), base=16)
973 module_path = m_obj.group("path")
974 uuid = m_obj.group("uuid")
975 module_desc = ModuleDesc(
976 name=os.path.basename(module_path),
977 arch=arch,
978 start_addr=start_addr,
979 end_addr=end_addr,
980 module_path=module_path,
981 uuid=uuid,
983 mm.add_module(module_desc)
984 if mm is not None:
985 logging.debug(
986 'Loaded Module map from "{}":\n{}'.format(f.name, str(mm))
988 return mm
991 class SysRootFilterPlugIn(AsanSymbolizerPlugIn):
993 Simple plug-in to add sys root prefix to all binary paths
994 used for symbolication.
997 def __init__(self):
998 self.sysroot_path = ""
1000 def register_cmdline_args(self, parser):
1001 parser.add_argument(
1002 "-s",
1003 dest="sys_root",
1004 metavar="SYSROOT",
1005 help="set path to sysroot for sanitized binaries",
1008 def process_cmdline_args(self, pargs):
1009 if pargs.sys_root is None:
1010 # Not being used so remove ourselves.
1011 return False
1012 self.sysroot_path = pargs.sys_root
1013 return True
1015 def filter_binary_path(self, path):
1016 return self.sysroot_path + path
1019 class ModuleMapPlugIn(AsanSymbolizerPlugIn):
1020 def __init__(self):
1021 self._module_map = None
1022 self._uuid_validation = True
1024 def register_cmdline_args(self, parser):
1025 parser.add_argument(
1026 "--module-map",
1027 help="Path to text file containing module map"
1028 "output. See print_module_map ASan option.",
1030 parser.add_argument(
1031 "--skip-uuid-validation",
1032 default=False,
1033 action="store_true",
1034 help="Skips validating UUID of modules using otool.",
1037 def process_cmdline_args(self, pargs):
1038 if not pargs.module_map:
1039 return False
1040 self._module_map = ModuleMap.parse_from_file(args.module_map)
1041 if self._module_map is None:
1042 msg = "Failed to find module map"
1043 logging.error(msg)
1044 raise Exception(msg)
1045 self._uuid_validation = not pargs.skip_uuid_validation
1046 return True
1048 def filter_binary_path(self, binary_path):
1049 if os.path.isabs(binary_path):
1050 # This is a binary path so transform into
1051 # a module name
1052 module_name = os.path.basename(binary_path)
1053 else:
1054 module_name = binary_path
1055 return self._module_map.get_module_path_for_symbolication(
1056 module_name, self.proxy, self._uuid_validation
1060 def add_logging_args(parser):
1061 parser.add_argument(
1062 "--log-dest",
1063 default=None,
1064 help="Destination path for script logging (default stderr).",
1066 parser.add_argument(
1067 "--log-level",
1068 choices=["debug", "info", "warning", "error", "critical"],
1069 default="info",
1070 help="Log level for script (default: %(default)s).",
1074 def setup_logging():
1075 # Set up a parser just for parsing the logging arguments.
1076 # This is necessary because logging should be configured before we
1077 # perform the main argument parsing.
1078 parser = argparse.ArgumentParser(add_help=False)
1079 add_logging_args(parser)
1080 pargs, unparsed_args = parser.parse_known_args()
1082 log_level = getattr(logging, pargs.log_level.upper())
1083 if log_level == logging.DEBUG:
1084 log_format = (
1085 "%(levelname)s: [%(funcName)s() %(filename)s:%(lineno)d] %(message)s"
1087 else:
1088 log_format = "%(levelname)s: %(message)s"
1089 basic_config = {"level": log_level, "format": log_format}
1090 log_dest = pargs.log_dest
1091 if log_dest:
1092 basic_config["filename"] = log_dest
1093 logging.basicConfig(**basic_config)
1094 logging.debug(
1095 'Logging level set to "{}" and directing output to "{}"'.format(
1096 pargs.log_level, "stderr" if log_dest is None else log_dest
1099 return unparsed_args
1102 def add_load_plugin_args(parser):
1103 parser.add_argument("-p", "--plugins", help="Load plug-in", nargs="+", default=[])
1106 def setup_plugins(plugin_proxy, args):
1107 parser = argparse.ArgumentParser(add_help=False)
1108 add_load_plugin_args(parser)
1109 pargs, unparsed_args = parser.parse_known_args()
1110 for plugin_path in pargs.plugins:
1111 plugin_proxy.load_plugin_from_file(plugin_path)
1112 # Add built-in plugins.
1113 plugin_proxy.add_plugin(ModuleMapPlugIn())
1114 plugin_proxy.add_plugin(SysRootFilterPlugIn())
1115 return unparsed_args
1118 if __name__ == "__main__":
1119 remaining_args = setup_logging()
1120 with AsanSymbolizerPlugInProxy() as plugin_proxy:
1121 remaining_args = setup_plugins(plugin_proxy, remaining_args)
1122 parser = argparse.ArgumentParser(
1123 formatter_class=argparse.RawDescriptionHelpFormatter,
1124 description="ASan symbolization script",
1125 epilog=__doc__,
1127 parser.add_argument(
1128 "path_to_cut",
1129 nargs="*",
1130 help="pattern to be cut from the result file path ",
1132 parser.add_argument(
1133 "-d", "--demangle", action="store_true", help="demangle function names"
1135 parser.add_argument(
1136 "-c", metavar="CROSS_COMPILE", help="set prefix for binutils"
1138 parser.add_argument(
1139 "-l",
1140 "--logfile",
1141 default=sys.stdin,
1142 type=argparse.FileType("r"),
1143 help="set log file name to parse, default is stdin",
1145 parser.add_argument(
1146 "--force-system-symbolizer",
1147 action="store_true",
1148 help="don't use llvm-symbolizer",
1150 # Add logging arguments so that `--help` shows them.
1151 add_logging_args(parser)
1152 # Add load plugin arguments so that `--help` shows them.
1153 add_load_plugin_args(parser)
1154 plugin_proxy.register_cmdline_args(parser)
1155 args = parser.parse_args(remaining_args)
1156 plugin_proxy.process_cmdline_args(args)
1157 if args.path_to_cut:
1158 fix_filename_patterns = args.path_to_cut
1159 if args.demangle:
1160 demangle = True
1161 if args.c:
1162 binutils_prefix = args.c
1163 if args.logfile:
1164 logfile = args.logfile
1165 else:
1166 logfile = sys.stdin
1167 if args.force_system_symbolizer:
1168 force_system_symbolizer = True
1169 if force_system_symbolizer:
1170 assert allow_system_symbolizer
1171 loop = SymbolizationLoop(plugin_proxy)
1172 loop.process_logfile()