update credits
[librepilot.git] / make / scripts / asan_symbolize.py
blob8e6fb61f7bf7ed8f18bcabaed5c5de0ea1ee5c51
1 #!/usr/bin/env python
2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
4 # The LLVM Compiler Infrastructure
6 # This file is distributed under the University of Illinois Open Source
7 # License. See LICENSE.TXT for details.
9 #===------------------------------------------------------------------------===#
10 import argparse
11 import bisect
12 import getopt
13 import os
14 import re
15 import subprocess
16 import sys
18 symbolizers = {}
19 DEBUG = False
20 demangle = False
21 binutils_prefix = None
22 sysroot_path = None
23 binary_name_filter = None
24 fix_filename_patterns = None
25 logfile = sys.stdin
26 allow_system_symbolizer = True
28 # FIXME: merge the code that calls fix_filename().
29 def fix_filename(file_name):
30 if fix_filename_patterns:
31 for path_to_cut in fix_filename_patterns:
32 file_name = re.sub('.*' + path_to_cut, '', file_name)
33 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
34 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
35 return file_name
37 def sysroot_path_filter(binary_name):
38 return sysroot_path + binary_name
40 def guess_arch(addr):
41 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
42 if len(addr) > 10:
43 return 'x86_64'
44 else:
45 return 'i386'
47 class Symbolizer(object):
48 def __init__(self):
49 pass
51 def symbolize(self, addr, binary, offset):
52 """Symbolize the given address (pair of binary and offset).
54 Overriden in subclasses.
55 Args:
56 addr: virtual address of an instruction.
57 binary: path to executable/shared object containing this instruction.
58 offset: instruction offset in the @binary.
59 Returns:
60 list of strings (one string for each inlined frame) describing
61 the code locations for this instruction (that is, function name, file
62 name, line and column numbers).
63 """
64 return None
67 class LLVMSymbolizer(Symbolizer):
68 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
69 super(LLVMSymbolizer, self).__init__()
70 self.symbolizer_path = symbolizer_path
71 self.default_arch = default_arch
72 self.system = system
73 self.dsym_hints = dsym_hints
74 self.pipe = self.open_llvm_symbolizer()
76 def open_llvm_symbolizer(self):
77 cmd = [self.symbolizer_path,
78 '--use-symbol-table=true',
79 '--demangle=%s' % demangle,
80 '--functions=linkage',
81 '--inlining=true',
82 '--default-arch=%s' % self.default_arch]
83 if self.system == 'Darwin':
84 for hint in self.dsym_hints:
85 cmd.append('--dsym-hint=%s' % hint)
86 if DEBUG:
87 print ' '.join(cmd)
88 try:
89 result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
90 stdout=subprocess.PIPE)
91 except OSError:
92 result = None
93 return result
95 def symbolize(self, addr, binary, offset):
96 """Overrides Symbolizer.symbolize."""
97 if not self.pipe:
98 return None
99 result = []
100 try:
101 symbolizer_input = '"%s" %s' % (binary, offset)
102 if DEBUG:
103 print symbolizer_input
104 print >> self.pipe.stdin, symbolizer_input
105 while True:
106 function_name = self.pipe.stdout.readline().rstrip()
107 if not function_name:
108 break
109 file_name = self.pipe.stdout.readline().rstrip()
110 file_name = fix_filename(file_name)
111 if (not function_name.startswith('??') or
112 not file_name.startswith('??')):
113 # Append only non-trivial frames.
114 result.append('%s in %s %s' % (addr, function_name,
115 file_name))
116 except Exception:
117 result = []
118 if not result:
119 result = None
120 return result
123 def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
124 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
125 if not symbolizer_path:
126 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
127 if not symbolizer_path:
128 # Assume llvm-symbolizer is in PATH.
129 symbolizer_path = 'llvm-symbolizer'
130 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
133 class Addr2LineSymbolizer(Symbolizer):
134 def __init__(self, binary):
135 super(Addr2LineSymbolizer, self).__init__()
136 self.binary = binary
137 self.pipe = self.open_addr2line()
138 self.output_terminator = -1
140 def open_addr2line(self):
141 addr2line_tool = 'addr2line'
142 if binutils_prefix:
143 addr2line_tool = binutils_prefix + addr2line_tool
144 cmd = [addr2line_tool, '-fi']
145 if demangle:
146 cmd += ['--demangle']
147 cmd += ['-e', self.binary]
148 if DEBUG:
149 print ' '.join(cmd)
150 return subprocess.Popen(cmd,
151 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
153 def symbolize(self, addr, binary, offset):
154 """Overrides Symbolizer.symbolize."""
155 if self.binary != binary:
156 return None
157 lines = []
158 try:
159 print >> self.pipe.stdin, offset
160 print >> self.pipe.stdin, self.output_terminator
161 is_first_frame = True
162 while True:
163 function_name = self.pipe.stdout.readline().rstrip()
164 file_name = self.pipe.stdout.readline().rstrip()
165 if is_first_frame:
166 is_first_frame = False
167 elif function_name in ['', '??']:
168 assert file_name == function_name
169 break
170 lines.append((function_name, file_name));
171 except Exception:
172 lines.append(('??', '??:0'))
173 return ['%s in %s %s' % (addr, function, fix_filename(file)) for (function, file) in lines]
175 class UnbufferedLineConverter(object):
177 Wrap a child process that responds to each line of input with one line of
178 output. Uses pty to trick the child into providing unbuffered output.
180 def __init__(self, args, close_stderr=False):
181 # Local imports so that the script can start on Windows.
182 import pty
183 import termios
184 pid, fd = pty.fork()
185 if pid == 0:
186 # We're the child. Transfer control to command.
187 if close_stderr:
188 dev_null = os.open('/dev/null', 0)
189 os.dup2(dev_null, 2)
190 os.execvp(args[0], args)
191 else:
192 # Disable echoing.
193 attr = termios.tcgetattr(fd)
194 attr[3] = attr[3] & ~termios.ECHO
195 termios.tcsetattr(fd, termios.TCSANOW, attr)
196 # Set up a file()-like interface to the child process
197 self.r = os.fdopen(fd, "r", 1)
198 self.w = os.fdopen(os.dup(fd), "w", 1)
200 def convert(self, line):
201 self.w.write(line + "\n")
202 return self.readline()
204 def readline(self):
205 return self.r.readline().rstrip()
208 class DarwinSymbolizer(Symbolizer):
209 def __init__(self, addr, binary):
210 super(DarwinSymbolizer, self).__init__()
211 self.binary = binary
212 self.arch = guess_arch(addr)
213 self.open_atos()
215 def open_atos(self):
216 if DEBUG:
217 print 'atos -o %s -arch %s' % (self.binary, self.arch)
218 cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
219 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
221 def symbolize(self, addr, binary, offset):
222 """Overrides Symbolizer.symbolize."""
223 if self.binary != binary:
224 return None
225 atos_line = self.atos.convert('0x%x' % int(offset, 16))
226 while "got symbolicator for" in atos_line:
227 atos_line = self.atos.readline()
228 # A well-formed atos response looks like this:
229 # foo(type1, type2) (in object.name) (filename.cc:80)
230 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
231 if DEBUG:
232 print 'atos_line: ', atos_line
233 if match:
234 function_name = match.group(1)
235 function_name = re.sub('\(.*?\)', '', function_name)
236 file_name = fix_filename(match.group(3))
237 return ['%s in %s %s' % (addr, function_name, file_name)]
238 else:
239 return ['%s in %s' % (addr, atos_line)]
242 # Chain several symbolizers so that if one symbolizer fails, we fall back
243 # to the next symbolizer in chain.
244 class ChainSymbolizer(Symbolizer):
245 def __init__(self, symbolizer_list):
246 super(ChainSymbolizer, self).__init__()
247 self.symbolizer_list = symbolizer_list
249 def symbolize(self, addr, binary, offset):
250 """Overrides Symbolizer.symbolize."""
251 for symbolizer in self.symbolizer_list:
252 if symbolizer:
253 result = symbolizer.symbolize(addr, binary, offset)
254 if result:
255 return result
256 return None
258 def append_symbolizer(self, symbolizer):
259 self.symbolizer_list.append(symbolizer)
262 def BreakpadSymbolizerFactory(binary):
263 suffix = os.getenv('BREAKPAD_SUFFIX')
264 if suffix:
265 filename = binary + suffix
266 if os.access(filename, os.F_OK):
267 return BreakpadSymbolizer(filename)
268 return None
271 def SystemSymbolizerFactory(system, addr, binary):
272 if system == 'Darwin':
273 return DarwinSymbolizer(addr, binary)
274 elif system == 'Linux' or system == 'FreeBSD':
275 return Addr2LineSymbolizer(binary)
278 class BreakpadSymbolizer(Symbolizer):
279 def __init__(self, filename):
280 super(BreakpadSymbolizer, self).__init__()
281 self.filename = filename
282 lines = file(filename).readlines()
283 self.files = []
284 self.symbols = {}
285 self.address_list = []
286 self.addresses = {}
287 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
288 fragments = lines[0].rstrip().split()
289 self.arch = fragments[2]
290 self.debug_id = fragments[3]
291 self.binary = ' '.join(fragments[4:])
292 self.parse_lines(lines[1:])
294 def parse_lines(self, lines):
295 cur_function_addr = ''
296 for line in lines:
297 fragments = line.split()
298 if fragments[0] == 'FILE':
299 assert int(fragments[1]) == len(self.files)
300 self.files.append(' '.join(fragments[2:]))
301 elif fragments[0] == 'PUBLIC':
302 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
303 elif fragments[0] in ['CFI', 'STACK']:
304 pass
305 elif fragments[0] == 'FUNC':
306 cur_function_addr = int(fragments[1], 16)
307 if not cur_function_addr in self.symbols.keys():
308 self.symbols[cur_function_addr] = ' '.join(fragments[4:])
309 else:
310 # Line starting with an address.
311 addr = int(fragments[0], 16)
312 self.address_list.append(addr)
313 # Tuple of symbol address, size, line, file number.
314 self.addresses[addr] = (cur_function_addr,
315 int(fragments[1], 16),
316 int(fragments[2]),
317 int(fragments[3]))
318 self.address_list.sort()
320 def get_sym_file_line(self, addr):
321 key = None
322 if addr in self.addresses.keys():
323 key = addr
324 else:
325 index = bisect.bisect_left(self.address_list, addr)
326 if index == 0:
327 return None
328 else:
329 key = self.address_list[index - 1]
330 sym_id, size, line_no, file_no = self.addresses[key]
331 symbol = self.symbols[sym_id]
332 filename = self.files[file_no]
333 if addr < key + size:
334 return symbol, filename, line_no
335 else:
336 return None
338 def symbolize(self, addr, binary, offset):
339 if self.binary != binary:
340 return None
341 res = self.get_sym_file_line(int(offset, 16))
342 if res:
343 function_name, file_name, line_no = res
344 result = ['%s in %s %s:%d' % (
345 addr, function_name, file_name, line_no)]
346 print result
347 return result
348 else:
349 return None
352 class SymbolizationLoop(object):
353 def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
354 if sys.platform == 'win32':
355 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
356 # even in sandboxed processes. Nothing needs to be done here.
357 self.process_line = self.process_line_echo
358 else:
359 # Used by clients who may want to supply a different binary name.
360 # E.g. in Chrome several binaries may share a single .dSYM.
361 self.binary_name_filter = binary_name_filter
362 self.dsym_hint_producer = dsym_hint_producer
363 self.system = os.uname()[0]
364 if self.system not in ['Linux', 'Darwin', 'FreeBSD']:
365 raise Exception('Unknown system')
366 self.llvm_symbolizers = {}
367 self.last_llvm_symbolizer = None
368 self.dsym_hints = set([])
369 self.frame_no = 0
370 self.process_line = self.process_line_posix
372 def symbolize_address(self, addr, binary, offset):
373 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
374 # a single symbolizer binary.
375 # On Darwin, if the dsym hint producer is present:
376 # 1. check whether we've seen this binary already; if so,
377 # use |llvm_symbolizers[binary]|, which has already loaded the debug
378 # info for this binary (might not be the case for
379 # |last_llvm_symbolizer|);
380 # 2. otherwise check if we've seen all the hints for this binary already;
381 # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
382 # 3. otherwise create a new symbolizer and pass all currently known
383 # .dSYM hints to it.
384 if not binary in self.llvm_symbolizers:
385 use_new_symbolizer = True
386 if self.system == 'Darwin' and self.dsym_hint_producer:
387 dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
388 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
389 self.dsym_hints |= dsym_hints_for_binary
390 if self.last_llvm_symbolizer and not use_new_symbolizer:
391 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
392 else:
393 self.last_llvm_symbolizer = LLVMSymbolizerFactory(
394 self.system, guess_arch(addr), self.dsym_hints)
395 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
396 # Use the chain of symbolizers:
397 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
398 # (fall back to next symbolizer if the previous one fails).
399 if not binary in symbolizers:
400 symbolizers[binary] = ChainSymbolizer(
401 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
402 result = symbolizers[binary].symbolize(addr, binary, offset)
403 if result is None:
404 if not allow_system_symbolizer:
405 raise Exception('Failed to launch or use llvm-symbolizer.')
406 # Initialize system symbolizer only if other symbolizers failed.
407 symbolizers[binary].append_symbolizer(
408 SystemSymbolizerFactory(self.system, addr, binary))
409 result = symbolizers[binary].symbolize(addr, binary, offset)
410 # The system symbolizer must produce some result.
411 assert result
412 return result
414 def get_symbolized_lines(self, symbolized_lines):
415 if not symbolized_lines:
416 return [self.current_line]
417 else:
418 result = []
419 for symbolized_frame in symbolized_lines:
420 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip()))
421 self.frame_no += 1
422 return result
424 def process_logfile(self):
425 self.frame_no = 0
426 for line in logfile:
427 processed = self.process_line(line)
428 print '\n'.join(processed)
430 def process_line_echo(self, line):
431 return [line.rstrip()]
433 def process_line_posix(self, line):
434 self.current_line = line.rstrip()
435 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
436 stack_trace_line_format = (
437 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
438 match = re.match(stack_trace_line_format, line)
439 if not match:
440 return [self.current_line]
441 if DEBUG:
442 print line
443 _, frameno_str, addr, binary, offset = match.groups()
444 if frameno_str == '0':
445 # Assume that frame #0 is the first frame of new stack trace.
446 self.frame_no = 0
447 original_binary = binary
448 if self.binary_name_filter:
449 binary = self.binary_name_filter(binary)
450 symbolized_line = self.symbolize_address(addr, binary, offset)
451 if not symbolized_line:
452 if original_binary != binary:
453 symbolized_line = self.symbolize_address(addr, binary, offset)
454 return self.get_symbolized_lines(symbolized_line)
457 if __name__ == '__main__':
458 parser = argparse.ArgumentParser(
459 formatter_class=argparse.RawDescriptionHelpFormatter,
460 description='ASan symbolization script',
461 epilog='Example of use:\n'
462 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" '
463 '-s "$HOME/SymbolFiles" < asan.log')
464 parser.add_argument('path_to_cut', nargs='*',
465 help='pattern to be cut from the result file path ')
466 parser.add_argument('-d','--demangle', action='store_true',
467 help='demangle function names')
468 parser.add_argument('-s', metavar='SYSROOT',
469 help='set path to sysroot for sanitized binaries')
470 parser.add_argument('-c', metavar='CROSS_COMPILE',
471 help='set prefix for binutils')
472 parser.add_argument('-l','--logfile', default=sys.stdin,
473 type=argparse.FileType('r'),
474 help='set log file name to parse, default is stdin')
475 args = parser.parse_args()
476 if args.path_to_cut:
477 fix_filename_patterns = args.path_to_cut
478 if args.demangle:
479 demangle = True
480 if args.s:
481 binary_name_filter = sysroot_path_filter
482 sysroot_path = args.s
483 if args.c:
484 binutils_prefix = args.c
485 if args.logfile:
486 logfile = args.logfile
487 else:
488 logfile = sys.stdin
489 loop = SymbolizationLoop(binary_name_filter)
490 loop.process_logfile()