Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / tools / valgrind / asan / third_party / asan_symbolize.py
blob59fceaaed814c031fc08ea301f146cfe862a3e8a
1 #!/usr/bin/env python
2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
4 # The LLVM Compiler Infrastructure
6 # This file is distributed under the University of Illinois Open Source
7 # License. See LICENSE.TXT for details.
9 #===------------------------------------------------------------------------===#
10 import argparse
11 import bisect
12 import getopt
13 import os
14 import re
15 import subprocess
16 import sys
18 symbolizers = {}
19 DEBUG = False
20 demangle = False
21 binutils_prefix = None
22 sysroot_path = None
23 binary_name_filter = None
24 fix_filename_patterns = None
25 logfile = sys.stdin
27 # FIXME: merge the code that calls fix_filename().
28 def fix_filename(file_name):
29 if fix_filename_patterns:
30 for path_to_cut in fix_filename_patterns:
31 file_name = re.sub('.*' + path_to_cut, '', file_name)
32 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
33 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
34 return file_name
36 def sysroot_path_filter(binary_name):
37 return sysroot_path + binary_name
39 def guess_arch(addr):
40 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
41 if len(addr) > 10:
42 return 'x86_64'
43 else:
44 return 'i386'
46 class Symbolizer(object):
47 def __init__(self):
48 pass
50 def symbolize(self, addr, binary, offset):
51 """Symbolize the given address (pair of binary and offset).
53 Overriden in subclasses.
54 Args:
55 addr: virtual address of an instruction.
56 binary: path to executable/shared object containing this instruction.
57 offset: instruction offset in the @binary.
58 Returns:
59 list of strings (one string for each inlined frame) describing
60 the code locations for this instruction (that is, function name, file
61 name, line and column numbers).
62 """
63 return None
66 class LLVMSymbolizer(Symbolizer):
67 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
68 super(LLVMSymbolizer, self).__init__()
69 self.symbolizer_path = symbolizer_path
70 self.default_arch = default_arch
71 self.system = system
72 self.dsym_hints = dsym_hints
73 self.pipe = self.open_llvm_symbolizer()
75 def open_llvm_symbolizer(self):
76 cmd = [self.symbolizer_path,
77 '--use-symbol-table=true',
78 '--demangle=%s' % demangle,
79 '--functions=short',
80 '--inlining=true',
81 '--default-arch=%s' % self.default_arch]
82 if self.system == 'Darwin':
83 for hint in self.dsym_hints:
84 cmd.append('--dsym-hint=%s' % hint)
85 if DEBUG:
86 print ' '.join(cmd)
87 try:
88 result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
89 stdout=subprocess.PIPE)
90 except OSError:
91 result = None
92 return result
94 def symbolize(self, addr, binary, offset):
95 """Overrides Symbolizer.symbolize."""
96 if not self.pipe:
97 return None
98 result = []
99 try:
100 symbolizer_input = '"%s" %s' % (binary, offset)
101 if DEBUG:
102 print symbolizer_input
103 print >> self.pipe.stdin, symbolizer_input
104 while True:
105 function_name = self.pipe.stdout.readline().rstrip()
106 if not function_name:
107 break
108 file_name = self.pipe.stdout.readline().rstrip()
109 file_name = fix_filename(file_name)
110 if (not function_name.startswith('??') or
111 not file_name.startswith('??')):
112 # Append only non-trivial frames.
113 result.append('%s in %s %s' % (addr, function_name,
114 file_name))
115 except Exception:
116 result = []
117 if not result:
118 result = None
119 return result
122 def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
123 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
124 if not symbolizer_path:
125 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
126 if not symbolizer_path:
127 # Assume llvm-symbolizer is in PATH.
128 symbolizer_path = 'llvm-symbolizer'
129 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
132 class Addr2LineSymbolizer(Symbolizer):
133 def __init__(self, binary):
134 super(Addr2LineSymbolizer, self).__init__()
135 self.binary = binary
136 self.pipe = self.open_addr2line()
138 def open_addr2line(self):
139 addr2line_tool = 'addr2line'
140 if binutils_prefix:
141 addr2line_tool = binutils_prefix + addr2line_tool
142 cmd = [addr2line_tool, '-f']
143 if demangle:
144 cmd += ['--demangle']
145 cmd += ['-e', self.binary]
146 if DEBUG:
147 print ' '.join(cmd)
148 return subprocess.Popen(cmd,
149 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
151 def symbolize(self, addr, binary, offset):
152 """Overrides Symbolizer.symbolize."""
153 if self.binary != binary:
154 return None
155 try:
156 print >> self.pipe.stdin, offset
157 function_name = self.pipe.stdout.readline().rstrip()
158 file_name = self.pipe.stdout.readline().rstrip()
159 except Exception:
160 function_name = ''
161 file_name = ''
162 file_name = fix_filename(file_name)
163 return ['%s in %s %s' % (addr, function_name, file_name)]
166 class UnbufferedLineConverter(object):
168 Wrap a child process that responds to each line of input with one line of
169 output. Uses pty to trick the child into providing unbuffered output.
171 def __init__(self, args, close_stderr=False):
172 # Local imports so that the script can start on Windows.
173 import pty
174 import termios
175 pid, fd = pty.fork()
176 if pid == 0:
177 # We're the child. Transfer control to command.
178 if close_stderr:
179 dev_null = os.open('/dev/null', 0)
180 os.dup2(dev_null, 2)
181 os.execvp(args[0], args)
182 else:
183 # Disable echoing.
184 attr = termios.tcgetattr(fd)
185 attr[3] = attr[3] & ~termios.ECHO
186 termios.tcsetattr(fd, termios.TCSANOW, attr)
187 # Set up a file()-like interface to the child process
188 self.r = os.fdopen(fd, "r", 1)
189 self.w = os.fdopen(os.dup(fd), "w", 1)
191 def convert(self, line):
192 self.w.write(line + "\n")
193 return self.readline()
195 def readline(self):
196 return self.r.readline().rstrip()
199 class DarwinSymbolizer(Symbolizer):
200 def __init__(self, addr, binary):
201 super(DarwinSymbolizer, self).__init__()
202 self.binary = binary
203 self.arch = guess_arch(addr)
204 self.open_atos()
206 def open_atos(self):
207 if DEBUG:
208 print 'atos -o %s -arch %s' % (self.binary, self.arch)
209 cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
210 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
212 def symbolize(self, addr, binary, offset):
213 """Overrides Symbolizer.symbolize."""
214 if self.binary != binary:
215 return None
216 atos_line = self.atos.convert('0x%x' % int(offset, 16))
217 while "got symbolicator for" in atos_line:
218 atos_line = self.atos.readline()
219 # A well-formed atos response looks like this:
220 # foo(type1, type2) (in object.name) (filename.cc:80)
221 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
222 if DEBUG:
223 print 'atos_line: ', atos_line
224 if match:
225 function_name = match.group(1)
226 function_name = re.sub('\(.*?\)', '', function_name)
227 file_name = fix_filename(match.group(3))
228 return ['%s in %s %s' % (addr, function_name, file_name)]
229 else:
230 return ['%s in %s' % (addr, atos_line)]
233 # Chain several symbolizers so that if one symbolizer fails, we fall back
234 # to the next symbolizer in chain.
235 class ChainSymbolizer(Symbolizer):
236 def __init__(self, symbolizer_list):
237 super(ChainSymbolizer, self).__init__()
238 self.symbolizer_list = symbolizer_list
240 def symbolize(self, addr, binary, offset):
241 """Overrides Symbolizer.symbolize."""
242 for symbolizer in self.symbolizer_list:
243 if symbolizer:
244 result = symbolizer.symbolize(addr, binary, offset)
245 if result:
246 return result
247 return None
249 def append_symbolizer(self, symbolizer):
250 self.symbolizer_list.append(symbolizer)
253 def BreakpadSymbolizerFactory(binary):
254 suffix = os.getenv('BREAKPAD_SUFFIX')
255 if suffix:
256 filename = binary + suffix
257 if os.access(filename, os.F_OK):
258 return BreakpadSymbolizer(filename)
259 return None
262 def SystemSymbolizerFactory(system, addr, binary):
263 if system == 'Darwin':
264 return DarwinSymbolizer(addr, binary)
265 elif system == 'Linux':
266 return Addr2LineSymbolizer(binary)
269 class BreakpadSymbolizer(Symbolizer):
270 def __init__(self, filename):
271 super(BreakpadSymbolizer, self).__init__()
272 self.filename = filename
273 lines = file(filename).readlines()
274 self.files = []
275 self.symbols = {}
276 self.address_list = []
277 self.addresses = {}
278 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
279 fragments = lines[0].rstrip().split()
280 self.arch = fragments[2]
281 self.debug_id = fragments[3]
282 self.binary = ' '.join(fragments[4:])
283 self.parse_lines(lines[1:])
285 def parse_lines(self, lines):
286 cur_function_addr = ''
287 for line in lines:
288 fragments = line.split()
289 if fragments[0] == 'FILE':
290 assert int(fragments[1]) == len(self.files)
291 self.files.append(' '.join(fragments[2:]))
292 elif fragments[0] == 'PUBLIC':
293 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
294 elif fragments[0] in ['CFI', 'STACK']:
295 pass
296 elif fragments[0] == 'FUNC':
297 cur_function_addr = int(fragments[1], 16)
298 if not cur_function_addr in self.symbols.keys():
299 self.symbols[cur_function_addr] = ' '.join(fragments[4:])
300 else:
301 # Line starting with an address.
302 addr = int(fragments[0], 16)
303 self.address_list.append(addr)
304 # Tuple of symbol address, size, line, file number.
305 self.addresses[addr] = (cur_function_addr,
306 int(fragments[1], 16),
307 int(fragments[2]),
308 int(fragments[3]))
309 self.address_list.sort()
311 def get_sym_file_line(self, addr):
312 key = None
313 if addr in self.addresses.keys():
314 key = addr
315 else:
316 index = bisect.bisect_left(self.address_list, addr)
317 if index == 0:
318 return None
319 else:
320 key = self.address_list[index - 1]
321 sym_id, size, line_no, file_no = self.addresses[key]
322 symbol = self.symbols[sym_id]
323 filename = self.files[file_no]
324 if addr < key + size:
325 return symbol, filename, line_no
326 else:
327 return None
329 def symbolize(self, addr, binary, offset):
330 if self.binary != binary:
331 return None
332 res = self.get_sym_file_line(int(offset, 16))
333 if res:
334 function_name, file_name, line_no = res
335 result = ['%s in %s %s:%d' % (
336 addr, function_name, file_name, line_no)]
337 print result
338 return result
339 else:
340 return None
343 class SymbolizationLoop(object):
344 def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
345 if sys.platform == 'win32':
346 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
347 # even in sandboxed processes. Nothing needs to be done here.
348 self.process_line = self.process_line_echo
349 else:
350 # Used by clients who may want to supply a different binary name.
351 # E.g. in Chrome several binaries may share a single .dSYM.
352 self.binary_name_filter = binary_name_filter
353 self.dsym_hint_producer = dsym_hint_producer
354 self.system = os.uname()[0]
355 if self.system not in ['Linux', 'Darwin', 'FreeBSD']:
356 raise Exception('Unknown system')
357 self.llvm_symbolizers = {}
358 self.last_llvm_symbolizer = None
359 self.dsym_hints = set([])
360 self.frame_no = 0
361 self.process_line = self.process_line_posix
363 def symbolize_address(self, addr, binary, offset):
364 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
365 # a single symbolizer binary.
366 # On Darwin, if the dsym hint producer is present:
367 # 1. check whether we've seen this binary already; if so,
368 # use |llvm_symbolizers[binary]|, which has already loaded the debug
369 # info for this binary (might not be the case for
370 # |last_llvm_symbolizer|);
371 # 2. otherwise check if we've seen all the hints for this binary already;
372 # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
373 # 3. otherwise create a new symbolizer and pass all currently known
374 # .dSYM hints to it.
375 if not binary in self.llvm_symbolizers:
376 use_new_symbolizer = True
377 if self.system == 'Darwin' and self.dsym_hint_producer:
378 dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
379 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
380 self.dsym_hints |= dsym_hints_for_binary
381 if self.last_llvm_symbolizer and not use_new_symbolizer:
382 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
383 else:
384 self.last_llvm_symbolizer = LLVMSymbolizerFactory(
385 self.system, guess_arch(addr), self.dsym_hints)
386 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
387 # Use the chain of symbolizers:
388 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
389 # (fall back to next symbolizer if the previous one fails).
390 if not binary in symbolizers:
391 symbolizers[binary] = ChainSymbolizer(
392 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
393 result = symbolizers[binary].symbolize(addr, binary, offset)
394 if result is None:
395 # Initialize system symbolizer only if other symbolizers failed.
396 symbolizers[binary].append_symbolizer(
397 SystemSymbolizerFactory(self.system, addr, binary))
398 result = symbolizers[binary].symbolize(addr, binary, offset)
399 # The system symbolizer must produce some result.
400 assert result
401 return result
403 def get_symbolized_lines(self, symbolized_lines):
404 if not symbolized_lines:
405 return [self.current_line]
406 else:
407 result = []
408 for symbolized_frame in symbolized_lines:
409 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip()))
410 self.frame_no += 1
411 return result
413 def process_logfile(self):
414 self.frame_no = 0
415 for line in logfile:
416 processed = self.process_line(line)
417 print '\n'.join(processed)
419 def process_line_echo(self, line):
420 return [line.rstrip()]
422 def process_line_posix(self, line):
423 self.current_line = line.rstrip()
424 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
425 stack_trace_line_format = (
426 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
427 match = re.match(stack_trace_line_format, line)
428 if not match:
429 return [self.current_line]
430 if DEBUG:
431 print line
432 _, frameno_str, addr, binary, offset = match.groups()
433 if frameno_str == '0':
434 # Assume that frame #0 is the first frame of new stack trace.
435 self.frame_no = 0
436 original_binary = binary
437 if self.binary_name_filter:
438 binary = self.binary_name_filter(binary)
439 symbolized_line = self.symbolize_address(addr, binary, offset)
440 if not symbolized_line:
441 if original_binary != binary:
442 symbolized_line = self.symbolize_address(addr, binary, offset)
443 return self.get_symbolized_lines(symbolized_line)
446 if __name__ == '__main__':
447 parser = argparse.ArgumentParser(
448 formatter_class=argparse.RawDescriptionHelpFormatter,
449 description='ASan symbolization script',
450 epilog='Example of use:\n'
451 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" '
452 '-s "$HOME/SymbolFiles" < asan.log')
453 parser.add_argument('path_to_cut', nargs='*',
454 help='pattern to be cut from the result file path ')
455 parser.add_argument('-d','--demangle', action='store_true',
456 help='demangle function names')
457 parser.add_argument('-s', metavar='SYSROOT',
458 help='set path to sysroot for sanitized binaries')
459 parser.add_argument('-c', metavar='CROSS_COMPILE',
460 help='set prefix for binutils')
461 parser.add_argument('-l','--logfile', default=sys.stdin,
462 type=argparse.FileType('r'),
463 help='set log file name to parse, default is stdin')
464 args = parser.parse_args()
465 if args.path_to_cut:
466 fix_filename_patterns = args.path_to_cut
467 if args.demangle:
468 demangle = True
469 if args.s:
470 binary_name_filter = sysroot_path_filter
471 sysroot_path = args.s
472 if args.c:
473 binutils_prefix = args.c
474 if args.logfile:
475 logfile = args.logfile
476 else:
477 logfile = sys.stdin
478 loop = SymbolizationLoop(binary_name_filter)
479 loop.process_logfile()