Revert of Remove OneClickSigninHelper since it is no longer used. (patchset #5 id...
[chromium-blink-merge.git] / tools / valgrind / asan / third_party / asan_symbolize.py
blobb9d3ad3ad2fe6b7b9c3b96d9b366149387d16291
1 #!/usr/bin/env python
2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
4 # The LLVM Compiler Infrastructure
6 # This file is distributed under the University of Illinois Open Source
7 # License. See LICENSE.TXT for details.
9 #===------------------------------------------------------------------------===#
10 import argparse
11 import bisect
12 import getopt
13 import os
14 import re
15 import subprocess
16 import sys
18 symbolizers = {}
19 DEBUG = False
20 demangle = False
21 binutils_prefix = None
22 sysroot_path = None
23 binary_name_filter = None
24 fix_filename_patterns = None
25 logfile = sys.stdin
26 allow_system_symbolizer = True
28 # FIXME: merge the code that calls fix_filename().
29 def fix_filename(file_name):
30 if fix_filename_patterns:
31 for path_to_cut in fix_filename_patterns:
32 file_name = re.sub('.*' + path_to_cut, '', file_name)
33 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
34 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
35 return file_name
37 def sysroot_path_filter(binary_name):
38 return sysroot_path + binary_name
40 def guess_arch(addr):
41 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
42 if len(addr) > 10:
43 return 'x86_64'
44 else:
45 return 'i386'
47 class Symbolizer(object):
48 def __init__(self):
49 pass
51 def symbolize(self, addr, binary, offset):
52 """Symbolize the given address (pair of binary and offset).
54 Overriden in subclasses.
55 Args:
56 addr: virtual address of an instruction.
57 binary: path to executable/shared object containing this instruction.
58 offset: instruction offset in the @binary.
59 Returns:
60 list of strings (one string for each inlined frame) describing
61 the code locations for this instruction (that is, function name, file
62 name, line and column numbers).
63 """
64 return None
67 class LLVMSymbolizer(Symbolizer):
68 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
69 super(LLVMSymbolizer, self).__init__()
70 self.symbolizer_path = symbolizer_path
71 self.default_arch = default_arch
72 self.system = system
73 self.dsym_hints = dsym_hints
74 self.pipe = self.open_llvm_symbolizer()
76 def open_llvm_symbolizer(self):
77 cmd = [self.symbolizer_path,
78 '--use-symbol-table=true',
79 '--demangle=%s' % demangle,
80 '--functions=short',
81 '--inlining=true',
82 '--default-arch=%s' % self.default_arch]
83 if self.system == 'Darwin':
84 for hint in self.dsym_hints:
85 cmd.append('--dsym-hint=%s' % hint)
86 if DEBUG:
87 print ' '.join(cmd)
88 try:
89 result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
90 stdout=subprocess.PIPE)
91 except OSError:
92 result = None
93 return result
95 def symbolize(self, addr, binary, offset):
96 """Overrides Symbolizer.symbolize."""
97 if not self.pipe:
98 return None
99 result = []
100 try:
101 symbolizer_input = '"%s" %s' % (binary, offset)
102 if DEBUG:
103 print symbolizer_input
104 print >> self.pipe.stdin, symbolizer_input
105 while True:
106 function_name = self.pipe.stdout.readline().rstrip()
107 if not function_name:
108 break
109 file_name = self.pipe.stdout.readline().rstrip()
110 file_name = fix_filename(file_name)
111 if (not function_name.startswith('??') or
112 not file_name.startswith('??')):
113 # Append only non-trivial frames.
114 result.append('%s in %s %s' % (addr, function_name,
115 file_name))
116 except Exception:
117 result = []
118 if not result:
119 result = None
120 return result
123 def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
124 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
125 if not symbolizer_path:
126 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
127 if not symbolizer_path:
128 # Assume llvm-symbolizer is in PATH.
129 symbolizer_path = 'llvm-symbolizer'
130 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
133 class Addr2LineSymbolizer(Symbolizer):
134 def __init__(self, binary):
135 super(Addr2LineSymbolizer, self).__init__()
136 self.binary = binary
137 self.pipe = self.open_addr2line()
139 def open_addr2line(self):
140 addr2line_tool = 'addr2line'
141 if binutils_prefix:
142 addr2line_tool = binutils_prefix + addr2line_tool
143 cmd = [addr2line_tool, '-f']
144 if demangle:
145 cmd += ['--demangle']
146 cmd += ['-e', self.binary]
147 if DEBUG:
148 print ' '.join(cmd)
149 return subprocess.Popen(cmd,
150 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
152 def symbolize(self, addr, binary, offset):
153 """Overrides Symbolizer.symbolize."""
154 if self.binary != binary:
155 return None
156 try:
157 print >> self.pipe.stdin, offset
158 function_name = self.pipe.stdout.readline().rstrip()
159 file_name = self.pipe.stdout.readline().rstrip()
160 except Exception:
161 function_name = ''
162 file_name = ''
163 file_name = fix_filename(file_name)
164 return ['%s in %s %s' % (addr, function_name, file_name)]
167 class UnbufferedLineConverter(object):
169 Wrap a child process that responds to each line of input with one line of
170 output. Uses pty to trick the child into providing unbuffered output.
172 def __init__(self, args, close_stderr=False):
173 # Local imports so that the script can start on Windows.
174 import pty
175 import termios
176 pid, fd = pty.fork()
177 if pid == 0:
178 # We're the child. Transfer control to command.
179 if close_stderr:
180 dev_null = os.open('/dev/null', 0)
181 os.dup2(dev_null, 2)
182 os.execvp(args[0], args)
183 else:
184 # Disable echoing.
185 attr = termios.tcgetattr(fd)
186 attr[3] = attr[3] & ~termios.ECHO
187 termios.tcsetattr(fd, termios.TCSANOW, attr)
188 # Set up a file()-like interface to the child process
189 self.r = os.fdopen(fd, "r", 1)
190 self.w = os.fdopen(os.dup(fd), "w", 1)
192 def convert(self, line):
193 self.w.write(line + "\n")
194 return self.readline()
196 def readline(self):
197 return self.r.readline().rstrip()
200 class DarwinSymbolizer(Symbolizer):
201 def __init__(self, addr, binary):
202 super(DarwinSymbolizer, self).__init__()
203 self.binary = binary
204 self.arch = guess_arch(addr)
205 self.open_atos()
207 def open_atos(self):
208 if DEBUG:
209 print 'atos -o %s -arch %s' % (self.binary, self.arch)
210 cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
211 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
213 def symbolize(self, addr, binary, offset):
214 """Overrides Symbolizer.symbolize."""
215 if self.binary != binary:
216 return None
217 atos_line = self.atos.convert('0x%x' % int(offset, 16))
218 while "got symbolicator for" in atos_line:
219 atos_line = self.atos.readline()
220 # A well-formed atos response looks like this:
221 # foo(type1, type2) (in object.name) (filename.cc:80)
222 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
223 if DEBUG:
224 print 'atos_line: ', atos_line
225 if match:
226 function_name = match.group(1)
227 function_name = re.sub('\(.*?\)', '', function_name)
228 file_name = fix_filename(match.group(3))
229 return ['%s in %s %s' % (addr, function_name, file_name)]
230 else:
231 return ['%s in %s' % (addr, atos_line)]
234 # Chain several symbolizers so that if one symbolizer fails, we fall back
235 # to the next symbolizer in chain.
236 class ChainSymbolizer(Symbolizer):
237 def __init__(self, symbolizer_list):
238 super(ChainSymbolizer, self).__init__()
239 self.symbolizer_list = symbolizer_list
241 def symbolize(self, addr, binary, offset):
242 """Overrides Symbolizer.symbolize."""
243 for symbolizer in self.symbolizer_list:
244 if symbolizer:
245 result = symbolizer.symbolize(addr, binary, offset)
246 if result:
247 return result
248 return None
250 def append_symbolizer(self, symbolizer):
251 self.symbolizer_list.append(symbolizer)
254 def BreakpadSymbolizerFactory(binary):
255 suffix = os.getenv('BREAKPAD_SUFFIX')
256 if suffix:
257 filename = binary + suffix
258 if os.access(filename, os.F_OK):
259 return BreakpadSymbolizer(filename)
260 return None
263 def SystemSymbolizerFactory(system, addr, binary):
264 if system == 'Darwin':
265 return DarwinSymbolizer(addr, binary)
266 elif system == 'Linux':
267 return Addr2LineSymbolizer(binary)
270 class BreakpadSymbolizer(Symbolizer):
271 def __init__(self, filename):
272 super(BreakpadSymbolizer, self).__init__()
273 self.filename = filename
274 lines = file(filename).readlines()
275 self.files = []
276 self.symbols = {}
277 self.address_list = []
278 self.addresses = {}
279 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
280 fragments = lines[0].rstrip().split()
281 self.arch = fragments[2]
282 self.debug_id = fragments[3]
283 self.binary = ' '.join(fragments[4:])
284 self.parse_lines(lines[1:])
286 def parse_lines(self, lines):
287 cur_function_addr = ''
288 for line in lines:
289 fragments = line.split()
290 if fragments[0] == 'FILE':
291 assert int(fragments[1]) == len(self.files)
292 self.files.append(' '.join(fragments[2:]))
293 elif fragments[0] == 'PUBLIC':
294 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
295 elif fragments[0] in ['CFI', 'STACK']:
296 pass
297 elif fragments[0] == 'FUNC':
298 cur_function_addr = int(fragments[1], 16)
299 if not cur_function_addr in self.symbols.keys():
300 self.symbols[cur_function_addr] = ' '.join(fragments[4:])
301 else:
302 # Line starting with an address.
303 addr = int(fragments[0], 16)
304 self.address_list.append(addr)
305 # Tuple of symbol address, size, line, file number.
306 self.addresses[addr] = (cur_function_addr,
307 int(fragments[1], 16),
308 int(fragments[2]),
309 int(fragments[3]))
310 self.address_list.sort()
312 def get_sym_file_line(self, addr):
313 key = None
314 if addr in self.addresses.keys():
315 key = addr
316 else:
317 index = bisect.bisect_left(self.address_list, addr)
318 if index == 0:
319 return None
320 else:
321 key = self.address_list[index - 1]
322 sym_id, size, line_no, file_no = self.addresses[key]
323 symbol = self.symbols[sym_id]
324 filename = self.files[file_no]
325 if addr < key + size:
326 return symbol, filename, line_no
327 else:
328 return None
330 def symbolize(self, addr, binary, offset):
331 if self.binary != binary:
332 return None
333 res = self.get_sym_file_line(int(offset, 16))
334 if res:
335 function_name, file_name, line_no = res
336 result = ['%s in %s %s:%d' % (
337 addr, function_name, file_name, line_no)]
338 print result
339 return result
340 else:
341 return None
344 class SymbolizationLoop(object):
345 def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
346 if sys.platform == 'win32':
347 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
348 # even in sandboxed processes. Nothing needs to be done here.
349 self.process_line = self.process_line_echo
350 else:
351 # Used by clients who may want to supply a different binary name.
352 # E.g. in Chrome several binaries may share a single .dSYM.
353 self.binary_name_filter = binary_name_filter
354 self.dsym_hint_producer = dsym_hint_producer
355 self.system = os.uname()[0]
356 if self.system not in ['Linux', 'Darwin', 'FreeBSD']:
357 raise Exception('Unknown system')
358 self.llvm_symbolizers = {}
359 self.last_llvm_symbolizer = None
360 self.dsym_hints = set([])
361 self.frame_no = 0
362 self.process_line = self.process_line_posix
364 def symbolize_address(self, addr, binary, offset):
365 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
366 # a single symbolizer binary.
367 # On Darwin, if the dsym hint producer is present:
368 # 1. check whether we've seen this binary already; if so,
369 # use |llvm_symbolizers[binary]|, which has already loaded the debug
370 # info for this binary (might not be the case for
371 # |last_llvm_symbolizer|);
372 # 2. otherwise check if we've seen all the hints for this binary already;
373 # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
374 # 3. otherwise create a new symbolizer and pass all currently known
375 # .dSYM hints to it.
376 if not binary in self.llvm_symbolizers:
377 use_new_symbolizer = True
378 if self.system == 'Darwin' and self.dsym_hint_producer:
379 dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
380 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
381 self.dsym_hints |= dsym_hints_for_binary
382 if self.last_llvm_symbolizer and not use_new_symbolizer:
383 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
384 else:
385 self.last_llvm_symbolizer = LLVMSymbolizerFactory(
386 self.system, guess_arch(addr), self.dsym_hints)
387 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
388 # Use the chain of symbolizers:
389 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
390 # (fall back to next symbolizer if the previous one fails).
391 if not binary in symbolizers:
392 symbolizers[binary] = ChainSymbolizer(
393 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
394 result = symbolizers[binary].symbolize(addr, binary, offset)
395 if result is None:
396 if not allow_system_symbolizer:
397 raise Exception('Failed to launch or use llvm-symbolizer.')
398 # Initialize system symbolizer only if other symbolizers failed.
399 symbolizers[binary].append_symbolizer(
400 SystemSymbolizerFactory(self.system, addr, binary))
401 result = symbolizers[binary].symbolize(addr, binary, offset)
402 # The system symbolizer must produce some result.
403 assert result
404 return result
406 def get_symbolized_lines(self, symbolized_lines):
407 if not symbolized_lines:
408 return [self.current_line]
409 else:
410 result = []
411 for symbolized_frame in symbolized_lines:
412 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip()))
413 self.frame_no += 1
414 return result
416 def process_logfile(self):
417 self.frame_no = 0
418 for line in logfile:
419 processed = self.process_line(line)
420 print '\n'.join(processed)
422 def process_line_echo(self, line):
423 return [line.rstrip()]
425 def process_line_posix(self, line):
426 self.current_line = line.rstrip()
427 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
428 stack_trace_line_format = (
429 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
430 match = re.match(stack_trace_line_format, line)
431 if not match:
432 return [self.current_line]
433 if DEBUG:
434 print line
435 _, frameno_str, addr, binary, offset = match.groups()
436 if frameno_str == '0':
437 # Assume that frame #0 is the first frame of new stack trace.
438 self.frame_no = 0
439 original_binary = binary
440 if self.binary_name_filter:
441 binary = self.binary_name_filter(binary)
442 symbolized_line = self.symbolize_address(addr, binary, offset)
443 if not symbolized_line:
444 if original_binary != binary:
445 symbolized_line = self.symbolize_address(addr, binary, offset)
446 return self.get_symbolized_lines(symbolized_line)
449 if __name__ == '__main__':
450 parser = argparse.ArgumentParser(
451 formatter_class=argparse.RawDescriptionHelpFormatter,
452 description='ASan symbolization script',
453 epilog='Example of use:\n'
454 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" '
455 '-s "$HOME/SymbolFiles" < asan.log')
456 parser.add_argument('path_to_cut', nargs='*',
457 help='pattern to be cut from the result file path ')
458 parser.add_argument('-d','--demangle', action='store_true',
459 help='demangle function names')
460 parser.add_argument('-s', metavar='SYSROOT',
461 help='set path to sysroot for sanitized binaries')
462 parser.add_argument('-c', metavar='CROSS_COMPILE',
463 help='set prefix for binutils')
464 parser.add_argument('-l','--logfile', default=sys.stdin,
465 type=argparse.FileType('r'),
466 help='set log file name to parse, default is stdin')
467 args = parser.parse_args()
468 if args.path_to_cut:
469 fix_filename_patterns = args.path_to_cut
470 if args.demangle:
471 demangle = True
472 if args.s:
473 binary_name_filter = sysroot_path_filter
474 sysroot_path = args.s
475 if args.c:
476 binutils_prefix = args.c
477 if args.logfile:
478 logfile = args.logfile
479 else:
480 logfile = sys.stdin
481 loop = SymbolizationLoop(binary_name_filter)
482 loop.process_logfile()