2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
4 # The LLVM Compiler Infrastructure
6 # This file is distributed under the University of Illinois Open Source
7 # License. See LICENSE.TXT for details.
9 #===------------------------------------------------------------------------===#
21 binutils_prefix
= None
23 binary_name_filter
= None
24 fix_filename_patterns
= None
26 allow_system_symbolizer
= True
28 # FIXME: merge the code that calls fix_filename().
29 def fix_filename(file_name
):
30 if fix_filename_patterns
:
31 for path_to_cut
in fix_filename_patterns
:
32 file_name
= re
.sub('.*' + path_to_cut
, '', file_name
)
33 file_name
= re
.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name
)
34 file_name
= re
.sub('.*crtstuff.c:0', '???:0', file_name
)
37 def sysroot_path_filter(binary_name
):
38 return sysroot_path
+ binary_name
41 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
47 class Symbolizer(object):
51 def symbolize(self
, addr
, binary
, offset
):
52 """Symbolize the given address (pair of binary and offset).
54 Overriden in subclasses.
56 addr: virtual address of an instruction.
57 binary: path to executable/shared object containing this instruction.
58 offset: instruction offset in the @binary.
60 list of strings (one string for each inlined frame) describing
61 the code locations for this instruction (that is, function name, file
62 name, line and column numbers).
67 class LLVMSymbolizer(Symbolizer
):
68 def __init__(self
, symbolizer_path
, default_arch
, system
, dsym_hints
=[]):
69 super(LLVMSymbolizer
, self
).__init
__()
70 self
.symbolizer_path
= symbolizer_path
71 self
.default_arch
= default_arch
73 self
.dsym_hints
= dsym_hints
74 self
.pipe
= self
.open_llvm_symbolizer()
76 def open_llvm_symbolizer(self
):
77 cmd
= [self
.symbolizer_path
,
78 '--use-symbol-table=true',
79 '--demangle=%s' % demangle
,
80 '--functions=linkage',
82 '--default-arch=%s' % self
.default_arch
]
83 if self
.system
== 'Darwin':
84 for hint
in self
.dsym_hints
:
85 cmd
.append('--dsym-hint=%s' % hint
)
89 result
= subprocess
.Popen(cmd
, stdin
=subprocess
.PIPE
,
90 stdout
=subprocess
.PIPE
)
95 def symbolize(self
, addr
, binary
, offset
):
96 """Overrides Symbolizer.symbolize."""
101 symbolizer_input
= '"%s" %s' % (binary
, offset
)
103 print symbolizer_input
104 print >> self
.pipe
.stdin
, symbolizer_input
106 function_name
= self
.pipe
.stdout
.readline().rstrip()
107 if not function_name
:
109 file_name
= self
.pipe
.stdout
.readline().rstrip()
110 file_name
= fix_filename(file_name
)
111 if (not function_name
.startswith('??') or
112 not file_name
.startswith('??')):
113 # Append only non-trivial frames.
114 result
.append('%s in %s %s' % (addr
, function_name
,
123 def LLVMSymbolizerFactory(system
, default_arch
, dsym_hints
=[]):
124 symbolizer_path
= os
.getenv('LLVM_SYMBOLIZER_PATH')
125 if not symbolizer_path
:
126 symbolizer_path
= os
.getenv('ASAN_SYMBOLIZER_PATH')
127 if not symbolizer_path
:
128 # Assume llvm-symbolizer is in PATH.
129 symbolizer_path
= 'llvm-symbolizer'
130 return LLVMSymbolizer(symbolizer_path
, default_arch
, system
, dsym_hints
)
133 class Addr2LineSymbolizer(Symbolizer
):
134 def __init__(self
, binary
):
135 super(Addr2LineSymbolizer
, self
).__init
__()
137 self
.pipe
= self
.open_addr2line()
138 self
.output_terminator
= -1
140 def open_addr2line(self
):
141 addr2line_tool
= 'addr2line'
143 addr2line_tool
= binutils_prefix
+ addr2line_tool
144 cmd
= [addr2line_tool
, '-fi']
146 cmd
+= ['--demangle']
147 cmd
+= ['-e', self
.binary
]
150 return subprocess
.Popen(cmd
,
151 stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
)
153 def symbolize(self
, addr
, binary
, offset
):
154 """Overrides Symbolizer.symbolize."""
155 if self
.binary
!= binary
:
159 print >> self
.pipe
.stdin
, offset
160 print >> self
.pipe
.stdin
, self
.output_terminator
161 is_first_frame
= True
163 function_name
= self
.pipe
.stdout
.readline().rstrip()
164 file_name
= self
.pipe
.stdout
.readline().rstrip()
166 is_first_frame
= False
167 elif function_name
in ['', '??']:
168 assert file_name
== function_name
170 lines
.append((function_name
, file_name
));
172 lines
.append(('??', '??:0'))
173 return ['%s in %s %s' % (addr
, function
, fix_filename(file)) for (function
, file) in lines
]
175 class UnbufferedLineConverter(object):
177 Wrap a child process that responds to each line of input with one line of
178 output. Uses pty to trick the child into providing unbuffered output.
180 def __init__(self
, args
, close_stderr
=False):
181 # Local imports so that the script can start on Windows.
186 # We're the child. Transfer control to command.
188 dev_null
= os
.open('/dev/null', 0)
190 os
.execvp(args
[0], args
)
193 attr
= termios
.tcgetattr(fd
)
194 attr
[3] = attr
[3] & ~termios
.ECHO
195 termios
.tcsetattr(fd
, termios
.TCSANOW
, attr
)
196 # Set up a file()-like interface to the child process
197 self
.r
= os
.fdopen(fd
, "r", 1)
198 self
.w
= os
.fdopen(os
.dup(fd
), "w", 1)
200 def convert(self
, line
):
201 self
.w
.write(line
+ "\n")
202 return self
.readline()
205 return self
.r
.readline().rstrip()
208 class DarwinSymbolizer(Symbolizer
):
209 def __init__(self
, addr
, binary
):
210 super(DarwinSymbolizer
, self
).__init
__()
212 self
.arch
= guess_arch(addr
)
217 print 'atos -o %s -arch %s' % (self
.binary
, self
.arch
)
218 cmdline
= ['atos', '-o', self
.binary
, '-arch', self
.arch
]
219 self
.atos
= UnbufferedLineConverter(cmdline
, close_stderr
=True)
221 def symbolize(self
, addr
, binary
, offset
):
222 """Overrides Symbolizer.symbolize."""
223 if self
.binary
!= binary
:
225 atos_line
= self
.atos
.convert('0x%x' % int(offset
, 16))
226 while "got symbolicator for" in atos_line
:
227 atos_line
= self
.atos
.readline()
228 # A well-formed atos response looks like this:
229 # foo(type1, type2) (in object.name) (filename.cc:80)
230 match
= re
.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line
)
232 print 'atos_line: ', atos_line
234 function_name
= match
.group(1)
235 function_name
= re
.sub('\(.*?\)', '', function_name
)
236 file_name
= fix_filename(match
.group(3))
237 return ['%s in %s %s' % (addr
, function_name
, file_name
)]
239 return ['%s in %s' % (addr
, atos_line
)]
242 # Chain several symbolizers so that if one symbolizer fails, we fall back
243 # to the next symbolizer in chain.
244 class ChainSymbolizer(Symbolizer
):
245 def __init__(self
, symbolizer_list
):
246 super(ChainSymbolizer
, self
).__init
__()
247 self
.symbolizer_list
= symbolizer_list
249 def symbolize(self
, addr
, binary
, offset
):
250 """Overrides Symbolizer.symbolize."""
251 for symbolizer
in self
.symbolizer_list
:
253 result
= symbolizer
.symbolize(addr
, binary
, offset
)
258 def append_symbolizer(self
, symbolizer
):
259 self
.symbolizer_list
.append(symbolizer
)
262 def BreakpadSymbolizerFactory(binary
):
263 suffix
= os
.getenv('BREAKPAD_SUFFIX')
265 filename
= binary
+ suffix
266 if os
.access(filename
, os
.F_OK
):
267 return BreakpadSymbolizer(filename
)
271 def SystemSymbolizerFactory(system
, addr
, binary
):
272 if system
== 'Darwin':
273 return DarwinSymbolizer(addr
, binary
)
274 elif system
== 'Linux' or system
== 'FreeBSD':
275 return Addr2LineSymbolizer(binary
)
278 class BreakpadSymbolizer(Symbolizer
):
279 def __init__(self
, filename
):
280 super(BreakpadSymbolizer
, self
).__init
__()
281 self
.filename
= filename
282 lines
= file(filename
).readlines()
285 self
.address_list
= []
287 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
288 fragments
= lines
[0].rstrip().split()
289 self
.arch
= fragments
[2]
290 self
.debug_id
= fragments
[3]
291 self
.binary
= ' '.join(fragments
[4:])
292 self
.parse_lines(lines
[1:])
294 def parse_lines(self
, lines
):
295 cur_function_addr
= ''
297 fragments
= line
.split()
298 if fragments
[0] == 'FILE':
299 assert int(fragments
[1]) == len(self
.files
)
300 self
.files
.append(' '.join(fragments
[2:]))
301 elif fragments
[0] == 'PUBLIC':
302 self
.symbols
[int(fragments
[1], 16)] = ' '.join(fragments
[3:])
303 elif fragments
[0] in ['CFI', 'STACK']:
305 elif fragments
[0] == 'FUNC':
306 cur_function_addr
= int(fragments
[1], 16)
307 if not cur_function_addr
in self
.symbols
.keys():
308 self
.symbols
[cur_function_addr
] = ' '.join(fragments
[4:])
310 # Line starting with an address.
311 addr
= int(fragments
[0], 16)
312 self
.address_list
.append(addr
)
313 # Tuple of symbol address, size, line, file number.
314 self
.addresses
[addr
] = (cur_function_addr
,
315 int(fragments
[1], 16),
318 self
.address_list
.sort()
320 def get_sym_file_line(self
, addr
):
322 if addr
in self
.addresses
.keys():
325 index
= bisect
.bisect_left(self
.address_list
, addr
)
329 key
= self
.address_list
[index
- 1]
330 sym_id
, size
, line_no
, file_no
= self
.addresses
[key
]
331 symbol
= self
.symbols
[sym_id
]
332 filename
= self
.files
[file_no
]
333 if addr
< key
+ size
:
334 return symbol
, filename
, line_no
338 def symbolize(self
, addr
, binary
, offset
):
339 if self
.binary
!= binary
:
341 res
= self
.get_sym_file_line(int(offset
, 16))
343 function_name
, file_name
, line_no
= res
344 result
= ['%s in %s %s:%d' % (
345 addr
, function_name
, file_name
, line_no
)]
352 class SymbolizationLoop(object):
353 def __init__(self
, binary_name_filter
=None, dsym_hint_producer
=None):
354 if sys
.platform
== 'win32':
355 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
356 # even in sandboxed processes. Nothing needs to be done here.
357 self
.process_line
= self
.process_line_echo
359 # Used by clients who may want to supply a different binary name.
360 # E.g. in Chrome several binaries may share a single .dSYM.
361 self
.binary_name_filter
= binary_name_filter
362 self
.dsym_hint_producer
= dsym_hint_producer
363 self
.system
= os
.uname()[0]
364 if self
.system
not in ['Linux', 'Darwin', 'FreeBSD']:
365 raise Exception('Unknown system')
366 self
.llvm_symbolizers
= {}
367 self
.last_llvm_symbolizer
= None
368 self
.dsym_hints
= set([])
370 self
.process_line
= self
.process_line_posix
372 def symbolize_address(self
, addr
, binary
, offset
):
373 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
374 # a single symbolizer binary.
375 # On Darwin, if the dsym hint producer is present:
376 # 1. check whether we've seen this binary already; if so,
377 # use |llvm_symbolizers[binary]|, which has already loaded the debug
378 # info for this binary (might not be the case for
379 # |last_llvm_symbolizer|);
380 # 2. otherwise check if we've seen all the hints for this binary already;
381 # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
382 # 3. otherwise create a new symbolizer and pass all currently known
384 if not binary
in self
.llvm_symbolizers
:
385 use_new_symbolizer
= True
386 if self
.system
== 'Darwin' and self
.dsym_hint_producer
:
387 dsym_hints_for_binary
= set(self
.dsym_hint_producer(binary
))
388 use_new_symbolizer
= bool(dsym_hints_for_binary
- self
.dsym_hints
)
389 self
.dsym_hints |
= dsym_hints_for_binary
390 if self
.last_llvm_symbolizer
and not use_new_symbolizer
:
391 self
.llvm_symbolizers
[binary
] = self
.last_llvm_symbolizer
393 self
.last_llvm_symbolizer
= LLVMSymbolizerFactory(
394 self
.system
, guess_arch(addr
), self
.dsym_hints
)
395 self
.llvm_symbolizers
[binary
] = self
.last_llvm_symbolizer
396 # Use the chain of symbolizers:
397 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
398 # (fall back to next symbolizer if the previous one fails).
399 if not binary
in symbolizers
:
400 symbolizers
[binary
] = ChainSymbolizer(
401 [BreakpadSymbolizerFactory(binary
), self
.llvm_symbolizers
[binary
]])
402 result
= symbolizers
[binary
].symbolize(addr
, binary
, offset
)
404 if not allow_system_symbolizer
:
405 raise Exception('Failed to launch or use llvm-symbolizer.')
406 # Initialize system symbolizer only if other symbolizers failed.
407 symbolizers
[binary
].append_symbolizer(
408 SystemSymbolizerFactory(self
.system
, addr
, binary
))
409 result
= symbolizers
[binary
].symbolize(addr
, binary
, offset
)
410 # The system symbolizer must produce some result.
414 def get_symbolized_lines(self
, symbolized_lines
):
415 if not symbolized_lines
:
416 return [self
.current_line
]
419 for symbolized_frame
in symbolized_lines
:
420 result
.append(' #%s %s' % (str(self
.frame_no
), symbolized_frame
.rstrip()))
424 def process_logfile(self
):
427 processed
= self
.process_line(line
)
428 print '\n'.join(processed
)
430 def process_line_echo(self
, line
):
431 return [line
.rstrip()]
433 def process_line_posix(self
, line
):
434 self
.current_line
= line
.rstrip()
435 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
436 stack_trace_line_format
= (
437 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
438 match
= re
.match(stack_trace_line_format
, line
)
440 return [self
.current_line
]
443 _
, frameno_str
, addr
, binary
, offset
= match
.groups()
444 if frameno_str
== '0':
445 # Assume that frame #0 is the first frame of new stack trace.
447 original_binary
= binary
448 if self
.binary_name_filter
:
449 binary
= self
.binary_name_filter(binary
)
450 symbolized_line
= self
.symbolize_address(addr
, binary
, offset
)
451 if not symbolized_line
:
452 if original_binary
!= binary
:
453 symbolized_line
= self
.symbolize_address(addr
, binary
, offset
)
454 return self
.get_symbolized_lines(symbolized_line
)
457 if __name__
== '__main__':
458 parser
= argparse
.ArgumentParser(
459 formatter_class
=argparse
.RawDescriptionHelpFormatter
,
460 description
='ASan symbolization script',
461 epilog
='Example of use:\n'
462 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" '
463 '-s "$HOME/SymbolFiles" < asan.log')
464 parser
.add_argument('path_to_cut', nargs
='*',
465 help='pattern to be cut from the result file path ')
466 parser
.add_argument('-d','--demangle', action
='store_true',
467 help='demangle function names')
468 parser
.add_argument('-s', metavar
='SYSROOT',
469 help='set path to sysroot for sanitized binaries')
470 parser
.add_argument('-c', metavar
='CROSS_COMPILE',
471 help='set prefix for binutils')
472 parser
.add_argument('-l','--logfile', default
=sys
.stdin
,
473 type=argparse
.FileType('r'),
474 help='set log file name to parse, default is stdin')
475 args
= parser
.parse_args()
477 fix_filename_patterns
= args
.path_to_cut
481 binary_name_filter
= sysroot_path_filter
482 sysroot_path
= args
.s
484 binutils_prefix
= args
.c
486 logfile
= args
.logfile
489 loop
= SymbolizationLoop(binary_name_filter
)
490 loop
.process_logfile()