2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
4 # The LLVM Compiler Infrastructure
6 # This file is distributed under the University of Illinois Open Source
7 # License. See LICENSE.TXT for details.
9 #===------------------------------------------------------------------------===#
21 binutils_prefix
= None
23 binary_name_filter
= None
24 fix_filename_patterns
= None
27 # FIXME: merge the code that calls fix_filename().
28 def fix_filename(file_name
):
29 if fix_filename_patterns
:
30 for path_to_cut
in fix_filename_patterns
:
31 file_name
= re
.sub('.*' + path_to_cut
, '', file_name
)
32 file_name
= re
.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name
)
33 file_name
= re
.sub('.*crtstuff.c:0', '???:0', file_name
)
36 def sysroot_path_filter(binary_name
):
37 return sysroot_path
+ binary_name
40 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
46 class Symbolizer(object):
50 def symbolize(self
, addr
, binary
, offset
):
51 """Symbolize the given address (pair of binary and offset).
53 Overriden in subclasses.
55 addr: virtual address of an instruction.
56 binary: path to executable/shared object containing this instruction.
57 offset: instruction offset in the @binary.
59 list of strings (one string for each inlined frame) describing
60 the code locations for this instruction (that is, function name, file
61 name, line and column numbers).
66 class LLVMSymbolizer(Symbolizer
):
67 def __init__(self
, symbolizer_path
, default_arch
, system
, dsym_hints
=[]):
68 super(LLVMSymbolizer
, self
).__init
__()
69 self
.symbolizer_path
= symbolizer_path
70 self
.default_arch
= default_arch
72 self
.dsym_hints
= dsym_hints
73 self
.pipe
= self
.open_llvm_symbolizer()
75 def open_llvm_symbolizer(self
):
76 cmd
= [self
.symbolizer_path
,
77 '--use-symbol-table=true',
78 '--demangle=%s' % demangle
,
81 '--default-arch=%s' % self
.default_arch
]
82 if self
.system
== 'Darwin':
83 for hint
in self
.dsym_hints
:
84 cmd
.append('--dsym-hint=%s' % hint
)
88 result
= subprocess
.Popen(cmd
, stdin
=subprocess
.PIPE
,
89 stdout
=subprocess
.PIPE
)
94 def symbolize(self
, addr
, binary
, offset
):
95 """Overrides Symbolizer.symbolize."""
100 symbolizer_input
= '"%s" %s' % (binary
, offset
)
102 print symbolizer_input
103 print >> self
.pipe
.stdin
, symbolizer_input
105 function_name
= self
.pipe
.stdout
.readline().rstrip()
106 if not function_name
:
108 file_name
= self
.pipe
.stdout
.readline().rstrip()
109 file_name
= fix_filename(file_name
)
110 if (not function_name
.startswith('??') or
111 not file_name
.startswith('??')):
112 # Append only non-trivial frames.
113 result
.append('%s in %s %s' % (addr
, function_name
,
122 def LLVMSymbolizerFactory(system
, default_arch
, dsym_hints
=[]):
123 symbolizer_path
= os
.getenv('LLVM_SYMBOLIZER_PATH')
124 if not symbolizer_path
:
125 symbolizer_path
= os
.getenv('ASAN_SYMBOLIZER_PATH')
126 if not symbolizer_path
:
127 # Assume llvm-symbolizer is in PATH.
128 symbolizer_path
= 'llvm-symbolizer'
129 return LLVMSymbolizer(symbolizer_path
, default_arch
, system
, dsym_hints
)
132 class Addr2LineSymbolizer(Symbolizer
):
133 def __init__(self
, binary
):
134 super(Addr2LineSymbolizer
, self
).__init
__()
136 self
.pipe
= self
.open_addr2line()
138 def open_addr2line(self
):
139 addr2line_tool
= 'addr2line'
141 addr2line_tool
= binutils_prefix
+ addr2line_tool
142 cmd
= [addr2line_tool
, '-f']
144 cmd
+= ['--demangle']
145 cmd
+= ['-e', self
.binary
]
148 return subprocess
.Popen(cmd
,
149 stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
)
151 def symbolize(self
, addr
, binary
, offset
):
152 """Overrides Symbolizer.symbolize."""
153 if self
.binary
!= binary
:
156 print >> self
.pipe
.stdin
, offset
157 function_name
= self
.pipe
.stdout
.readline().rstrip()
158 file_name
= self
.pipe
.stdout
.readline().rstrip()
162 file_name
= fix_filename(file_name
)
163 return ['%s in %s %s' % (addr
, function_name
, file_name
)]
166 class UnbufferedLineConverter(object):
168 Wrap a child process that responds to each line of input with one line of
169 output. Uses pty to trick the child into providing unbuffered output.
171 def __init__(self
, args
, close_stderr
=False):
172 # Local imports so that the script can start on Windows.
177 # We're the child. Transfer control to command.
179 dev_null
= os
.open('/dev/null', 0)
181 os
.execvp(args
[0], args
)
184 attr
= termios
.tcgetattr(fd
)
185 attr
[3] = attr
[3] & ~termios
.ECHO
186 termios
.tcsetattr(fd
, termios
.TCSANOW
, attr
)
187 # Set up a file()-like interface to the child process
188 self
.r
= os
.fdopen(fd
, "r", 1)
189 self
.w
= os
.fdopen(os
.dup(fd
), "w", 1)
191 def convert(self
, line
):
192 self
.w
.write(line
+ "\n")
193 return self
.readline()
196 return self
.r
.readline().rstrip()
199 class DarwinSymbolizer(Symbolizer
):
200 def __init__(self
, addr
, binary
):
201 super(DarwinSymbolizer
, self
).__init
__()
203 self
.arch
= guess_arch(addr
)
208 print 'atos -o %s -arch %s' % (self
.binary
, self
.arch
)
209 cmdline
= ['atos', '-o', self
.binary
, '-arch', self
.arch
]
210 self
.atos
= UnbufferedLineConverter(cmdline
, close_stderr
=True)
212 def symbolize(self
, addr
, binary
, offset
):
213 """Overrides Symbolizer.symbolize."""
214 if self
.binary
!= binary
:
216 atos_line
= self
.atos
.convert('0x%x' % int(offset
, 16))
217 while "got symbolicator for" in atos_line
:
218 atos_line
= self
.atos
.readline()
219 # A well-formed atos response looks like this:
220 # foo(type1, type2) (in object.name) (filename.cc:80)
221 match
= re
.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line
)
223 print 'atos_line: ', atos_line
225 function_name
= match
.group(1)
226 function_name
= re
.sub('\(.*?\)', '', function_name
)
227 file_name
= fix_filename(match
.group(3))
228 return ['%s in %s %s' % (addr
, function_name
, file_name
)]
230 return ['%s in %s' % (addr
, atos_line
)]
233 # Chain several symbolizers so that if one symbolizer fails, we fall back
234 # to the next symbolizer in chain.
235 class ChainSymbolizer(Symbolizer
):
236 def __init__(self
, symbolizer_list
):
237 super(ChainSymbolizer
, self
).__init
__()
238 self
.symbolizer_list
= symbolizer_list
240 def symbolize(self
, addr
, binary
, offset
):
241 """Overrides Symbolizer.symbolize."""
242 for symbolizer
in self
.symbolizer_list
:
244 result
= symbolizer
.symbolize(addr
, binary
, offset
)
249 def append_symbolizer(self
, symbolizer
):
250 self
.symbolizer_list
.append(symbolizer
)
253 def BreakpadSymbolizerFactory(binary
):
254 suffix
= os
.getenv('BREAKPAD_SUFFIX')
256 filename
= binary
+ suffix
257 if os
.access(filename
, os
.F_OK
):
258 return BreakpadSymbolizer(filename
)
262 def SystemSymbolizerFactory(system
, addr
, binary
):
263 if system
== 'Darwin':
264 return DarwinSymbolizer(addr
, binary
)
265 elif system
== 'Linux':
266 return Addr2LineSymbolizer(binary
)
269 class BreakpadSymbolizer(Symbolizer
):
270 def __init__(self
, filename
):
271 super(BreakpadSymbolizer
, self
).__init
__()
272 self
.filename
= filename
273 lines
= file(filename
).readlines()
276 self
.address_list
= []
278 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
279 fragments
= lines
[0].rstrip().split()
280 self
.arch
= fragments
[2]
281 self
.debug_id
= fragments
[3]
282 self
.binary
= ' '.join(fragments
[4:])
283 self
.parse_lines(lines
[1:])
285 def parse_lines(self
, lines
):
286 cur_function_addr
= ''
288 fragments
= line
.split()
289 if fragments
[0] == 'FILE':
290 assert int(fragments
[1]) == len(self
.files
)
291 self
.files
.append(' '.join(fragments
[2:]))
292 elif fragments
[0] == 'PUBLIC':
293 self
.symbols
[int(fragments
[1], 16)] = ' '.join(fragments
[3:])
294 elif fragments
[0] in ['CFI', 'STACK']:
296 elif fragments
[0] == 'FUNC':
297 cur_function_addr
= int(fragments
[1], 16)
298 if not cur_function_addr
in self
.symbols
.keys():
299 self
.symbols
[cur_function_addr
] = ' '.join(fragments
[4:])
301 # Line starting with an address.
302 addr
= int(fragments
[0], 16)
303 self
.address_list
.append(addr
)
304 # Tuple of symbol address, size, line, file number.
305 self
.addresses
[addr
] = (cur_function_addr
,
306 int(fragments
[1], 16),
309 self
.address_list
.sort()
311 def get_sym_file_line(self
, addr
):
313 if addr
in self
.addresses
.keys():
316 index
= bisect
.bisect_left(self
.address_list
, addr
)
320 key
= self
.address_list
[index
- 1]
321 sym_id
, size
, line_no
, file_no
= self
.addresses
[key
]
322 symbol
= self
.symbols
[sym_id
]
323 filename
= self
.files
[file_no
]
324 if addr
< key
+ size
:
325 return symbol
, filename
, line_no
329 def symbolize(self
, addr
, binary
, offset
):
330 if self
.binary
!= binary
:
332 res
= self
.get_sym_file_line(int(offset
, 16))
334 function_name
, file_name
, line_no
= res
335 result
= ['%s in %s %s:%d' % (
336 addr
, function_name
, file_name
, line_no
)]
343 class SymbolizationLoop(object):
344 def __init__(self
, binary_name_filter
=None, dsym_hint_producer
=None):
345 if sys
.platform
== 'win32':
346 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
347 # even in sandboxed processes. Nothing needs to be done here.
348 self
.process_line
= self
.process_line_echo
350 # Used by clients who may want to supply a different binary name.
351 # E.g. in Chrome several binaries may share a single .dSYM.
352 self
.binary_name_filter
= binary_name_filter
353 self
.dsym_hint_producer
= dsym_hint_producer
354 self
.system
= os
.uname()[0]
355 if self
.system
not in ['Linux', 'Darwin', 'FreeBSD']:
356 raise Exception('Unknown system')
357 self
.llvm_symbolizers
= {}
358 self
.last_llvm_symbolizer
= None
359 self
.dsym_hints
= set([])
361 self
.process_line
= self
.process_line_posix
363 def symbolize_address(self
, addr
, binary
, offset
):
364 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
365 # a single symbolizer binary.
366 # On Darwin, if the dsym hint producer is present:
367 # 1. check whether we've seen this binary already; if so,
368 # use |llvm_symbolizers[binary]|, which has already loaded the debug
369 # info for this binary (might not be the case for
370 # |last_llvm_symbolizer|);
371 # 2. otherwise check if we've seen all the hints for this binary already;
372 # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
373 # 3. otherwise create a new symbolizer and pass all currently known
375 if not binary
in self
.llvm_symbolizers
:
376 use_new_symbolizer
= True
377 if self
.system
== 'Darwin' and self
.dsym_hint_producer
:
378 dsym_hints_for_binary
= set(self
.dsym_hint_producer(binary
))
379 use_new_symbolizer
= bool(dsym_hints_for_binary
- self
.dsym_hints
)
380 self
.dsym_hints |
= dsym_hints_for_binary
381 if self
.last_llvm_symbolizer
and not use_new_symbolizer
:
382 self
.llvm_symbolizers
[binary
] = self
.last_llvm_symbolizer
384 self
.last_llvm_symbolizer
= LLVMSymbolizerFactory(
385 self
.system
, guess_arch(addr
), self
.dsym_hints
)
386 self
.llvm_symbolizers
[binary
] = self
.last_llvm_symbolizer
387 # Use the chain of symbolizers:
388 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
389 # (fall back to next symbolizer if the previous one fails).
390 if not binary
in symbolizers
:
391 symbolizers
[binary
] = ChainSymbolizer(
392 [BreakpadSymbolizerFactory(binary
), self
.llvm_symbolizers
[binary
]])
393 result
= symbolizers
[binary
].symbolize(addr
, binary
, offset
)
395 # Initialize system symbolizer only if other symbolizers failed.
396 symbolizers
[binary
].append_symbolizer(
397 SystemSymbolizerFactory(self
.system
, addr
, binary
))
398 result
= symbolizers
[binary
].symbolize(addr
, binary
, offset
)
399 # The system symbolizer must produce some result.
403 def get_symbolized_lines(self
, symbolized_lines
):
404 if not symbolized_lines
:
405 return [self
.current_line
]
408 for symbolized_frame
in symbolized_lines
:
409 result
.append(' #%s %s' % (str(self
.frame_no
), symbolized_frame
.rstrip()))
413 def process_logfile(self
):
416 processed
= self
.process_line(line
)
417 print '\n'.join(processed
)
419 def process_line_echo(self
, line
):
420 return [line
.rstrip()]
422 def process_line_posix(self
, line
):
423 self
.current_line
= line
.rstrip()
424 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
425 stack_trace_line_format
= (
426 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
427 match
= re
.match(stack_trace_line_format
, line
)
429 return [self
.current_line
]
432 _
, frameno_str
, addr
, binary
, offset
= match
.groups()
433 if frameno_str
== '0':
434 # Assume that frame #0 is the first frame of new stack trace.
436 original_binary
= binary
437 if self
.binary_name_filter
:
438 binary
= self
.binary_name_filter(binary
)
439 symbolized_line
= self
.symbolize_address(addr
, binary
, offset
)
440 if not symbolized_line
:
441 if original_binary
!= binary
:
442 symbolized_line
= self
.symbolize_address(addr
, binary
, offset
)
443 return self
.get_symbolized_lines(symbolized_line
)
446 if __name__
== '__main__':
447 parser
= argparse
.ArgumentParser(
448 formatter_class
=argparse
.RawDescriptionHelpFormatter
,
449 description
='ASan symbolization script',
450 epilog
='Example of use:\n'
451 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" '
452 '-s "$HOME/SymbolFiles" < asan.log')
453 parser
.add_argument('path_to_cut', nargs
='*',
454 help='pattern to be cut from the result file path ')
455 parser
.add_argument('-d','--demangle', action
='store_true',
456 help='demangle function names')
457 parser
.add_argument('-s', metavar
='SYSROOT',
458 help='set path to sysroot for sanitized binaries')
459 parser
.add_argument('-c', metavar
='CROSS_COMPILE',
460 help='set prefix for binutils')
461 parser
.add_argument('-l','--logfile', default
=sys
.stdin
,
462 type=argparse
.FileType('r'),
463 help='set log file name to parse, default is stdin')
464 args
= parser
.parse_args()
466 fix_filename_patterns
= args
.path_to_cut
470 binary_name_filter
= sysroot_path_filter
471 sysroot_path
= args
.s
473 binutils_prefix
= args
.c
475 logfile
= args
.logfile
478 loop
= SymbolizationLoop(binary_name_filter
)
479 loop
.process_logfile()