2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
7 This script can take an Apple-style CrashReporter log and symbolicate it. This
8 is useful for when a user's reports aren't being uploaded, for example.
10 Only versions 6, 7, 8, and 9 reports are supported. For more information on the
11 file format, reference this document:
12 TN2123 <http://developer.apple.com/library/mac/#technotes/tn2004/tn2123.html>
14 Information on symbolication was gleaned from:
15 <http://developer.apple.com/tools/xcode/symbolizingcrashdumps.html>
24 # Maps binary image identifiers to binary names (minus the .dSYM portion) found
25 # in the archive. These are the only objects that will be looked up.
27 'com.google.Chrome': 'Google Chrome.app',
28 'com.google.Chrome.framework': 'Google Chrome Framework.framework',
29 'com.google.Chrome.helper': 'Google Chrome Helper.app'
32 class CrashReport(object):
33 """A parsed representation of an Apple CrashReport text file."""
34 def __init__(self
, file_name
):
35 super(CrashReport
, self
).__init
__()
38 self
._binary
_images
= {}
40 fd
= open(file_name
, 'r')
43 # Try and get the report version. If it's not a version we handle, abort.
44 self
.report_version
= int(self
.report_info
['Report Version'])
45 # Version 6: 10.5 and 10.6 crash report
46 # Version 7: 10.6 spindump report
47 # Version 8: 10.7 spindump report
48 # Version 9: 10.7 crash report
49 valid_versions
= (6, 7, 8, 9)
50 if self
.report_version
not in valid_versions
:
51 raise Exception("Only crash reports of versions %s are accepted." %
54 # If this is a spindump (version 7 or 8 report), use a special parser. The
55 # format is undocumented, but is similar to version 6. However, the spindump
56 # report contains user and kernel stacks for every process on the system.
57 if self
.report_version
== 7 or self
.report_version
== 8:
58 self
._ParseSpindumpStack
(fd
)
62 self
._ParseBinaryImages
(fd
)
65 def Symbolicate(self
, symbol_path
):
66 """Symbolicates a crash report stack trace."""
67 # In order to be efficient, collect all the offsets that will be passed to
68 # atos by the image name.
69 offsets_by_image
= self
._CollectAddressesForImages
(SYMBOL_IMAGE_MAP
.keys())
71 # For each image, run atos with the list of addresses.
72 for image_name
, addresses
in offsets_by_image
.items():
73 # If this image was not loaded or is in no stacks, skip.
74 if image_name
not in self
._binary
_images
or not len(addresses
):
77 # Combine the |image_name| and |symbol_path| into the path of the dSYM.
78 dsym_file
= self
._GetDSymPath
(symbol_path
, image_name
)
80 # From the list of 2-Tuples of (frame, address), create a list of just
82 address_list
= map(lambda x
: x
[1], addresses
)
84 # Look up the load address of the image.
85 binary_base
= self
._binary
_images
[image_name
][0]
87 # This returns a list of just symbols. The indices will match up with the
88 # list of |addresses|.
89 symbol_names
= self
._RunAtos
(binary_base
, dsym_file
, address_list
)
91 print 'Error loading symbols for ' + image_name
94 # Attaches a list of symbol names to stack frames. This assumes that the
95 # order of |addresses| has stayed the same as |symbol_names|.
96 self
._AddSymbolsToFrames
(symbol_names
, addresses
)
98 def _ParseHeader(self
, fd
):
99 """Parses the header section of a crash report, which contains the OS and
100 application version information."""
101 # The header is made up of different sections, depending on the type of
102 # report and the report version. Almost all have a format of a key and
103 # value separated by a colon. Accumulate all of these artifacts into a
104 # dictionary until the first thread stack is reached.
105 thread_re
= re
.compile('^[ \t]*Thread ([a-f0-9]+)')
107 while not thread_re
.match(line
):
108 # Skip blank lines. There are typically three or four sections separated
109 # by newlines in the header.
112 parts
= line
.split(':', 1)
113 # Certain lines in different report versions don't follow the key-value
114 # format, so skip them.
116 # There's a varying amount of space padding after the ':' to align all
117 # the values; strip that.
118 self
.report_info
[parts
[0]] = parts
[1].lstrip()
121 # When this loop exits, the header has been read in full. However, the first
122 # thread stack heading has been read past. Seek backwards from the current
123 # position by the length of the line so that it is re-read when
124 # _ParseStack() is entered.
125 fd
.seek(-len(line
), os
.SEEK_CUR
)
127 def _ParseStack(self
, fd
):
128 """Parses the stack dump of a crash report and creates a list of threads
129 and their stack traces."""
130 # Compile a regex that matches the start of a thread stack. Note that this
131 # must be specific to not include the thread state section, which comes
132 # right after all the stack traces.
133 line_re
= re
.compile('^Thread ([0-9]+)( Crashed)?:(.*)')
135 # On entry into this function, the fd has been walked up to the "Thread 0"
137 line
= fd
.readline().rstrip()
140 while line_re
.match(line
) or in_stack
:
141 # Check for start of the thread stack.
142 matches
= line_re
.match(line
)
145 # A blank line indicates a break in the thread stack.
148 # If this is the start of a thread stack, create the CrashThread.
150 thread
= CrashThread(matches
.group(1))
151 thread
.name
= matches
.group(3)
152 thread
.did_crash
= matches
.group(2) != None
153 self
.threads
.append(thread
)
155 # All other lines are stack frames.
156 thread
.stack
.append(self
._ParseStackFrame
(line
))
157 # Read the next line.
160 def _ParseStackFrame(self
, line
):
161 """Takes in a single line of text and transforms it into a StackFrame."""
162 frame
= StackFrame(line
)
164 # A stack frame is in the format of:
165 # |<frame-number> <binary-image> 0x<address> <symbol> <offset>|.
166 regex
= '^([0-9]+) +(.+)[ \t]+(0x[0-9a-f]+) (.*) \+ ([0-9]+)$'
167 matches
= re
.match(regex
, line
)
171 # Create a stack frame with the information extracted from the regex.
172 frame
.frame_id
= matches
.group(1)
173 frame
.image
= matches
.group(2)
174 frame
.address
= int(matches
.group(3), 0) # Convert HEX to an int.
175 frame
.original_symbol
= matches
.group(4)
176 frame
.offset
= matches
.group(5)
180 def _ParseSpindumpStack(self
, fd
):
181 """Parses a spindump stack report. In this format, each thread stack has
182 both a user and kernel trace. Only the user traces are symbolicated."""
184 # The stack trace begins with the thread header, which is identified by a
185 # HEX number. The thread names appear to be incorrect in spindumps.
186 user_thread_re
= re
.compile('^ Thread ([0-9a-fx]+)')
188 # When this method is called, the fd has been walked right up to the first
191 in_user_stack
= False
192 in_kernel_stack
= False
195 while user_thread_re
.match(line
) or in_user_stack
or in_kernel_stack
:
196 # Check for the start of a thread.
197 matches
= user_thread_re
.match(line
)
200 # A blank line indicates the start of a new thread. The blank line comes
201 # after the kernel stack before a new thread header.
202 in_kernel_stack
= False
204 # This is the start of a thread header. The next line is the heading for
205 # the user stack, followed by the actual trace.
206 thread
= CrashThread(matches
.group(1))
208 self
.threads
.append(thread
)
210 line
= fd
.readline() # Read past the 'User stack:' header.
211 elif line
.startswith(' Kernel stack:'):
212 # The kernel stack header comes immediately after the last frame (really
213 # the top frame) in the user stack, without a blank line.
214 in_user_stack
= False
215 in_kernel_stack
= True
217 # If this is a line while in the user stack, parse it as a stack frame.
218 thread
.stack
.append(self
._ParseSpindumpStackFrame
(line
))
219 # Loop with the next line.
222 # When the loop exits, the file has been read through the 'Binary images:'
223 # header. Seek backwards so that _ParseBinaryImages() does the right thing.
224 fd
.seek(-len(line
), os
.SEEK_CUR
)
226 def _ParseSpindumpStackFrame(self
, line
):
227 """Parses a spindump-style stackframe."""
228 frame
= StackFrame(line
)
230 # The format of the frame is either:
231 # A: |<space><steps> <symbol> + <offset> (in <image-name>) [<address>]|
232 # B: |<space><steps> ??? (in <image-name> + <offset>) [<address>]|
233 regex_a
= '^([ ]+[0-9]+) (.*) \+ ([0-9]+) \(in (.*)\) \[(0x[0-9a-f]+)\]'
234 regex_b
= '^([ ]+[0-9]+) \?\?\?( \(in (.*) \+ ([0-9]+)\))? \[(0x[0-9a-f]+)\]'
236 # Create the stack frame with the information extracted from the regex.
237 matches
= re
.match(regex_a
, line
)
239 frame
.frame_id
= matches
.group(1)[4:] # Remove some leading spaces.
240 frame
.original_symbol
= matches
.group(2)
241 frame
.offset
= matches
.group(3)
242 frame
.image
= matches
.group(4)
243 frame
.address
= int(matches
.group(5), 0)
247 # If pattern A didn't match (which it will most of the time), try B.
248 matches
= re
.match(regex_b
, line
)
250 frame
.frame_id
= matches
.group(1)[4:] # Remove some leading spaces.
251 frame
.image
= matches
.group(3)
252 frame
.offset
= matches
.group(4)
253 frame
.address
= int(matches
.group(5), 0)
257 # Otherwise, this frame could not be matched and just use the raw input.
258 frame
.line
= frame
.line
.strip()
261 def _ParseBinaryImages(self
, fd
):
262 """Parses out the binary images section in order to get the load offset."""
263 # The parser skips some sections, so advance until the "Binary Images"
265 while not fd
.readline().lstrip().startswith("Binary Images:"): pass
267 # Create a regex to match the lines of format:
268 # |0x<start> - 0x<end> <binary-image> <version> (<version>) <<UUID>> <path>|
269 image_re
= re
.compile(
270 '[ ]*(0x[0-9a-f]+) -[ \t]+(0x[0-9a-f]+) [+ ]([a-zA-Z0-9._\-]+)')
272 # This section is in this format:
273 # |<start address> - <end address> <image name>|.
277 # End when a blank line is hit.
279 # Match the line to the regex.
280 match
= image_re
.match(line
)
282 # Store the offsets by image name so it can be referenced during
283 # symbolication. These are hex numbers with leading '0x', so int() can
284 # convert them to decimal if base=0.
285 address_range
= (int(match
.group(1), 0), int(match
.group(2), 0))
286 self
._binary
_images
[match
.group(3)] = address_range
288 def _CollectAddressesForImages(self
, images
):
289 """Iterates all the threads and stack frames and all the stack frames that
290 are in a list of binary |images|. The result is a dictionary, keyed by the
291 image name that maps to a list of tuples. Each is a 2-Tuple of
292 (stack_frame, address)"""
293 # Create the collection and initialize it with empty lists for each image.
296 collection
[image
] = []
298 # Perform the iteration.
299 for thread
in self
.threads
:
300 for frame
in thread
.stack
:
301 image_name
= self
._ImageForAddress
(frame
.address
)
302 if image_name
in images
:
303 # Replace the image name in the frame in case it was elided.
304 frame
.image
= image_name
305 collection
[frame
.image
].append((frame
, frame
.address
))
310 def _ImageForAddress(self
, address
):
311 """Given a PC address, returns the bundle identifier of the image in which
312 the address resides."""
313 for image_name
, address_range
in self
._binary
_images
.items():
314 if address
>= address_range
[0] and address
<= address_range
[1]:
318 def _GetDSymPath(self
, base_path
, image_name
):
319 """Takes a base path for the symbols and an image name. It looks the name up
320 in SYMBOL_IMAGE_MAP and creates a full path to the dSYM in the bundle."""
321 image_file
= SYMBOL_IMAGE_MAP
[image_name
]
322 return os
.path
.join(base_path
, image_file
+ '.dSYM', 'Contents',
323 'Resources', 'DWARF',
324 os
.path
.splitext(image_file
)[0]) # Chop off the extension.
326 def _RunAtos(self
, load_address
, dsym_file
, addresses
):
327 """Runs the atos with the provided arguments. |addresses| is used as stdin.
328 Returns a list of symbol information in the same order as |addresses|."""
329 args
= ['atos', '-l', str(load_address
), '-o', dsym_file
]
331 # Get the arch type. This is of the format |X86 (Native)|.
332 if 'Code Type' in self
.report_info
:
333 arch
= self
.report_info
['Code Type'].lower().split(' ')
337 # The crash report refers to i386 as x86, but atos doesn't know what
340 args
.extend(['-arch', arch
])
342 proc
= subprocess
.Popen(args
, stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
)
343 addresses
= map(hex, addresses
)
344 (stdout
, stderr
) = proc
.communicate(' '.join(addresses
))
347 return stdout
.rstrip().split('\n')
349 def _AddSymbolsToFrames(self
, symbols
, address_tuples
):
350 """Takes a single value (the list) from _CollectAddressesForImages and does
351 a smart-zip with the data returned by atos in |symbols|. Note that the
352 indices must match for this to succeed."""
353 if len(symbols
) != len(address_tuples
):
354 print 'symbols do not match'
356 # Each line of output from atos is in this format:
357 # |<symbol> (in <image>) (<file>:<line>)|.
358 line_regex
= re
.compile('(.+) \(in (.+)\) (\((.+):([0-9]+)\))?')
360 # Zip the two data sets together.
361 for i
in range(len(symbols
)):
362 symbol_parts
= line_regex
.match(symbols
[i
])
365 frame
= address_tuples
[i
][0]
366 frame
.symbol
= symbol_parts
.group(1)
367 frame
.image
= symbol_parts
.group(2)
368 frame
.file_name
= symbol_parts
.group(4)
369 frame
.line_number
= symbol_parts
.group(5)
372 class CrashThread(object):
373 """A CrashThread represents a stacktrace of a single thread """
374 def __init__(self
, thread_id
):
375 super(CrashThread
, self
).__init
__()
376 self
.thread_id
= thread_id
378 self
.did_crash
= False
384 name
= ': ' + self
.name
385 return 'Thread ' + self
.thread_id
+ name
+ '\n' + \
386 '\n'.join(map(str, self
.stack
))
389 class StackFrame(object):
390 """A StackFrame is owned by a CrashThread."""
391 def __init__(self
, line
):
392 super(StackFrame
, self
).__init
__()
393 # The original line. This will be set to None if symbolication was
400 self
.original_symbol
= None
402 # The following members are set after symbolication.
404 self
.file_name
= None
408 # If symbolication failed, just use the original line.
410 return ' %s' % self
.line
412 # Use different location information depending on symbolicated data.
415 location
= ' - %s:%s' % (self
.file_name
, self
.line_number
)
417 location
= ' + %s' % self
.offset
419 # Same with the symbol information.
420 symbol
= self
.original_symbol
424 return ' %s\t0x%x\t[%s\t%s]\t%s' % (self
.frame_id
, self
.address
,
425 self
.image
, location
, symbol
)
428 def PrettyPrintReport(report
):
429 """Takes a crash report and prints it like the crash server would."""
430 print 'Process : ' + report
.report_info
['Process']
431 print 'Version : ' + report
.report_info
['Version']
432 print 'Date : ' + report
.report_info
['Date/Time']
433 print 'OS Version : ' + report
.report_info
['OS Version']
435 if 'Crashed Thread' in report
.report_info
:
436 print 'Crashed Thread : ' + report
.report_info
['Crashed Thread']
438 if 'Event' in report
.report_info
:
439 print 'Event : ' + report
.report_info
['Event']
442 for thread
in report
.threads
:
445 exc_type
= report
.report_info
['Exception Type'].split(' ')[0]
446 exc_code
= report
.report_info
['Exception Codes'].replace('at', '@')
447 print '*CRASHED* ( ' + exc_type
+ ' / ' + exc_code
+ ' )'
448 # Version 7 reports have spindump-style output (with a stepped stack trace),
449 # so remove the first tab to get better alignment.
450 if report
.report_version
== 7:
451 for line
in repr(thread
).split('\n'):
452 print line
.replace('\t', ' ', 1)
459 parser
= optparse
.OptionParser(
460 usage
='%prog [options] symbol_path crash_report',
461 description
='This will parse and symbolicate an Apple CrashReporter v6-9 '
463 parser
.add_option('-s', '--std-path', action
='store_true', dest
='std_path',
464 help='With this flag, the symbol_path is a containing '
465 'directory, in which a dSYM files are stored in a '
466 'directory named by the version. Example: '
467 '[symbolicate_crash.py -s ./symbols/ report.crash] will '
468 'look for dSYMs in ./symbols/15.0.666.0/ if the report is '
469 'from that verison.')
470 (options
, args
) = parser
.parse_args(args
[1:])
472 # Check that we have something to symbolicate.
477 report
= CrashReport(args
[1])
480 # If not using the standard layout, this is a full path to the symbols.
481 if not options
.std_path
:
482 symbol_path
= args
[0]
483 # Otherwise, use the report version to locate symbols in a directory.
485 # This is in the format of |M.N.B.P (B.P)|. Get just the part before the
487 chrome_version
= report
.report_info
['Version'].split(' ')[0]
488 symbol_path
= os
.path
.join(args
[0], chrome_version
)
490 # Check that the symbols exist.
491 if not os
.path
.isdir(symbol_path
):
492 print >>sys
.stderr
, 'Symbol path %s is not a directory' % symbol_path
495 print >>sys
.stderr
, 'Using symbols from ' + symbol_path
496 print >>sys
.stderr
, '=' * 80
498 report
.Symbolicate(symbol_path
)
499 PrettyPrintReport(report
)
503 if __name__
== '__main__':
504 sys
.exit(Main(sys
.argv
))