Add ICU message format support
[chromium-blink-merge.git] / tools / valgrind / asan / asan_symbolize.py
blob010740671aae94880bfaa87f659a55054ff983ff
1 #!/usr/bin/env python
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
7 from third_party import asan_symbolize
9 import argparse
10 import base64
11 import json
12 import os
13 import platform
14 import re
15 import subprocess
16 import sys
18 class LineBuffered(object):
19 """Disable buffering on a file object."""
20 def __init__(self, stream):
21 self.stream = stream
23 def write(self, data):
24 self.stream.write(data)
25 if '\n' in data:
26 self.stream.flush()
28 def __getattr__(self, attr):
29 return getattr(self.stream, attr)
32 def disable_buffering():
33 """Makes this process and child processes stdout unbuffered."""
34 if not os.environ.get('PYTHONUNBUFFERED'):
35 # Since sys.stdout is a C++ object, it's impossible to do
36 # sys.stdout.write = lambda...
37 sys.stdout = LineBuffered(sys.stdout)
38 os.environ['PYTHONUNBUFFERED'] = 'x'
41 def set_symbolizer_path():
42 """Set the path to the llvm-symbolize binary in the Chromium source tree."""
43 if not os.environ.get('LLVM_SYMBOLIZER_PATH'):
44 script_dir = os.path.dirname(os.path.abspath(__file__))
45 # Assume this script resides three levels below src/ (i.e.
46 # src/tools/valgrind/asan/).
47 src_root = os.path.join(script_dir, "..", "..", "..")
48 symbolizer_path = os.path.join(src_root, 'third_party',
49 'llvm-build', 'Release+Asserts', 'bin', 'llvm-symbolizer')
50 assert(os.path.isfile(symbolizer_path))
51 os.environ['LLVM_SYMBOLIZER_PATH'] = os.path.abspath(symbolizer_path)
54 def is_hash_name(name):
55 match = re.match('[0-9a-f]+$', name)
56 return bool(match)
59 def split_path(path):
60 ret = []
61 while True:
62 head, tail = os.path.split(path)
63 if head == path:
64 return [head] + ret
65 ret, path = [tail] + ret, head
68 def chrome_product_dir_path(exe_path):
69 if exe_path is None:
70 return None
71 path_parts = split_path(exe_path)
72 # Make sure the product dir path isn't empty if |exe_path| consists of
73 # a single component.
74 if len(path_parts) == 1:
75 path_parts = ['.'] + path_parts
76 for index, part in enumerate(path_parts):
77 if part.endswith('.app'):
78 return os.path.join(*path_parts[:index])
79 # If the executable isn't an .app bundle, it's a commandline binary that
80 # resides right in the product dir.
81 return os.path.join(*path_parts[:-1])
84 inode_path_cache = {}
87 def find_inode_at_path(inode, path):
88 if inode in inode_path_cache:
89 return inode_path_cache[inode]
90 cmd = ['find', path, '-inum', str(inode)]
91 find_line = subprocess.check_output(cmd).rstrip()
92 lines = find_line.split('\n')
93 ret = None
94 if lines:
95 # `find` may give us several paths (e.g. 'Chromium Framework' in the
96 # product dir and 'Chromium Framework' inside 'Chromium.app',
97 # chrome_dsym_hints() will produce correct .dSYM path for any of them.
98 ret = lines[0]
99 inode_path_cache[inode] = ret
100 return ret
103 # Create a binary name filter that works around https://crbug.com/444835.
104 # When running tests on OSX swarming servers, ASan sometimes prints paths to
105 # files in cache (ending with SHA1 filenames) instead of paths to hardlinks to
106 # those files in the product dir.
107 # For a given |binary_path| chrome_osx_binary_name_filter() returns one of the
108 # hardlinks to the same inode in |product_dir_path|.
109 def make_chrome_osx_binary_name_filter(product_dir_path=''):
110 def chrome_osx_binary_name_filter(binary_path):
111 basename = os.path.basename(binary_path)
112 if is_hash_name(basename) and product_dir_path:
113 inode = os.stat(binary_path).st_ino
114 new_binary_path = find_inode_at_path(inode, product_dir_path)
115 if new_binary_path:
116 return new_binary_path
117 return binary_path
118 return chrome_osx_binary_name_filter
121 # Construct a path to the .dSYM bundle for the given binary.
122 # There are three possible cases for binary location in Chromium:
123 # 1. The binary is a standalone executable or dynamic library in the product
124 # dir, the debug info is in "binary.dSYM" in the product dir.
125 # 2. The binary is a standalone framework or .app bundle, the debug info is in
126 # "Framework.framework.dSYM" or "App.app.dSYM" in the product dir.
127 # 3. The binary is a framework or an .app bundle within another .app bundle
128 # (e.g. Outer.app/Contents/Versions/1.2.3.4/Inner.app), and the debug info
129 # is in Inner.app.dSYM in the product dir.
130 # The first case is handled by llvm-symbolizer, so we only need to construct
131 # .dSYM paths for .app bundles and frameworks.
132 # We're assuming that there're no more than two nested bundles in the binary
133 # path. Only one of these bundles may be a framework and frameworks cannot
134 # contain other bundles.
135 def chrome_dsym_hints(binary):
136 path_parts = split_path(binary)
137 app_positions = []
138 framework_positions = []
139 for index, part in enumerate(path_parts):
140 if part.endswith('.app'):
141 app_positions.append(index)
142 elif part.endswith('.framework'):
143 framework_positions.append(index)
144 bundle_positions = app_positions + framework_positions
145 bundle_positions.sort()
146 assert len(bundle_positions) <= 2, \
147 "The path contains more than two nested bundles: %s" % binary
148 if len(bundle_positions) == 0:
149 # Case 1: this is a standalone executable or dylib.
150 return []
151 assert (not (len(app_positions) == 1 and
152 len(framework_positions) == 1 and
153 app_positions[0] > framework_positions[0])), \
154 "The path contains an app bundle inside a framework: %s" % binary
155 # Cases 2 and 3. The outermost bundle (which is the only bundle in the case 2)
156 # is located in the product dir.
157 outermost_bundle = bundle_positions[0]
158 product_dir = path_parts[:outermost_bundle]
159 # In case 2 this is the same as |outermost_bundle|.
160 innermost_bundle = bundle_positions[-1]
161 dsym_path = product_dir + [path_parts[innermost_bundle]]
162 result = '%s.dSYM' % os.path.join(*dsym_path)
163 return [result]
166 # We want our output to match base::EscapeJSONString(), which produces
167 # doubly-escaped strings. The first escaping pass is handled by this class. The
168 # second pass happens when JSON data is dumped to file.
169 class StringEncoder(json.JSONEncoder):
170 def __init__(self):
171 json.JSONEncoder.__init__(self)
173 def encode(self, s):
174 assert(isinstance(s, basestring))
175 encoded = json.JSONEncoder.encode(self, s)
176 assert(len(encoded) >= 2)
177 assert(encoded[0] == '"')
178 assert(encoded[-1] == '"')
179 encoded = encoded[1:-1]
180 # Special case from base::EscapeJSONString().
181 encoded = encoded.replace('<', '\u003C')
182 return encoded
185 class JSONTestRunSymbolizer(object):
186 def __init__(self, symbolization_loop):
187 self.string_encoder = StringEncoder()
188 self.symbolization_loop = symbolization_loop
190 def symbolize_snippet(self, snippet):
191 symbolized_lines = []
192 for line in snippet.split('\n'):
193 symbolized_lines += self.symbolization_loop.process_line(line)
194 return '\n'.join(symbolized_lines)
196 def symbolize(self, test_run):
197 original_snippet = base64.b64decode(test_run['output_snippet_base64'])
198 symbolized_snippet = self.symbolize_snippet(original_snippet)
199 if symbolized_snippet == original_snippet:
200 # No sanitizer reports in snippet.
201 return
203 test_run['original_output_snippet'] = test_run['output_snippet']
204 test_run['original_output_snippet_base64'] = \
205 test_run['output_snippet_base64']
207 escaped_snippet = StringEncoder().encode(symbolized_snippet)
208 test_run['output_snippet'] = escaped_snippet
209 test_run['output_snippet_base64'] = \
210 base64.b64encode(symbolized_snippet)
211 test_run['snippet_processed_by'] = 'asan_symbolize.py'
212 # Originally, "lossless" refers to "no Unicode data lost while encoding the
213 # string". However, since we're applying another kind of transformation
214 # (symbolization), it doesn't seem right to consider the snippet lossless.
215 test_run['losless_snippet'] = False
218 def symbolize_snippets_in_json(filename, symbolization_loop):
219 with open(filename, 'r') as f:
220 json_data = json.load(f)
222 test_run_symbolizer = JSONTestRunSymbolizer(symbolization_loop)
223 for iteration_data in json_data['per_iteration_data']:
224 for test_name, test_runs in iteration_data.iteritems():
225 for test_run in test_runs:
226 test_run_symbolizer.symbolize(test_run)
228 with open(filename, 'w') as f:
229 json.dump(json_data, f, indent=3, sort_keys=True)
232 def main():
233 parser = argparse.ArgumentParser(description='Symbolize sanitizer reports.')
234 parser.add_argument('--test-summary-json-file',
235 help='Path to a JSON file produced by the test launcher. The script will '
236 'ignore stdandard input and instead symbolize the output stnippets '
237 'inside the JSON file. The result will be written back to the JSON '
238 'file.')
239 parser.add_argument('strip_path_prefix', nargs='*',
240 help='When printing source file names, the longest prefix ending in one '
241 'of these substrings will be stripped. E.g.: "Release/../../".')
242 parser.add_argument('--executable-path',
243 help='Path to program executable. Used on OSX swarming bots to locate '
244 'dSYM bundles for associated frameworks and bundles.')
245 args = parser.parse_args()
247 disable_buffering()
248 set_symbolizer_path()
249 asan_symbolize.demangle = True
250 asan_symbolize.fix_filename_patterns = args.strip_path_prefix
251 # Most source paths for Chromium binaries start with
252 # /path/to/src/out/Release/../../
253 asan_symbolize.fix_filename_patterns.append('Release/../../')
254 binary_name_filter = None
255 if platform.uname()[0] == 'Darwin':
256 binary_name_filter = make_chrome_osx_binary_name_filter(
257 chrome_product_dir_path(args.executable_path))
258 loop = asan_symbolize.SymbolizationLoop(
259 binary_name_filter=binary_name_filter,
260 dsym_hint_producer=chrome_dsym_hints)
262 if args.test_summary_json_file:
263 symbolize_snippets_in_json(args.test_summary_json_file, loop)
264 else:
265 # Process stdin.
266 asan_symbolize.logfile = sys.stdin
267 loop.process_logfile()
269 if __name__ == '__main__':
270 main()