3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
7 from third_party
import asan_symbolize
18 class LineBuffered(object):
19 """Disable buffering on a file object."""
20 def __init__(self
, stream
):
23 def write(self
, data
):
24 self
.stream
.write(data
)
28 def __getattr__(self
, attr
):
29 return getattr(self
.stream
, attr
)
32 def disable_buffering():
33 """Makes this process and child processes stdout unbuffered."""
34 if not os
.environ
.get('PYTHONUNBUFFERED'):
35 # Since sys.stdout is a C++ object, it's impossible to do
36 # sys.stdout.write = lambda...
37 sys
.stdout
= LineBuffered(sys
.stdout
)
38 os
.environ
['PYTHONUNBUFFERED'] = 'x'
41 def set_symbolizer_path():
42 """Set the path to the llvm-symbolize binary in the Chromium source tree."""
43 if not os
.environ
.get('LLVM_SYMBOLIZER_PATH'):
44 script_dir
= os
.path
.dirname(os
.path
.abspath(__file__
))
45 # Assume this script resides three levels below src/ (i.e.
46 # src/tools/valgrind/asan/).
47 src_root
= os
.path
.join(script_dir
, "..", "..", "..")
48 symbolizer_path
= os
.path
.join(src_root
, 'third_party',
49 'llvm-build', 'Release+Asserts', 'bin', 'llvm-symbolizer')
50 assert(os
.path
.isfile(symbolizer_path
))
51 os
.environ
['LLVM_SYMBOLIZER_PATH'] = os
.path
.abspath(symbolizer_path
)
54 def is_hash_name(name
):
55 match
= re
.match('[0-9a-f]+$', name
)
62 head
, tail
= os
.path
.split(path
)
65 ret
, path
= [tail
] + ret
, head
68 def chrome_product_dir_path(exe_path
):
71 path_parts
= split_path(exe_path
)
72 # Make sure the product dir path isn't empty if |exe_path| consists of
74 if len(path_parts
) == 1:
75 path_parts
= ['.'] + path_parts
76 for index
, part
in enumerate(path_parts
):
77 if part
.endswith('.app'):
78 return os
.path
.join(*path_parts
[:index
])
79 # If the executable isn't an .app bundle, it's a commandline binary that
80 # resides right in the product dir.
81 return os
.path
.join(*path_parts
[:-1])
87 def find_inode_at_path(inode
, path
):
88 if inode
in inode_path_cache
:
89 return inode_path_cache
[inode
]
90 cmd
= ['find', path
, '-inum', str(inode
)]
91 find_line
= subprocess
.check_output(cmd
).rstrip()
92 lines
= find_line
.split('\n')
95 # `find` may give us several paths (e.g. 'Chromium Framework' in the
96 # product dir and 'Chromium Framework' inside 'Chromium.app',
97 # chrome_dsym_hints() will produce correct .dSYM path for any of them.
99 inode_path_cache
[inode
] = ret
103 # Create a binary name filter that works around https://crbug.com/444835.
104 # When running tests on OSX swarming servers, ASan sometimes prints paths to
105 # files in cache (ending with SHA1 filenames) instead of paths to hardlinks to
106 # those files in the product dir.
107 # For a given |binary_path| chrome_osx_binary_name_filter() returns one of the
108 # hardlinks to the same inode in |product_dir_path|.
109 def make_chrome_osx_binary_name_filter(product_dir_path
=''):
110 def chrome_osx_binary_name_filter(binary_path
):
111 basename
= os
.path
.basename(binary_path
)
112 if is_hash_name(basename
) and product_dir_path
:
113 inode
= os
.stat(binary_path
).st_ino
114 new_binary_path
= find_inode_at_path(inode
, product_dir_path
)
116 return new_binary_path
118 return chrome_osx_binary_name_filter
121 # Construct a path to the .dSYM bundle for the given binary.
122 # There are three possible cases for binary location in Chromium:
123 # 1. The binary is a standalone executable or dynamic library in the product
124 # dir, the debug info is in "binary.dSYM" in the product dir.
125 # 2. The binary is a standalone framework or .app bundle, the debug info is in
126 # "Framework.framework.dSYM" or "App.app.dSYM" in the product dir.
127 # 3. The binary is a framework or an .app bundle within another .app bundle
128 # (e.g. Outer.app/Contents/Versions/1.2.3.4/Inner.app), and the debug info
129 # is in Inner.app.dSYM in the product dir.
130 # The first case is handled by llvm-symbolizer, so we only need to construct
131 # .dSYM paths for .app bundles and frameworks.
132 # We're assuming that there're no more than two nested bundles in the binary
133 # path. Only one of these bundles may be a framework and frameworks cannot
134 # contain other bundles.
135 def chrome_dsym_hints(binary
):
136 path_parts
= split_path(binary
)
138 framework_positions
= []
139 for index
, part
in enumerate(path_parts
):
140 if part
.endswith('.app'):
141 app_positions
.append(index
)
142 elif part
.endswith('.framework'):
143 framework_positions
.append(index
)
144 bundle_positions
= app_positions
+ framework_positions
145 bundle_positions
.sort()
146 assert len(bundle_positions
) <= 2, \
147 "The path contains more than two nested bundles: %s" % binary
148 if len(bundle_positions
) == 0:
149 # Case 1: this is a standalone executable or dylib.
151 assert (not (len(app_positions
) == 1 and
152 len(framework_positions
) == 1 and
153 app_positions
[0] > framework_positions
[0])), \
154 "The path contains an app bundle inside a framework: %s" % binary
155 # Cases 2 and 3. The outermost bundle (which is the only bundle in the case 2)
156 # is located in the product dir.
157 outermost_bundle
= bundle_positions
[0]
158 product_dir
= path_parts
[:outermost_bundle
]
159 # In case 2 this is the same as |outermost_bundle|.
160 innermost_bundle
= bundle_positions
[-1]
161 dsym_path
= product_dir
+ [path_parts
[innermost_bundle
]]
162 result
= '%s.dSYM' % os
.path
.join(*dsym_path
)
166 # We want our output to match base::EscapeJSONString(), which produces
167 # doubly-escaped strings. The first escaping pass is handled by this class. The
168 # second pass happens when JSON data is dumped to file.
169 class StringEncoder(json
.JSONEncoder
):
171 json
.JSONEncoder
.__init
__(self
)
174 assert(isinstance(s
, basestring
))
175 encoded
= json
.JSONEncoder
.encode(self
, s
)
176 assert(len(encoded
) >= 2)
177 assert(encoded
[0] == '"')
178 assert(encoded
[-1] == '"')
179 encoded
= encoded
[1:-1]
180 # Special case from base::EscapeJSONString().
181 encoded
= encoded
.replace('<', '\u003C')
185 class JSONTestRunSymbolizer(object):
186 def __init__(self
, symbolization_loop
):
187 self
.string_encoder
= StringEncoder()
188 self
.symbolization_loop
= symbolization_loop
190 def symbolize_snippet(self
, snippet
):
191 symbolized_lines
= []
192 for line
in snippet
.split('\n'):
193 symbolized_lines
+= self
.symbolization_loop
.process_line(line
)
194 return '\n'.join(symbolized_lines
)
196 def symbolize(self
, test_run
):
197 original_snippet
= base64
.b64decode(test_run
['output_snippet_base64'])
198 symbolized_snippet
= self
.symbolize_snippet(original_snippet
)
199 if symbolized_snippet
== original_snippet
:
200 # No sanitizer reports in snippet.
203 test_run
['original_output_snippet'] = test_run
['output_snippet']
204 test_run
['original_output_snippet_base64'] = \
205 test_run
['output_snippet_base64']
207 escaped_snippet
= StringEncoder().encode(symbolized_snippet
)
208 test_run
['output_snippet'] = escaped_snippet
209 test_run
['output_snippet_base64'] = \
210 base64
.b64encode(symbolized_snippet
)
211 test_run
['snippet_processed_by'] = 'asan_symbolize.py'
212 # Originally, "lossless" refers to "no Unicode data lost while encoding the
213 # string". However, since we're applying another kind of transformation
214 # (symbolization), it doesn't seem right to consider the snippet lossless.
215 test_run
['losless_snippet'] = False
218 def symbolize_snippets_in_json(filename
, symbolization_loop
):
219 with
open(filename
, 'r') as f
:
220 json_data
= json
.load(f
)
222 test_run_symbolizer
= JSONTestRunSymbolizer(symbolization_loop
)
223 for iteration_data
in json_data
['per_iteration_data']:
224 for test_name
, test_runs
in iteration_data
.iteritems():
225 for test_run
in test_runs
:
226 test_run_symbolizer
.symbolize(test_run
)
228 with
open(filename
, 'w') as f
:
229 json
.dump(json_data
, f
, indent
=3, sort_keys
=True)
233 parser
= argparse
.ArgumentParser(description
='Symbolize sanitizer reports.')
234 parser
.add_argument('--test-summary-json-file',
235 help='Path to a JSON file produced by the test launcher. The script will '
236 'ignore stdandard input and instead symbolize the output stnippets '
237 'inside the JSON file. The result will be written back to the JSON '
239 parser
.add_argument('strip_path_prefix', nargs
='*',
240 help='When printing source file names, the longest prefix ending in one '
241 'of these substrings will be stripped. E.g.: "Release/../../".')
242 parser
.add_argument('--executable-path',
243 help='Path to program executable. Used on OSX swarming bots to locate '
244 'dSYM bundles for associated frameworks and bundles.')
245 args
= parser
.parse_args()
248 set_symbolizer_path()
249 asan_symbolize
.demangle
= True
250 asan_symbolize
.fix_filename_patterns
= args
.strip_path_prefix
251 # Most source paths for Chromium binaries start with
252 # /path/to/src/out/Release/../../
253 asan_symbolize
.fix_filename_patterns
.append('Release/../../')
254 binary_name_filter
= None
255 if platform
.uname()[0] == 'Darwin':
256 binary_name_filter
= make_chrome_osx_binary_name_filter(
257 chrome_product_dir_path(args
.executable_path
))
258 loop
= asan_symbolize
.SymbolizationLoop(
259 binary_name_filter
=binary_name_filter
,
260 dsym_hint_producer
=chrome_dsym_hints
)
262 if args
.test_summary_json_file
:
263 symbolize_snippets_in_json(args
.test_summary_json_file
, loop
)
266 asan_symbolize
.logfile
= sys
.stdin
267 loop
.process_logfile()
269 if __name__
== '__main__':