Add ICU message format support
[chromium-blink-merge.git] / tools / cygprofile / cyglog_to_orderfile.py
blob6225574cb41e563560f51c70b4862dd7f273dfdf
1 #!/usr/bin/python
2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Symbolizes a log file produced by cyprofile instrumentation.
8 Given a log file and the binary being profiled, creates an orderfile.
9 """
11 import logging
12 import multiprocessing
13 import optparse
14 import os
15 import re
16 import string
17 import sys
18 import tempfile
20 import cygprofile_utils
21 import symbol_extractor
24 def _ParseLogLines(log_file_lines):
25 """Parses a merged cyglog produced by mergetraces.py.
27 Args:
28 log_file_lines: array of lines in log file produced by profiled run
30 Below is an example of a small log file:
31 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
32 secs usecs pid:threadid func
33 START
34 1314897086 795828 3587:1074648168 0x509e105c
35 1314897086 795874 3587:1074648168 0x509e0eb4
36 1314897086 796326 3587:1074648168 0x509e0e3c
37 1314897086 796552 3587:1074648168 0x509e07bc
38 END
40 Returns:
41 An ordered list of callee offsets.
42 """
43 call_lines = []
44 vm_start = 0
45 line = log_file_lines[0]
46 assert 'r-xp' in line
47 end_index = line.find('-')
48 vm_start = int(line[:end_index], 16)
49 for line in log_file_lines[3:]:
50 fields = line.split()
51 if len(fields) == 4:
52 call_lines.append(fields)
53 else:
54 assert fields[0] == 'END'
55 # Convert strings to int in fields.
56 call_info = []
57 for call_line in call_lines:
58 addr = int(call_line[3], 16)
59 if vm_start < addr:
60 addr -= vm_start
61 call_info.append(addr)
62 return call_info
65 def _GroupLibrarySymbolInfosByOffset(lib_filename):
66 """Returns a dict {offset: [SymbolInfo]} from a library."""
67 symbol_infos = symbol_extractor.SymbolInfosFromBinary(lib_filename)
68 return symbol_extractor.GroupSymbolInfosByOffset(symbol_infos)
71 class SymbolNotFoundException(Exception):
72 def __init__(self, value):
73 super(SymbolNotFoundException, self).__init__(value)
74 self.value = value
76 def __str__(self):
77 return repr(self.value)
80 def _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset):
81 """Finds all SymbolInfo at a given offset.
83 Args:
84 offset_to_symbol_infos: {offset: [SymbolInfo]}
85 offset: offset to look the symbols at
87 Returns:
88 The list of SymbolInfo at the given offset
90 Raises:
91 SymbolNotFoundException if the offset doesn't match any symbol.
92 """
93 if offset in offset_to_symbol_infos:
94 return offset_to_symbol_infos[offset]
95 elif offset % 2 and (offset - 1) in offset_to_symbol_infos:
96 # On ARM, odd addresses are used to signal thumb instruction. They are
97 # generated by setting the LSB to 1 (see
98 # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfjhia.html).
99 # TODO(lizeb): Make sure this hack doesn't propagate to other archs.
100 return offset_to_symbol_infos[offset - 1]
101 else:
102 raise SymbolNotFoundException(offset)
105 def _GetObjectFileNames(obj_dir):
106 """Returns the list of object files in a directory."""
107 obj_files = []
108 for (dirpath, _, filenames) in os.walk(obj_dir):
109 for file_name in filenames:
110 if file_name.endswith('.o'):
111 obj_files.append(os.path.join(dirpath, file_name))
112 return obj_files
115 def _AllSymbolInfos(object_filenames):
116 """Returns a list of SymbolInfo from an iterable of filenames."""
117 pool = multiprocessing.Pool()
118 # Hopefully the object files are in the page cache at this step, so IO should
119 # not be a problem (hence no concurrency limit on the pool).
120 symbol_infos_nested = pool.map(
121 symbol_extractor.SymbolInfosFromBinary, object_filenames)
122 result = []
123 for symbol_infos in symbol_infos_nested:
124 result += symbol_infos
125 return result
128 def _SameCtorOrDtorNames(symbol1, symbol2):
129 """Returns True if two symbols refer to the same constructor or destructor.
131 The Itanium C++ ABI specifies dual constructor and destructor
132 emmission (section 5.1.4.3):
133 https://refspecs.linuxbase.org/cxxabi-1.83.html#mangling-special
134 To avoid fully parsing all mangled symbols, a heuristic is used with c++filt.
136 Note: some compilers may name generated copies differently. If this becomes
137 an issue this heuristic will need to be updated.
139 # Check if this is the understood case of constructor/destructor
140 # signatures. GCC emits up to three types of constructor/destructors:
141 # complete, base, and allocating. If they're all the same they'll
142 # get folded together.
143 return (re.search('(C[123]|D[012])E', symbol1) and
144 symbol_extractor.DemangleSymbol(symbol1) ==
145 symbol_extractor.DemangleSymbol(symbol2))
148 def GetSymbolToSectionsMapFromObjectFiles(obj_dir):
149 """Scans object files to create a {symbol: linker section(s)} map.
151 Args:
152 obj_dir: The root of the output object file directory, which will be
153 scanned for .o files to form the mapping.
155 Returns:
156 A map {symbol_name: [section_name1, section_name2...]}
158 object_files = _GetObjectFileNames(obj_dir)
159 symbol_to_sections_map = {}
160 symbol_warnings = cygprofile_utils.WarningCollector(300)
161 symbol_infos = _AllSymbolInfos(object_files)
162 for symbol_info in symbol_infos:
163 symbol = symbol_info.name
164 if symbol.startswith('.LTHUNK'):
165 continue
166 section = symbol_info.section
167 if ((symbol in symbol_to_sections_map) and
168 (symbol_info.section not in symbol_to_sections_map[symbol])):
169 symbol_to_sections_map[symbol].append(section)
171 if not _SameCtorOrDtorNames(
172 symbol, symbol_to_sections_map[symbol][0].lstrip('.text.')):
173 symbol_warnings.Write('Symbol ' + symbol +
174 ' unexpectedly in more than one section: ' +
175 ', '.join(symbol_to_sections_map[symbol]))
176 elif not section.startswith('.text.'):
177 symbol_warnings.Write('Symbol ' + symbol +
178 ' in incorrect section ' + section)
179 else:
180 # In most cases we expect just one item in this list, and maybe 4 or so in
181 # the worst case.
182 symbol_to_sections_map[symbol] = [section]
183 symbol_warnings.WriteEnd('bad sections')
184 return symbol_to_sections_map
187 def _WarnAboutDuplicates(offsets):
188 """Warns about duplicate offsets.
190 Args:
191 offsets: list of offsets to check for duplicates
193 Returns:
194 True if there are no duplicates, False otherwise.
196 seen_offsets = set()
197 ok = True
198 for offset in offsets:
199 if offset not in seen_offsets:
200 seen_offsets.add(offset)
201 else:
202 ok = False
203 logging.warning('Duplicate offset: ' + hex(offset))
204 return ok
207 def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_sections_map,
208 output_file):
209 """Outputs the orderfile to output_file.
211 Args:
212 offsets: Iterable of offsets to match to section names
213 offset_to_symbol_infos: {offset: [SymbolInfo]}
214 symbol_to_sections_map: {name: [section1, section2]}
215 output_file: file-like object to write the results to
217 Returns:
218 True if all symbols were found in the library.
220 success = True
221 unknown_symbol_warnings = cygprofile_utils.WarningCollector(300)
222 symbol_not_found_warnings = cygprofile_utils.WarningCollector(300)
223 output_sections = set()
224 for offset in offsets:
225 try:
226 symbol_infos = _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset)
227 for symbol_info in symbol_infos:
228 if symbol_info.name in symbol_to_sections_map:
229 sections = symbol_to_sections_map[symbol_info.name]
230 for section in sections:
231 if not section in output_sections:
232 output_file.write(section + '\n')
233 output_sections.add(section)
234 else:
235 unknown_symbol_warnings.Write(
236 'No known section for symbol ' + symbol_info.name)
237 except SymbolNotFoundException:
238 symbol_not_found_warnings.Write(
239 'Did not find function in binary. offset: ' + hex(offset))
240 success = False
241 unknown_symbol_warnings.WriteEnd('no known section for symbol.')
242 symbol_not_found_warnings.WriteEnd('symbol not found in the binary.')
243 return success
246 def main():
247 parser = optparse.OptionParser(usage=
248 'usage: %prog [options] <merged_cyglog> <library> <output_filename>')
249 parser.add_option('--target-arch', action='store', dest='arch',
250 choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'],
251 help='The target architecture for libchrome.so')
252 options, argv = parser.parse_args(sys.argv)
253 if not options.arch:
254 options.arch = cygprofile_utils.DetectArchitecture()
255 if len(argv) != 4:
256 parser.print_help()
257 return 1
258 (log_filename, lib_filename, output_filename) = argv[1:]
259 symbol_extractor.SetArchitecture(options.arch)
261 obj_dir = cygprofile_utils.GetObjDir(lib_filename)
263 log_file_lines = map(string.rstrip, open(log_filename).readlines())
264 offsets = _ParseLogLines(log_file_lines)
265 _WarnAboutDuplicates(offsets)
267 offset_to_symbol_infos = _GroupLibrarySymbolInfosByOffset(lib_filename)
268 symbol_to_sections_map = GetSymbolToSectionsMapFromObjectFiles(obj_dir)
270 success = False
271 temp_filename = None
272 output_file = None
273 try:
274 (fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(output_filename))
275 output_file = os.fdopen(fd, 'w')
276 ok = _OutputOrderfile(
277 offsets, offset_to_symbol_infos, symbol_to_sections_map, output_file)
278 output_file.close()
279 os.rename(temp_filename, output_filename)
280 temp_filename = None
281 success = ok
282 finally:
283 if output_file:
284 output_file.close()
285 if temp_filename:
286 os.remove(temp_filename)
288 return 0 if success else 1
291 if __name__ == '__main__':
292 logging.basicConfig(level=logging.INFO)
293 sys.exit(main())