Start unifying document-mode and tabbed-mode tab creation
[chromium-blink-merge.git] / tools / cygprofile / cyglog_to_orderfile.py
blobf64c077edc063535172dc9058901012ed1739e97
1 #!/usr/bin/python
2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Symbolizes a log file produced by cyprofile instrumentation.
8 Given a log file and the binary being profiled, creates an orderfile.
9 """
11 import logging
12 import multiprocessing
13 import optparse
14 import os
15 import tempfile
16 import string
17 import sys
19 import cygprofile_utils
20 import symbol_extractor
23 def _ParseLogLines(log_file_lines):
24 """Parses a merged cyglog produced by mergetraces.py.
26 Args:
27 log_file_lines: array of lines in log file produced by profiled run
28 lib_name: library or executable containing symbols
30 Below is an example of a small log file:
31 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
32 secs usecs pid:threadid func
33 START
34 1314897086 795828 3587:1074648168 0x509e105c
35 1314897086 795874 3587:1074648168 0x509e0eb4
36 1314897086 796326 3587:1074648168 0x509e0e3c
37 1314897086 796552 3587:1074648168 0x509e07bc
38 END
40 Returns:
41 An ordered list of callee offsets.
42 """
43 call_lines = []
44 vm_start = 0
45 line = log_file_lines[0]
46 assert 'r-xp' in line
47 end_index = line.find('-')
48 vm_start = int(line[:end_index], 16)
49 for line in log_file_lines[3:]:
50 fields = line.split()
51 if len(fields) == 4:
52 call_lines.append(fields)
53 else:
54 assert fields[0] == 'END'
55 # Convert strings to int in fields.
56 call_info = []
57 for call_line in call_lines:
58 addr = int(call_line[3], 16)
59 if vm_start < addr:
60 addr -= vm_start
61 call_info.append(addr)
62 return call_info
65 def _GroupLibrarySymbolInfosByOffset(lib_filename):
66 """Returns a dict {offset: [SymbolInfo]} from a library."""
67 symbol_infos = symbol_extractor.SymbolInfosFromBinary(lib_filename)
68 return symbol_extractor.GroupSymbolInfosByOffset(symbol_infos)
71 class SymbolNotFoundException(Exception):
72 def __init__(self, value):
73 super(SymbolNotFoundException, self).__init__(value)
74 self.value = value
76 def __str__(self):
77 return repr(self.value)
80 def _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset):
81 """Finds all SymbolInfo at a given offset.
83 Args:
84 offset_to_symbol_infos: {offset: [SymbolInfo]}
85 offset: offset to look the symbols at
87 Returns:
88 The list of SymbolInfo at the given offset
90 Raises:
91 SymbolNotFoundException if the offset doesn't match any symbol.
92 """
93 if offset in offset_to_symbol_infos:
94 return offset_to_symbol_infos[offset]
95 elif offset % 2 and (offset - 1) in offset_to_symbol_infos:
96 # On ARM, odd addresses are used to signal thumb instruction. They are
97 # generated by setting the LSB to 1 (see
98 # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfjhia.html).
99 # TODO(lizeb): Make sure this hack doesn't propagate to other archs.
100 return offset_to_symbol_infos[offset - 1]
101 else:
102 raise SymbolNotFoundException(offset)
105 def _GetObjectFileNames(obj_dir):
106 """Returns the list of object files in a directory."""
107 obj_files = []
108 for (dirpath, _, filenames) in os.walk(obj_dir):
109 for file_name in filenames:
110 if file_name.endswith('.o'):
111 obj_files.append(os.path.join(dirpath, file_name))
112 return obj_files
115 def _AllSymbolInfos(object_filenames):
116 """Returns a list of SymbolInfo from an iterable of filenames."""
117 pool = multiprocessing.Pool()
118 # Hopefully the object files are in the page cache at this step, so IO should
119 # not be a problem (hence no concurrency limit on the pool).
120 symbol_infos_nested = pool.map(
121 symbol_extractor.SymbolInfosFromBinary, object_filenames)
122 result = []
123 for symbol_infos in symbol_infos_nested:
124 result += symbol_infos
125 return result
128 def _GetSymbolToSectionMapFromObjectFiles(obj_dir):
129 """ Creates a mapping from symbol to linker section name by scanning all
130 the object files.
132 object_files = _GetObjectFileNames(obj_dir)
133 symbol_to_section_map = {}
134 symbol_warnings = cygprofile_utils.WarningCollector(300)
135 symbol_infos = _AllSymbolInfos(object_files)
136 for symbol_info in symbol_infos:
137 symbol = symbol_info.name
138 if symbol.startswith('.LTHUNK'):
139 continue
140 section = symbol_info.section
141 if ((symbol in symbol_to_section_map) and
142 (symbol_to_section_map[symbol] != symbol_info.section)):
143 symbol_warnings.Write('Symbol ' + symbol +
144 ' in conflicting sections ' + section +
145 ' and ' + symbol_to_section_map[symbol])
146 elif not section.startswith('.text'):
147 symbol_warnings.Write('Symbol ' + symbol +
148 ' in incorrect section ' + section)
149 else:
150 symbol_to_section_map[symbol] = section
151 symbol_warnings.WriteEnd('bad sections')
152 return symbol_to_section_map
155 def _WarnAboutDuplicates(offsets):
156 """Warns about duplicate offsets.
158 Args:
159 offsets: list of offsets to check for duplicates
161 Returns:
162 True if there are no duplicates, False otherwise.
164 seen_offsets = set()
165 ok = True
166 for offset in offsets:
167 if offset not in seen_offsets:
168 seen_offsets.add(offset)
169 else:
170 ok = False
171 logging.warning('Duplicate offset: ' + hex(offset))
172 return ok
175 def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_section_map,
176 output_file):
177 """Outputs the orderfile to output_file.
179 Args:
180 offsets: Iterable of offsets to match to section names
181 offset_to_symbol_infos: {offset: [SymbolInfo]}
182 symbol_to_section_map: {name: section}
183 output_file: file-like object to write the results to
185 success = True
186 unknown_symbol_warnings = cygprofile_utils.WarningCollector(300)
187 symbol_not_found_warnings = cygprofile_utils.WarningCollector(300)
188 output_sections = set()
189 for offset in offsets:
190 try:
191 symbol_infos = _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset)
192 for symbol_info in symbol_infos:
193 if symbol_info.name in symbol_to_section_map:
194 section = symbol_to_section_map[symbol_info.name]
195 if not section in output_sections:
196 output_file.write(section + '\n')
197 output_sections.add(section)
198 else:
199 unknown_symbol_warnings.Write(
200 'No known section for symbol ' + symbol_info.name)
201 except SymbolNotFoundException:
202 symbol_not_found_warnings.Write(
203 'Did not find function in binary. offset: ' + hex(offset))
204 success = False
205 unknown_symbol_warnings.WriteEnd('no known section for symbol.')
206 symbol_not_found_warnings.WriteEnd('symbol not found in the binary.')
207 return success
210 def main():
211 parser = optparse.OptionParser(usage=
212 'usage: %prog [options] <merged_cyglog> <library> <output_filename>')
213 parser.add_option('--target-arch', action='store', dest='arch',
214 choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'],
215 help='The target architecture for libchrome.so')
216 options, argv = parser.parse_args(sys.argv)
217 if not options.arch:
218 options.arch = cygprofile_utils.DetectArchitecture()
219 if len(argv) != 4:
220 parser.print_help()
221 return 1
222 (log_filename, lib_filename, output_filename) = argv[1:]
223 symbol_extractor.SetArchitecture(options.arch)
225 obj_dir = os.path.abspath(os.path.join(
226 os.path.dirname(lib_filename), '../obj'))
228 log_file_lines = map(string.rstrip, open(log_filename).readlines())
229 offsets = _ParseLogLines(log_file_lines)
230 _WarnAboutDuplicates(offsets)
232 offset_to_symbol_infos = _GroupLibrarySymbolInfosByOffset(lib_filename)
233 symbol_to_section_map = _GetSymbolToSectionMapFromObjectFiles(obj_dir)
235 success = False
236 temp_filename = None
237 output_file = None
238 try:
239 (fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(output_filename))
240 output_file = os.fdopen(fd, 'w')
241 ok = _OutputOrderfile(
242 offsets, offset_to_symbol_infos, symbol_to_section_map, output_file)
243 output_file.close()
244 os.rename(temp_filename, output_filename)
245 temp_filename = None
246 success = ok
247 finally:
248 if output_file:
249 output_file.close()
250 if temp_filename:
251 os.remove(temp_filename)
253 return 0 if success else 1
256 if __name__ == '__main__':
257 logging.basicConfig(level=logging.INFO)
258 sys.exit(main())