2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Symbolizes a log file produced by cyprofile instrumentation.
8 Given a log file and the binary being profiled, creates an orderfile.
12 import multiprocessing
20 import cygprofile_utils
21 import symbol_extractor
24 def _ParseLogLines(log_file_lines
):
25 """Parses a merged cyglog produced by mergetraces.py.
28 log_file_lines: array of lines in log file produced by profiled run
30 Below is an example of a small log file:
31 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
32 secs usecs pid:threadid func
34 1314897086 795828 3587:1074648168 0x509e105c
35 1314897086 795874 3587:1074648168 0x509e0eb4
36 1314897086 796326 3587:1074648168 0x509e0e3c
37 1314897086 796552 3587:1074648168 0x509e07bc
41 An ordered list of callee offsets.
45 line
= log_file_lines
[0]
47 end_index
= line
.find('-')
48 vm_start
= int(line
[:end_index
], 16)
49 for line
in log_file_lines
[3:]:
52 call_lines
.append(fields
)
54 assert fields
[0] == 'END'
55 # Convert strings to int in fields.
57 for call_line
in call_lines
:
58 addr
= int(call_line
[3], 16)
61 call_info
.append(addr
)
65 def _GroupLibrarySymbolInfosByOffset(lib_filename
):
66 """Returns a dict {offset: [SymbolInfo]} from a library."""
67 symbol_infos
= symbol_extractor
.SymbolInfosFromBinary(lib_filename
)
68 return symbol_extractor
.GroupSymbolInfosByOffset(symbol_infos
)
71 class SymbolNotFoundException(Exception):
72 def __init__(self
, value
):
73 super(SymbolNotFoundException
, self
).__init
__(value
)
77 return repr(self
.value
)
80 def _FindSymbolInfosAtOffset(offset_to_symbol_infos
, offset
):
81 """Finds all SymbolInfo at a given offset.
84 offset_to_symbol_infos: {offset: [SymbolInfo]}
85 offset: offset to look the symbols at
88 The list of SymbolInfo at the given offset
91 SymbolNotFoundException if the offset doesn't match any symbol.
93 if offset
in offset_to_symbol_infos
:
94 return offset_to_symbol_infos
[offset
]
95 elif offset
% 2 and (offset
- 1) in offset_to_symbol_infos
:
96 # On ARM, odd addresses are used to signal thumb instruction. They are
97 # generated by setting the LSB to 1 (see
98 # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfjhia.html).
99 # TODO(lizeb): Make sure this hack doesn't propagate to other archs.
100 return offset_to_symbol_infos
[offset
- 1]
102 raise SymbolNotFoundException(offset
)
105 def _GetObjectFileNames(obj_dir
):
106 """Returns the list of object files in a directory."""
108 for (dirpath
, _
, filenames
) in os
.walk(obj_dir
):
109 for file_name
in filenames
:
110 if file_name
.endswith('.o'):
111 obj_files
.append(os
.path
.join(dirpath
, file_name
))
115 def _AllSymbolInfos(object_filenames
):
116 """Returns a list of SymbolInfo from an iterable of filenames."""
117 pool
= multiprocessing
.Pool()
118 # Hopefully the object files are in the page cache at this step, so IO should
119 # not be a problem (hence no concurrency limit on the pool).
120 symbol_infos_nested
= pool
.map(
121 symbol_extractor
.SymbolInfosFromBinary
, object_filenames
)
123 for symbol_infos
in symbol_infos_nested
:
124 result
+= symbol_infos
128 def _SameCtorOrDtorNames(symbol1
, symbol2
):
129 """Returns True if two symbols refer to the same constructor or destructor.
131 The Itanium C++ ABI specifies dual constructor and destructor
132 emmission (section 5.1.4.3):
133 https://refspecs.linuxbase.org/cxxabi-1.83.html#mangling-special
134 To avoid fully parsing all mangled symbols, a heuristic is used with c++filt.
136 Note: some compilers may name generated copies differently. If this becomes
137 an issue this heuristic will need to be updated.
139 # Check if this is the understood case of constructor/destructor
140 # signatures. GCC emits up to three types of constructor/destructors:
141 # complete, base, and allocating. If they're all the same they'll
142 # get folded together.
143 return (re
.search('(C[123]|D[012])E', symbol1
) and
144 symbol_extractor
.DemangleSymbol(symbol1
) ==
145 symbol_extractor
.DemangleSymbol(symbol2
))
148 def GetSymbolToSectionsMapFromObjectFiles(obj_dir
):
149 """Scans object files to create a {symbol: linker section(s)} map.
152 obj_dir: The root of the output object file directory, which will be
153 scanned for .o files to form the mapping.
156 A map {symbol_name: [section_name1, section_name2...]}
158 object_files
= _GetObjectFileNames(obj_dir
)
159 symbol_to_sections_map
= {}
160 symbol_warnings
= cygprofile_utils
.WarningCollector(300)
161 symbol_infos
= _AllSymbolInfos(object_files
)
162 for symbol_info
in symbol_infos
:
163 symbol
= symbol_info
.name
164 if symbol
.startswith('.LTHUNK'):
166 section
= symbol_info
.section
167 if ((symbol
in symbol_to_sections_map
) and
168 (symbol_info
.section
not in symbol_to_sections_map
[symbol
])):
169 symbol_to_sections_map
[symbol
].append(section
)
171 if not _SameCtorOrDtorNames(
172 symbol
, symbol_to_sections_map
[symbol
][0].lstrip('.text.')):
173 symbol_warnings
.Write('Symbol ' + symbol
+
174 ' unexpectedly in more than one section: ' +
175 ', '.join(symbol_to_sections_map
[symbol
]))
176 elif not section
.startswith('.text.'):
177 symbol_warnings
.Write('Symbol ' + symbol
+
178 ' in incorrect section ' + section
)
180 # In most cases we expect just one item in this list, and maybe 4 or so in
182 symbol_to_sections_map
[symbol
] = [section
]
183 symbol_warnings
.WriteEnd('bad sections')
184 return symbol_to_sections_map
187 def _WarnAboutDuplicates(offsets
):
188 """Warns about duplicate offsets.
191 offsets: list of offsets to check for duplicates
194 True if there are no duplicates, False otherwise.
198 for offset
in offsets
:
199 if offset
not in seen_offsets
:
200 seen_offsets
.add(offset
)
203 logging
.warning('Duplicate offset: ' + hex(offset
))
207 def _OutputOrderfile(offsets
, offset_to_symbol_infos
, symbol_to_sections_map
,
209 """Outputs the orderfile to output_file.
212 offsets: Iterable of offsets to match to section names
213 offset_to_symbol_infos: {offset: [SymbolInfo]}
214 symbol_to_sections_map: {name: [section1, section2]}
215 output_file: file-like object to write the results to
218 True if all symbols were found in the library.
221 unknown_symbol_warnings
= cygprofile_utils
.WarningCollector(300)
222 symbol_not_found_warnings
= cygprofile_utils
.WarningCollector(300)
223 output_sections
= set()
224 for offset
in offsets
:
226 symbol_infos
= _FindSymbolInfosAtOffset(offset_to_symbol_infos
, offset
)
227 for symbol_info
in symbol_infos
:
228 if symbol_info
.name
in symbol_to_sections_map
:
229 sections
= symbol_to_sections_map
[symbol_info
.name
]
230 for section
in sections
:
231 if not section
in output_sections
:
232 output_file
.write(section
+ '\n')
233 output_sections
.add(section
)
235 unknown_symbol_warnings
.Write(
236 'No known section for symbol ' + symbol_info
.name
)
237 except SymbolNotFoundException
:
238 symbol_not_found_warnings
.Write(
239 'Did not find function in binary. offset: ' + hex(offset
))
241 unknown_symbol_warnings
.WriteEnd('no known section for symbol.')
242 symbol_not_found_warnings
.WriteEnd('symbol not found in the binary.')
247 parser
= optparse
.OptionParser(usage
=
248 'usage: %prog [options] <merged_cyglog> <library> <output_filename>')
249 parser
.add_option('--target-arch', action
='store', dest
='arch',
250 choices
=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'],
251 help='The target architecture for libchrome.so')
252 options
, argv
= parser
.parse_args(sys
.argv
)
254 options
.arch
= cygprofile_utils
.DetectArchitecture()
258 (log_filename
, lib_filename
, output_filename
) = argv
[1:]
259 symbol_extractor
.SetArchitecture(options
.arch
)
261 obj_dir
= cygprofile_utils
.GetObjDir(lib_filename
)
263 log_file_lines
= map(string
.rstrip
, open(log_filename
).readlines())
264 offsets
= _ParseLogLines(log_file_lines
)
265 _WarnAboutDuplicates(offsets
)
267 offset_to_symbol_infos
= _GroupLibrarySymbolInfosByOffset(lib_filename
)
268 symbol_to_sections_map
= GetSymbolToSectionsMapFromObjectFiles(obj_dir
)
274 (fd
, temp_filename
) = tempfile
.mkstemp(dir=os
.path
.dirname(output_filename
))
275 output_file
= os
.fdopen(fd
, 'w')
276 ok
= _OutputOrderfile(
277 offsets
, offset_to_symbol_infos
, symbol_to_sections_map
, output_file
)
279 os
.rename(temp_filename
, output_filename
)
286 os
.remove(temp_filename
)
288 return 0 if success
else 1
291 if __name__
== '__main__':
292 logging
.basicConfig(level
=logging
.INFO
)