2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Symbolizes a log file produced by cyprofile instrumentation.
8 Given a log file and the binary being profiled, creates an orderfile.
12 import multiprocessing
19 import cygprofile_utils
20 import symbol_extractor
23 def _ParseLogLines(log_file_lines
):
24 """Parses a merged cyglog produced by mergetraces.py.
27 log_file_lines: array of lines in log file produced by profiled run
28 lib_name: library or executable containing symbols
30 Below is an example of a small log file:
31 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
32 secs usecs pid:threadid func
34 1314897086 795828 3587:1074648168 0x509e105c
35 1314897086 795874 3587:1074648168 0x509e0eb4
36 1314897086 796326 3587:1074648168 0x509e0e3c
37 1314897086 796552 3587:1074648168 0x509e07bc
41 An ordered list of callee offsets.
45 line
= log_file_lines
[0]
47 end_index
= line
.find('-')
48 vm_start
= int(line
[:end_index
], 16)
49 for line
in log_file_lines
[3:]:
52 call_lines
.append(fields
)
54 assert fields
[0] == 'END'
55 # Convert strings to int in fields.
57 for call_line
in call_lines
:
58 addr
= int(call_line
[3], 16)
61 call_info
.append(addr
)
65 def _GroupLibrarySymbolInfosByOffset(lib_filename
):
66 """Returns a dict {offset: [SymbolInfo]} from a library."""
67 symbol_infos
= symbol_extractor
.SymbolInfosFromBinary(lib_filename
)
68 return symbol_extractor
.GroupSymbolInfosByOffset(symbol_infos
)
71 class SymbolNotFoundException(Exception):
72 def __init__(self
, value
):
73 super(SymbolNotFoundException
, self
).__init
__(value
)
77 return repr(self
.value
)
80 def _FindSymbolInfosAtOffset(offset_to_symbol_infos
, offset
):
81 """Finds all SymbolInfo at a given offset.
84 offset_to_symbol_infos: {offset: [SymbolInfo]}
85 offset: offset to look the symbols at
88 The list of SymbolInfo at the given offset
91 SymbolNotFoundException if the offset doesn't match any symbol.
93 if offset
in offset_to_symbol_infos
:
94 return offset_to_symbol_infos
[offset
]
95 elif offset
% 2 and (offset
- 1) in offset_to_symbol_infos
:
96 # On ARM, odd addresses are used to signal thumb instruction. They are
97 # generated by setting the LSB to 1 (see
98 # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfjhia.html).
99 # TODO(lizeb): Make sure this hack doesn't propagate to other archs.
100 return offset_to_symbol_infos
[offset
- 1]
102 raise SymbolNotFoundException(offset
)
105 def _GetObjectFileNames(obj_dir
):
106 """Returns the list of object files in a directory."""
108 for (dirpath
, _
, filenames
) in os
.walk(obj_dir
):
109 for file_name
in filenames
:
110 if file_name
.endswith('.o'):
111 obj_files
.append(os
.path
.join(dirpath
, file_name
))
115 def _AllSymbolInfos(object_filenames
):
116 """Returns a list of SymbolInfo from an iterable of filenames."""
117 pool
= multiprocessing
.Pool()
118 # Hopefully the object files are in the page cache at this step, so IO should
119 # not be a problem (hence no concurrency limit on the pool).
120 symbol_infos_nested
= pool
.map(
121 symbol_extractor
.SymbolInfosFromBinary
, object_filenames
)
123 for symbol_infos
in symbol_infos_nested
:
124 result
+= symbol_infos
128 def _GetSymbolToSectionMapFromObjectFiles(obj_dir
):
129 """ Creates a mapping from symbol to linker section name by scanning all
132 object_files
= _GetObjectFileNames(obj_dir
)
133 symbol_to_section_map
= {}
134 symbol_warnings
= cygprofile_utils
.WarningCollector(300)
135 symbol_infos
= _AllSymbolInfos(object_files
)
136 for symbol_info
in symbol_infos
:
137 symbol
= symbol_info
.name
138 if symbol
.startswith('.LTHUNK'):
140 section
= symbol_info
.section
141 if ((symbol
in symbol_to_section_map
) and
142 (symbol_to_section_map
[symbol
] != symbol_info
.section
)):
143 symbol_warnings
.Write('Symbol ' + symbol
+
144 ' in conflicting sections ' + section
+
145 ' and ' + symbol_to_section_map
[symbol
])
146 elif not section
.startswith('.text'):
147 symbol_warnings
.Write('Symbol ' + symbol
+
148 ' in incorrect section ' + section
)
150 symbol_to_section_map
[symbol
] = section
151 symbol_warnings
.WriteEnd('bad sections')
152 return symbol_to_section_map
155 def _WarnAboutDuplicates(offsets
):
156 """Warns about duplicate offsets.
159 offsets: list of offsets to check for duplicates
162 True if there are no duplicates, False otherwise.
166 for offset
in offsets
:
167 if offset
not in seen_offsets
:
168 seen_offsets
.add(offset
)
171 logging
.warning('Duplicate offset: ' + hex(offset
))
175 def _OutputOrderfile(offsets
, offset_to_symbol_infos
, symbol_to_section_map
,
177 """Outputs the orderfile to output_file.
180 offsets: Iterable of offsets to match to section names
181 offset_to_symbol_infos: {offset: [SymbolInfo]}
182 symbol_to_section_map: {name: section}
183 output_file: file-like object to write the results to
186 unknown_symbol_warnings
= cygprofile_utils
.WarningCollector(300)
187 symbol_not_found_warnings
= cygprofile_utils
.WarningCollector(300)
188 output_sections
= set()
189 for offset
in offsets
:
191 symbol_infos
= _FindSymbolInfosAtOffset(offset_to_symbol_infos
, offset
)
192 for symbol_info
in symbol_infos
:
193 if symbol_info
.name
in symbol_to_section_map
:
194 section
= symbol_to_section_map
[symbol_info
.name
]
195 if not section
in output_sections
:
196 output_file
.write(section
+ '\n')
197 output_sections
.add(section
)
199 unknown_symbol_warnings
.Write(
200 'No known section for symbol ' + symbol_info
.name
)
201 except SymbolNotFoundException
:
202 symbol_not_found_warnings
.Write(
203 'Did not find function in binary. offset: ' + hex(offset
))
205 unknown_symbol_warnings
.WriteEnd('no known section for symbol.')
206 symbol_not_found_warnings
.WriteEnd('symbol not found in the binary.')
211 parser
= optparse
.OptionParser(usage
=
212 'usage: %prog [options] <merged_cyglog> <library> <output_filename>')
213 parser
.add_option('--target-arch', action
='store', dest
='arch',
214 choices
=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'],
215 help='The target architecture for libchrome.so')
216 options
, argv
= parser
.parse_args(sys
.argv
)
218 options
.arch
= cygprofile_utils
.DetectArchitecture()
222 (log_filename
, lib_filename
, output_filename
) = argv
[1:]
223 symbol_extractor
.SetArchitecture(options
.arch
)
225 obj_dir
= os
.path
.abspath(os
.path
.join(
226 os
.path
.dirname(lib_filename
), '../obj'))
228 log_file_lines
= map(string
.rstrip
, open(log_filename
).readlines())
229 offsets
= _ParseLogLines(log_file_lines
)
230 _WarnAboutDuplicates(offsets
)
232 offset_to_symbol_infos
= _GroupLibrarySymbolInfosByOffset(lib_filename
)
233 symbol_to_section_map
= _GetSymbolToSectionMapFromObjectFiles(obj_dir
)
239 (fd
, temp_filename
) = tempfile
.mkstemp(dir=os
.path
.dirname(output_filename
))
240 output_file
= os
.fdopen(fd
, 'w')
241 ok
= _OutputOrderfile(
242 offsets
, offset_to_symbol_infos
, symbol_to_section_map
, output_file
)
244 os
.rename(temp_filename
, output_filename
)
251 os
.remove(temp_filename
)
253 return 0 if success
else 1
256 if __name__
== '__main__':
257 logging
.basicConfig(level
=logging
.INFO
)