2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Symbolizes a log file produced by cyprofile instrumentation.
8 Given a log file and the binary being profiled, creates an orderfile.
12 import multiprocessing
19 import symbol_extractor
22 def _ParseLogLines(log_file_lines
):
23 """Parses a merged cyglog produced by mergetraces.py.
26 log_file_lines: array of lines in log file produced by profiled run
27 lib_name: library or executable containing symbols
29 Below is an example of a small log file:
30 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
31 secs usecs pid:threadid func
33 1314897086 795828 3587:1074648168 0x509e105c
34 1314897086 795874 3587:1074648168 0x509e0eb4
35 1314897086 796326 3587:1074648168 0x509e0e3c
36 1314897086 796552 3587:1074648168 0x509e07bc
40 An ordered list of callee offsets.
44 line
= log_file_lines
[0]
46 end_index
= line
.find('-')
47 vm_start
= int(line
[:end_index
], 16)
48 for line
in log_file_lines
[3:]:
51 call_lines
.append(fields
)
53 assert fields
[0] == 'END'
54 # Convert strings to int in fields.
56 for call_line
in call_lines
:
57 addr
= int(call_line
[3], 16)
60 call_info
.append(addr
)
64 def _GroupLibrarySymbolInfosByOffset(lib_filename
):
65 """Returns a dict {offset: [SymbolInfo]} from a library."""
66 symbol_infos
= symbol_extractor
.SymbolInfosFromBinary(lib_filename
)
67 return symbol_extractor
.GroupSymbolInfosByOffset(symbol_infos
)
70 class SymbolNotFoundException(Exception):
71 def __init__(self
, value
):
72 super(SymbolNotFoundException
, self
).__init
__(value
)
76 return repr(self
.value
)
79 def _FindSymbolInfosAtOffset(offset_to_symbol_infos
, offset
):
80 """Finds all SymbolInfo at a given offset.
83 offset_to_symbol_infos: {offset: [SymbolInfo]}
84 offset: offset to look the symbols at
87 The list of SymbolInfo at the given offset
90 SymbolNotFoundException if the offset doesn't match any symbol.
92 if offset
in offset_to_symbol_infos
:
93 return offset_to_symbol_infos
[offset
]
94 elif offset
% 2 and (offset
- 1) in offset_to_symbol_infos
:
95 # On ARM, odd addresses are used to signal thumb instruction. They are
96 # generated by setting the LSB to 1 (see
97 # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfjhia.html).
98 # TODO(lizeb): Make sure this hack doesn't propagate to other archs.
99 return offset_to_symbol_infos
[offset
- 1]
101 raise SymbolNotFoundException(offset
)
104 class WarningCollector(object):
105 """Collect warnings, but limit the number printed to a set value."""
106 def __init__(self
, max_warnings
):
108 self
._max
_warnings
= max_warnings
110 def Write(self
, message
):
111 if self
._warnings
< self
._max
_warnings
:
112 logging
.warning(message
)
115 def WriteEnd(self
, message
):
116 if self
._warnings
> self
._max
_warnings
:
117 logging
.warning('%d more warnings for: %s' % (
118 self
._warnings
- self
._max
_warnings
, message
))
121 def _GetObjectFileNames(obj_dir
):
122 """Returns the list of object files in a directory."""
124 for (dirpath
, _
, filenames
) in os
.walk(obj_dir
):
125 for file_name
in filenames
:
126 if file_name
.endswith('.o'):
127 obj_files
.append(os
.path
.join(dirpath
, file_name
))
131 def _AllSymbolInfos(object_filenames
):
132 """Returns a list of SymbolInfo from an iterable of filenames."""
133 pool
= multiprocessing
.Pool()
134 # Hopefully the object files are in the page cache at this step, so IO should
135 # not be a problem (hence no concurrency limit on the pool).
136 symbol_infos_nested
= pool
.map(
137 symbol_extractor
.SymbolInfosFromBinary
, object_filenames
)
139 for symbol_infos
in symbol_infos_nested
:
140 result
+= symbol_infos
144 def _GetSymbolToSectionMapFromObjectFiles(obj_dir
):
145 """ Creates a mapping from symbol to linker section name by scanning all
148 object_files
= _GetObjectFileNames(obj_dir
)
149 symbol_to_section_map
= {}
150 symbol_warnings
= WarningCollector(300)
151 symbol_infos
= _AllSymbolInfos(object_files
)
152 for symbol_info
in symbol_infos
:
153 symbol
= symbol_info
.name
154 if symbol
.startswith('.LTHUNK'):
156 section
= symbol_info
.section
157 if ((symbol
in symbol_to_section_map
) and
158 (symbol_to_section_map
[symbol
] != symbol_info
.section
)):
159 symbol_warnings
.Write('Symbol ' + symbol
+
160 ' in conflicting sections ' + section
+
161 ' and ' + symbol_to_section_map
[symbol
])
162 elif not section
.startswith('.text'):
163 symbol_warnings
.Write('Symbol ' + symbol
+
164 ' in incorrect section ' + section
)
166 symbol_to_section_map
[symbol
] = section
167 symbol_warnings
.WriteEnd('bad sections')
168 return symbol_to_section_map
171 def _WarnAboutDuplicates(offsets
):
172 """Warns about duplicate offsets.
175 offsets: list of offsets to check for duplicates
178 True if there are no duplicates, False otherwise.
182 for offset
in offsets
:
183 if offset
not in seen_offsets
:
184 seen_offsets
.add(offset
)
187 logging
.warning('Duplicate offset: ' + hex(offset
))
191 def _OutputOrderfile(offsets
, offset_to_symbol_infos
, symbol_to_section_map
,
193 """Outputs the orderfile to output_file.
196 offsets: Iterable of offsets to match to section names
197 offset_to_symbol_infos: {offset: [SymbolInfo]}
198 symbol_to_section_map: {name: section}
199 output_file: file-like object to write the results to
202 unknown_symbol_warnings
= WarningCollector(300)
203 symbol_not_found_warnings
= WarningCollector(300)
204 output_sections
= set()
205 for offset
in offsets
:
207 symbol_infos
= _FindSymbolInfosAtOffset(offset_to_symbol_infos
, offset
)
208 for symbol_info
in symbol_infos
:
209 if symbol_info
.name
in symbol_to_section_map
:
210 section
= symbol_to_section_map
[symbol_info
.name
]
211 if not section
in output_sections
:
212 output_file
.write(section
+ '\n')
213 output_sections
.add(section
)
215 unknown_symbol_warnings
.Write(
216 'No known section for symbol ' + symbol_info
.name
)
217 except SymbolNotFoundException
:
218 symbol_not_found_warnings
.Write(
219 'Did not find function in binary. offset: ' + hex(offset
))
221 unknown_symbol_warnings
.WriteEnd('no known section for symbol.')
222 symbol_not_found_warnings
.WriteEnd('symbol not found in the binary.')
227 parser
= optparse
.OptionParser(usage
=
228 'usage: %prog [options] <merged_cyglog> <library> <output_filename>')
229 parser
.add_option('--target-arch', action
='store', dest
='arch',
231 choices
=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'],
232 help='The target architecture for libchrome.so')
233 options
, argv
= parser
.parse_args(sys
.argv
)
237 (log_filename
, lib_filename
, output_filename
) = argv
[1:]
238 symbol_extractor
.SetArchitecture(options
.arch
)
240 obj_dir
= os
.path
.abspath(os
.path
.join(
241 os
.path
.dirname(lib_filename
), '../obj'))
243 log_file_lines
= map(string
.rstrip
, open(log_filename
).readlines())
244 offsets
= _ParseLogLines(log_file_lines
)
245 _WarnAboutDuplicates(offsets
)
247 offset_to_symbol_infos
= _GroupLibrarySymbolInfosByOffset(lib_filename
)
248 symbol_to_section_map
= _GetSymbolToSectionMapFromObjectFiles(obj_dir
)
254 (fd
, temp_filename
) = tempfile
.mkstemp(dir=os
.path
.dirname(output_filename
))
255 output_file
= os
.fdopen(fd
, 'w')
256 ok
= _OutputOrderfile(
257 offsets
, offset_to_symbol_infos
, symbol_to_section_map
, output_file
)
259 os
.rename(temp_filename
, output_filename
)
266 os
.remove(temp_filename
)
268 return 0 if success
else 1
271 if __name__
== '__main__':
272 logging
.basicConfig(level
=logging
.INFO
)