Add ICU message format support
[chromium-blink-merge.git] / tools / cygprofile / patch_orderfile.py
blob0092ddd6d1cd562a32b07a874a0435e801adfd1d
1 #!/usr/bin/python
2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Patch an orderfile.
8 Starting with a list of symbols in a binary and an orderfile (ordered list of
9 sections), matches the symbols in the orderfile and augments each symbol with
10 the symbols residing at the same address (due to having identical code). The
11 output is a list of section matching rules appropriate for the linker option
12 -section-ordering-file. These section matching rules include both actual
13 section names and names with wildcard (*) suffixes.
15 Note: It is possible to have.
16 - Several symbols mapping to the same offset in the binary.
17 - Several offsets for a given symbol (because we strip the ".clone." and other
18 suffixes)
20 The general pipeline is:
21 1. Get the symbol infos (name, offset, size, section) from the binary
22 2. Get the symbol names from the orderfile
23 3. Find the orderfile symbol names in the symbols coming from the binary
24 4. For each symbol found, get all the symbols at the same address
25 5. Output them to an updated orderfile, with several different prefixes
26 and suffixes
27 6. Output catch-all section matching rules for unprofiled methods.
28 """
30 import collections
31 import logging
32 import optparse
33 import sys
35 import cyglog_to_orderfile
36 import cygprofile_utils
37 import symbol_extractor
39 # Prefixes for the symbols. We strip them from the incoming symbols, and add
40 # them back in the output file.
41 _PREFIXES = ('.text.startup.', '.text.hot.', '.text.unlikely.', '.text.')
43 # Suffixes for the symbols. These are due to method splitting for inlining and
44 # method cloning for various reasons including constant propagation and
45 # inter-procedural optimization.
46 _SUFFIXES = ('.clone.', '.part.', '.isra.', '.constprop.')
49 def RemoveSuffixes(name):
50 """Strips method name suffixes from cloning and splitting.
52 .clone. comes from cloning in -O3.
53 .part. comes from partial method splitting for inlining.
54 .isra. comes from inter-procedural optimizations.
55 .constprop. is cloning for constant propagation.
56 """
57 for suffix in _SUFFIXES:
58 name = name.split(suffix)[0]
59 return name
62 def _UniqueGenerator(generator):
63 """Converts a generator to skip yielding elements already seen.
65 Example:
66 @_UniqueGenerator
67 def Foo():
68 yield 1
69 yield 2
70 yield 1
71 yield 3
73 Foo() yields 1,2,3.
74 """
75 def _FilteringFunction(*args, **kwargs):
76 returned = set()
77 for item in generator(*args, **kwargs):
78 if item in returned:
79 continue
80 returned.add(item)
81 yield item
83 return _FilteringFunction
86 def _GroupSymbolInfos(symbol_infos):
87 """Groups the symbol infos by name and offset.
89 Args:
90 symbol_infos: an iterable of SymbolInfo
92 Returns:
93 The same output as _GroupSymbolInfosFromBinary.
94 """
95 # Map the addresses to symbols.
96 offset_to_symbol_infos = collections.defaultdict(list)
97 name_to_symbol_infos = collections.defaultdict(list)
98 for symbol in symbol_infos:
99 symbol = symbol_extractor.SymbolInfo(name=RemoveSuffixes(symbol.name),
100 offset=symbol.offset,
101 size=symbol.size,
102 section=symbol.section)
103 offset_to_symbol_infos[symbol.offset].append(symbol)
104 name_to_symbol_infos[symbol.name].append(symbol)
105 return (dict(offset_to_symbol_infos), dict(name_to_symbol_infos))
108 def _GroupSymbolInfosFromBinary(binary_filename):
109 """Group all the symbols from a binary by name and offset.
111 Args:
112 binary_filename: path to the binary.
114 Returns:
115 A tuple of dict:
116 (offset_to_symbol_infos, name_to_symbol_infos):
117 - offset_to_symbol_infos: {offset: [symbol_info1, ...]}
118 - name_to_symbol_infos: {name: [symbol_info1, ...]}
120 symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename)
121 return _GroupSymbolInfos(symbol_infos)
124 def _StripPrefix(line):
125 """Strips the linker section name prefix from a symbol line.
127 Args:
128 line: a line from an orderfile, usually in the form:
129 .text.SymbolName
131 Returns:
132 The symbol, SymbolName in the example above.
134 for prefix in _PREFIXES:
135 if line.startswith(prefix):
136 return line[len(prefix):]
137 return line # Unprefixed case
140 def _SectionNameToSymbols(section_name, section_to_symbols_map):
141 """Yields all symbols which could be referred to by section_name.
143 If the section name is present in the map, the names in the map are returned.
144 Otherwise, any clone annotations and prefixes are stripped from the section
145 name and the remainder is returned.
147 if (not section_name or
148 section_name == '.text' or
149 section_name.endswith('*')):
150 return # Don't return anything for catch-all sections
151 if section_name in section_to_symbols_map:
152 for symbol in section_to_symbols_map[section_name]:
153 yield symbol
154 else:
155 name = _StripPrefix(section_name)
156 if name:
157 yield name
160 def GetSectionsFromOrderfile(filename):
161 """Yields the sections from an orderfile.
163 Args:
164 filename: The name of the orderfile.
166 Yields:
167 A list of symbol names.
169 with open(filename, 'r') as f:
170 for line in f.xreadlines():
171 line = line.rstrip('\n')
172 if line:
173 yield line
176 @_UniqueGenerator
177 def GetSymbolsFromOrderfile(filename, section_to_symbols_map):
178 """Yields the symbols from an orderfile. Output elements do not repeat.
180 Args:
181 filename: The name of the orderfile.
182 section_to_symbols_map: The mapping from section to symbol names. If a
183 section name is missing from the mapping, the
184 symbol name is assumed to be the section name with
185 prefixes and suffixes stripped.
187 Yields:
188 A list of symbol names.
190 # TODO(azarchs): Move this method to symbol_extractor.py
191 for section in GetSectionsFromOrderfile(filename):
192 for symbol in _SectionNameToSymbols(RemoveSuffixes(section),
193 section_to_symbols_map):
194 yield symbol
197 def _SymbolsWithSameOffset(profiled_symbol, name_to_symbol_info,
198 offset_to_symbol_info):
199 """Expands a symbol to include all symbols with the same offset.
201 Args:
202 profiled_symbol: the string symbol name to be expanded.
203 name_to_symbol_info: {name: [symbol_info1], ...}, as returned by
204 GetSymbolInfosFromBinary
205 offset_to_symbol_info: {offset: [symbol_info1, ...], ...}
207 Returns:
208 A list of symbol names, or an empty list if profiled_symbol was not in
209 name_to_symbol_info.
211 if profiled_symbol not in name_to_symbol_info:
212 return []
213 symbol_infos = name_to_symbol_info[profiled_symbol]
214 expanded = []
215 for symbol_info in symbol_infos:
216 expanded += (s.name for s in offset_to_symbol_info[symbol_info.offset])
217 return expanded
220 @_UniqueGenerator
221 def _SectionMatchingRules(section_name, name_to_symbol_infos,
222 offset_to_symbol_infos, section_to_symbols_map,
223 symbol_to_sections_map, suffixed_sections):
224 """Gets the set of section matching rules for section_name.
226 These rules will include section_name, but also any sections which may
227 contain the same code due to cloning, splitting, or identical code folding.
229 Args:
230 section_name: The section to expand.
231 name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by
232 GetSymbolInfosFromBinary.
233 offset_to_symbol_infos: {offset: [symbol_info1, ...], ...}
234 section_to_symbols_map: The mapping from section to symbol name. Missing
235 section names are treated as per _SectionNameToSymbols.
236 symbol_to_sections_map: The mapping from symbol name to names of linker
237 sections containing the symbol. If a symbol isn't in the mapping, the
238 section names are generated from the set of _PREFIXES with the symbol
239 name.
240 suffixed_sections: A set of sections which can have suffixes.
242 Yields:
243 Section names including at least section_name.
245 for name in _ExpandSection(section_name, name_to_symbol_infos,
246 offset_to_symbol_infos, section_to_symbols_map,
247 symbol_to_sections_map):
248 yield name
249 # Since only a subset of methods (mostly those compiled with O2) ever get
250 # suffixes, don't emit the wildcards for ones where it won't be helpful.
251 # Otherwise linking takes too long.
252 if name in suffixed_sections:
253 # TODO(azarchs): instead of just appending .*, append .suffix.* for
254 # _SUFFIXES. We can't do this right now because that many wildcards
255 # seems to kill the linker (linking libchrome takes 3 hours). This gets
256 # almost all the benefit at a much lower link-time cost, but could cause
257 # problems with unexpected suffixes.
258 yield name + '.*'
260 def _ExpandSection(section_name, name_to_symbol_infos, offset_to_symbol_infos,
261 section_to_symbols_map, symbol_to_sections_map):
262 """Yields the set of section names for section_name.
264 This set will include section_name, but also any sections which may contain
265 the same code due to identical code folding.
267 Args:
268 section_name: The section to expand.
269 name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by
270 GetSymbolInfosFromBinary.
271 offset_to_symbol_infos: {offset: [symbol_info1, ...], ...}
272 section_to_symbols_map: The mapping from section to symbol name. Missing
273 section names are treated as per _SectionNameToSymbols.
274 symbol_to_sections_map: The mapping from symbol name to names of linker
275 sections containing the symbol. If a symbol isn't in the mapping, the
276 section names are generated from the set of _PREFIXES with the symbol
277 name.
279 Yields:
280 Section names including at least section_name.
282 yield section_name
283 for first_sym in _SectionNameToSymbols(section_name,
284 section_to_symbols_map):
285 for symbol in _SymbolsWithSameOffset(first_sym, name_to_symbol_infos,
286 offset_to_symbol_infos):
287 if symbol in symbol_to_sections_map:
288 for section in symbol_to_sections_map[symbol]:
289 yield section
290 for prefix in _PREFIXES:
291 yield prefix + symbol
294 @_UniqueGenerator
295 def _ExpandSections(section_names, name_to_symbol_infos,
296 offset_to_symbol_infos, section_to_symbols_map,
297 symbol_to_sections_map, suffixed_sections):
298 """Gets an ordered set of section matching rules for a list of sections.
300 Rules will not be repeated.
302 Args:
303 section_names: The sections to expand.
304 name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by
305 _GroupSymbolInfosFromBinary.
306 offset_to_symbol_infos: {offset: [symbol_info1, ...], ...}
307 section_to_symbols_map: The mapping from section to symbol names.
308 symbol_to_sections_map: The mapping from symbol name to names of linker
309 sections containing the symbol.
310 suffixed_sections: A set of sections which can have suffixes.
312 Yields:
313 Section matching rules including at least section_names.
315 for profiled_section in section_names:
316 for section in _SectionMatchingRules(
317 profiled_section, name_to_symbol_infos, offset_to_symbol_infos,
318 section_to_symbols_map, symbol_to_sections_map, suffixed_sections):
319 yield section
322 def _CombineSectionListsByPrimaryName(symbol_to_sections_map):
323 """Combines values of the symbol_to_sections_map by stripping suffixes.
325 Example:
326 {foo: [.text.foo, .text.bar.part.1],
327 foo.constprop.4: [.text.baz.constprop.3]} ->
328 {foo: [.text.foo, .text.bar, .text.baz]}
330 Args:
331 symbol_to_sections_map: Mapping from symbol name to list of section names
333 Returns:
334 The same mapping, but with symbol and section names suffix-stripped.
336 simplified = {}
337 for suffixed_symbol, suffixed_sections in symbol_to_sections_map.iteritems():
338 symbol = RemoveSuffixes(suffixed_symbol)
339 sections = [RemoveSuffixes(section) for section in suffixed_sections]
340 simplified.setdefault(symbol, []).extend(sections)
341 return simplified
344 def _SectionsWithSuffixes(symbol_to_sections_map):
345 """Finds sections which have suffixes applied.
347 Args:
348 symbol_to_sections_map: a map where the values are lists of section names.
350 Returns:
351 A set containing all section names which were seen with suffixes applied.
353 sections_with_suffixes = set()
354 for suffixed_sections in symbol_to_sections_map.itervalues():
355 for suffixed_section in suffixed_sections:
356 section = RemoveSuffixes(suffixed_section)
357 if section != suffixed_section:
358 sections_with_suffixes.add(section)
359 return sections_with_suffixes
362 def _StripSuffixes(section_list):
363 """Remove all suffixes on items in a list of sections or symbols."""
364 return [RemoveSuffixes(section) for section in section_list]
367 def main(argv):
368 parser = optparse.OptionParser(usage=
369 'usage: %prog [options] <unpatched_orderfile> <library>')
370 parser.add_option('--target-arch', action='store', dest='arch',
371 choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'],
372 help='The target architecture for the library.')
373 options, argv = parser.parse_args(argv)
374 if not options.arch:
375 options.arch = cygprofile_utils.DetectArchitecture()
376 if len(argv) != 3:
377 parser.print_help()
378 return 1
379 orderfile_filename = argv[1]
380 binary_filename = argv[2]
381 symbol_extractor.SetArchitecture(options.arch)
382 (offset_to_symbol_infos, name_to_symbol_infos) = _GroupSymbolInfosFromBinary(
383 binary_filename)
384 obj_dir = cygprofile_utils.GetObjDir(binary_filename)
385 raw_symbol_map = cyglog_to_orderfile.GetSymbolToSectionsMapFromObjectFiles(
386 obj_dir)
387 suffixed = _SectionsWithSuffixes(raw_symbol_map)
388 symbol_to_sections_map = _CombineSectionListsByPrimaryName(raw_symbol_map)
389 section_to_symbols_map = cygprofile_utils.InvertMapping(
390 symbol_to_sections_map)
391 profiled_sections = _StripSuffixes(
392 GetSectionsFromOrderfile(orderfile_filename))
393 expanded_sections = _ExpandSections(
394 profiled_sections, name_to_symbol_infos, offset_to_symbol_infos,
395 section_to_symbols_map, symbol_to_sections_map, suffixed)
396 for section in expanded_sections:
397 print section
398 # The following is needed otherwise Gold only applies a partial sort.
399 print '.text' # gets methods not in a section, such as assembly
400 for prefix in _PREFIXES:
401 print prefix + '*' # gets everything else
402 return 0
405 if __name__ == '__main__':
406 logging.basicConfig(level=logging.INFO)
407 sys.exit(main(sys.argv))