Add ICU message format support
[chromium-blink-merge.git] / tools / cygprofile / symbol_extractor.py
bloba30c05ee3428840ef5f0d14a5cd33eb959e52c76
1 #!/usr/bin/python
2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Utilities to get and manipulate symbols from a binary."""
8 import collections
9 import logging
10 import os
11 import re
12 import subprocess
13 import sys
15 import cygprofile_utils
17 sys.path.insert(
18 0, os.path.join(os.path.dirname(__file__), os.pardir, os.pardir,
19 'third_party', 'android_platform', 'development',
20 'scripts'))
21 import symbol
23 _MAX_WARNINGS_TO_PRINT = 200
25 SymbolInfo = collections.namedtuple('SymbolInfo', ('name', 'offset', 'size',
26 'section'))
28 def SetArchitecture(arch):
29 """Set the architecture for binaries to be symbolized."""
30 symbol.ARCH = arch
33 def _FromObjdumpLine(line):
34 """Create a SymbolInfo by parsing a properly formatted objdump output line.
36 Args:
37 line: line from objdump
39 Returns:
40 An instance of SymbolInfo if the line represents a symbol, None otherwise.
41 """
42 # All of the symbol lines we care about are in the form
43 # 0000000000 g F .text.foo 000000000 [.hidden] foo
44 # where g (global) might also be l (local) or w (weak).
45 parts = line.split()
46 if len(parts) < 6 or parts[2] != 'F':
47 return None
49 assert len(parts) == 6 or (len(parts) == 7 and parts[5] == '.hidden')
50 accepted_scopes = set(['g', 'l', 'w'])
51 assert parts[1] in accepted_scopes
53 offset = int(parts[0], 16)
54 section = parts[3]
55 size = int(parts[4], 16)
56 name = parts[-1].rstrip('\n')
57 assert re.match('^[a-zA-Z0-9_.]+$', name)
58 return SymbolInfo(name=name, offset=offset, section=section, size=size)
61 def _SymbolInfosFromStream(objdump_lines):
62 """Parses the output of objdump, and get all the symbols from a binary.
64 Args:
65 objdump_lines: An iterable of lines
67 Returns:
68 A list of SymbolInfo.
69 """
70 symbol_infos = []
71 for line in objdump_lines:
72 symbol_info = _FromObjdumpLine(line)
73 if symbol_info is not None:
74 symbol_infos.append(symbol_info)
75 return symbol_infos
78 def SymbolInfosFromBinary(binary_filename):
79 """Runs objdump to get all the symbols from a binary.
81 Args:
82 binary_filename: path to the binary.
84 Returns:
85 A list of SymbolInfo from the binary.
86 """
87 command = (symbol.ToolPath('objdump'), '-t', '-w', binary_filename)
88 p = subprocess.Popen(command, shell=False, stdout=subprocess.PIPE)
89 try:
90 result = _SymbolInfosFromStream(p.stdout)
91 return result
92 finally:
93 p.wait()
96 def GroupSymbolInfosByOffset(symbol_infos):
97 """Create a dict {offset: [symbol_info1, ...], ...}.
99 As several symbols can be at the same offset, this is a 1-to-many
100 relationship.
102 Args:
103 symbol_infos: iterable of SymbolInfo instances
105 Returns:
106 a dict {offset: [symbol_info1, ...], ...}
108 offset_to_symbol_infos = collections.defaultdict(list)
109 for symbol_info in symbol_infos:
110 offset_to_symbol_infos[symbol_info.offset].append(symbol_info)
111 return dict(offset_to_symbol_infos)
113 def GroupSymbolInfosByName(symbol_infos):
114 """Create a dict {name: [symbol_info1, ...], ...}.
116 A symbol can have several offsets, this is a 1-to-many relationship.
118 Args:
119 symbol_infos: iterable of SymbolInfo instances
121 Returns:
122 a dict {name: [symbol_info1, ...], ...}
124 name_to_symbol_infos = collections.defaultdict(list)
125 for symbol_info in symbol_infos:
126 name_to_symbol_infos[symbol_info.name].append(symbol_info)
127 return dict(name_to_symbol_infos)
129 def CreateNameToSymbolInfo(symbol_infos):
130 """Create a dict {name: symbol_info, ...}.
132 Args:
133 symbol_infos: iterable of SymbolInfo instances
135 Returns:
136 a dict {name: symbol_info, ...}
137 If a symbol name corresponds to more than one symbol_info, the symbol_info
138 with the lowest offset is chosen.
140 #TODO(azarchs): move the functionality in this method into check_orderfile.
141 symbol_infos_by_name = {}
142 warnings = cygprofile_utils.WarningCollector(_MAX_WARNINGS_TO_PRINT)
143 for infos in GroupSymbolInfosByName(symbol_infos).itervalues():
144 first_symbol_info = min(infos, key=lambda x:x.offset)
145 symbol_infos_by_name[first_symbol_info.name] = first_symbol_info
146 if len(infos) > 1:
147 warnings.Write('Symbol %s appears at %d offsets: %s' %
148 (first_symbol_info.name,
149 len(infos),
150 ','.join([hex(x.offset) for x in infos])))
151 warnings.WriteEnd('symbols at multiple offsets.')
152 return symbol_infos_by_name
155 def DemangleSymbol(mangled_symbol):
156 """Return the demangled form of mangled_symbol."""
157 return symbol.CallCppFilt(mangled_symbol)