Add ICU message format support
[chromium-blink-merge.git] / tools / find_runtime_symbols / static_symbols.py
blobcd57bacd99aeeea9ddd0812385fd32cdf2a11ea8
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 import bisect
6 import re
9 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?')
10 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')
11 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')
12 _READELF_SECTION_HEADER_PATTER = re.compile(
13 '^\s*\[\s*(Nr|\d+)\]\s+(|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+'
14 '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+'
15 '([0-9]+)\s+([0-9]+)\s+([0-9]+)')
18 class ParsingException(Exception):
19 def __str__(self):
20 return repr(self.args[0])
23 class AddressMapping(object):
24 def __init__(self):
25 self._symbol_map = {}
27 def append(self, start, entry):
28 self._symbol_map[start] = entry
30 def find(self, address):
31 return self._symbol_map.get(address)
34 class RangeAddressMapping(AddressMapping):
35 def __init__(self):
36 super(RangeAddressMapping, self).__init__()
37 self._sorted_start_list = []
38 self._is_sorted = True
40 def append(self, start, entry):
41 if self._sorted_start_list:
42 if self._sorted_start_list[-1] > start:
43 self._is_sorted = False
44 elif self._sorted_start_list[-1] == start:
45 return
46 self._sorted_start_list.append(start)
47 self._symbol_map[start] = entry
49 def find(self, address):
50 if not self._sorted_start_list:
51 return None
52 if not self._is_sorted:
53 self._sorted_start_list.sort()
54 self._is_sorted = True
55 found_index = bisect.bisect_left(self._sorted_start_list, address)
56 found_start_address = self._sorted_start_list[found_index - 1]
57 return self._symbol_map[found_start_address]
60 class Procedure(object):
61 """A class for a procedure symbol and an address range for the symbol."""
63 def __init__(self, start, end, name):
64 self.start = start
65 self.end = end
66 self.name = name
68 def __eq__(self, other):
69 return (self.start == other.start and
70 self.end == other.end and
71 self.name == other.name)
73 def __ne__(self, other):
74 return not self.__eq__(other)
76 def __str__(self):
77 return '%x-%x: %s' % (self.start, self.end, self.name)
80 class ElfSection(object):
81 """A class for an elf section header."""
83 def __init__(
84 self, number, name, stype, address, offset, size, es, flg, lk, inf, al):
85 self.number = number
86 self.name = name
87 self.stype = stype
88 self.address = address
89 self.offset = offset
90 self.size = size
91 self.es = es
92 self.flg = flg
93 self.lk = lk
94 self.inf = inf
95 self.al = al
97 def __eq__(self, other):
98 return (self.number == other.number and
99 self.name == other.name and
100 self.stype == other.stype and
101 self.address == other.address and
102 self.offset == other.offset and
103 self.size == other.size and
104 self.es == other.es and
105 self.flg == other.flg and
106 self.lk == other.lk and
107 self.inf == other.inf and
108 self.al == other.al)
110 def __ne__(self, other):
111 return not self.__eq__(other)
113 def __str__(self):
114 return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name)
117 class StaticSymbolsInFile(object):
118 """Represents static symbol information in a binary file."""
120 def __init__(self, my_name):
121 self.my_name = my_name
122 self._elf_sections = []
123 self._procedures = RangeAddressMapping()
124 self._sourcefiles = RangeAddressMapping()
125 self._typeinfos = AddressMapping()
127 def _append_elf_section(self, elf_section):
128 self._elf_sections.append(elf_section)
130 def _append_procedure(self, start, procedure):
131 self._procedures.append(start, procedure)
133 def _append_sourcefile(self, start, sourcefile):
134 self._sourcefiles.append(start, sourcefile)
136 def _append_typeinfo(self, start, typeinfo):
137 self._typeinfos.append(start, typeinfo)
139 def _find_symbol_by_runtime_address(self, address, vma, target):
140 if not (vma.begin <= address < vma.end):
141 return None
143 if vma.name != self.my_name:
144 return None
146 file_offset = address - (vma.begin - vma.offset)
147 elf_address = None
148 for section in self._elf_sections:
149 if section.offset <= file_offset < (section.offset + section.size):
150 elf_address = section.address + file_offset - section.offset
151 if not elf_address:
152 return None
154 return target.find(elf_address)
156 def find_procedure_by_runtime_address(self, address, vma):
157 return self._find_symbol_by_runtime_address(address, vma, self._procedures)
159 def find_sourcefile_by_runtime_address(self, address, vma):
160 return self._find_symbol_by_runtime_address(address, vma, self._sourcefiles)
162 def find_typeinfo_by_runtime_address(self, address, vma):
163 return self._find_symbol_by_runtime_address(address, vma, self._typeinfos)
165 def load_readelf_ew(self, f):
166 found_header = False
167 for line in f:
168 if line.rstrip() == 'Section Headers:':
169 found_header = True
170 break
171 if not found_header:
172 return None
174 for line in f:
175 line = line.rstrip()
176 matched = _READELF_SECTION_HEADER_PATTER.match(line)
177 if matched:
178 self._append_elf_section(ElfSection(
179 int(matched.group(1), 10), # number
180 matched.group(2), # name
181 matched.group(3), # stype
182 int(matched.group(4), 16), # address
183 int(matched.group(5), 16), # offset
184 int(matched.group(6), 16), # size
185 matched.group(7), # es
186 matched.group(8), # flg
187 matched.group(9), # lk
188 matched.group(10), # inf
189 matched.group(11) # al
191 else:
192 if line in ('Key to Flags:', 'Program Headers:'):
193 break
195 def load_readelf_debug_decodedline_file(self, input_file):
196 for line in input_file:
197 splitted = line.rstrip().split(None, 2)
198 self._append_sourcefile(int(splitted[0], 16), splitted[1])
200 @staticmethod
201 def _parse_nm_bsd_line(line):
202 if line[8] == ' ':
203 return line[0:8], line[9], line[11:]
204 elif line[16] == ' ':
205 return line[0:16], line[17], line[19:]
206 raise ParsingException('Invalid nm output.')
208 @staticmethod
209 def _get_short_function_name(function):
210 while True:
211 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)
212 if not number:
213 break
214 while True:
215 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)
216 if not number:
217 break
218 return _LEADING_TYPE_PATTERN.sub('\g<1>', function)
220 def load_nm_bsd(self, f, mangled=False):
221 last_start = 0
222 routine = ''
224 for line in f:
225 line = line.rstrip()
226 sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line)
228 if sym_value[0] == ' ':
229 continue
231 start_val = int(sym_value, 16)
233 if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and
234 (not mangled and sym_name.startswith('typeinfo'))):
235 self._append_typeinfo(start_val, sym_name)
237 # It's possible for two symbols to share the same address, if
238 # one is a zero-length variable (like __start_google_malloc) or
239 # one symbol is a weak alias to another (like __libc_malloc).
240 # In such cases, we want to ignore all values except for the
241 # actual symbol, which in nm-speak has type "T". The logic
242 # below does this, though it's a bit tricky: what happens when
243 # we have a series of lines with the same address, is the first
244 # one gets queued up to be processed. However, it won't
245 # *actually* be processed until later, when we read a line with
246 # a different address. That means that as long as we're reading
247 # lines with the same address, we have a chance to replace that
248 # item in the queue, which we do whenever we see a 'T' entry --
249 # that is, a line with type 'T'. If we never see a 'T' entry,
250 # we'll just go ahead and process the first entry (which never
251 # got touched in the queue), and ignore the others.
252 if start_val == last_start and (sym_type == 't' or sym_type == 'T'):
253 # We are the 'T' symbol at this address, replace previous symbol.
254 routine = sym_name
255 continue
256 elif start_val == last_start:
257 # We're not the 'T' symbol at this address, so ignore us.
258 continue
260 # Tag this routine with the starting address in case the image
261 # has multiple occurrences of this routine. We use a syntax
262 # that resembles template paramters that are automatically
263 # stripped out by ShortFunctionName()
264 sym_name += "<%016x>" % start_val
266 if not mangled:
267 routine = self._get_short_function_name(routine)
268 self._append_procedure(
269 last_start, Procedure(last_start, start_val, routine))
271 last_start = start_val
272 routine = sym_name
274 if not mangled:
275 routine = self._get_short_function_name(routine)
276 self._append_procedure(
277 last_start, Procedure(last_start, last_start, routine))