1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
9 _ARGUMENT_TYPE_PATTERN
= re
.compile('\([^()]*\)(\s*const)?')
10 _TEMPLATE_ARGUMENT_PATTERN
= re
.compile('<[^<>]*>')
11 _LEADING_TYPE_PATTERN
= re
.compile('^.*\s+(\w+::)')
12 _READELF_SECTION_HEADER_PATTER
= re
.compile(
13 '^\s*\[\s*(Nr|\d+)\]\s+(|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+'
14 '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+'
15 '([0-9]+)\s+([0-9]+)\s+([0-9]+)')
18 class ParsingException(Exception):
20 return repr(self
.args
[0])
23 class AddressMapping(object):
27 def append(self
, start
, entry
):
28 self
._symbol
_map
[start
] = entry
30 def find(self
, address
):
31 return self
._symbol
_map
.get(address
)
34 class RangeAddressMapping(AddressMapping
):
36 super(RangeAddressMapping
, self
).__init
__()
37 self
._sorted
_start
_list
= []
38 self
._is
_sorted
= True
40 def append(self
, start
, entry
):
41 if self
._sorted
_start
_list
:
42 if self
._sorted
_start
_list
[-1] > start
:
43 self
._is
_sorted
= False
44 elif self
._sorted
_start
_list
[-1] == start
:
46 self
._sorted
_start
_list
.append(start
)
47 self
._symbol
_map
[start
] = entry
49 def find(self
, address
):
50 if not self
._sorted
_start
_list
:
52 if not self
._is
_sorted
:
53 self
._sorted
_start
_list
.sort()
54 self
._is
_sorted
= True
55 found_index
= bisect
.bisect_left(self
._sorted
_start
_list
, address
)
56 found_start_address
= self
._sorted
_start
_list
[found_index
- 1]
57 return self
._symbol
_map
[found_start_address
]
60 class Procedure(object):
61 """A class for a procedure symbol and an address range for the symbol."""
63 def __init__(self
, start
, end
, name
):
68 def __eq__(self
, other
):
69 return (self
.start
== other
.start
and
70 self
.end
== other
.end
and
71 self
.name
== other
.name
)
73 def __ne__(self
, other
):
74 return not self
.__eq
__(other
)
77 return '%x-%x: %s' % (self
.start
, self
.end
, self
.name
)
80 class ElfSection(object):
81 """A class for an elf section header."""
84 self
, number
, name
, stype
, address
, offset
, size
, es
, flg
, lk
, inf
, al
):
88 self
.address
= address
97 def __eq__(self
, other
):
98 return (self
.number
== other
.number
and
99 self
.name
== other
.name
and
100 self
.stype
== other
.stype
and
101 self
.address
== other
.address
and
102 self
.offset
== other
.offset
and
103 self
.size
== other
.size
and
104 self
.es
== other
.es
and
105 self
.flg
== other
.flg
and
106 self
.lk
== other
.lk
and
107 self
.inf
== other
.inf
and
110 def __ne__(self
, other
):
111 return not self
.__eq
__(other
)
114 return '%x+%x(%x) %s' % (self
.address
, self
.size
, self
.offset
, self
.name
)
117 class StaticSymbolsInFile(object):
118 """Represents static symbol information in a binary file."""
120 def __init__(self
, my_name
):
121 self
.my_name
= my_name
122 self
._elf
_sections
= []
123 self
._procedures
= RangeAddressMapping()
124 self
._sourcefiles
= RangeAddressMapping()
125 self
._typeinfos
= AddressMapping()
127 def _append_elf_section(self
, elf_section
):
128 self
._elf
_sections
.append(elf_section
)
130 def _append_procedure(self
, start
, procedure
):
131 self
._procedures
.append(start
, procedure
)
133 def _append_sourcefile(self
, start
, sourcefile
):
134 self
._sourcefiles
.append(start
, sourcefile
)
136 def _append_typeinfo(self
, start
, typeinfo
):
137 self
._typeinfos
.append(start
, typeinfo
)
139 def _find_symbol_by_runtime_address(self
, address
, vma
, target
):
140 if not (vma
.begin
<= address
< vma
.end
):
143 if vma
.name
!= self
.my_name
:
146 file_offset
= address
- (vma
.begin
- vma
.offset
)
148 for section
in self
._elf
_sections
:
149 if section
.offset
<= file_offset
< (section
.offset
+ section
.size
):
150 elf_address
= section
.address
+ file_offset
- section
.offset
154 return target
.find(elf_address
)
156 def find_procedure_by_runtime_address(self
, address
, vma
):
157 return self
._find
_symbol
_by
_runtime
_address
(address
, vma
, self
._procedures
)
159 def find_sourcefile_by_runtime_address(self
, address
, vma
):
160 return self
._find
_symbol
_by
_runtime
_address
(address
, vma
, self
._sourcefiles
)
162 def find_typeinfo_by_runtime_address(self
, address
, vma
):
163 return self
._find
_symbol
_by
_runtime
_address
(address
, vma
, self
._typeinfos
)
165 def load_readelf_ew(self
, f
):
168 if line
.rstrip() == 'Section Headers:':
176 matched
= _READELF_SECTION_HEADER_PATTER
.match(line
)
178 self
._append
_elf
_section
(ElfSection(
179 int(matched
.group(1), 10), # number
180 matched
.group(2), # name
181 matched
.group(3), # stype
182 int(matched
.group(4), 16), # address
183 int(matched
.group(5), 16), # offset
184 int(matched
.group(6), 16), # size
185 matched
.group(7), # es
186 matched
.group(8), # flg
187 matched
.group(9), # lk
188 matched
.group(10), # inf
189 matched
.group(11) # al
192 if line
in ('Key to Flags:', 'Program Headers:'):
195 def load_readelf_debug_decodedline_file(self
, input_file
):
196 for line
in input_file
:
197 splitted
= line
.rstrip().split(None, 2)
198 self
._append
_sourcefile
(int(splitted
[0], 16), splitted
[1])
201 def _parse_nm_bsd_line(line
):
203 return line
[0:8], line
[9], line
[11:]
204 elif line
[16] == ' ':
205 return line
[0:16], line
[17], line
[19:]
206 raise ParsingException('Invalid nm output.')
209 def _get_short_function_name(function
):
211 function
, number
= _ARGUMENT_TYPE_PATTERN
.subn('', function
)
215 function
, number
= _TEMPLATE_ARGUMENT_PATTERN
.subn('', function
)
218 return _LEADING_TYPE_PATTERN
.sub('\g<1>', function
)
220 def load_nm_bsd(self
, f
, mangled
=False):
226 sym_value
, sym_type
, sym_name
= self
._parse
_nm
_bsd
_line
(line
)
228 if sym_value
[0] == ' ':
231 start_val
= int(sym_value
, 16)
233 if (sym_type
in ('r', 'R', 'D', 'U', 'd', 'V') and
234 (not mangled
and sym_name
.startswith('typeinfo'))):
235 self
._append
_typeinfo
(start_val
, sym_name
)
237 # It's possible for two symbols to share the same address, if
238 # one is a zero-length variable (like __start_google_malloc) or
239 # one symbol is a weak alias to another (like __libc_malloc).
240 # In such cases, we want to ignore all values except for the
241 # actual symbol, which in nm-speak has type "T". The logic
242 # below does this, though it's a bit tricky: what happens when
243 # we have a series of lines with the same address, is the first
244 # one gets queued up to be processed. However, it won't
245 # *actually* be processed until later, when we read a line with
246 # a different address. That means that as long as we're reading
247 # lines with the same address, we have a chance to replace that
248 # item in the queue, which we do whenever we see a 'T' entry --
249 # that is, a line with type 'T'. If we never see a 'T' entry,
250 # we'll just go ahead and process the first entry (which never
251 # got touched in the queue), and ignore the others.
252 if start_val
== last_start
and (sym_type
== 't' or sym_type
== 'T'):
253 # We are the 'T' symbol at this address, replace previous symbol.
256 elif start_val
== last_start
:
257 # We're not the 'T' symbol at this address, so ignore us.
260 # Tag this routine with the starting address in case the image
261 # has multiple occurrences of this routine. We use a syntax
262 # that resembles template paramters that are automatically
263 # stripped out by ShortFunctionName()
264 sym_name
+= "<%016x>" % start_val
267 routine
= self
._get
_short
_function
_name
(routine
)
268 self
._append
_procedure
(
269 last_start
, Procedure(last_start
, start_val
, routine
))
271 last_start
= start_val
275 routine
= self
._get
_short
_function
_name
(routine
)
276 self
._append
_procedure
(
277 last_start
, Procedure(last_start
, last_start
, routine
))