2 #===- lib/hwasan/scripts/hwasan_symbolize ----------------------------------===#
4 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 # See https:#llvm.org/LICENSE.txt for license information.
6 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 #===------------------------------------------------------------------------===#
10 # HWAddressSanitizer offline symbolization script.
12 #===------------------------------------------------------------------------===#
14 from __future__
import print_function
15 from __future__
import unicode_literals
25 if sys
.version_info
.major
< 3:
26 # Simulate Python 3.x behaviour of defaulting to UTF-8 for print. This is
27 # important in case any symbols are non-ASCII.
29 sys
.stdout
= codecs
.getwriter("utf-8")(sys
.stdout
)
31 last_access_address
= None
32 last_access_tag
= None
35 def __init__(self
, path
, binary_prefixes
, paths_to_cut
):
38 self
.__binary
_prefixes
= binary_prefixes
39 self
.__paths
_to
_cut
= paths_to_cut
41 self
.__warnings
= set()
43 def enable_logging(self
, enable
):
46 def __open_pipe(self
):
49 if sys
.version_info
.major
> 2:
50 opt
['encoding'] = 'utf-8'
51 self
.__pipe
= subprocess
.Popen([self
.__path
, "--inlining", "--functions"],
52 stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
,
55 class __EOF(Exception):
59 print(s
, file=self
.__pipe
.stdin
)
60 self
.__pipe
.stdin
.flush()
62 print("#>> |%s|" % (s
,), file=sys
.stderr
)
65 s
= self
.__pipe
.stdout
.readline().rstrip()
67 print("# << |%s|" % (s
,), file=sys
.stderr
)
69 raise Symbolizer
.__EOF
72 def __process_source_path(self
, file_name
):
73 for path_to_cut
in self
.__paths
_to
_cut
:
74 file_name
= re
.sub(".*" + path_to_cut
, "", file_name
)
75 file_name
= re
.sub(".*hwasan_[a-z_]*.(cc|h):[0-9]*", "[hwasan_rtl]", file_name
)
76 file_name
= re
.sub(".*asan_[a-z_]*.(cc|h):[0-9]*", "[asan_rtl]", file_name
)
77 file_name
= re
.sub(".*crtstuff.c:0", "???:0", file_name
)
80 def __process_binary_name(self
, name
):
81 if name
.startswith('/'):
83 for p
in self
.__binary
_prefixes
:
84 full_path
= os
.path
.join(p
, name
)
85 if os
.path
.exists(full_path
):
87 apex_prefix
= "apex/com.android."
88 if name
.startswith(apex_prefix
):
89 full_path
= os
.path
.join(p
, "apex/com.google.android." + name
[len(apex_prefix
):])
90 if os
.path
.exists(full_path
):
92 # Try stripping extra path components as the last resort.
93 for p
in self
.__binary
_prefixes
:
94 full_path
= os
.path
.join(p
, os
.path
.basename(name
))
95 if os
.path
.exists(full_path
):
97 if name
not in self
.__warnings
:
98 print("Could not find symbols for", name
, file=sys
.stderr
)
99 self
.__warnings
.add(name
)
102 def iter_locals(self
, binary
, addr
):
105 binary
= self
.__process
_binary
_name
(binary
)
108 self
.__write
("FRAME %s %s" % (binary
, addr
))
111 function_name
= self
.__read
()
112 local_name
= self
.__read
()
113 file_line
= self
.__read
()
114 extra
= self
.__read
().split()
116 file_line
= self
.__process
_source
_path
(file_line
)
117 offset
= None if extra
[0] == '??' else int(extra
[0])
118 size
= None if extra
[1] == '??' else int(extra
[1])
119 tag_offset
= None if extra
[2] == '??' else int(extra
[2])
120 yield (function_name
, file_line
, local_name
, offset
, size
, tag_offset
)
121 except Symbolizer
.__EOF
:
124 def iter_call_stack(self
, binary
, addr
):
127 binary
= self
.__process
_binary
_name
(binary
)
130 self
.__write
("CODE %s %s" % (binary
, addr
))
133 function_name
= self
.__read
()
134 file_line
= self
.__read
()
135 file_line
= self
.__process
_source
_path
(file_line
)
136 yield (function_name
, file_line
)
137 except Symbolizer
.__EOF
:
140 def symbolize_line(line
, symbolizer_path
):
141 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
142 match
= re
.match(r
'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)', line
, re
.UNICODE
)
144 frameno
= match
.group(2)
145 binary
= match
.group(5)
146 addr
= int(match
.group(6), 16)
148 frames
= list(symbolizer
.iter_call_stack(binary
, addr
))
151 print("%s#%s%s%s in %s" % (match
.group(1), match
.group(2),
152 match
.group(3), frames
[0][0], frames
[0][1]))
153 for i
in range(1, len(frames
)):
154 space1
= ' ' * match
.end(1)
155 space2
= ' ' * (match
.start(4) - match
.end(1) - 2)
156 print("%s->%s%s in %s" % (space1
, space2
, frames
[i
][0], frames
[i
][1]))
162 def save_access_address(line
):
163 global last_access_address
, last_access_tag
164 match
= re
.match(r
'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line
, re
.UNICODE
)
166 last_access_address
= int(match
.group(2), 16)
167 match
= re
.match(r
'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line
, re
.UNICODE
)
169 last_access_tag
= int(match
.group(2), 16)
171 def process_stack_history(line
, symbolizer
, ignore_tags
=False):
172 if last_access_address
is None or last_access_tag
is None:
174 if re
.match(r
'Previously allocated frames:', line
, re
.UNICODE
):
176 pc_mask
= (1 << 48) - 1
177 fp_mask
= (1 << 20) - 1
178 # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD)
179 match
= re
.match(r
'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)', line
, re
.UNICODE
)
181 record_addr
= int(match
.group(2), 16)
182 record
= int(match
.group(3), 16)
183 binary
= match
.group(4)
184 addr
= int(match
.group(5), 16)
185 base_tag
= (record_addr
>> 3) & 0xFF
186 fp
= (record
>> 48) << 4
187 pc
= record
& pc_mask
189 for local
in symbolizer
.iter_locals(binary
, addr
):
190 frame_offset
= local
[3]
192 if frame_offset
is None or size
is None:
194 obj_offset
= (last_access_address
- fp
- frame_offset
) & fp_mask
195 if obj_offset
>= size
:
197 tag_offset
= local
[5]
198 if not ignore_tags
and (tag_offset
is None or base_tag ^ tag_offset
!= last_access_tag
):
201 print('Potentially referenced stack object:')
202 print(' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset
, local
[2], local
[0]))
203 print(' at %s' % (local
[1],))
207 parser
= argparse
.ArgumentParser()
208 parser
.add_argument('-d', action
='store_true')
209 parser
.add_argument('-v', action
='store_true')
210 parser
.add_argument('--ignore-tags', action
='store_true')
211 parser
.add_argument('--symbols', action
='append')
212 parser
.add_argument('--source', action
='append')
213 parser
.add_argument('--symbolizer')
214 parser
.add_argument('args', nargs
=argparse
.REMAINDER
)
215 args
= parser
.parse_args()
217 # Unstripped binaries location.
218 binary_prefixes
= args
.symbols
or []
219 if not binary_prefixes
:
220 if 'ANDROID_PRODUCT_OUT' in os
.environ
:
221 product_out
= os
.path
.join(os
.environ
['ANDROID_PRODUCT_OUT'], 'symbols')
222 binary_prefixes
.append(product_out
)
223 binary_prefixes
.append('/')
225 for p
in binary_prefixes
:
226 if not os
.path
.isdir(p
):
227 print("Symbols path does not exist or is not a directory:", p
, file=sys
.stderr
)
231 paths_to_cut
= args
.source
or []
233 paths_to_cut
.append(os
.getcwd() + '/')
234 if 'ANDROID_BUILD_TOP' in os
.environ
:
235 paths_to_cut
.append(os
.environ
['ANDROID_BUILD_TOP'] + '/')
237 # llvm-symbolizer binary.
238 # 1. --symbolizer flag
239 # 2. environment variable
240 # 3. unsuffixed binary in the current directory
241 # 4. if inside Android platform, prebuilt binary at a known path
242 # 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
243 # highest available version in $PATH
244 symbolizer_path
= args
.symbolizer
245 if not symbolizer_path
:
246 if 'LLVM_SYMBOLIZER_PATH' in os
.environ
:
247 symbolizer_path
= os
.environ
['LLVM_SYMBOLIZER_PATH']
248 elif 'HWASAN_SYMBOLIZER_PATH' in os
.environ
:
249 symbolizer_path
= os
.environ
['HWASAN_SYMBOLIZER_PATH']
251 if not symbolizer_path
:
252 s
= os
.path
.join(os
.path
.dirname(sys
.argv
[0]), 'llvm-symbolizer')
253 if os
.path
.exists(s
):
256 if not symbolizer_path
:
257 if 'ANDROID_BUILD_TOP' in os
.environ
:
258 s
= os
.path
.join(os
.environ
['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
259 if os
.path
.exists(s
):
262 if not symbolizer_path
:
263 for path
in os
.environ
["PATH"].split(os
.pathsep
):
264 p
= os
.path
.join(path
, 'llvm-symbolizer')
265 if os
.path
.exists(p
):
269 def extract_version(s
):
273 x
= float(s
[idx
+ 1:])
276 if not symbolizer_path
:
277 for path
in os
.environ
["PATH"].split(os
.pathsep
):
278 candidates
= glob
.glob(os
.path
.join(path
, 'llvm-symbolizer-*'))
279 if len(candidates
) > 0:
280 candidates
.sort(key
= extract_version
, reverse
= True)
281 symbolizer_path
= candidates
[0]
284 if not os
.path
.exists(symbolizer_path
):
285 print("Symbolizer path does not exist:", symbolizer_path
, file=sys
.stderr
)
289 print("Looking for symbols in:")
290 for s
in binary_prefixes
:
292 print("Stripping source path prefixes:")
293 for s
in paths_to_cut
:
295 print("Using llvm-symbolizer binary in:\n %s" % (symbolizer_path
,))
298 symbolizer
= Symbolizer(symbolizer_path
, binary_prefixes
, paths_to_cut
)
299 symbolizer
.enable_logging(args
.d
)
301 for line
in sys
.stdin
:
302 if sys
.version_info
.major
< 3:
303 line
= line
.decode('utf-8')
304 save_access_address(line
)
305 if process_stack_history(line
, symbolizer
, ignore_tags
=args
.ignore_tags
):
307 symbolize_line(line
, symbolizer_path
)