[clang][extract-api] Emit "navigator" property of "name" in SymbolGraph
[llvm-project.git] / compiler-rt / lib / hwasan / scripts / hwasan_symbolize
blobf73ea1783d758f4a92ed04f8cdefd40cd80c2d8a
1 #!/usr/bin/env python
2 #===- lib/hwasan/scripts/hwasan_symbolize ----------------------------------===#
4 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 # See https:#llvm.org/LICENSE.txt for license information.
6 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 #===------------------------------------------------------------------------===#
10 # HWAddressSanitizer offline symbolization script.
12 #===------------------------------------------------------------------------===#
14 from __future__ import print_function
15 from __future__ import unicode_literals
17 import glob
18 import os
19 import re
20 import sys
21 import string
22 import subprocess
23 import argparse
25 if sys.version_info.major < 3:
26 # Simulate Python 3.x behaviour of defaulting to UTF-8 for print. This is
27 # important in case any symbols are non-ASCII.
28 import codecs
29 sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
31 last_access_address = None
32 last_access_tag = None
34 class Symbolizer:
35 def __init__(self, path, binary_prefixes, paths_to_cut):
36 self.__pipe = None
37 self.__path = path
38 self.__binary_prefixes = binary_prefixes
39 self.__paths_to_cut = paths_to_cut
40 self.__log = False
41 self.__warnings = set()
43 def enable_logging(self, enable):
44 self.__log = enable
46 def __open_pipe(self):
47 if not self.__pipe:
48 opt = {}
49 if sys.version_info.major > 2:
50 opt['encoding'] = 'utf-8'
51 self.__pipe = subprocess.Popen([self.__path, "--inlining", "--functions"],
52 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
53 **opt)
55 class __EOF(Exception):
56 pass
58 def __write(self, s):
59 print(s, file=self.__pipe.stdin)
60 self.__pipe.stdin.flush()
61 if self.__log:
62 print("#>> |%s|" % (s,), file=sys.stderr)
64 def __read(self):
65 s = self.__pipe.stdout.readline().rstrip()
66 if self.__log:
67 print("# << |%s|" % (s,), file=sys.stderr)
68 if s == '':
69 raise Symbolizer.__EOF
70 return s
72 def __process_source_path(self, file_name):
73 for path_to_cut in self.__paths_to_cut:
74 file_name = re.sub(".*" + path_to_cut, "", file_name)
75 file_name = re.sub(".*hwasan_[a-z_]*.(cc|h):[0-9]*", "[hwasan_rtl]", file_name)
76 file_name = re.sub(".*asan_[a-z_]*.(cc|h):[0-9]*", "[asan_rtl]", file_name)
77 file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
78 return file_name
80 def __process_binary_name(self, name):
81 if name.startswith('/'):
82 name = name[1:]
83 for p in self.__binary_prefixes:
84 full_path = os.path.join(p, name)
85 if os.path.exists(full_path):
86 return full_path
87 apex_prefix = "apex/com.android."
88 if name.startswith(apex_prefix):
89 full_path = os.path.join(p, "apex/com.google.android." + name[len(apex_prefix):])
90 if os.path.exists(full_path):
91 return full_path
92 # Try stripping extra path components as the last resort.
93 for p in self.__binary_prefixes:
94 full_path = os.path.join(p, os.path.basename(name))
95 if os.path.exists(full_path):
96 return full_path
97 if name not in self.__warnings:
98 print("Could not find symbols for", name, file=sys.stderr)
99 self.__warnings.add(name)
100 return None
102 def iter_locals(self, binary, addr):
103 self.__open_pipe()
104 p = self.__pipe
105 binary = self.__process_binary_name(binary)
106 if not binary:
107 return
108 self.__write("FRAME %s %s" % (binary, addr))
109 try:
110 while True:
111 function_name = self.__read()
112 local_name = self.__read()
113 file_line = self.__read()
114 extra = self.__read().split()
116 file_line = self.__process_source_path(file_line)
117 offset = None if extra[0] == '??' else int(extra[0])
118 size = None if extra[1] == '??' else int(extra[1])
119 tag_offset = None if extra[2] == '??' else int(extra[2])
120 yield (function_name, file_line, local_name, offset, size, tag_offset)
121 except Symbolizer.__EOF:
122 pass
124 def iter_call_stack(self, binary, addr):
125 self.__open_pipe()
126 p = self.__pipe
127 binary = self.__process_binary_name(binary)
128 if not binary:
129 return
130 self.__write("CODE %s %s" % (binary, addr))
131 try:
132 while True:
133 function_name = self.__read()
134 file_line = self.__read()
135 file_line = self.__process_source_path(file_line)
136 yield (function_name, file_line)
137 except Symbolizer.__EOF:
138 pass
140 def symbolize_line(line, symbolizer_path):
141 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
142 match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE)
143 if match:
144 frameno = match.group(2)
145 binary = match.group(5)
146 addr = int(match.group(6), 16)
148 frames = list(symbolizer.iter_call_stack(binary, addr))
150 if len(frames) > 0:
151 print("%s#%s%s%s in %s" % (match.group(1), match.group(2),
152 match.group(3), frames[0][0], frames[0][1]))
153 for i in range(1, len(frames)):
154 space1 = ' ' * match.end(1)
155 space2 = ' ' * (match.start(4) - match.end(1) - 2)
156 print("%s->%s%s in %s" % (space1, space2, frames[i][0], frames[i][1]))
157 else:
158 print(line.rstrip())
159 else:
160 print(line.rstrip())
162 def save_access_address(line):
163 global last_access_address, last_access_tag
164 match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
165 if match:
166 last_access_address = int(match.group(2), 16)
167 match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE)
168 if match:
169 last_access_tag = int(match.group(2), 16)
171 def process_stack_history(line, symbolizer, ignore_tags=False):
172 if last_access_address is None or last_access_tag is None:
173 return
174 if re.match(r'Previously allocated frames:', line, re.UNICODE):
175 return True
176 pc_mask = (1 << 48) - 1
177 fp_mask = (1 << 20) - 1
178 # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD)
179 match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE)
180 if match:
181 record_addr = int(match.group(2), 16)
182 record = int(match.group(3), 16)
183 binary = match.group(4)
184 addr = int(match.group(5), 16)
185 base_tag = (record_addr >> 3) & 0xFF
186 fp = (record >> 48) << 4
187 pc = record & pc_mask
189 for local in symbolizer.iter_locals(binary, addr):
190 frame_offset = local[3]
191 size = local[4]
192 if frame_offset is None or size is None:
193 continue
194 obj_offset = (last_access_address - fp - frame_offset) & fp_mask
195 if obj_offset >= size:
196 continue
197 tag_offset = local[5]
198 if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag):
199 continue
200 print('')
201 print('Potentially referenced stack object:')
202 print(' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
203 print(' at %s' % (local[1],))
204 return True
205 return False
207 parser = argparse.ArgumentParser()
208 parser.add_argument('-d', action='store_true')
209 parser.add_argument('-v', action='store_true')
210 parser.add_argument('--ignore-tags', action='store_true')
211 parser.add_argument('--symbols', action='append')
212 parser.add_argument('--source', action='append')
213 parser.add_argument('--symbolizer')
214 parser.add_argument('args', nargs=argparse.REMAINDER)
215 args = parser.parse_args()
217 # Unstripped binaries location.
218 binary_prefixes = args.symbols or []
219 if not binary_prefixes:
220 if 'ANDROID_PRODUCT_OUT' in os.environ:
221 product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
222 binary_prefixes.append(product_out)
223 binary_prefixes.append('/')
225 for p in binary_prefixes:
226 if not os.path.isdir(p):
227 print("Symbols path does not exist or is not a directory:", p, file=sys.stderr)
228 sys.exit(1)
230 # Source location.
231 paths_to_cut = args.source or []
232 if not paths_to_cut:
233 paths_to_cut.append(os.getcwd() + '/')
234 if 'ANDROID_BUILD_TOP' in os.environ:
235 paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
237 # llvm-symbolizer binary.
238 # 1. --symbolizer flag
239 # 2. environment variable
240 # 3. unsuffixed binary in the current directory
241 # 4. if inside Android platform, prebuilt binary at a known path
242 # 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
243 # highest available version in $PATH
244 symbolizer_path = args.symbolizer
245 if not symbolizer_path:
246 if 'LLVM_SYMBOLIZER_PATH' in os.environ:
247 symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
248 elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
249 symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
251 if not symbolizer_path:
252 s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
253 if os.path.exists(s):
254 symbolizer_path = s
256 if not symbolizer_path:
257 if 'ANDROID_BUILD_TOP' in os.environ:
258 s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
259 if os.path.exists(s):
260 symbolizer_path = s
262 if not symbolizer_path:
263 for path in os.environ["PATH"].split(os.pathsep):
264 p = os.path.join(path, 'llvm-symbolizer')
265 if os.path.exists(p):
266 symbolizer_path = p
267 break
269 def extract_version(s):
270 idx = s.rfind('-')
271 if idx == -1:
272 return 0
273 x = float(s[idx + 1:])
274 return x
276 if not symbolizer_path:
277 for path in os.environ["PATH"].split(os.pathsep):
278 candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
279 if len(candidates) > 0:
280 candidates.sort(key = extract_version, reverse = True)
281 symbolizer_path = candidates[0]
282 break
284 if not os.path.exists(symbolizer_path):
285 print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr)
286 sys.exit(1)
288 if args.v:
289 print("Looking for symbols in:")
290 for s in binary_prefixes:
291 print(" %s" % (s,))
292 print("Stripping source path prefixes:")
293 for s in paths_to_cut:
294 print(" %s" % (s,))
295 print("Using llvm-symbolizer binary in:\n %s" % (symbolizer_path,))
296 print()
298 symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
299 symbolizer.enable_logging(args.d)
301 for line in sys.stdin:
302 if sys.version_info.major < 3:
303 line = line.decode('utf-8')
304 save_access_address(line)
305 if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags):
306 continue
307 symbolize_line(line, symbolizer_path)