2 #===- lib/hwasan/scripts/hwasan_symbolize ----------------------------------===#
4 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 # See https:#llvm.org/LICENSE.txt for license information.
6 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 #===------------------------------------------------------------------------===#
10 # HWAddressSanitizer offline symbolization script.
12 #===------------------------------------------------------------------------===#
14 from __future__ import print_function
15 from __future__ import unicode_literals
28 if sys.version_info.major < 3:
29 # Simulate Python 3.x behaviour of defaulting to UTF-8 for print. This is
30 # important in case any symbols are non-ASCII.
32 sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
34 # Below, a parser for a subset of ELF. It only supports 64 bit, little-endian,
35 # and only parses what is necessary to find the build ids. It uses a memoryview
36 # into an mmap to avoid copying.
50 def align_up(size, alignment):
51 return (size + alignment - 1) & ~(alignment - 1)
53 def handle_Nhdr(mv, sh_size):
55 while offset < sh_size:
56 n_namesz, n_descsz, n_type = struct.unpack_from('<III', buffer=mv,
58 if (n_type == NT_GNU_BUILD_ID and n_namesz == 4 and
59 mv[offset + Nhdr_size: offset + Nhdr_size + 4] == b"GNU\x00"):
60 value = mv[offset + Nhdr_size + 4: offset + Nhdr_size + 4 + n_descsz]
62 offset += Nhdr_size + align_up(n_namesz, 4) + align_up(n_descsz, 4)
66 sh_type, = struct.unpack_from('<I', buffer=mv, offset=sh_type_offset)
67 if sh_type != SHT_NOTE:
69 sh_offset, = struct.unpack_from('<Q', buffer=mv, offset=sh_offset_offset)
70 sh_size, = struct.unpack_from('<Q', buffer=mv, offset=sh_size_offset)
71 return sh_offset, sh_size
74 # \x02 is ELFCLASS64, \x01 is ELFDATA2LSB. HWASan currently only works on
75 # 64-bit little endian platforms (x86_64 and ARM64). If this changes, we will
76 # have to extend the parsing code.
77 if mv[:6] != b'\x7fELF\x02\x01':
79 e_shnum, = struct.unpack_from('<H', buffer=mv, offset=e_shnum_offset)
80 e_shoff, = struct.unpack_from('<Q', buffer=mv, offset=e_shoff_offset)
81 for i in range(0, e_shnum):
82 start = e_shoff + i * Shdr_size
83 sh_offset, sh_size = handle_Shdr(mv[start: start + Shdr_size])
86 note_hdr = mv[sh_offset: sh_offset + sh_size]
87 result = handle_Nhdr(note_hdr, sh_size)
88 if result is not None:
91 def get_buildid(filename):
92 with open(filename, "r") as fd:
93 if os.fstat(fd.fileno()).st_size < Ehdr_size:
95 with mmap.mmap(fd.fileno(), 0, access=mmap.ACCESS_READ) as m:
96 with memoryview(m) as mv:
100 def __init__(self, path, binary_prefixes, paths_to_cut):
103 self.__binary_prefixes = binary_prefixes
104 self.__paths_to_cut = paths_to_cut
106 self.__warnings = set()
108 self.__link_prefixes = []
110 self.__last_access_address = None
111 self.__last_access_tag = None
113 self.__tag_dump_match_idx = None
114 self.__matched_stack_uas = False
117 def enable_html(self, enable):
120 def enable_logging(self, enable):
123 def maybe_escape(self, text):
125 # We need to manually use for leading spaces, html.escape does
126 # not do that, and HTML ignores them.
128 for i, c in enumerate(text):
133 return spaces * ' ' + html.escape(text)
136 def print(self, line, escape=True):
138 line = self.maybe_escape(line)
143 def read_linkify(self, filename):
144 with open(filename, 'r') as fd:
146 self.__link_prefixes = [(e["prefix"], e["link"]) for e in data]
148 def __open_pipe(self):
151 if sys.version_info.major > 2:
152 opt['encoding'] = 'utf-8'
153 self.__pipe = subprocess.Popen([self.__path, "--inlining", "--functions"],
154 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
157 class __EOF(Exception):
160 def __write(self, s):
161 print(s, file=self.__pipe.stdin)
162 self.__pipe.stdin.flush()
164 print("#>> |%s|" % (s,), file=sys.stderr)
167 s = self.__pipe.stdout.readline().rstrip()
169 print("# << |%s|" % (s,), file=sys.stderr)
171 raise Symbolizer.__EOF
174 def __process_source_path(self, file_name):
175 for path_to_cut in self.__paths_to_cut:
176 file_name = re.sub(".*" + path_to_cut, "", file_name)
177 file_name = re.sub(".*hwasan_[a-z_]*.(cc|h):[0-9]*", "[hwasan_rtl]", file_name)
178 file_name = re.sub(".*asan_[a-z_]*.(cc|h):[0-9]*", "[asan_rtl]", file_name)
179 file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
182 def __process_binary_name(self, name, buildid):
183 if name.startswith('/'):
185 if buildid is not None and buildid in self.__index:
186 return self.__index[buildid]
188 for p in self.__binary_prefixes:
189 full_path = os.path.join(p, name)
190 if os.path.exists(full_path):
192 apex_prefix = "apex/com.android."
193 if name.startswith(apex_prefix):
194 full_path = os.path.join(p, "apex/com.google.android." + name[len(apex_prefix):])
195 if os.path.exists(full_path):
197 # Try stripping extra path components as the last resort.
198 for p in self.__binary_prefixes:
199 full_path = os.path.join(p, os.path.basename(name))
200 if os.path.exists(full_path):
202 if name not in self.__warnings:
203 print("Could not find symbols for", name, file=sys.stderr)
204 self.__warnings.add(name)
207 def iter_locals(self, binary, addr, buildid):
210 binary = self.__process_binary_name(binary, buildid)
213 self.__write("FRAME %s %s" % (binary, addr))
216 function_name = self.__read()
217 local_name = self.__read()
218 file_line = self.__read()
219 extra = self.__read().split()
221 file_line = self.__process_source_path(file_line)
222 offset = None if extra[0] == '??' else int(extra[0])
223 size = None if extra[1] == '??' else int(extra[1])
224 tag_offset = None if extra[2] == '??' else int(extra[2])
225 yield (function_name, file_line, local_name, offset, size, tag_offset)
226 except Symbolizer.__EOF:
229 def iter_call_stack(self, binary, buildid, addr):
232 binary = self.__process_binary_name(binary, buildid)
235 self.__write("CODE %s %s" % (binary, addr))
238 function_name = self.__read()
239 file_line = self.__read()
240 file_line = self.__process_source_path(file_line)
241 yield (function_name, file_line)
242 except Symbolizer.__EOF:
245 def maybe_linkify(self, file_line):
246 if not self.__html or not self.__link_prefixes:
248 filename, line_col = file_line.split(':', 1)
250 line = '0' # simplify the link generation
252 line = line_col.split(':')[0]
253 longest_prefix = max((
254 (prefix, link) for prefix, link in self.__link_prefixes
255 if filename.startswith(prefix)),
256 key=lambda x: len(x[0]), default=None)
257 if longest_prefix is None:
260 prefix, link = longest_prefix
261 return '<a href="{}">{}</a>'.format(
262 html.escape(link.format(file=filename[len(prefix):], line=line,
263 file_line=file_line, prefix=prefix)), file_line)
265 def build_index(self):
266 for p in self.__binary_prefixes:
267 for dname, _, fnames in os.walk(p):
269 filename = os.path.join(dname, fn)
271 bid = get_buildid(filename)
272 except FileNotFoundError:
274 except Exception as e:
275 print("Failed to parse {}: {}".format(filename, e), file=sys.stderr)
278 self.__index[bid] = filename
280 def symbolize_line(self, line):
281 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
282 match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)'
283 r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
285 frameno = match.group(2)
286 binary = match.group(5)
287 addr = int(match.group(6), 16)
288 buildid = match.group(7)
290 frames = list(self.iter_call_stack(binary, buildid, addr))
295 "%s#%s%s%s in " % (match.group(1), match.group(2), match.group(3),
297 ) + self.maybe_linkify(frames[0][1]),
299 for i in range(1, len(frames)):
300 space1 = ' ' * match.end(1)
301 space2 = ' ' * (match.start(4) - match.end(1) - 2)
303 self.maybe_escape("%s->%s%s in " % (space1, space2, frames[i][0]))
304 + self.maybe_linkify(frames[i][1]), escape=False)
306 self.print(line.rstrip())
308 self.print(line.rstrip())
310 def save_access_address(self, line):
311 match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
313 self.__last_access_address = int(match.group(2), 16)
314 match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+(\([0-9a-f]+\))? \(ptr/mem\)', line, re.UNICODE)
316 self.__last_access_tag = int(match.group(2), 16)
318 def process_tag_dump_line(self, line, ignore_tags=False):
319 m = re.match(r'.*?(0x[0-9a-f]+):' + r'([ ]*[\[ ][0-9a-f][0-9a-f]\]?)' * 16, line)
323 tags = m.group(*range(2, 18))
324 fault = [i for i, x in enumerate(tags) if '[' in x]
326 self.__tag_dump_match_idx = len(self.__tag_dump) + fault[0]
327 self.__tag_dump.extend(int(x.strip(' [').rstrip('] '), 16) for x in tags)
330 def finish_tag_dump(self):
331 if self.__matched_stack_uas or self.__tag_dump_match_idx is None:
333 for offset, size, local in sorted(self.__offsets, key=lambda x: abs(x[0])):
334 idx = self.__tag_dump_match_idx - offset // 16
335 if idx < 0 or idx > len(self.__tag_dump):
337 if self.__tag_dump[idx] == self.__last_access_tag:
339 self.print('Potentially referenced stack object:')
341 self.print(' %d bytes after a variable "%s" in stack frame of function "%s"' % (offset - size, local[2], local[0]))
343 self.print(' %d bytes before a variable "%s" in stack frame of function "%s"' % (-offset, local[2], local[0]))
344 self.print(' at %s' % (local[1],))
346 def process_stack_history(self, line, ignore_tags=False):
347 if self.__last_access_address is None or self.__last_access_tag is None:
349 if re.match(r'Previously allocated frames:', line, re.UNICODE):
351 pc_mask = (1 << 48) - 1
352 fp_mask = (1 << 20) - 1
353 # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
354 match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)'
355 r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
357 record_addr = int(match.group(2), 16)
358 record = int(match.group(3), 16)
359 binary = match.group(4)
360 addr = int(match.group(5), 16)
361 buildid = match.group(6)
362 base_tag = (record_addr >> 3) & 0xFF
363 fp = (record >> 48) << 4
364 pc = record & pc_mask
366 for local in self.iter_locals(binary, addr, buildid):
367 frame_offset = local[3]
369 if frame_offset is None or size is None:
371 obj_offset = (self.__last_access_address & fp_mask) - ((fp & fp_mask) + frame_offset)
372 tag_offset = local[5]
373 if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != self.__last_access_tag):
375 if obj_offset < 0 or obj_offset >= size:
376 self.__offsets.append((obj_offset, size, local))
379 self.print('Potentially referenced stack object:')
380 self.print(' %d bytes inside a variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
381 self.print(' at %s' % (local[1],))
382 self.__matched_stack_uas = True
386 def extract_version(s):
390 x = float(s[idx + 1:])
394 parser = argparse.ArgumentParser()
395 parser.add_argument('-d', action='store_true')
396 parser.add_argument('-v', action='store_true')
397 parser.add_argument('--ignore-tags', action='store_true')
398 parser.add_argument('--symbols', action='append')
399 parser.add_argument('--source', action='append')
400 parser.add_argument('--index', action='store_true')
401 parser.add_argument('--symbolizer')
402 parser.add_argument('--linkify', type=str)
403 parser.add_argument('--html', action='store_true')
404 parser.add_argument('args', nargs=argparse.REMAINDER)
405 args = parser.parse_args()
407 # Unstripped binaries location.
408 binary_prefixes = args.symbols or []
409 if not binary_prefixes:
410 if 'ANDROID_PRODUCT_OUT' in os.environ:
411 product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
412 binary_prefixes.append(product_out)
413 binary_prefixes.append('/')
415 for p in binary_prefixes:
416 if not os.path.isdir(p):
417 print("Symbols path does not exist or is not a directory:", p, file=sys.stderr)
421 paths_to_cut = args.source or []
423 paths_to_cut.append(os.getcwd() + '/')
424 if 'ANDROID_BUILD_TOP' in os.environ:
425 paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
427 # llvm-symbolizer binary.
428 # 1. --symbolizer flag
429 # 2. environment variable
430 # 3. unsuffixed binary in the current directory
431 # 4. if inside Android platform, prebuilt binary at a known path
432 # 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
433 # highest available version in $PATH
434 symbolizer_path = args.symbolizer
435 if not symbolizer_path:
436 if 'LLVM_SYMBOLIZER_PATH' in os.environ:
437 symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
438 elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
439 symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
441 if not symbolizer_path:
442 s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
443 if os.path.exists(s):
446 if not symbolizer_path:
447 if 'ANDROID_BUILD_TOP' in os.environ:
448 s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
449 if os.path.exists(s):
452 if not symbolizer_path:
453 for path in os.environ["PATH"].split(os.pathsep):
454 p = os.path.join(path, 'llvm-symbolizer')
455 if os.path.exists(p):
459 if not symbolizer_path:
460 for path in os.environ["PATH"].split(os.pathsep):
461 candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
462 if len(candidates) > 0:
463 candidates.sort(key = extract_version, reverse = True)
464 symbolizer_path = candidates[0]
467 if not os.path.exists(symbolizer_path):
468 print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr)
472 print("Looking for symbols in:")
473 for s in binary_prefixes:
475 print("Stripping source path prefixes:")
476 for s in paths_to_cut:
478 print("Using llvm-symbolizer binary in:\n %s" % (symbolizer_path,))
481 symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
482 symbolizer.enable_html(args.html)
483 symbolizer.enable_logging(args.d)
485 symbolizer.build_index()
489 print('Need --html to --linkify', file=sys.stderr)
491 symbolizer.read_linkify(args.linkify)
494 for line in sys.stdin:
495 if sys.version_info.major < 3:
496 line = line.decode('utf-8')
498 tag_dump = symbolizer.process_tag_dump_line(line)
501 symbolizer.finish_tag_dump()
502 if 'Memory tags around the buggy address' in line:
505 symbolizer.save_access_address(line)
506 if symbolizer.process_stack_history(line, ignore_tags=args.ignore_tags):
508 symbolizer.symbolize_line(line)
511 if __name__ == '__main__':