Update docs and command line flags for MB.
[chromium-blink-merge.git] / third_party / android_platform / development / scripts / symbol.py
blob4cde7d965afa27a0a5fe33fe928e8804fd4b92c2
1 #!/usr/bin/python
3 # Copyright (C) 2013 The Android Open Source Project
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Module for looking up symbolic debugging information.
19 The information can include symbol names, offsets, and source locations.
20 """
22 import glob
23 import itertools
24 import logging
25 import os
26 import re
27 import struct
28 import subprocess
29 import zipfile
31 CHROME_SRC = os.path.join(os.path.realpath(os.path.dirname(__file__)),
32 os.pardir, os.pardir, os.pardir, os.pardir)
33 ANDROID_BUILD_TOP = CHROME_SRC
34 SYMBOLS_DIR = CHROME_SRC
35 CHROME_SYMBOLS_DIR = CHROME_SRC
37 ARCH = "arm"
39 TOOLCHAIN_INFO = None
41 # See:
42 # http://bugs.python.org/issue14315
43 # https://hg.python.org/cpython/rev/6dd5e9556a60#l2.8
44 def PatchZipFile():
45 oldDecodeExtra = zipfile.ZipInfo._decodeExtra
46 def decodeExtra(self):
47 try:
48 oldDecodeExtra(self)
49 except struct.error:
50 pass
51 zipfile.ZipInfo._decodeExtra = decodeExtra
52 PatchZipFile()
54 def Uname():
55 """'uname' for constructing prebuilt/<...> and out/host/<...> paths."""
56 uname = os.uname()[0]
57 if uname == "Darwin":
58 proc = os.uname()[-1]
59 if proc == "i386" or proc == "x86_64":
60 return "darwin-x86"
61 return "darwin-ppc"
62 if uname == "Linux":
63 return "linux-x86"
64 return uname
66 def ToolPath(tool, toolchain_info=None):
67 """Return a full qualified path to the specified tool"""
68 # ToolPath looks for the tools in the completely incorrect directory.
69 # This looks in the checked in android_tools.
70 if ARCH == "arm":
71 toolchain_source = "arm-linux-androideabi-4.9"
72 toolchain_prefix = "arm-linux-androideabi"
73 ndk = "ndk"
74 elif ARCH == "arm64":
75 toolchain_source = "aarch64-linux-android-4.9"
76 toolchain_prefix = "aarch64-linux-android"
77 ndk = "ndk"
78 elif ARCH == "x86":
79 toolchain_source = "x86-4.9"
80 toolchain_prefix = "i686-linux-android"
81 ndk = "ndk"
82 elif ARCH == "x86_64" or ARCH == "x64":
83 toolchain_source = "x86_64-4.9"
84 toolchain_prefix = "x86_64-linux-android"
85 ndk = "ndk"
86 elif ARCH == "mips":
87 toolchain_source = "mipsel-linux-android-4.9"
88 toolchain_prefix = "mipsel-linux-android"
89 ndk = "ndk"
90 else:
91 raise Exception("Could not find tool chain")
93 toolchain_subdir = (
94 "third_party/android_tools/%s/toolchains/%s/prebuilt/linux-x86_64/bin" %
95 (ndk, toolchain_source))
97 return os.path.join(CHROME_SRC,
98 toolchain_subdir,
99 toolchain_prefix + "-" + tool)
101 def FindToolchain():
102 """Look for the latest available toolchain
104 Args:
105 None
107 Returns:
108 A pair of strings containing toolchain label and target prefix.
110 global TOOLCHAIN_INFO
111 if TOOLCHAIN_INFO is not None:
112 return TOOLCHAIN_INFO
114 ## Known toolchains, newer ones in the front.
115 gcc_version = "4.9"
116 if ARCH == "arm64":
117 known_toolchains = [
118 ("aarch64-linux-android-" + gcc_version, "aarch64", "aarch64-linux-android")
120 elif ARCH == "arm":
121 known_toolchains = [
122 ("arm-linux-androideabi-" + gcc_version, "arm", "arm-linux-androideabi")
124 elif ARCH =="x86":
125 known_toolchains = [
126 ("x86-" + gcc_version, "x86", "i686-linux-android")
128 elif ARCH =="x86_64" or ARCH =="x64":
129 known_toolchains = [
130 ("x86_64-" + gcc_version, "x86_64", "x86_64-linux-android")
132 elif ARCH == "mips":
133 known_toolchains = [
134 ("mipsel-linux-android-" + gcc_version, "mips", "mipsel-linux-android")
136 else:
137 known_toolchains = []
139 logging.debug('FindToolcahin: known_toolchains=%s' % known_toolchains)
140 # Look for addr2line to check for valid toolchain path.
141 for (label, platform, target) in known_toolchains:
142 toolchain_info = (label, platform, target);
143 if os.path.exists(ToolPath("addr2line", toolchain_info)):
144 TOOLCHAIN_INFO = toolchain_info
145 print "Using toolchain from :" + ToolPath("", TOOLCHAIN_INFO)
146 return toolchain_info
148 raise Exception("Could not find tool chain")
150 def GetAapt():
151 """Returns the path to aapt.
153 Args:
154 None
156 Returns:
157 the pathname of the 'aapt' executable.
159 sdk_home = os.path.join('third_party', 'android_tools', 'sdk')
160 sdk_home = os.environ.get('SDK_HOME', sdk_home)
161 aapt_exe = glob.glob(os.path.join(sdk_home, 'build-tools', '*', 'aapt'))
162 if not aapt_exe:
163 return None
164 return sorted(aapt_exe, key=os.path.getmtime, reverse=True)[0]
166 def ApkMatchPackageName(aapt, apk_path, package_name):
167 """Returns true the APK's package name matches package_name.
169 Args:
170 aapt: pathname for the 'aapt' executable.
171 apk_path: pathname of the APK file.
172 package_name: package name to match.
174 Returns:
175 True if the package name matches or aapt is None, False otherwise.
177 if not aapt:
178 # Allow false positives
179 return True
180 aapt_output = subprocess.check_output(
181 [aapt, 'dump', 'badging', apk_path]).split('\n')
182 package_name_re = re.compile(r'package: .*name=\'(\S*)\'')
183 for line in aapt_output:
184 match = package_name_re.match(line)
185 if match:
186 return package_name == match.group(1)
187 return False
189 def PathListJoin(prefix_list, suffix_list):
190 """Returns each prefix in prefix_list joined with each suffix in suffix list.
192 Args:
193 prefix_list: list of path prefixes.
194 suffix_list: list of path suffixes.
196 Returns:
197 List of paths each of which joins a prefix with a suffix.
199 return [
200 os.path.join(prefix, suffix)
201 for prefix in prefix_list for suffix in suffix_list ]
203 def GetCandidates(dirs, filepart, candidate_fun):
204 """Returns a list of candidate filenames.
206 Args:
207 dirs: a list of the directory part of the pathname.
208 filepart: the file part of the pathname.
209 candidate_fun: a function to apply to each candidate, returns a list.
211 Returns:
212 A list of candidate files ordered by modification time, newest first.
214 out_dir = os.environ.get('CHROMIUM_OUT_DIR', 'out')
215 out_dir = os.path.join(CHROME_SYMBOLS_DIR, out_dir)
216 buildtype = os.environ.get('BUILDTYPE')
217 if buildtype:
218 buildtype_list = [ buildtype ]
219 else:
220 buildtype_list = [ 'Debug', 'Release' ]
222 candidates = PathListJoin([out_dir], buildtype_list) + [CHROME_SYMBOLS_DIR]
223 candidates = PathListJoin(candidates, dirs)
224 candidates = PathListJoin(candidates, [filepart])
225 logging.debug('GetCandidates: prefiltered candidates = %s' % candidates)
226 candidates = list(
227 itertools.chain.from_iterable(map(candidate_fun, candidates)))
228 candidates = sorted(candidates, key=os.path.getmtime, reverse=True)
229 return candidates
231 def GetCandidateApks():
232 """Returns a list of APKs which could contain the library.
234 Args:
235 None
237 Returns:
238 list of APK filename which could contain the library.
240 return GetCandidates(['apks'], '*.apk', glob.glob)
242 def GetCrazyLib(apk_filename):
243 """Returns the name of the first crazy library from this APK.
245 Args:
246 apk_filename: name of an APK file.
248 Returns:
249 Name of the first library which would be crazy loaded from this APK.
251 zip_file = zipfile.ZipFile(apk_filename, 'r')
252 for filename in zip_file.namelist():
253 match = re.match('lib/[^/]*/crazy.(lib.*[.]so)', filename)
254 if match:
255 return match.group(1)
257 def GetApkFromLibrary(device_library_path):
258 match = re.match(r'.*/([^/]*)-[0-9]+(\/[^/]*)?\.apk$', device_library_path)
259 if not match:
260 return None
261 return match.group(1)
263 def GetMatchingApks(package_name):
264 """Find any APKs which match the package indicated by the device_apk_name.
266 Args:
267 device_apk_name: name of the APK on the device.
269 Returns:
270 A list of APK filenames which could contain the desired library.
272 return filter(
273 lambda candidate_apk:
274 ApkMatchPackageName(GetAapt(), candidate_apk, package_name),
275 GetCandidateApks())
277 def MapDeviceApkToLibrary(device_apk_name):
278 """Provide a library name which corresponds with device_apk_name.
280 Args:
281 device_apk_name: name of the APK on the device.
283 Returns:
284 Name of the library which corresponds to that APK.
286 matching_apks = GetMatchingApks(device_apk_name)
287 logging.debug('MapDeviceApkToLibrary: matching_apks=%s' % matching_apks)
288 for matching_apk in matching_apks:
289 crazy_lib = GetCrazyLib(matching_apk)
290 if crazy_lib:
291 return crazy_lib
293 def GetCandidateLibraries(library_name):
294 """Returns a list of candidate library filenames.
296 Args:
297 library_name: basename of the library to match.
299 Returns:
300 A list of matching library filenames for library_name.
302 return GetCandidates(
303 ['lib', 'lib.target', '.'], library_name,
304 lambda filename: filter(os.path.exists, [filename]))
306 def TranslateLibPath(lib):
307 # The filename in the stack trace maybe an APK name rather than a library
308 # name. This happens when the library was loaded directly from inside the
309 # APK. If this is the case we try to figure out the library name by looking
310 # for a matching APK file and finding the name of the library in contains.
311 # The name of the APK file on the device is of the form
312 # <package_name>-<number>.apk. The APK file on the host may have any name
313 # so we look at the APK badging to see if the package name matches.
314 apk = GetApkFromLibrary(lib)
315 if apk is not None:
316 logging.debug('TranslateLibPath: apk=%s' % apk)
317 mapping = MapDeviceApkToLibrary(apk)
318 if mapping:
319 lib = mapping
321 # SymbolInformation(lib, addr) receives lib as the path from symbols
322 # root to the symbols file. This needs to be translated to point to the
323 # correct .so path. If the user doesn't explicitly specify which directory to
324 # use, then use the most recently updated one in one of the known directories.
325 # If the .so is not found somewhere in CHROME_SYMBOLS_DIR, leave it
326 # untranslated in case it is an Android symbol in SYMBOLS_DIR.
327 library_name = os.path.basename(lib)
329 logging.debug('TranslateLibPath: lib=%s library_name=%s' % (lib, library_name))
331 candidate_libraries = GetCandidateLibraries(library_name)
332 logging.debug('TranslateLibPath: candidate_libraries=%s' % candidate_libraries)
333 if not candidate_libraries:
334 return lib
336 library_path = os.path.relpath(candidate_libraries[0], SYMBOLS_DIR)
337 logging.debug('TranslateLibPath: library_path=%s' % library_path)
338 return '/' + library_path
340 def SymbolInformation(lib, addr, get_detailed_info):
341 """Look up symbol information about an address.
343 Args:
344 lib: library (or executable) pathname containing symbols
345 addr: string hexidecimal address
347 Returns:
348 A list of the form [(source_symbol, source_location,
349 object_symbol_with_offset)].
351 If the function has been inlined then the list may contain
352 more than one element with the symbols for the most deeply
353 nested inlined location appearing first. The list is
354 always non-empty, even if no information is available.
356 Usually you want to display the source_location and
357 object_symbol_with_offset from the last element in the list.
359 lib = TranslateLibPath(lib)
360 info = SymbolInformationForSet(lib, set([addr]), get_detailed_info)
361 return (info and info.get(addr)) or [(None, None, None)]
364 def SymbolInformationForSet(lib, unique_addrs, get_detailed_info):
365 """Look up symbol information for a set of addresses from the given library.
367 Args:
368 lib: library (or executable) pathname containing symbols
369 unique_addrs: set of hexidecimal addresses
371 Returns:
372 A dictionary of the form {addr: [(source_symbol, source_location,
373 object_symbol_with_offset)]} where each address has a list of
374 associated symbols and locations. The list is always non-empty.
376 If the function has been inlined then the list may contain
377 more than one element with the symbols for the most deeply
378 nested inlined location appearing first. The list is
379 always non-empty, even if no information is available.
381 Usually you want to display the source_location and
382 object_symbol_with_offset from the last element in the list.
384 if not lib:
385 return None
387 addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
388 if not addr_to_line:
389 return None
391 if get_detailed_info:
392 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
393 if not addr_to_objdump:
394 return None
395 else:
396 addr_to_objdump = dict((addr, ("", 0)) for addr in unique_addrs)
398 result = {}
399 for addr in unique_addrs:
400 source_info = addr_to_line.get(addr)
401 if not source_info:
402 source_info = [(None, None)]
403 if addr in addr_to_objdump:
404 (object_symbol, object_offset) = addr_to_objdump.get(addr)
405 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
406 object_offset)
407 else:
408 object_symbol_with_offset = None
409 result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
410 for (source_symbol, source_location) in source_info]
412 return result
415 class MemoizedForSet(object):
416 def __init__(self, fn):
417 self.fn = fn
418 self.cache = {}
420 def __call__(self, lib, unique_addrs):
421 lib_cache = self.cache.setdefault(lib, {})
423 no_cache = filter(lambda x: x not in lib_cache, unique_addrs)
424 if no_cache:
425 lib_cache.update((k, None) for k in no_cache)
426 result = self.fn(lib, no_cache)
427 if result:
428 lib_cache.update(result)
430 return dict((k, lib_cache[k]) for k in unique_addrs if lib_cache[k])
433 @MemoizedForSet
434 def CallAddr2LineForSet(lib, unique_addrs):
435 """Look up line and symbol information for a set of addresses.
437 Args:
438 lib: library (or executable) pathname containing symbols
439 unique_addrs: set of string hexidecimal addresses look up.
441 Returns:
442 A dictionary of the form {addr: [(symbol, file:line)]} where
443 each address has a list of associated symbols and locations
444 or an empty list if no symbol information was found.
446 If the function has been inlined then the list may contain
447 more than one element with the symbols for the most deeply
448 nested inlined location appearing first.
450 if not lib:
451 return None
454 symbols = SYMBOLS_DIR + lib
455 if not os.path.splitext(symbols)[1] in ['', '.so', '.apk']:
456 return None
458 if not os.path.isfile(symbols):
459 return None
461 (label, platform, target) = FindToolchain()
462 cmd = [ToolPath("addr2line"), "--functions", "--inlines",
463 "--demangle", "--exe=" + symbols]
464 child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
466 result = {}
467 addrs = sorted(unique_addrs)
468 for addr in addrs:
469 child.stdin.write("0x%s\n" % addr)
470 child.stdin.flush()
471 records = []
472 first = True
473 while True:
474 symbol = child.stdout.readline().strip()
475 if symbol == "??":
476 symbol = None
477 location = child.stdout.readline().strip()
478 if location == "??:0":
479 location = None
480 if symbol is None and location is None:
481 break
482 records.append((symbol, location))
483 if first:
484 # Write a blank line as a sentinel so we know when to stop
485 # reading inlines from the output.
486 # The blank line will cause addr2line to emit "??\n??:0\n".
487 child.stdin.write("\n")
488 first = False
489 result[addr] = records
490 child.stdin.close()
491 child.stdout.close()
492 return result
495 def StripPC(addr):
496 """Strips the Thumb bit a program counter address when appropriate.
498 Args:
499 addr: the program counter address
501 Returns:
502 The stripped program counter address.
504 global ARCH
506 if ARCH == "arm":
507 return addr & ~1
508 return addr
510 @MemoizedForSet
511 def CallObjdumpForSet(lib, unique_addrs):
512 """Use objdump to find out the names of the containing functions.
514 Args:
515 lib: library (or executable) pathname containing symbols
516 unique_addrs: set of string hexidecimal addresses to find the functions for.
518 Returns:
519 A dictionary of the form {addr: (string symbol, offset)}.
521 if not lib:
522 return None
524 symbols = SYMBOLS_DIR + lib
525 if not os.path.exists(symbols):
526 return None
528 symbols = SYMBOLS_DIR + lib
529 if not os.path.exists(symbols):
530 return None
532 result = {}
534 # Function lines look like:
535 # 000177b0 <android::IBinder::~IBinder()+0x2c>:
536 # We pull out the address and function first. Then we check for an optional
537 # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
538 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
539 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
541 # A disassembly line looks like:
542 # 177b2: b510 push {r4, lr}
543 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
545 for target_addr in unique_addrs:
546 start_addr_dec = str(StripPC(int(target_addr, 16)))
547 stop_addr_dec = str(StripPC(int(target_addr, 16)) + 8)
548 cmd = [ToolPath("objdump"),
549 "--section=.text",
550 "--demangle",
551 "--disassemble",
552 "--start-address=" + start_addr_dec,
553 "--stop-address=" + stop_addr_dec,
554 symbols]
556 current_symbol = None # The current function symbol in the disassembly.
557 current_symbol_addr = 0 # The address of the current function.
559 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
560 for line in stream:
561 # Is it a function line like:
562 # 000177b0 <android::IBinder::~IBinder()>:
563 components = func_regexp.match(line)
564 if components:
565 # This is a new function, so record the current function and its address.
566 current_symbol_addr = int(components.group(1), 16)
567 current_symbol = components.group(2)
569 # Does it have an optional offset like: "foo(..)+0x2c"?
570 components = offset_regexp.match(current_symbol)
571 if components:
572 current_symbol = components.group(1)
573 offset = components.group(2)
574 if offset:
575 current_symbol_addr -= int(offset, 16)
577 # Is it an disassembly line like:
578 # 177b2: b510 push {r4, lr}
579 components = asm_regexp.match(line)
580 if components:
581 addr = components.group(1)
582 i_addr = int(addr, 16)
583 i_target = StripPC(int(target_addr, 16))
584 if i_addr == i_target:
585 result[target_addr] = (current_symbol, i_target - current_symbol_addr)
586 stream.close()
588 return result
591 def CallCppFilt(mangled_symbol):
592 cmd = [ToolPath("c++filt")]
593 process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
594 process.stdin.write(mangled_symbol)
595 process.stdin.write("\n")
596 process.stdin.close()
597 demangled_symbol = process.stdout.readline().strip()
598 process.stdout.close()
599 return demangled_symbol
601 def FormatSymbolWithOffset(symbol, offset):
602 if offset == 0:
603 return symbol
604 return "%s+%d" % (symbol, offset)