Port Android relocation packer to chromium build
[chromium-blink-merge.git] / third_party / android_platform / development / scripts / symbol.py
blobb412e13fc8fc5fffe106f78a3ff45561dc27c9f5
1 #!/usr/bin/python
3 # Copyright (C) 2013 The Android Open Source Project
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Module for looking up symbolic debugging information.
19 The information can include symbol names, offsets, and source locations.
20 """
22 import glob
23 import itertools
24 import os
25 import re
26 import subprocess
27 import zipfile
29 CHROME_SRC = os.path.join(os.path.realpath(os.path.dirname(__file__)),
30 os.pardir, os.pardir, os.pardir, os.pardir)
31 ANDROID_BUILD_TOP = CHROME_SRC
32 SYMBOLS_DIR = CHROME_SRC
33 CHROME_SYMBOLS_DIR = CHROME_SRC
35 ARCH = "arm"
37 TOOLCHAIN_INFO = None
39 def Uname():
40 """'uname' for constructing prebuilt/<...> and out/host/<...> paths."""
41 uname = os.uname()[0]
42 if uname == "Darwin":
43 proc = os.uname()[-1]
44 if proc == "i386" or proc == "x86_64":
45 return "darwin-x86"
46 return "darwin-ppc"
47 if uname == "Linux":
48 return "linux-x86"
49 return uname
51 def ToolPath(tool, toolchain_info=None):
52 """Return a full qualified path to the specified tool"""
53 # ToolPath looks for the tools in the completely incorrect directory.
54 # This looks in the checked in android_tools.
55 if ARCH == "arm":
56 toolchain_source = "arm-linux-androideabi-4.9"
57 toolchain_prefix = "arm-linux-androideabi"
58 ndk = "ndk"
59 elif ARCH == "arm64":
60 toolchain_source = "aarch64-linux-android-4.9"
61 toolchain_prefix = "aarch64-linux-android"
62 ndk = "ndk"
63 elif ARCH == "x86":
64 toolchain_source = "x86-4.9"
65 toolchain_prefix = "i686-linux-android"
66 ndk = "ndk"
67 elif ARCH == "x86_64" or ARCH == "x64":
68 toolchain_source = "x86_64-4.9"
69 toolchain_prefix = "x86_64-linux-android"
70 ndk = "ndk"
71 elif ARCH == "mips":
72 toolchain_source = "mipsel-linux-android-4.9"
73 toolchain_prefix = "mipsel-linux-android"
74 ndk = "ndk"
75 else:
76 raise Exception("Could not find tool chain")
78 toolchain_subdir = (
79 "third_party/android_tools/%s/toolchains/%s/prebuilt/linux-x86_64/bin" %
80 (ndk, toolchain_source))
82 return os.path.join(CHROME_SRC,
83 toolchain_subdir,
84 toolchain_prefix + "-" + tool)
86 def FindToolchain():
87 """Look for the latest available toolchain
89 Args:
90 None
92 Returns:
93 A pair of strings containing toolchain label and target prefix.
94 """
95 global TOOLCHAIN_INFO
96 if TOOLCHAIN_INFO is not None:
97 return TOOLCHAIN_INFO
99 ## Known toolchains, newer ones in the front.
100 gcc_version = "4.9"
101 if ARCH == "arm64":
102 known_toolchains = [
103 ("aarch64-linux-android-" + gcc_version, "aarch64", "aarch64-linux-android")
105 elif ARCH == "arm":
106 known_toolchains = [
107 ("arm-linux-androideabi-" + gcc_version, "arm", "arm-linux-androideabi")
109 elif ARCH =="x86":
110 known_toolchains = [
111 ("x86-" + gcc_version, "x86", "i686-linux-android")
113 elif ARCH =="x86_64" or ARCH =="x64":
114 known_toolchains = [
115 ("x86_64-" + gcc_version, "x86_64", "x86_64-linux-android")
117 elif ARCH == "mips":
118 known_toolchains = [
119 ("mipsel-linux-android-" + gcc_version, "mips", "mipsel-linux-android")
121 else:
122 known_toolchains = []
124 # Look for addr2line to check for valid toolchain path.
125 for (label, platform, target) in known_toolchains:
126 toolchain_info = (label, platform, target);
127 if os.path.exists(ToolPath("addr2line", toolchain_info)):
128 TOOLCHAIN_INFO = toolchain_info
129 print "Using toolchain from :" + ToolPath("", TOOLCHAIN_INFO)
130 return toolchain_info
132 raise Exception("Could not find tool chain")
134 def GetAapt():
135 """Returns the path to aapt.
137 Args:
138 None
140 Returns:
141 the pathname of the 'aapt' executable.
143 sdk_home = os.path.join('third_party', 'android_tools', 'sdk')
144 sdk_home = os.environ.get('SDK_HOME', sdk_home)
145 aapt_exe = glob.glob(os.path.join(sdk_home, 'build-tools', '*', 'aapt'))
146 if not aapt_exe:
147 return None
148 return sorted(aapt_exe, key=os.path.getmtime, reverse=True)[0]
150 def ApkMatchPackageName(aapt, apk_path, package_name):
151 """Returns true the APK's package name matches package_name.
153 Args:
154 aapt: pathname for the 'aapt' executable.
155 apk_path: pathname of the APK file.
156 package_name: package name to match.
158 Returns:
159 True if the package name matches or aapt is None, False otherwise.
161 if not aapt:
162 # Allow false positives
163 return True
164 aapt_output = subprocess.check_output(
165 [aapt, 'dump', 'badging', apk_path]).split('\n')
166 package_name_re = re.compile(r'package: .*name=\'(\S*)\'')
167 for line in aapt_output:
168 match = package_name_re.match(line)
169 if match:
170 return package_name == match.group(1)
171 return False
173 def PathListJoin(prefix_list, suffix_list):
174 """Returns each prefix in prefix_list joined with each suffix in suffix list.
176 Args:
177 prefix_list: list of path prefixes.
178 suffix_list: list of path suffixes.
180 Returns:
181 List of paths each of which joins a prefix with a suffix.
183 return [
184 os.path.join(prefix, suffix)
185 for prefix in prefix_list for suffix in suffix_list ]
187 def GetCandidates(dirs, filepart, candidate_fun):
188 """Returns a list of candidate filenames.
190 Args:
191 dirs: a list of the directory part of the pathname.
192 filepart: the file part of the pathname.
193 candidate_fun: a function to apply to each candidate, returns a list.
195 Returns:
196 A list of candidate files ordered by modification time, newest first.
198 out_dir = os.environ.get('CHROMIUM_OUT_DIR', 'out')
199 out_dir = os.path.join(CHROME_SYMBOLS_DIR, out_dir)
200 buildtype = os.environ.get('BUILDTYPE')
201 if buildtype:
202 buildtype_list = [ buildtype ]
203 else:
204 buildtype_list = [ 'Debug', 'Release' ]
206 candidates = PathListJoin([out_dir], buildtype_list) + [CHROME_SYMBOLS_DIR]
207 candidates = PathListJoin(candidates, dirs)
208 candidates = PathListJoin(candidates, [filepart])
209 candidates = list(
210 itertools.chain.from_iterable(map(candidate_fun, candidates)))
211 candidates = sorted(candidates, key=os.path.getmtime, reverse=True)
212 return candidates
214 def GetCandidateApks():
215 """Returns a list of APKs which could contain the library.
217 Args:
218 None
220 Returns:
221 list of APK filename which could contain the library.
223 return GetCandidates(['apks'], '*.apk', glob.glob)
225 def GetCrazyLib(apk_filename):
226 """Returns the name of the first crazy library from this APK.
228 Args:
229 apk_filename: name of an APK file.
231 Returns:
232 Name of the first library which would be crazy loaded from this APK.
234 zip_file = zipfile.ZipFile(apk_filename, 'r')
235 for filename in zip_file.namelist():
236 match = re.match('lib/[^/]*/crazy.(lib.*[.]so)', filename)
237 if match:
238 return match.group(1)
240 def GetMatchingApks(device_apk_name):
241 """Find any APKs which match the package indicated by the device_apk_name.
243 Args:
244 device_apk_name: name of the APK on the device.
246 Returns:
247 A list of APK filenames which could contain the desired library.
249 match = re.match('(.*)-[0-9]+[.]apk$', device_apk_name)
250 if not match:
251 return None
252 package_name = match.group(1)
253 return filter(
254 lambda candidate_apk:
255 ApkMatchPackageName(GetAapt(), candidate_apk, package_name),
256 GetCandidateApks())
258 def MapDeviceApkToLibrary(device_apk_name):
259 """Provide a library name which corresponds with device_apk_name.
261 Args:
262 device_apk_name: name of the APK on the device.
264 Returns:
265 Name of the library which corresponds to that APK.
267 matching_apks = GetMatchingApks(device_apk_name)
268 for matching_apk in matching_apks:
269 crazy_lib = GetCrazyLib(matching_apk)
270 if crazy_lib:
271 return crazy_lib
273 def GetCandidateLibraries(library_name):
274 """Returns a list of candidate library filenames.
276 Args:
277 library_name: basename of the library to match.
279 Returns:
280 A list of matching library filenames for library_name.
282 return GetCandidates(
283 ['lib', 'lib.target'], library_name,
284 lambda filename: filter(os.path.exists, [filename]))
286 def TranslateLibPath(lib):
287 # SymbolInformation(lib, addr) receives lib as the path from symbols
288 # root to the symbols file. This needs to be translated to point to the
289 # correct .so path. If the user doesn't explicitly specify which directory to
290 # use, then use the most recently updated one in one of the known directories.
291 # If the .so is not found somewhere in CHROME_SYMBOLS_DIR, leave it
292 # untranslated in case it is an Android symbol in SYMBOLS_DIR.
293 library_name = os.path.basename(lib)
295 # The filename in the stack trace maybe an APK name rather than a library
296 # name. This happens when the library was loaded directly from inside the
297 # APK. If this is the case we try to figure out the library name by looking
298 # for a matching APK file and finding the name of the library in contains.
299 # The name of the APK file on the device is of the form
300 # <package_name>-<number>.apk. The APK file on the host may have any name
301 # so we look at the APK badging to see if the package name matches.
302 if re.search('-[0-9]+[.]apk$', library_name):
303 mapping = MapDeviceApkToLibrary(library_name)
304 if mapping:
305 library_name = mapping
307 candidate_libraries = GetCandidateLibraries(library_name)
308 if not candidate_libraries:
309 return lib
311 library_path = os.path.relpath(candidate_libraries[0], SYMBOLS_DIR)
312 return '/' + library_path
314 def SymbolInformation(lib, addr, get_detailed_info):
315 """Look up symbol information about an address.
317 Args:
318 lib: library (or executable) pathname containing symbols
319 addr: string hexidecimal address
321 Returns:
322 A list of the form [(source_symbol, source_location,
323 object_symbol_with_offset)].
325 If the function has been inlined then the list may contain
326 more than one element with the symbols for the most deeply
327 nested inlined location appearing first. The list is
328 always non-empty, even if no information is available.
330 Usually you want to display the source_location and
331 object_symbol_with_offset from the last element in the list.
333 lib = TranslateLibPath(lib)
334 info = SymbolInformationForSet(lib, set([addr]), get_detailed_info)
335 return (info and info.get(addr)) or [(None, None, None)]
338 def SymbolInformationForSet(lib, unique_addrs, get_detailed_info):
339 """Look up symbol information for a set of addresses from the given library.
341 Args:
342 lib: library (or executable) pathname containing symbols
343 unique_addrs: set of hexidecimal addresses
345 Returns:
346 A dictionary of the form {addr: [(source_symbol, source_location,
347 object_symbol_with_offset)]} where each address has a list of
348 associated symbols and locations. The list is always non-empty.
350 If the function has been inlined then the list may contain
351 more than one element with the symbols for the most deeply
352 nested inlined location appearing first. The list is
353 always non-empty, even if no information is available.
355 Usually you want to display the source_location and
356 object_symbol_with_offset from the last element in the list.
358 if not lib:
359 return None
361 addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
362 if not addr_to_line:
363 return None
365 if get_detailed_info:
366 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
367 if not addr_to_objdump:
368 return None
369 else:
370 addr_to_objdump = dict((addr, ("", 0)) for addr in unique_addrs)
372 result = {}
373 for addr in unique_addrs:
374 source_info = addr_to_line.get(addr)
375 if not source_info:
376 source_info = [(None, None)]
377 if addr in addr_to_objdump:
378 (object_symbol, object_offset) = addr_to_objdump.get(addr)
379 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
380 object_offset)
381 else:
382 object_symbol_with_offset = None
383 result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
384 for (source_symbol, source_location) in source_info]
386 return result
389 class MemoizedForSet(object):
390 def __init__(self, fn):
391 self.fn = fn
392 self.cache = {}
394 def __call__(self, lib, unique_addrs):
395 lib_cache = self.cache.setdefault(lib, {})
397 no_cache = filter(lambda x: x not in lib_cache, unique_addrs)
398 if no_cache:
399 lib_cache.update((k, None) for k in no_cache)
400 result = self.fn(lib, no_cache)
401 if result:
402 lib_cache.update(result)
404 return dict((k, lib_cache[k]) for k in unique_addrs if lib_cache[k])
407 @MemoizedForSet
408 def CallAddr2LineForSet(lib, unique_addrs):
409 """Look up line and symbol information for a set of addresses.
411 Args:
412 lib: library (or executable) pathname containing symbols
413 unique_addrs: set of string hexidecimal addresses look up.
415 Returns:
416 A dictionary of the form {addr: [(symbol, file:line)]} where
417 each address has a list of associated symbols and locations
418 or an empty list if no symbol information was found.
420 If the function has been inlined then the list may contain
421 more than one element with the symbols for the most deeply
422 nested inlined location appearing first.
424 if not lib:
425 return None
428 symbols = SYMBOLS_DIR + lib
429 if not os.path.isfile(symbols):
430 return None
432 (label, platform, target) = FindToolchain()
433 cmd = [ToolPath("addr2line"), "--functions", "--inlines",
434 "--demangle", "--exe=" + symbols]
435 child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
437 result = {}
438 addrs = sorted(unique_addrs)
439 for addr in addrs:
440 child.stdin.write("0x%s\n" % addr)
441 child.stdin.flush()
442 records = []
443 first = True
444 while True:
445 symbol = child.stdout.readline().strip()
446 if symbol == "??":
447 symbol = None
448 location = child.stdout.readline().strip()
449 if location == "??:0":
450 location = None
451 if symbol is None and location is None:
452 break
453 records.append((symbol, location))
454 if first:
455 # Write a blank line as a sentinel so we know when to stop
456 # reading inlines from the output.
457 # The blank line will cause addr2line to emit "??\n??:0\n".
458 child.stdin.write("\n")
459 first = False
460 result[addr] = records
461 child.stdin.close()
462 child.stdout.close()
463 return result
466 def StripPC(addr):
467 """Strips the Thumb bit a program counter address when appropriate.
469 Args:
470 addr: the program counter address
472 Returns:
473 The stripped program counter address.
475 global ARCH
477 if ARCH == "arm":
478 return addr & ~1
479 return addr
481 @MemoizedForSet
482 def CallObjdumpForSet(lib, unique_addrs):
483 """Use objdump to find out the names of the containing functions.
485 Args:
486 lib: library (or executable) pathname containing symbols
487 unique_addrs: set of string hexidecimal addresses to find the functions for.
489 Returns:
490 A dictionary of the form {addr: (string symbol, offset)}.
492 if not lib:
493 return None
495 symbols = SYMBOLS_DIR + lib
496 if not os.path.exists(symbols):
497 return None
499 symbols = SYMBOLS_DIR + lib
500 if not os.path.exists(symbols):
501 return None
503 result = {}
505 # Function lines look like:
506 # 000177b0 <android::IBinder::~IBinder()+0x2c>:
507 # We pull out the address and function first. Then we check for an optional
508 # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
509 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
510 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
512 # A disassembly line looks like:
513 # 177b2: b510 push {r4, lr}
514 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
516 for target_addr in unique_addrs:
517 start_addr_dec = str(StripPC(int(target_addr, 16)))
518 stop_addr_dec = str(StripPC(int(target_addr, 16)) + 8)
519 cmd = [ToolPath("objdump"),
520 "--section=.text",
521 "--demangle",
522 "--disassemble",
523 "--start-address=" + start_addr_dec,
524 "--stop-address=" + stop_addr_dec,
525 symbols]
527 current_symbol = None # The current function symbol in the disassembly.
528 current_symbol_addr = 0 # The address of the current function.
530 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
531 for line in stream:
532 # Is it a function line like:
533 # 000177b0 <android::IBinder::~IBinder()>:
534 components = func_regexp.match(line)
535 if components:
536 # This is a new function, so record the current function and its address.
537 current_symbol_addr = int(components.group(1), 16)
538 current_symbol = components.group(2)
540 # Does it have an optional offset like: "foo(..)+0x2c"?
541 components = offset_regexp.match(current_symbol)
542 if components:
543 current_symbol = components.group(1)
544 offset = components.group(2)
545 if offset:
546 current_symbol_addr -= int(offset, 16)
548 # Is it an disassembly line like:
549 # 177b2: b510 push {r4, lr}
550 components = asm_regexp.match(line)
551 if components:
552 addr = components.group(1)
553 i_addr = int(addr, 16)
554 i_target = StripPC(int(target_addr, 16))
555 if i_addr == i_target:
556 result[target_addr] = (current_symbol, i_target - current_symbol_addr)
557 stream.close()
559 return result
562 def CallCppFilt(mangled_symbol):
563 cmd = [ToolPath("c++filt")]
564 process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
565 process.stdin.write(mangled_symbol)
566 process.stdin.write("\n")
567 process.stdin.close()
568 demangled_symbol = process.stdout.readline().strip()
569 process.stdout.close()
570 return demangled_symbol
572 def FormatSymbolWithOffset(symbol, offset):
573 if offset == 0:
574 return symbol
575 return "%s+%d" % (symbol, offset)