Revert safe-browsing PrefixSet size_t handling.
[chromium-blink-merge.git] / tools / cygprofile / symbolize.py
blob694ae54f9b0728a02b9865d61eaf157240bfaaf1
1 #!/usr/bin/python
2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Symbolize log file produced by cypgofile instrumentation.
8 Given a log file and the binary being profiled (e.g. executable, shared
9 library), the script can produce three different outputs: 1) symbols for the
10 addresses, 2) function and line numbers for the addresses, or 3) an order file.
11 """
13 import optparse
14 import os
15 import string
16 import subprocess
17 import sys
20 def ParseLogLines(log_file_lines):
21 """Parse a log file produced by the profiled run of clank.
23 Args:
24 log_file_lines: array of lines in log file produced by profiled run
25 lib_name: library or executable containing symbols
27 Below is an example of a small log file:
28 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
29 secs msecs pid:threadid func
30 START
31 1314897086 795828 3587:1074648168 0x509e105c
32 1314897086 795874 3587:1074648168 0x509e0eb4
33 1314897086 796326 3587:1074648168 0x509e0e3c
34 1314897086 796552 3587:1074648168 0x509e07bc
35 END
37 Returns:
38 call_info list with list of tuples of the format (sec, msec, call id,
39 function address called)
40 """
41 call_lines = []
42 has_started = False
43 vm_start = 0
44 line = log_file_lines[0]
45 assert("r-xp" in line)
46 end_index = line.find('-')
47 vm_start = int(line[:end_index], 16)
48 for line in log_file_lines[2:]:
49 # print hex(vm_start)
50 fields = line.split()
51 if len(fields) == 4:
52 call_lines.append(fields)
54 # Convert strings to int in fields.
55 call_info = []
56 for call_line in call_lines:
57 (sec_timestamp, msec_timestamp) = map(int, call_line[0:2])
58 callee_id = call_line[2]
59 addr = int(call_line[3], 16)
60 if vm_start < addr:
61 addr -= vm_start
62 call_info.append((sec_timestamp, msec_timestamp, callee_id, addr))
64 return call_info
67 def ParseLibSymbols(lib_file):
68 """Get output from running nm and greping for text symbols.
70 Args:
71 lib_file: the library or executable that contains the profiled code
73 Returns:
74 list of sorted unique addresses and corresponding size of function symbols
75 in lib_file and map of addresses to all symbols at a particular address
76 """
77 cmd = ['nm', '-S', '-n', lib_file]
78 nm_p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
79 output = nm_p.communicate()[0]
80 nm_lines = output.split('\n')
82 nm_symbols = []
83 for nm_line in nm_lines:
84 if any(str in nm_line for str in (' t ', ' W ', ' T ')):
85 nm_symbols.append(nm_line)
87 nm_index = 0
88 unique_addrs = []
89 address_map = {}
90 while nm_index < len(nm_symbols):
92 # If the length of the split line is not 4, then it does not contain all the
93 # information needed to symbolize (i.e. address, size and symbol name).
94 if len(nm_symbols[nm_index].split()) == 4:
95 (addr, size) = [int(x, 16) for x in nm_symbols[nm_index].split()[0:2]]
97 # Multiple symbols may be at the same address. This is do to aliasing
98 # done by the compiler. Since there is no way to be sure which one was
99 # called in profiled run, we will symbolize to include all symbol names at
100 # a particular address.
101 fnames = []
102 while (nm_index < len(nm_symbols) and
103 addr == int(nm_symbols[nm_index].split()[0], 16)):
104 if len(nm_symbols[nm_index].split()) == 4:
105 fnames.append(nm_symbols[nm_index].split()[3])
106 nm_index += 1
107 address_map[addr] = fnames
108 unique_addrs.append((addr, size))
109 else:
110 nm_index += 1
112 return (unique_addrs, address_map)
114 class SymbolNotFoundException(Exception):
115 def __init__(self,value):
116 self.value = value
117 def __str__(self):
118 return repr(self.value)
120 def BinarySearchAddresses(addr, start, end, arr):
121 """Find starting address of a symbol at a particular address.
123 The reason we can not directly use the address provided by the log file is
124 that the log file may give an address after the start of the symbol. The
125 logged address is often one byte after the start. By using this search
126 function rather than just subtracting one from the logged address allows
127 the logging instrumentation to log any address in a function.
129 Args:
130 addr: the address being searched for
131 start: the starting index for the binary search
132 end: the ending index for the binary search
133 arr: the list being searched containing tuple of address and size
135 Returns:
136 the starting address of the symbol at address addr
138 Raises:
139 Exception: if address not found. Functions expects all logged addresses
140 to be found
142 # print "addr: " + str(addr) + " start: " + str(start) + " end: " + str(end)
143 if start >= end or start == end - 1:
144 # arr[i] is a tuple of address and size. Check if addr inside range
145 if addr >= arr[start][0] and addr < arr[start][0] + arr[start][1]:
146 return arr[start][0]
147 elif addr >= arr[end][0] and addr < arr[end][0] + arr[end][1]:
148 return arr[end][0]
149 else:
150 raise SymbolNotFoundException(addr)
151 else:
152 halfway = (start + end) / 2
153 (nm_addr, size) = arr[halfway]
154 # print "nm_addr: " + str(nm_addr) + " halfway: " + str(halfway)
155 if addr >= nm_addr and addr < nm_addr + size:
156 return nm_addr
157 elif addr < nm_addr:
158 return BinarySearchAddresses(addr, start, halfway-1, arr)
159 else:
160 # Condition (addr >= nm_addr + size) must be true.
161 return BinarySearchAddresses(addr, halfway+1, end, arr)
164 def FindFunctions(addr, unique_addrs, address_map):
165 """Find function symbol names at address addr."""
166 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1,
167 unique_addrs)]
170 def AddrToLine(addr, lib_file):
171 """Use addr2line to determine line info of a particular address."""
172 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)]
173 p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
174 output = (p.communicate()[0]).split('\n')
175 line = output[0]
176 index = 1
177 while index < len(output):
178 line = line + ':' + output[index]
179 index += 1
180 return line
183 def main():
184 """Write output for profiled run to standard out.
186 The format of the output depends on the output type specified as the third
187 command line argument. The default output type is to symbolize the addresses
188 of the functions called.
190 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file')
191 parser.add_option('-t', '--outputType', dest='output_type',
192 default='symbolize', type='string',
193 help='lineize or symbolize or orderfile')
195 # Option for output type. The log file and lib file arguments are required
196 # by the script and therefore are not options.
197 (options, args) = parser.parse_args()
198 if len(args) != 2:
199 parser.error('expected 2 args: log_file lib_file')
201 (log_file, lib_file) = args
202 output_type = options.output_type
204 lib_name = lib_file.split('/')[-1].strip()
205 log_file_lines = map(string.rstrip, open(log_file).readlines())
206 call_info = ParseLogLines(log_file_lines)
207 (unique_addrs, address_map) = ParseLibSymbols(lib_file)
209 # Check for duplicate addresses in the log file, and print a warning if
210 # duplicates are found. The instrumentation that produces the log file
211 # should only print the first time a function is entered.
212 addr_list = []
213 for call in call_info:
214 addr = call[3]
215 if addr not in addr_list:
216 addr_list.append(addr)
217 else:
218 print('WARNING: Address ' + hex(addr) + ' (line= ' +
219 AddrToLine(addr, lib_file) + ') already profiled.')
221 for call in call_info:
222 if output_type == 'lineize':
223 symbol = AddrToLine(call[3], lib_file)
224 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'
225 + symbol)
226 elif output_type == 'orderfile':
227 try:
228 symbols = FindFunctions(call[3], unique_addrs, address_map)
229 for symbol in symbols:
230 print '.text.' + symbol
231 print ''
232 except SymbolNotFoundException as e:
233 sys.stderr.write('WARNING: Did not find function in binary. addr: '
234 + hex(addr) + '\n')
235 else:
236 try:
237 symbols = FindFunctions(call[3], unique_addrs, address_map)
238 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'
239 + symbols[0])
240 first_symbol = True
241 for symbol in symbols:
242 if not first_symbol:
243 print '\t\t\t\t\t' + symbol
244 else:
245 first_symbol = False
246 except SymbolNotFoundException as e:
247 sys.stderr.write('WARNING: Did not find function in binary. addr: '
248 + hex(addr) + '\n')
250 if __name__ == '__main__':
251 main()