2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Symbolize log file produced by cypgofile instrumentation.
8 Given a log file and the binary being profiled (e.g. executable, shared
9 library), the script can produce three different outputs: 1) symbols for the
10 addresses, 2) function and line numbers for the addresses, or 3) an order file.
20 def ParseLogLines(log_file_lines
):
21 """Parse a log file produced by the profiled run of clank.
24 log_file_lines: array of lines in log file produced by profiled run
25 lib_name: library or executable containing symbols
27 Below is an example of a small log file:
28 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
29 secs msecs pid:threadid func
31 1314897086 795828 3587:1074648168 0x509e105c
32 1314897086 795874 3587:1074648168 0x509e0eb4
33 1314897086 796326 3587:1074648168 0x509e0e3c
34 1314897086 796552 3587:1074648168 0x509e07bc
38 call_info list with list of tuples of the format (sec, msec, call id,
39 function address called)
44 line
= log_file_lines
[0]
45 assert("r-xp" in line
)
46 end_index
= line
.find('-')
47 vm_start
= int(line
[:end_index
], 16)
48 for line
in log_file_lines
[2:]:
52 call_lines
.append(fields
)
54 # Convert strings to int in fields.
56 for call_line
in call_lines
:
57 (sec_timestamp
, msec_timestamp
) = map(int, call_line
[0:2])
58 callee_id
= call_line
[2]
59 addr
= int(call_line
[3], 16)
62 call_info
.append((sec_timestamp
, msec_timestamp
, callee_id
, addr
))
67 def ParseLibSymbols(lib_file
):
68 """Get output from running nm and greping for text symbols.
71 lib_file: the library or executable that contains the profiled code
74 list of sorted unique addresses and corresponding size of function symbols
75 in lib_file and map of addresses to all symbols at a particular address
77 cmd
= ['nm', '-S', '-n', lib_file
]
78 nm_p
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
)
79 output
= nm_p
.communicate()[0]
80 nm_lines
= output
.split('\n')
83 for nm_line
in nm_lines
:
84 if any(str in nm_line
for str in (' t ', ' W ', ' T ')):
85 nm_symbols
.append(nm_line
)
90 while nm_index
< len(nm_symbols
):
92 # If the length of the split line is not 4, then it does not contain all the
93 # information needed to symbolize (i.e. address, size and symbol name).
94 if len(nm_symbols
[nm_index
].split()) == 4:
95 (addr
, size
) = [int(x
, 16) for x
in nm_symbols
[nm_index
].split()[0:2]]
97 # Multiple symbols may be at the same address. This is do to aliasing
98 # done by the compiler. Since there is no way to be sure which one was
99 # called in profiled run, we will symbolize to include all symbol names at
100 # a particular address.
102 while (nm_index
< len(nm_symbols
) and
103 addr
== int(nm_symbols
[nm_index
].split()[0], 16)):
104 if len(nm_symbols
[nm_index
].split()) == 4:
105 fnames
.append(nm_symbols
[nm_index
].split()[3])
107 address_map
[addr
] = fnames
108 unique_addrs
.append((addr
, size
))
112 return (unique_addrs
, address_map
)
114 class SymbolNotFoundException(Exception):
115 def __init__(self
,value
):
118 return repr(self
.value
)
120 def BinarySearchAddresses(addr
, start
, end
, arr
):
121 """Find starting address of a symbol at a particular address.
123 The reason we can not directly use the address provided by the log file is
124 that the log file may give an address after the start of the symbol. The
125 logged address is often one byte after the start. By using this search
126 function rather than just subtracting one from the logged address allows
127 the logging instrumentation to log any address in a function.
130 addr: the address being searched for
131 start: the starting index for the binary search
132 end: the ending index for the binary search
133 arr: the list being searched containing tuple of address and size
136 the starting address of the symbol at address addr
139 Exception: if address not found. Functions expects all logged addresses
142 # print "addr: " + str(addr) + " start: " + str(start) + " end: " + str(end)
143 if start
>= end
or start
== end
- 1:
144 # arr[i] is a tuple of address and size. Check if addr inside range
145 if addr
>= arr
[start
][0] and addr
< arr
[start
][0] + arr
[start
][1]:
147 elif addr
>= arr
[end
][0] and addr
< arr
[end
][0] + arr
[end
][1]:
150 raise SymbolNotFoundException(addr
)
152 halfway
= (start
+ end
) / 2
153 (nm_addr
, size
) = arr
[halfway
]
154 # print "nm_addr: " + str(nm_addr) + " halfway: " + str(halfway)
155 if addr
>= nm_addr
and addr
< nm_addr
+ size
:
158 return BinarySearchAddresses(addr
, start
, halfway
-1, arr
)
160 # Condition (addr >= nm_addr + size) must be true.
161 return BinarySearchAddresses(addr
, halfway
+1, end
, arr
)
164 def FindFunctions(addr
, unique_addrs
, address_map
):
165 """Find function symbol names at address addr."""
166 return address_map
[BinarySearchAddresses(addr
, 0, len(unique_addrs
) - 1,
170 def AddrToLine(addr
, lib_file
):
171 """Use addr2line to determine line info of a particular address."""
172 cmd
= ['addr2line', '-f', '-e', lib_file
, hex(addr
)]
173 p
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
)
174 output
= (p
.communicate()[0]).split('\n')
177 while index
< len(output
):
178 line
= line
+ ':' + output
[index
]
184 """Write output for profiled run to standard out.
186 The format of the output depends on the output type specified as the third
187 command line argument. The default output type is to symbolize the addresses
188 of the functions called.
190 parser
= optparse
.OptionParser('usage: %prog [options] log_file lib_file')
191 parser
.add_option('-t', '--outputType', dest
='output_type',
192 default
='symbolize', type='string',
193 help='lineize or symbolize or orderfile')
195 # Option for output type. The log file and lib file arguments are required
196 # by the script and therefore are not options.
197 (options
, args
) = parser
.parse_args()
199 parser
.error('expected 2 args: log_file lib_file')
201 (log_file
, lib_file
) = args
202 output_type
= options
.output_type
204 lib_name
= lib_file
.split('/')[-1].strip()
205 log_file_lines
= map(string
.rstrip
, open(log_file
).readlines())
206 call_info
= ParseLogLines(log_file_lines
)
207 (unique_addrs
, address_map
) = ParseLibSymbols(lib_file
)
209 # Check for duplicate addresses in the log file, and print a warning if
210 # duplicates are found. The instrumentation that produces the log file
211 # should only print the first time a function is entered.
213 for call
in call_info
:
215 if addr
not in addr_list
:
216 addr_list
.append(addr
)
218 print('WARNING: Address ' + hex(addr
) + ' (line= ' +
219 AddrToLine(addr
, lib_file
) + ') already profiled.')
221 for call
in call_info
:
222 if output_type
== 'lineize':
223 symbol
= AddrToLine(call
[3], lib_file
)
224 print(str(call
[0]) + ' ' + str(call
[1]) + '\t' + str(call
[2]) + '\t'
226 elif output_type
== 'orderfile':
228 symbols
= FindFunctions(call
[3], unique_addrs
, address_map
)
229 for symbol
in symbols
:
230 print '.text.' + symbol
232 except SymbolNotFoundException
as e
:
233 sys
.stderr
.write('WARNING: Did not find function in binary. addr: '
237 symbols
= FindFunctions(call
[3], unique_addrs
, address_map
)
238 print(str(call
[0]) + ' ' + str(call
[1]) + '\t' + str(call
[2]) + '\t'
241 for symbol
in symbols
:
243 print '\t\t\t\t\t' + symbol
246 except SymbolNotFoundException
as e
:
247 sys
.stderr
.write('WARNING: Did not find function in binary. addr: '
250 if __name__
== '__main__':