Roll src/third_party/WebKit d9c6159:8139f33 (svn 201974:201975)
[chromium-blink-merge.git] / tools / cygprofile / mergetraces.py
blob2ac83931cd7ba7b9ab316c1aad6689e7981374e9
1 #!/usr/bin/python
2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 # Use: ../mergetraces.py `ls cyglog.* -Sr` > merged_cyglog
8 """"Merge multiple logs files from different processes into a single log.
10 Given two log files of execution traces, merge the traces into a single trace.
11 Merging will use timestamps (i.e. the first two columns of logged calls) to
12 create a single log that is an ordered trace of calls by both processes.
13 """
15 import optparse
16 import string
17 import sys
20 def ParseLogLines(lines):
21 """Parse log file lines.
23 Args:
24 lines: lines from log file produced by profiled run
26 Below is an example of a small log file:
27 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
28 secs usecs pid:threadid func
29 START
30 1314897086 795828 3587:1074648168 0x509e105c
31 1314897086 795874 3587:1074648168 0x509e0eb4
32 1314897086 796326 3587:1074648168 0x509e0e3c
33 1314897086 796552 3587:1074648168 0x509e07bc
34 END
36 Returns:
37 tuple conisiting of 1) an ordered list of the logged calls, as an array of
38 fields, 2) the virtual start address of the library, used to compute the
39 offset of the symbol in the library and 3) the virtual end address
40 """
41 call_lines = []
42 vm_start = 0
43 vm_end = 0
44 dash_index = lines[0].find ('-')
45 space_index = lines[0].find (' ')
46 vm_start = int (lines[0][:dash_index], 16)
47 vm_end = int (lines[0][dash_index+1:space_index], 16)
48 for line in lines[2:]:
49 line = line.strip()
50 fields = line.split()
51 call_lines.append (fields)
53 return (call_lines, vm_start, vm_end)
56 def HasDuplicates(calls):
57 """Makes sure that calls are only logged once.
59 Args:
60 calls: list of calls logged
62 Returns:
63 boolean indicating if calls has duplicate calls
64 """
65 seen = set([])
66 for call in calls:
67 if call[3] in seen:
68 return True
69 seen.add(call[3])
70 return False
72 def CheckTimestamps(calls):
73 """Prints warning to stderr if the call timestamps are not in order.
75 Args:
76 calls: list of calls logged
77 """
78 index = 0
79 last_timestamp_secs = -1
80 last_timestamp_us = -1
81 while (index < len (calls)):
82 timestamp_secs = int (calls[index][0])
83 timestamp_us = int (calls[index][1])
84 timestamp = (timestamp_secs * 1000000) + timestamp_us
85 last_timestamp = (last_timestamp_secs * 1000000) + last_timestamp_us
86 if (timestamp < last_timestamp):
87 raise Exception("last_timestamp: " + str(last_timestamp_secs)
88 + " " + str(last_timestamp_us) + " timestamp: "
89 + str(timestamp_secs) + " " + str(timestamp_us) + "\n")
90 last_timestamp_secs = timestamp_secs
91 last_timestamp_us = timestamp_us
92 index = index + 1
95 def Convert(call_lines, start_address, end_address):
96 """Converts the call addresses to static offsets and removes invalid calls.
98 Removes profiled calls not in shared library using start and end virtual
99 addresses, converts strings to integer values, coverts virtual addresses to
100 address in shared library.
102 Returns:
103 list of calls as tuples (sec, usec, pid:tid, callee)
105 converted_calls = []
106 call_addresses = set()
107 for fields in call_lines:
108 secs = int (fields[0])
109 usecs = int (fields[1])
110 callee = int (fields[3], 16)
111 # Eliminate repetitions of the same function.
112 if callee in call_addresses:
113 continue
114 # Eliminate small addresses. It should be safe to do so because these point
115 # before the .text section (it is in .plt or earlier).
116 # TODO(pasko): understand why __cyg_profile_func_enter may output a small
117 # offset sometimes.
118 if callee < start_address + 4096:
119 sys.stderr.write('WARNING: ignoring small address: %s' %
120 hex(callee - start_address))
121 call_addresses.add(callee)
122 continue
123 if start_address <= callee < end_address:
124 converted_calls.append((secs, usecs, fields[2], (callee - start_address)))
125 call_addresses.add(callee)
126 return converted_calls
129 def Timestamp(trace_entry):
130 return int (trace_entry[0]) * 1000000 + int(trace_entry[1])
133 def AddTrace (tracemap, trace):
134 """Adds a trace to the tracemap.
136 Adds entries in the trace to the tracemap. All new calls will be added to
137 the tracemap. If the calls already exist in the tracemap then they will be
138 replaced if they happened sooner in the new trace.
140 Args:
141 tracemap: the tracemap
142 trace: the trace
145 for trace_entry in trace:
146 call = trace_entry[3]
147 if (not call in tracemap) or (
148 Timestamp(tracemap[call]) > Timestamp(trace_entry)):
149 tracemap[call] = trace_entry
152 def GroupByProcessAndThreadId(input_trace):
153 """Returns an array of traces grouped by pid and tid.
155 This is used to make the order of functions not depend on thread scheduling
156 which can be greatly impacted when profiling is done with cygprofile. As a
157 result each thread has its own contiguous segment of code (ordered by
158 timestamp) and processes also have their code isolated (i.e. not interleaved).
160 def MakeTimestamp(sec, usec):
161 return sec * 1000000 + usec
163 def PidAndTidFromString(pid_and_tid):
164 strings = pid_and_tid.split(':')
165 return (int(strings[0]), int(strings[1]))
167 tid_to_pid_map = {}
168 pid_first_seen = {}
169 tid_first_seen = {}
171 for (sec, usec, pid_and_tid, _) in input_trace:
172 (pid, tid) = PidAndTidFromString(pid_and_tid)
174 # Make sure that thread IDs are unique since this is a property we rely on.
175 if tid_to_pid_map.setdefault(tid, pid) != pid:
176 raise Exception(
177 'Seen PIDs %d and %d for TID=%d. Thread-IDs must be unique' % (
178 tid_to_pid_map[tid], pid, tid))
180 if not pid in pid_first_seen:
181 pid_first_seen[pid] = MakeTimestamp(sec, usec)
182 if not tid in tid_first_seen:
183 tid_first_seen[tid] = MakeTimestamp(sec, usec)
185 def CompareEvents(event1, event2):
186 (sec1, usec1, pid_and_tid, _) = event1
187 (pid1, tid1) = PidAndTidFromString(pid_and_tid)
188 (sec2, usec2, pid_and_tid, _) = event2
189 (pid2, tid2) = PidAndTidFromString(pid_and_tid)
191 pid_cmp = cmp(pid_first_seen[pid1], pid_first_seen[pid2])
192 if pid_cmp != 0:
193 return pid_cmp
194 tid_cmp = cmp(tid_first_seen[tid1], tid_first_seen[tid2])
195 if tid_cmp != 0:
196 return tid_cmp
197 return cmp(MakeTimestamp(sec1, usec1), MakeTimestamp(sec2, usec2))
199 return sorted(input_trace, cmp=CompareEvents)
202 def Main():
203 """Merge two traces for code in specified library and write to stdout.
205 Merges the two traces and coverts the virtual addresses to the offsets in the
206 library. First line of merged trace has dummy virtual address of 0-ffffffff
207 so that symbolizing the addresses uses the addresses in the log, since the
208 addresses have already been converted to static offsets.
210 parser = optparse.OptionParser('usage: %prog trace1 ... traceN')
211 (_, args) = parser.parse_args()
212 if len(args) <= 1:
213 parser.error('expected at least the following args: trace1 trace2')
215 step = 0
217 # Maps function addresses to their corresponding trace entry.
218 tracemap = dict()
220 for trace_file in args:
221 step += 1
222 sys.stderr.write(" " + str(step) + "/" + str(len(args)) +
223 ": " + trace_file + ":\n")
225 trace_lines = map(string.rstrip, open(trace_file).readlines())
226 (trace_calls, trace_start, trace_end) = ParseLogLines(trace_lines)
227 CheckTimestamps(trace_calls)
228 sys.stderr.write("Len: " + str(len(trace_calls)) +
229 ". Start: " + hex(trace_start) +
230 ", end: " + hex(trace_end) + '\n')
232 trace_calls = Convert(trace_calls, trace_start, trace_end)
233 sys.stderr.write("Converted len: " + str(len(trace_calls)) + "\n")
235 AddTrace(tracemap, trace_calls)
236 sys.stderr.write("Merged len: " + str(len(tracemap)) + "\n")
238 # Extract the resulting trace from the tracemap
239 merged_trace = []
240 for call in tracemap:
241 merged_trace.append(tracemap[call])
242 merged_trace.sort(key=Timestamp)
244 grouped_trace = GroupByProcessAndThreadId(merged_trace)
246 print "0-ffffffff r-xp 00000000 xx:00 00000 ./"
247 print "secs\tusecs\tpid:threadid\tfunc"
248 for call in grouped_trace:
249 print (str(call[0]) + "\t" + str(call[1]) + "\t" + call[2] + "\t" +
250 hex(call[3]))
253 if __name__ == '__main__':
254 Main()