Change UMA proto product field to be an int32.
[chromium-blink-merge.git] / tools / perf / measurements / loading_measurement_analyzer.py
blob37d97b3500655f3f54d8f5786c509a881346b55b
1 #!/usr/bin/env python
2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Parses CSV output from the loading_measurement and outputs interesting stats.
8 Example usage:
9 $ tools/perf/run_measurement --browser=release \
10 --output-format=csv --output=/path/to/loading_measurement_output.csv \
11 loading_measurement tools/perf/page_sets/top_1m.py
12 $ tools/perf/measurements/loading_measurement_analyzer.py \
13 --num-slowest-urls=100 --rank-csv-file=/path/to/top-1m.csv \
14 /path/to/loading_measurement_output.csv
15 """
17 import collections
18 import csv
19 import heapq
20 import optparse
21 import os
22 import re
23 import sys
26 class LoadingMeasurementAnalyzer(object):
28 def __init__(self, input_file, options):
29 self.ranks = {}
30 self.totals = collections.defaultdict(list)
31 self.maxes = collections.defaultdict(list)
32 self.avgs = collections.defaultdict(list)
33 self.load_times = []
34 self.cpu_times = []
35 self.network_percents = []
36 self.num_rows_parsed = 0
37 self.num_slowest_urls = options.num_slowest_urls
38 if options.rank_csv_file:
39 self._ParseRankCsvFile(os.path.expanduser(options.rank_csv_file))
40 self._ParseInputFile(input_file, options)
41 self._display_zeros = options.display_zeros
43 def _ParseInputFile(self, input_file, options):
44 with open(input_file, 'r') as csvfile:
45 row_dict = csv.DictReader(csvfile)
46 for row in row_dict:
47 if (options.rank_limit and
48 self._GetRank(row['url']) > options.rank_limit):
49 continue
50 cpu_time = 0
51 load_time = float(row['load_time (ms)'])
52 if load_time < 0:
53 print 'Skipping %s due to negative load time' % row['url']
54 continue
55 for key, value in row.iteritems():
56 if key in ('url', 'load_time (ms)', 'dom_content_loaded_time (ms)'):
57 continue
58 if not value or value == '-':
59 continue
60 value = float(value)
61 if not value:
62 continue
63 if '_avg' in key:
64 self.avgs[key].append((value, row['url']))
65 elif '_max' in key:
66 self.maxes[key].append((value, row['url']))
67 else:
68 self.totals[key].append((value, row['url']))
69 cpu_time += value
70 self.load_times.append((load_time, row['url']))
71 self.cpu_times.append((cpu_time, row['url']))
72 if options.show_network:
73 network_time = load_time - cpu_time
74 self.totals['Network (ms)'].append((network_time, row['url']))
75 self.network_percents.append((network_time / load_time, row['url']))
76 self.num_rows_parsed += 1
77 if options.max_rows and self.num_rows_parsed == int(options.max_rows):
78 break
80 def _ParseRankCsvFile(self, input_file):
81 with open(input_file, 'r') as csvfile:
82 for row in csv.reader(csvfile):
83 assert len(row) == 2
84 self.ranks[row[1]] = int(row[0])
86 def _GetRank(self, url):
87 url = url.replace('http://', '')
88 if url in self.ranks:
89 return self.ranks[url]
90 return len(self.ranks)
92 def PrintSummary(self, stdout):
93 sum_totals = {}
94 units = None
95 for key, values in self.totals.iteritems():
96 m = re.match('.* [(](.*)[)]', key)
97 assert m, 'All keys should have units.'
98 assert not units or units == m.group(1), 'All units should be the same.'
99 units = m.group(1)
100 sum_totals[key] = sum([v[0] for v in values])
101 total_cpu_time = sum([v[0] for v in self.cpu_times])
102 total_page_load_time = sum([v[0] for v in self.load_times])
104 print >> stdout
105 print >> stdout, 'Total URLs:', self.num_rows_parsed
106 print >> stdout, 'Total page load time: %ds' % int(round(
107 total_page_load_time / 1000))
108 print >> stdout, 'Average page load time: %dms' % int(round(
109 total_page_load_time / self.num_rows_parsed))
110 if units == 'ms':
111 print >> stdout, 'Total CPU time: %ds' % int(round(total_cpu_time / 1000))
112 print >> stdout, 'Average CPU time: %dms' % int(round(
113 total_cpu_time / self.num_rows_parsed))
114 print >> stdout
115 for key, value in sorted(sum_totals.iteritems(), reverse=True,
116 key=lambda i: i[1]):
117 if not self._display_zeros and not int(value / 100.):
118 break
119 output_key = '%60s: ' % re.sub(' [(].*[)]', '', key)
120 if units == 'ms':
121 output_value = '%10ds ' % (value / 1000)
122 output_percent = '%.1f%%' % (100 * value / total_page_load_time)
123 else:
124 output_value = '%10d%s ' % (value, units)
125 output_percent = '%.1f%%' % (100 * value / total_cpu_time)
126 print >> stdout, output_key, output_value, output_percent
128 if not self.num_slowest_urls:
129 return
131 for key, values in sorted(self.totals.iteritems(), reverse=True,
132 key=lambda i: sum_totals[i[0]]):
133 if not self._display_zeros and not int(sum_totals[key] / 100.):
134 break
135 print >> stdout
136 print >> stdout, 'Top %d slowest %s:' % (self.num_slowest_urls,
137 re.sub(' [(].*[)]', '', key))
138 slowest = heapq.nlargest(self.num_slowest_urls, values)
139 for value, url in slowest:
140 print >> stdout, '%10d%s\t%s (#%s)' % (value, units, url,
141 self._GetRank(url))
143 if self.network_percents:
144 print >> stdout
145 print >> stdout, 'Top %d highest network to CPU time ratios:' % (
146 self.num_slowest_urls)
147 for percent, url in sorted(
148 self.network_percents, reverse=True)[:self.num_slowest_urls]:
149 percent *= 100
150 print >> stdout, '\t', '%.1f%%' % percent, url, '(#%s)' % (
151 self._GetRank(url))
154 def main(arguments, stdout=sys.stdout):
155 prog_desc = 'Parses CSV output from the loading_measurement'
156 parser = optparse.OptionParser(usage=('%prog [options]' + '\n\n' + prog_desc))
158 parser.add_option('--max-rows', type='int',
159 help='Only process this many rows')
160 parser.add_option('--num-slowest-urls', type='int',
161 help='Output this many slowest URLs for each category')
162 parser.add_option('--rank-csv-file', help='A CSV file of <rank,url>')
163 parser.add_option('--rank-limit', type='int',
164 help='Only process pages higher than this rank')
165 parser.add_option('--show-network', action='store_true',
166 help='Whether to display Network as a category')
167 parser.add_option('--display-zeros', action='store_true',
168 help='Whether to display categories with zero time')
170 options, args = parser.parse_args(arguments)
172 assert len(args) == 1, 'Must pass exactly one CSV file to analyze'
173 if options.rank_limit and not options.rank_csv_file:
174 print 'Must pass --rank-csv-file with --rank-limit'
175 return 1
177 LoadingMeasurementAnalyzer(args[0], options).PrintSummary(stdout)
179 return 0
182 if __name__ == '__main__':
183 sys.exit(main(sys.argv[1:]))