2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Parses CSV output from the loading_measurement and outputs interesting stats.
9 $ tools/perf/run_measurement --browser=release \
10 --output-format=csv --output=/path/to/loading_measurement_output.csv \
11 loading_measurement tools/perf/page_sets/top_1m.json
12 $ tools/perf/measurements/loading_measurement_analyzer.py \
13 --num-slowest-urls=100 --rank-csv-file=/path/to/top-1m.csv \
14 /path/to/loading_measurement_output.csv
26 class LoadingMeasurementAnalyzer(object):
28 def __init__(self
, input_file
, options
):
30 self
.totals
= collections
.defaultdict(list)
31 self
.maxes
= collections
.defaultdict(list)
32 self
.avgs
= collections
.defaultdict(list)
35 self
.network_percents
= []
36 self
.num_rows_parsed
= 0
37 self
.num_slowest_urls
= options
.num_slowest_urls
38 if options
.rank_csv_file
:
39 self
._ParseRankCsvFile
(os
.path
.expanduser(options
.rank_csv_file
))
40 self
._ParseInputFile
(input_file
, options
)
41 self
._display
_zeros
= options
.display_zeros
43 def _ParseInputFile(self
, input_file
, options
):
44 with
open(input_file
, 'r') as csvfile
:
45 row_dict
= csv
.DictReader(csvfile
)
47 if (options
.rank_limit
and
48 self
._GetRank
(row
['url']) > options
.rank_limit
):
51 load_time
= float(row
['load_time (ms)'])
53 print 'Skipping %s due to negative load time' % row
['url']
55 for key
, value
in row
.iteritems():
56 if key
in ('url', 'load_time (ms)', 'dom_content_loaded_time (ms)'):
58 if not value
or value
== '-':
64 self
.avgs
[key
].append((value
, row
['url']))
66 self
.maxes
[key
].append((value
, row
['url']))
68 self
.totals
[key
].append((value
, row
['url']))
70 self
.load_times
.append((load_time
, row
['url']))
71 self
.cpu_times
.append((cpu_time
, row
['url']))
72 if options
.show_network
:
73 network_time
= load_time
- cpu_time
74 self
.totals
['Network (ms)'].append((network_time
, row
['url']))
75 self
.network_percents
.append((network_time
/ load_time
, row
['url']))
76 self
.num_rows_parsed
+= 1
77 if options
.max_rows
and self
.num_rows_parsed
== int(options
.max_rows
):
80 def _ParseRankCsvFile(self
, input_file
):
81 with
open(input_file
, 'r') as csvfile
:
82 for row
in csv
.reader(csvfile
):
84 self
.ranks
[row
[1]] = int(row
[0])
86 def _GetRank(self
, url
):
87 url
= url
.replace('http://', '')
89 return self
.ranks
[url
]
90 return len(self
.ranks
)
92 def PrintSummary(self
, stdout
):
95 for key
, values
in self
.totals
.iteritems():
96 m
= re
.match('.* [(](.*)[)]', key
)
97 assert m
, 'All keys should have units.'
98 assert not units
or units
== m
.group(1), 'All units should be the same.'
100 sum_totals
[key
] = sum([v
[0] for v
in values
])
101 total_cpu_time
= sum([v
[0] for v
in self
.cpu_times
])
102 total_page_load_time
= sum([v
[0] for v
in self
.load_times
])
105 print >> stdout
, 'Total URLs:', self
.num_rows_parsed
106 print >> stdout
, 'Total page load time: %ds' % int(round(
107 total_page_load_time
/ 1000))
108 print >> stdout
, 'Average page load time: %dms' % int(round(
109 total_page_load_time
/ self
.num_rows_parsed
))
111 print >> stdout
, 'Total CPU time: %ds' % int(round(total_cpu_time
/ 1000))
112 print >> stdout
, 'Average CPU time: %dms' % int(round(
113 total_cpu_time
/ self
.num_rows_parsed
))
115 for key
, value
in sorted(sum_totals
.iteritems(), reverse
=True,
117 if not self
._display
_zeros
and not int(value
/ 100.):
119 output_key
= '%60s: ' % re
.sub(' [(].*[)]', '', key
)
121 output_value
= '%10ds ' % (value
/ 1000)
122 output_percent
= '%.1f%%' % (100 * value
/ total_page_load_time
)
124 output_value
= '%10d%s ' % (value
, units
)
125 output_percent
= '%.1f%%' % (100 * value
/ total_cpu_time
)
126 print >> stdout
, output_key
, output_value
, output_percent
128 if not self
.num_slowest_urls
:
131 for key
, values
in sorted(self
.totals
.iteritems(), reverse
=True,
132 key
=lambda i
: sum_totals
[i
[0]]):
133 if not self
._display
_zeros
and not int(sum_totals
[key
] / 100.):
136 print >> stdout
, 'Top %d slowest %s:' % (self
.num_slowest_urls
,
137 re
.sub(' [(].*[)]', '', key
))
138 slowest
= heapq
.nlargest(self
.num_slowest_urls
, values
)
139 for value
, url
in slowest
:
140 print >> stdout
, '%10d%s\t%s (#%s)' % (value
, units
, url
,
143 if self
.network_percents
:
145 print >> stdout
, 'Top %d highest network to CPU time ratios:' % (
146 self
.num_slowest_urls
)
147 for percent
, url
in sorted(
148 self
.network_percents
, reverse
=True)[:self
.num_slowest_urls
]:
150 print >> stdout
, '\t', '%.1f%%' % percent
, url
, '(#%s)' % (
154 def main(arguments
, stdout
=sys
.stdout
):
155 prog_desc
= 'Parses CSV output from the loading_measurement'
156 parser
= optparse
.OptionParser(usage
=('%prog [options]' + '\n\n' + prog_desc
))
158 parser
.add_option('--max-rows', type='int',
159 help='Only process this many rows')
160 parser
.add_option('--num-slowest-urls', type='int',
161 help='Output this many slowest URLs for each category')
162 parser
.add_option('--rank-csv-file', help='A CSV file of <rank,url>')
163 parser
.add_option('--rank-limit', type='int',
164 help='Only process pages higher than this rank')
165 parser
.add_option('--show-network', action
='store_true',
166 help='Whether to display Network as a category')
167 parser
.add_option('--display-zeros', action
='store_true',
168 help='Whether to display categories with zero time')
170 options
, args
= parser
.parse_args(arguments
)
172 assert len(args
) == 1, 'Must pass exactly one CSV file to analyze'
173 if options
.rank_limit
and not options
.rank_csv_file
:
174 print 'Must pass --rank-csv-file with --rank-limit'
177 LoadingMeasurementAnalyzer(args
[0], options
).PrintSummary(stdout
)
182 if __name__
== '__main__':
183 sys
.exit(main(sys
.argv
[1:]))