2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 # A script to accumulate values from the 'dmprof cat' command into CSV or else.
9 # ./accumulate.py -f <format> -t <template-name> < input.json > output
11 # <format> is one of "csv", "json", and "tree". If "csv" or "json" is given,
12 # accumulate.py dumps a similar file to "dmprof csv|json". If "tree" is given,
13 # accumulate.py dumps a human-readable breakdown tree.
15 # <template-name> is a label in templates.json.
23 from lib
.ordered_dict
import OrderedDict
26 LOGGER
= logging
.getLogger('dmprof-accumulate')
29 def visit_in_template(template
, snapshot
, depth
):
30 """Visits all categories via a given template.
32 This function is not used. It's a sample function to traverse a template.
35 breakdown
= template
[1]
38 for rule
, _
in snapshot
[world
]['breakdown'][breakdown
].iteritems():
39 print (' ' * depth
) + rule
41 visit_in_template(rules
[rule
], snapshot
, depth
+ 1)
44 def accumulate(template
, snapshot
, units_dict
, target_units
):
45 """Accumulates units in a JSON |snapshot| with applying a given |template|.
48 template: A template tree included in a dmprof cat JSON file.
49 snapshot: A snapshot in a dmprof cat JSON file.
50 units_dict: A dict of units in worlds.
51 target_units: A list of unit ids which are a target of this accumulation.
54 breakdown
= template
[1]
57 remainder_units
= target_units
.copy()
58 category_tree
= OrderedDict()
61 for rule
, match
in snapshot
[world
]['breakdown'][breakdown
].iteritems():
62 if 'hidden' in match
and match
['hidden']:
64 matched_units
= set(match
['units']).intersection(target_units
)
66 for unit_id
in matched_units
:
67 subtotal
+= units_dict
[world
][unit_id
]
69 remainder_units
= remainder_units
.difference(matched_units
)
71 # A category matched with |rule| is a leaf of the breakdown tree.
72 # It is NOT broken down more.
73 category_tree
[rule
] = subtotal
76 # A category matched with |rule| is broken down more.
77 subtemplate
= rules
[rule
]
78 subworld
= subtemplate
[0]
79 subbreakdown
= subtemplate
[1]
82 # Break down in the same world: consider units.
83 category_tree
[rule
], accounted_total
, subremainder_units
= accumulate(
84 subtemplate
, snapshot
, units_dict
, matched_units
)
85 subremainder_total
= 0
86 if subremainder_units
:
87 for unit_id
in subremainder_units
:
88 subremainder_total
+= units_dict
[world
][unit_id
]
89 category_tree
[rule
][None] = subremainder_total
90 if subtotal
!= accounted_total
+ subremainder_total
:
91 print >> sys
.stderr
, (
92 'WARNING: Sum of %s:%s is different from %s by %d bytes.' % (
93 subworld
, subbreakdown
, rule
,
94 subtotal
- (accounted_total
+ subremainder_total
)))
96 # Break down in a different world: consider only the total size.
97 category_tree
[rule
], accounted_total
, _
= accumulate(
98 subtemplate
, snapshot
, units_dict
, set(units_dict
[subworld
].keys()))
99 if subtotal
>= accounted_total
:
100 category_tree
[rule
][None] = subtotal
- accounted_total
102 print >> sys
.stderr
, (
103 'WARNING: Sum of %s:%s is larger than %s by %d bytes.' % (
104 subworld
, subbreakdown
, rule
, accounted_total
- subtotal
))
105 print >> sys
.stderr
, (
106 'WARNING: Assuming remainder of %s is 0.' % rule
)
107 category_tree
[rule
][None] = 0
109 return category_tree
, total
, remainder_units
112 def flatten(category_tree
, header
=''):
113 """Flattens a category tree into a flat list."""
115 for rule
, sub
in category_tree
.iteritems():
119 flattened_rule
= header
+ '>' + rule
121 flattened_rule
= rule
122 if isinstance(sub
, dict) or isinstance(sub
, OrderedDict
):
123 result
.extend(flatten(sub
, flattened_rule
))
125 result
.append((flattened_rule
, sub
))
129 def print_category_tree(category_tree
, output
, depth
=0):
130 """Prints a category tree in a human-readable format."""
131 for label
in category_tree
:
132 print >> output
, (' ' * depth
),
133 if (isinstance(category_tree
[label
], dict) or
134 isinstance(category_tree
[label
], OrderedDict
)):
135 print >> output
, '%s:' % label
136 print_category_tree(category_tree
[label
], output
, depth
+ 1)
138 print >> output
, '%s: %d' % (label
, category_tree
[label
])
141 def flatten_all_category_trees(category_trees
):
142 flattened_labels
= set()
144 for category_tree
in category_trees
:
145 flattened
= OrderedDict()
146 for label
, subtotal
in flatten(category_tree
):
147 flattened_labels
.add(label
)
148 flattened
[label
] = subtotal
149 flattened_table
.append(flattened
)
150 return flattened_labels
, flattened_table
153 def output_csv(output
, category_trees
, data
, first_time
, output_exponent
):
154 flattened_labels
, flattened_table
= flatten_all_category_trees(category_trees
)
156 sorted_flattened_labels
= sorted(flattened_labels
)
157 print >> output
, ','.join(['second'] + sorted_flattened_labels
)
158 for index
, row
in enumerate(flattened_table
):
159 values
= [str(data
['snapshots'][index
]['time'] - first_time
)]
160 for label
in sorted_flattened_labels
:
163 if output_exponent
.upper() == 'K':
165 elif output_exponent
.upper() == 'M':
166 divisor
= 1024.0 * 1024.0
167 values
.append(str(row
[label
] / divisor
))
170 print >> output
, ','.join(values
)
173 def output_json(output
, category_trees
, data
, first_time
, template_label
):
174 flattened_labels
, flattened_table
= flatten_all_category_trees(category_trees
)
177 for index
, row
in enumerate(flattened_table
):
178 row_with_meta
= row
.copy()
179 row_with_meta
['second'] = data
['snapshots'][index
]['time'] - first_time
180 row_with_meta
['dump_time'] = datetime
.datetime
.fromtimestamp(
181 data
['snapshots'][index
]['time']).strftime('%Y-%m-%d %H:%M:%S')
182 json_snapshots
.append(row_with_meta
)
184 'version': 'JSON_DEEP_2',
187 'legends': sorted(flattened_labels
),
188 'snapshots': json_snapshots
192 json
.dump(json_root
, output
, indent
=2, sort_keys
=True)
195 def output_tree(output
, category_trees
):
196 for index
, category_tree
in enumerate(category_trees
):
197 print >> output
, '< Snapshot #%d >' % index
198 print_category_tree(category_tree
, output
, 1)
202 def do_main(cat_input
, output
, template_label
, output_format
, output_exponent
):
203 """Does the main work: accumulate for every snapshot and print a result."""
204 if output_format
not in ['csv', 'json', 'tree']:
205 raise NotImplementedError('The output format \"%s\" is not implemented.' %
208 if output_exponent
.upper() not in ['B', 'K', 'M']:
209 raise NotImplementedError('The exponent \"%s\" is not implemented.' %
212 data
= json
.loads(cat_input
.read(), object_pairs_hook
=OrderedDict
)
214 templates
= data
['templates']
215 if not template_label
:
216 template_label
= data
['default_template']
217 if template_label
not in templates
:
218 LOGGER
.error('A template \'%s\' is not found.' % template_label
)
220 template
= templates
[template_label
]
225 for snapshot
in data
['snapshots']:
227 first_time
= snapshot
['time']
230 for world_name
in snapshot
['worlds']:
232 for unit_id
, sizes
in snapshot
['worlds'][world_name
]['units'].iteritems():
233 world_units
[int(unit_id
)] = sizes
[0]
234 units
[world_name
] = world_units
236 category_tree
, _
, _
= accumulate(
237 template
, snapshot
['worlds'], units
, set(units
[template
[0]].keys()))
238 category_trees
.append(category_tree
)
240 if output_format
== 'csv':
241 output_csv(output
, category_trees
, data
, first_time
, output_exponent
)
242 elif output_format
== 'json':
243 output_json(output
, category_trees
, data
, first_time
, template_label
)
244 elif output_format
== 'tree':
245 output_tree(output
, category_trees
)
249 LOGGER
.setLevel(logging
.DEBUG
)
250 handler
= logging
.StreamHandler()
251 handler
.setLevel(logging
.INFO
)
252 formatter
= logging
.Formatter('%(message)s')
253 handler
.setFormatter(formatter
)
254 LOGGER
.addHandler(handler
)
256 parser
= optparse
.OptionParser()
257 parser
.add_option('-t', '--template', dest
='template',
259 help='Apply TEMPLATE to list up.')
260 parser
.add_option('-f', '--format', dest
='format', default
='csv',
261 help='Specify the output format: csv, json or tree.')
262 parser
.add_option('-e', '--exponent', dest
='exponent', default
='M',
263 help='Specify B (bytes), K (kilobytes) or M (megabytes).')
265 options
, _
= parser
.parse_args(sys
.argv
)
266 do_main(sys
.stdin
, sys
.stdout
,
267 options
.template
, options
.format
, options
.exponent
)
270 if __name__
== '__main__':