Re-enable index-basics-workers test to see if still times
[chromium-blink-merge.git] / tools / metrics / histograms / extract_histograms.py
blob1ba030c1bcfc16eeb669a02f672a595ef26a476a
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Extract histogram names from the description XML file.
7 For more information on the format of the XML file, which is self-documenting,
8 see histograms.xml; however, here is a simple example to get you started. The
9 XML below will generate the following five histograms:
11 HistogramTime
12 HistogramEnum
13 HistogramEnum_Chrome
14 HistogramEnum_IE
15 HistogramEnum_Firefox
17 <histogram-configuration>
19 <histograms>
21 <histogram name="HistogramTime" units="milliseconds">
22 <summary>A brief description.</summary>
23 <details>This is a more thorough description of this histogram.</details>
24 </histogram>
26 <histogram name="HistogramEnum" enum="MyEnumType">
27 <summary>This histogram sports an enum value type.</summary>
28 </histogram>
30 </histograms>
32 <enums>
34 <enum name="MyEnumType">
35 <summary>This is an example enum type, where the values mean little.</summary>
36 <int value="1" label="FIRST_VALUE">This is the first value.</int>
37 <int value="2" label="SECOND_VALUE">This is the second value.</int>
38 </enum>
40 </enums>
42 <fieldtrials>
44 <fieldtrial name="BrowserType">
45 <group name="Chrome"/>
46 <group name="IE"/>
47 <group name="Firefox"/>
48 <affected-histogram name="HistogramEnum"/>
49 </fieldtrial>
51 </fieldtrials>
53 </histogram-configuration>
55 """
57 import copy
58 import logging
59 import xml.dom.minidom
62 MAX_FIELDTRIAL_DEPENDENCY_DEPTH = 5
65 class Error(Exception):
66 pass
69 def JoinChildNodes(tag):
70 return ''.join([c.toxml() for c in tag.childNodes]).strip()
73 def NormalizeAttributeValue(s):
74 """Normalizes an attribute value (which might be wrapped over multiple lines)
75 by replacing each whitespace sequence with a single space.
77 Args:
78 s: The string to normalize, e.g. ' \n a b c\n d '
80 Returns:
81 The normalized string, e.g. 'a b c d'
82 """
83 return ' '.join(s.split())
86 def NormalizeAllAttributeValues(node):
87 """Recursively normalizes all tag attribute values in the given tree.
89 Args:
90 node: The minidom node to be normalized.
92 Returns:
93 The normalized minidom node.
94 """
95 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
96 for a in node.attributes.keys():
97 node.attributes[a].value = NormalizeAttributeValue(
98 node.attributes[a].value)
100 for c in node.childNodes: NormalizeAllAttributeValues(c)
101 return node
104 def _ExpandHistogramNameWithFieldTrial(group_name, histogram_name, fieldtrial):
105 """Creates a new histogram name based on the field trial group.
107 Args:
108 group_name: The name of the field trial group. May be empty.
109 histogram_name: The name of the histogram. May be of the form
110 Group.BaseName or BaseName
111 field_trial: The FieldTrial XML element.
113 Returns:
114 A string with the expanded histogram name.
116 Raises:
117 Error if the expansion can't be done.
119 if fieldtrial.hasAttribute('separator'):
120 separator = fieldtrial.getAttribute('separator')
121 else:
122 separator = '_'
124 if fieldtrial.hasAttribute('ordering'):
125 ordering = fieldtrial.getAttribute('ordering')
126 else:
127 ordering = 'suffix'
128 if ordering not in ['prefix', 'suffix']:
129 logging.error('ordering needs to be prefix or suffix, value is %s' %
130 ordering)
131 raise Error()
133 if not group_name:
134 return histogram_name
136 if ordering == 'suffix':
137 return histogram_name + separator + group_name
139 # For prefixes, the group_name is inserted between the "cluster" and the
140 # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.
141 sections = histogram_name.split('.')
142 if len(sections) <= 1:
143 logging.error(
144 'Prefix Field Trial expansions require histogram names which include a '
145 'dot separator. Histogram name is %s, and Field Trial is %s' %
146 (histogram_name, fieldtrial.getAttribute('name')))
147 raise Error()
149 cluster = sections[0] + '.'
150 remainder = '.'.join(sections[1:])
151 return cluster + group_name + separator + remainder
154 def ExtractHistograms(filename):
155 """Compute the histogram names and descriptions from the XML representation.
157 Args:
158 filename: The path to the histograms XML file.
160 Returns:
161 { 'histogram_name': 'histogram_description', ... }
163 Raises:
164 Error if the file is not well-formatted.
166 # Slurp in histograms.xml
167 raw_xml = ''
168 with open(filename, 'r') as f:
169 raw_xml = f.read()
171 # Parse the XML into a tree
172 tree = xml.dom.minidom.parseString(raw_xml)
173 NormalizeAllAttributeValues(tree)
175 histograms = {}
176 have_errors = False
178 # Load the enums.
179 enums = {}
180 last_name = None
181 for enum in tree.getElementsByTagName("enum"):
182 if enum.getAttribute('type') != 'int':
183 logging.error('Unknown enum type %s' % enum.getAttribute('type'))
184 have_errors = True
185 continue
187 name = enum.getAttribute('name')
188 if last_name is not None and name.lower() < last_name.lower():
189 logging.error('Enums %s and %s are not in alphabetical order'
190 % (last_name, name))
191 have_errors = True
192 last_name = name
194 if name in enums:
195 logging.error('Duplicate enum %s' % name)
196 have_errors = True
197 continue
199 last_int_value = None
200 enum_dict = {}
201 enum_dict['name'] = name
202 enum_dict['values'] = {}
204 for int_tag in enum.getElementsByTagName("int"):
205 value_dict = {}
206 int_value = int(int_tag.getAttribute('value'))
207 if last_int_value is not None and int_value < last_int_value:
208 logging.error('Enum %s int values %d and %d are not in numerical order'
209 % (name, last_int_value, int_value))
210 have_errors = True
211 last_int_value = int_value
212 if int_value in enum_dict['values']:
213 logging.error('Duplicate enum value %d for enum %s' % (int_value, name))
214 have_errors = True
215 continue
216 value_dict['label'] = int_tag.getAttribute('label')
217 value_dict['summary'] = JoinChildNodes(int_tag)
218 enum_dict['values'][int_value] = value_dict
220 summary_nodes = enum.getElementsByTagName("summary")
221 if len(summary_nodes) > 0:
222 enum_dict['summary'] = JoinChildNodes(summary_nodes[0])
224 enums[name] = enum_dict
226 # Process the histograms. The descriptions can include HTML tags.
227 last_name = None
228 for histogram in tree.getElementsByTagName("histogram"):
229 name = histogram.getAttribute('name')
230 if last_name is not None and name.lower() < last_name.lower():
231 logging.error('Histograms %s and %s are not in alphabetical order'
232 % (last_name, name))
233 have_errors = True
234 last_name = name
235 if name in histograms:
236 logging.error('Duplicate histogram definition %s' % name)
237 have_errors = True
238 continue
239 histograms[name] = {}
241 # Find <summary> tag.
242 summary_nodes = histogram.getElementsByTagName("summary")
243 if len(summary_nodes) > 0:
244 histograms[name]['summary'] = JoinChildNodes(summary_nodes[0])
245 else:
246 histograms[name]['summary'] = 'TBD'
248 # Find <obsolete> tag.
249 obsolete_nodes = histogram.getElementsByTagName("obsolete")
250 if len(obsolete_nodes) > 0:
251 reason = JoinChildNodes(obsolete_nodes[0])
252 histograms[name]['obsolete'] = reason
254 # Handle units.
255 if histogram.hasAttribute('units'):
256 histograms[name]['units'] = histogram.getAttribute('units')
258 # Find <details> tag.
259 details_nodes = histogram.getElementsByTagName("details")
260 if len(details_nodes) > 0:
261 histograms[name]['details'] = JoinChildNodes(details_nodes[0])
263 # Handle enum types.
264 if histogram.hasAttribute('enum'):
265 enum_name = histogram.getAttribute('enum')
266 if not enum_name in enums:
267 logging.error('Unknown enum %s in histogram %s' % (enum_name, name))
268 have_errors = True
269 else:
270 histograms[name]['enum'] = enums[enum_name]
272 # Process the field trials and compute the combinations with their affected
273 # histograms.
274 last_name = None
275 for fieldtrial in tree.getElementsByTagName("fieldtrial"):
276 name = fieldtrial.getAttribute('name')
277 if last_name is not None and name.lower() < last_name.lower():
278 logging.error('Field trials %s and %s are not in alphabetical order'
279 % (last_name, name))
280 have_errors = True
281 last_name = name
282 # Field trials can depend on other field trials, so we need to be careful.
283 # Make a temporary copy of the list of field trials to use as a queue.
284 # Field trials whose dependencies have not yet been processed will get
285 # relegated to the back of the queue to be processed later.
286 reprocess_queue = []
287 def GenerateFieldTrials():
288 for f in tree.getElementsByTagName("fieldtrial"): yield 0, f
289 for r, f in reprocess_queue: yield r, f
290 for reprocess_count, fieldtrial in GenerateFieldTrials():
291 # Check dependencies first
292 dependencies_valid = True
293 affected_histograms = fieldtrial.getElementsByTagName('affected-histogram')
294 for affected_histogram in affected_histograms:
295 histogram_name = affected_histogram.getAttribute('name')
296 if not histogram_name in histograms:
297 # Base histogram is missing
298 dependencies_valid = False
299 missing_dependency = histogram_name
300 break
301 if not dependencies_valid:
302 if reprocess_count < MAX_FIELDTRIAL_DEPENDENCY_DEPTH:
303 reprocess_queue.append( (reprocess_count + 1, fieldtrial) )
304 continue
305 else:
306 logging.error('Field trial %s is missing its dependency %s'
307 % (fieldtrial.getAttribute('name'),
308 missing_dependency))
309 have_errors = True
310 continue
312 name = fieldtrial.getAttribute('name')
313 groups = fieldtrial.getElementsByTagName('group')
314 group_labels = {}
315 for group in groups:
316 group_labels[group.getAttribute('name')] = group.getAttribute('label')
317 last_histogram_name = None
318 for affected_histogram in affected_histograms:
319 histogram_name = affected_histogram.getAttribute('name')
320 if (last_histogram_name is not None
321 and histogram_name.lower() < last_histogram_name.lower()):
322 logging.error('Affected histograms %s and %s of field trial %s are not '
323 'in alphabetical order'
324 % (last_histogram_name, histogram_name, name))
325 have_errors = True
326 last_histogram_name = histogram_name
327 base_description = histograms[histogram_name]
328 with_groups = affected_histogram.getElementsByTagName('with-group')
329 if len(with_groups) > 0:
330 histogram_groups = with_groups
331 else:
332 histogram_groups = groups
333 for group in histogram_groups:
334 group_name = group.getAttribute('name')
335 try:
336 new_histogram_name = _ExpandHistogramNameWithFieldTrial(
337 group_name, histogram_name, fieldtrial)
338 if new_histogram_name != histogram_name:
339 histograms[new_histogram_name] = copy.deepcopy(
340 histograms[histogram_name])
342 group_label = group_labels.get(group_name, '')
344 if not 'fieldtrial_groups' in histograms[new_histogram_name]:
345 histograms[new_histogram_name]['fieldtrial_groups'] = []
346 histograms[new_histogram_name]['fieldtrial_groups'].append(group_name)
348 if not 'fieldtrial_names' in histograms[new_histogram_name]:
349 histograms[new_histogram_name]['fieldtrial_names'] = []
350 histograms[new_histogram_name]['fieldtrial_names'].append(name)
352 if not 'fieldtrial_labels' in histograms[new_histogram_name]:
353 histograms[new_histogram_name]['fieldtrial_labels'] = []
354 histograms[new_histogram_name]['fieldtrial_labels'].append(
355 group_label)
357 except Error:
358 have_errors = True
360 if have_errors:
361 logging.error('Error parsing %s' % filename)
362 raise Error()
364 return histograms
367 def ExtractNames(histograms):
368 return sorted(histograms.keys())