Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / tools / metrics / histograms / extract_histograms.py
blobb8e49ad310e0a73b51870bf3f8d2693928cf87ed
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Extract histogram names from the description XML file.
7 For more information on the format of the XML file, which is self-documenting,
8 see histograms.xml; however, here is a simple example to get you started. The
9 XML below will generate the following five histograms:
11 HistogramTime
12 HistogramEnum
13 HistogramEnum_Chrome
14 HistogramEnum_IE
15 HistogramEnum_Firefox
17 <histogram-configuration>
19 <histograms>
21 <histogram name="HistogramTime" units="milliseconds">
22 <summary>A brief description.</summary>
23 <details>This is a more thorough description of this histogram.</details>
24 </histogram>
26 <histogram name="HistogramEnum" enum="MyEnumType">
27 <summary>This histogram sports an enum value type.</summary>
28 </histogram>
30 </histograms>
32 <enums>
34 <enum name="MyEnumType">
35 <summary>This is an example enum type, where the values mean little.</summary>
36 <int value="1" label="FIRST_VALUE">This is the first value.</int>
37 <int value="2" label="SECOND_VALUE">This is the second value.</int>
38 </enum>
40 </enums>
42 <histogram_suffixes_list>
44 <histogram_suffixes name="BrowserType">
45 <suffix name="Chrome"/>
46 <suffix name="IE"/>
47 <suffix name="Firefox"/>
48 <affected-histogram name="HistogramEnum"/>
49 </histogram_suffixes>
51 </histogram_suffixes_list>
53 </histogram-configuration>
55 """
57 import copy
58 import logging
59 import xml.dom.minidom
61 OWNER_FIELD_PLACEHOLDER = (
62 'Please list the metric\'s owners. Add more owner tags as needed.')
64 MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH = 5
67 class Error(Exception):
68 pass
71 def _JoinChildNodes(tag):
72 """Join child nodes into a single text.
74 Applicable to leafs like 'summary' and 'detail'.
76 Args:
77 tag: parent node
79 Returns:
80 a string with concatenated nodes' text representation.
81 """
82 return ''.join(c.toxml() for c in tag.childNodes).strip()
85 def _NormalizeString(s):
86 """Replaces all whitespace sequences with a single space.
88 The function properly handles multi-line strings.
90 Args:
91 s: The string to normalize, (' \\n a b c\\n d ').
93 Returns:
94 The normalized string (a b c d).
95 """
96 return ' '.join(s.split())
99 def _NormalizeAllAttributeValues(node):
100 """Recursively normalizes all tag attribute values in the given tree.
102 Args:
103 node: The minidom node to be normalized.
105 Returns:
106 The normalized minidom node.
108 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
109 for a in node.attributes.keys():
110 node.attributes[a].value = _NormalizeString(node.attributes[a].value)
112 for c in node.childNodes:
113 _NormalizeAllAttributeValues(c)
114 return node
117 def _ExpandHistogramNameWithSuffixes(suffix_name, histogram_name,
118 histogram_suffixes_node):
119 """Creates a new histogram name based on a histogram suffix.
121 Args:
122 suffix_name: The suffix string to apply to the histogram name. May be empty.
123 histogram_name: The name of the histogram. May be of the form
124 Group.BaseName or BaseName.
125 histogram_suffixes_node: The histogram_suffixes XML node.
127 Returns:
128 A string with the expanded histogram name.
130 Raises:
131 Error: if the expansion can't be done.
133 if histogram_suffixes_node.hasAttribute('separator'):
134 separator = histogram_suffixes_node.getAttribute('separator')
135 else:
136 separator = '_'
138 if histogram_suffixes_node.hasAttribute('ordering'):
139 ordering = histogram_suffixes_node.getAttribute('ordering')
140 else:
141 ordering = 'suffix'
142 if ordering not in ['prefix', 'suffix']:
143 logging.error('ordering needs to be prefix or suffix, value is %s',
144 ordering)
145 raise Error()
147 if not suffix_name:
148 return histogram_name
150 if ordering == 'suffix':
151 return histogram_name + separator + suffix_name
153 # For prefixes, the suffix_name is inserted between the "cluster" and the
154 # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.
155 sections = histogram_name.split('.')
156 if len(sections) <= 1:
157 logging.error(
158 'Prefix Field Trial expansions require histogram names which include a '
159 'dot separator. Histogram name is %s, and Field Trial is %s',
160 histogram_name, histogram_suffixes_node.getAttribute('name'))
161 raise Error()
163 cluster = sections[0] + '.'
164 remainder = '.'.join(sections[1:])
165 return cluster + suffix_name + separator + remainder
168 def _ExtractEnumsFromXmlTree(tree):
169 """Extract all <enum> nodes in the tree into a dictionary."""
171 enums = {}
172 have_errors = False
174 last_name = None
175 for enum in tree.getElementsByTagName('enum'):
176 if enum.getAttribute('type') != 'int':
177 logging.error('Unknown enum type %s', enum.getAttribute('type'))
178 have_errors = True
179 continue
181 name = enum.getAttribute('name')
182 if last_name is not None and name.lower() < last_name.lower():
183 logging.error('Enums %s and %s are not in alphabetical order',
184 last_name, name)
185 have_errors = True
186 last_name = name
188 if name in enums:
189 logging.error('Duplicate enum %s', name)
190 have_errors = True
191 continue
193 last_int_value = None
194 enum_dict = {}
195 enum_dict['name'] = name
196 enum_dict['values'] = {}
198 for int_tag in enum.getElementsByTagName('int'):
199 value_dict = {}
200 int_value = int(int_tag.getAttribute('value'))
201 if last_int_value is not None and int_value < last_int_value:
202 logging.error('Enum %s int values %d and %d are not in numerical order',
203 name, last_int_value, int_value)
204 have_errors = True
205 last_int_value = int_value
206 if int_value in enum_dict['values']:
207 logging.error('Duplicate enum value %d for enum %s', int_value, name)
208 have_errors = True
209 continue
210 value_dict['label'] = int_tag.getAttribute('label')
211 value_dict['summary'] = _JoinChildNodes(int_tag)
212 enum_dict['values'][int_value] = value_dict
214 summary_nodes = enum.getElementsByTagName('summary')
215 if summary_nodes:
216 enum_dict['summary'] = _NormalizeString(_JoinChildNodes(summary_nodes[0]))
218 enums[name] = enum_dict
220 return enums, have_errors
223 def _ExtractOwners(xml_node):
224 """Extract all owners into a list from owner tag under |xml_node|."""
225 owners = []
226 for owner_node in xml_node.getElementsByTagName('owner'):
227 owner_entry = _NormalizeString(_JoinChildNodes(owner_node))
228 if OWNER_FIELD_PLACEHOLDER not in owner_entry:
229 owners.append(owner_entry)
230 return owners
233 def _ExtractHistogramsFromXmlTree(tree, enums):
234 """Extract all <histogram> nodes in the tree into a dictionary."""
236 # Process the histograms. The descriptions can include HTML tags.
237 histograms = {}
238 have_errors = False
239 last_name = None
240 for histogram in tree.getElementsByTagName('histogram'):
241 name = histogram.getAttribute('name')
242 if last_name is not None and name.lower() < last_name.lower():
243 logging.error('Histograms %s and %s are not in alphabetical order',
244 last_name, name)
245 have_errors = True
246 last_name = name
247 if name in histograms:
248 logging.error('Duplicate histogram definition %s', name)
249 have_errors = True
250 continue
251 histograms[name] = histogram_entry = {}
253 # Find <owner> tag.
254 owners = _ExtractOwners(histogram)
255 if owners:
256 histogram_entry['owners'] = owners
258 # Find <summary> tag.
259 summary_nodes = histogram.getElementsByTagName('summary')
260 if summary_nodes:
261 histogram_entry['summary'] = _NormalizeString(
262 _JoinChildNodes(summary_nodes[0]))
263 else:
264 histogram_entry['summary'] = 'TBD'
266 # Find <obsolete> tag.
267 obsolete_nodes = histogram.getElementsByTagName('obsolete')
268 if obsolete_nodes:
269 reason = _JoinChildNodes(obsolete_nodes[0])
270 histogram_entry['obsolete'] = reason
272 # Handle units.
273 if histogram.hasAttribute('units'):
274 histogram_entry['units'] = histogram.getAttribute('units')
276 # Find <details> tag.
277 details_nodes = histogram.getElementsByTagName('details')
278 if details_nodes:
279 histogram_entry['details'] = _NormalizeString(
280 _JoinChildNodes(details_nodes[0]))
282 # Handle enum types.
283 if histogram.hasAttribute('enum'):
284 enum_name = histogram.getAttribute('enum')
285 if enum_name not in enums:
286 logging.error('Unknown enum %s in histogram %s', enum_name, name)
287 have_errors = True
288 else:
289 histogram_entry['enum'] = enums[enum_name]
291 return histograms, have_errors
294 # Finds an <obsolete> node amongst |node|'s immediate children and returns its
295 # content as a string. Returns None if no such node exists.
296 def _GetObsoleteReason(node):
297 for child in node.childNodes:
298 if child.localName == 'obsolete':
299 # There can be at most 1 obsolete element per node.
300 return _JoinChildNodes(child)
301 return None
304 def _UpdateHistogramsWithSuffixes(tree, histograms):
305 """Process <histogram_suffixes> tags and combine with affected histograms.
307 The histograms dictionary will be updated in-place by adding new histograms
308 created by combining histograms themselves with histogram_suffixes targeting
309 these histograms.
311 Args:
312 tree: XML dom tree.
313 histograms: a dictionary of histograms previously extracted from the tree;
315 Returns:
316 True if any errors were found.
318 have_errors = False
320 histogram_suffix_tag = 'histogram_suffixes'
321 suffix_tag = 'suffix'
322 with_tag = 'with-suffix'
324 # Verify order of histogram_suffixes fields first.
325 last_name = None
326 for histogram_suffixes in tree.getElementsByTagName(histogram_suffix_tag):
327 name = histogram_suffixes.getAttribute('name')
328 if last_name is not None and name.lower() < last_name.lower():
329 logging.error('histogram_suffixes %s and %s are not in alphabetical '
330 'order', last_name, name)
331 have_errors = True
332 last_name = name
334 # histogram_suffixes can depend on other histogram_suffixes, so we need to be
335 # careful. Make a temporary copy of the list of histogram_suffixes to use as a
336 # queue. histogram_suffixes whose dependencies have not yet been processed
337 # will get relegated to the back of the queue to be processed later.
338 reprocess_queue = []
339 def GenerateHistogramSuffixes():
340 for f in tree.getElementsByTagName(histogram_suffix_tag):
341 yield 0, f
342 for r, f in reprocess_queue:
343 yield r, f
345 for reprocess_count, histogram_suffixes in GenerateHistogramSuffixes():
346 # Check dependencies first
347 dependencies_valid = True
348 affected_histograms = histogram_suffixes.getElementsByTagName(
349 'affected-histogram')
350 for affected_histogram in affected_histograms:
351 histogram_name = affected_histogram.getAttribute('name')
352 if histogram_name not in histograms:
353 # Base histogram is missing
354 dependencies_valid = False
355 missing_dependency = histogram_name
356 break
357 if not dependencies_valid:
358 if reprocess_count < MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH:
359 reprocess_queue.append((reprocess_count + 1, histogram_suffixes))
360 continue
361 else:
362 logging.error('histogram_suffixes %s is missing its dependency %s',
363 histogram_suffixes.getAttribute('name'),
364 missing_dependency)
365 have_errors = True
366 continue
368 # If the suffix group has an obsolete tag, all suffixes it generates inherit
369 # its reason.
370 group_obsolete_reason = _GetObsoleteReason(histogram_suffixes)
372 name = histogram_suffixes.getAttribute('name')
373 suffix_nodes = histogram_suffixes.getElementsByTagName(suffix_tag)
374 suffix_labels = {}
375 for suffix in suffix_nodes:
376 suffix_labels[suffix.getAttribute('name')] = suffix.getAttribute('label')
377 # Find owners list under current histogram_suffixes tag.
378 owners = _ExtractOwners(histogram_suffixes)
380 last_histogram_name = None
381 for affected_histogram in affected_histograms:
382 histogram_name = affected_histogram.getAttribute('name')
383 if (last_histogram_name is not None
384 and histogram_name.lower() < last_histogram_name.lower()):
385 logging.error('Affected histograms %s and %s of histogram_suffixes %s '
386 'are not in alphabetical order',
387 last_histogram_name, histogram_name, name)
388 have_errors = True
389 last_histogram_name = histogram_name
390 with_suffixes = affected_histogram.getElementsByTagName(with_tag)
391 if with_suffixes:
392 suffixes_to_add = with_suffixes
393 else:
394 suffixes_to_add = suffix_nodes
395 for suffix in suffixes_to_add:
396 suffix_name = suffix.getAttribute('name')
397 try:
398 new_histogram_name = _ExpandHistogramNameWithSuffixes(
399 suffix_name, histogram_name, histogram_suffixes)
400 if new_histogram_name != histogram_name:
401 histograms[new_histogram_name] = copy.deepcopy(
402 histograms[histogram_name])
404 suffix_label = suffix_labels.get(suffix_name, '')
406 # TODO(yiyaoliu): Rename these to be consistent with the new naming.
407 # It is kept unchanged for now to be it's used by dashboards.
408 if 'fieldtrial_groups' not in histograms[new_histogram_name]:
409 histograms[new_histogram_name]['fieldtrial_groups'] = []
410 histograms[new_histogram_name]['fieldtrial_groups'].append(
411 suffix_name)
413 if 'fieldtrial_names' not in histograms[new_histogram_name]:
414 histograms[new_histogram_name]['fieldtrial_names'] = []
415 histograms[new_histogram_name]['fieldtrial_names'].append(name)
417 if 'fieldtrial_labels' not in histograms[new_histogram_name]:
418 histograms[new_histogram_name]['fieldtrial_labels'] = []
419 histograms[new_histogram_name]['fieldtrial_labels'].append(
420 suffix_label)
422 # If no owners are added for this histogram-suffixes, it inherits the
423 # owners of its parents.
424 if owners:
425 histograms[new_histogram_name]['owners'] = owners
427 # If a suffix has an obsolete node, it's marked as obsolete for the
428 # specified reason, overwriting its group's obsoletion reason if the
429 # group itself was obsolete as well.
430 obsolete_reason = _GetObsoleteReason(suffix)
431 if not obsolete_reason:
432 obsolete_reason = group_obsolete_reason
434 # If the suffix has an obsolete tag, all histograms it generates
435 # inherit it.
436 if obsolete_reason:
437 histograms[new_histogram_name]['obsolete'] = obsolete_reason
439 except Error:
440 have_errors = True
442 return have_errors
445 def ExtractHistogramsFromFile(file_handle):
446 """Compute the histogram names and descriptions from the XML representation.
448 Args:
449 file_handle: A file or file-like with XML content.
451 Returns:
452 a tuple of (histograms, status) where histograms is a dictionary mapping
453 histogram names to dictionaries containing histogram descriptions and status
454 is a boolean indicating if errros were encoutered in processing.
456 tree = xml.dom.minidom.parse(file_handle)
457 _NormalizeAllAttributeValues(tree)
459 enums, enum_errors = _ExtractEnumsFromXmlTree(tree)
460 histograms, histogram_errors = _ExtractHistogramsFromXmlTree(tree, enums)
461 update_errors = _UpdateHistogramsWithSuffixes(tree, histograms)
463 return histograms, enum_errors or histogram_errors or update_errors
466 def ExtractHistograms(filename):
467 """Load histogram definitions from a disk file.
469 Args:
470 filename: a file path to load data from.
472 Returns:
473 a dictionary of histogram descriptions.
475 Raises:
476 Error: if the file is not well-formatted.
478 with open(filename, 'r') as f:
479 histograms, had_errors = ExtractHistogramsFromFile(f)
480 if had_errors:
481 logging.error('Error parsing %s', filename)
482 raise Error()
483 return histograms
486 def ExtractNames(histograms):
487 return sorted(histograms.keys())