1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Extract histogram names from the description XML file.
7 For more information on the format of the XML file, which is self-documenting,
8 see histograms.xml; however, here is a simple example to get you started. The
9 XML below will generate the following five histograms:
17 <histogram-configuration>
21 <histogram name="HistogramTime" units="milliseconds">
22 <summary>A brief description.</summary>
23 <details>This is a more thorough description of this histogram.</details>
26 <histogram name="HistogramEnum" enum="MyEnumType">
27 <summary>This histogram sports an enum value type.</summary>
34 <enum name="MyEnumType">
35 <summary>This is an example enum type, where the values mean little.</summary>
36 <int value="1" label="FIRST_VALUE">This is the first value.</int>
37 <int value="2" label="SECOND_VALUE">This is the second value.</int>
42 <histogram_suffixes_list>
44 <histogram_suffixes name="BrowserType">
45 <suffix name="Chrome"/>
47 <suffix name="Firefox"/>
48 <affected-histogram name="HistogramEnum"/>
51 </histogram_suffixes_list>
53 </histogram-configuration>
59 import xml
.dom
.minidom
61 OWNER_FIELD_PLACEHOLDER
= (
62 'Please list the metric\'s owners. Add more owner tags as needed.')
64 MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH
= 5
67 class Error(Exception):
71 def _JoinChildNodes(tag
):
72 """Join child nodes into a single text.
74 Applicable to leafs like 'summary' and 'detail'.
80 a string with concatenated nodes' text representation.
82 return ''.join(c
.toxml() for c
in tag
.childNodes
).strip()
85 def _NormalizeString(s
):
86 """Replaces all whitespace sequences with a single space.
88 The function properly handles multi-line strings.
91 s: The string to normalize, (' \\n a b c\\n d ').
94 The normalized string (a b c d).
96 return ' '.join(s
.split())
99 def _NormalizeAllAttributeValues(node
):
100 """Recursively normalizes all tag attribute values in the given tree.
103 node: The minidom node to be normalized.
106 The normalized minidom node.
108 if node
.nodeType
== xml
.dom
.minidom
.Node
.ELEMENT_NODE
:
109 for a
in node
.attributes
.keys():
110 node
.attributes
[a
].value
= _NormalizeString(node
.attributes
[a
].value
)
112 for c
in node
.childNodes
:
113 _NormalizeAllAttributeValues(c
)
117 def _ExpandHistogramNameWithSuffixes(suffix_name
, histogram_name
,
118 histogram_suffixes_node
):
119 """Creates a new histogram name based on a histogram suffix.
122 suffix_name: The suffix string to apply to the histogram name. May be empty.
123 histogram_name: The name of the histogram. May be of the form
124 Group.BaseName or BaseName.
125 histogram_suffixes_node: The histogram_suffixes XML node.
128 A string with the expanded histogram name.
131 Error: if the expansion can't be done.
133 if histogram_suffixes_node
.hasAttribute('separator'):
134 separator
= histogram_suffixes_node
.getAttribute('separator')
138 if histogram_suffixes_node
.hasAttribute('ordering'):
139 ordering
= histogram_suffixes_node
.getAttribute('ordering')
142 if ordering
not in ['prefix', 'suffix']:
143 logging
.error('ordering needs to be prefix or suffix, value is %s',
148 return histogram_name
150 if ordering
== 'suffix':
151 return histogram_name
+ separator
+ suffix_name
153 # For prefixes, the suffix_name is inserted between the "cluster" and the
154 # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.
155 sections
= histogram_name
.split('.')
156 if len(sections
) <= 1:
158 'Prefix Field Trial expansions require histogram names which include a '
159 'dot separator. Histogram name is %s, and Field Trial is %s',
160 histogram_name
, histogram_suffixes_node
.getAttribute('name'))
163 cluster
= sections
[0] + '.'
164 remainder
= '.'.join(sections
[1:])
165 return cluster
+ suffix_name
+ separator
+ remainder
168 def _ExtractEnumsFromXmlTree(tree
):
169 """Extract all <enum> nodes in the tree into a dictionary."""
175 for enum
in tree
.getElementsByTagName('enum'):
176 if enum
.getAttribute('type') != 'int':
177 logging
.error('Unknown enum type %s', enum
.getAttribute('type'))
181 name
= enum
.getAttribute('name')
182 if last_name
is not None and name
.lower() < last_name
.lower():
183 logging
.error('Enums %s and %s are not in alphabetical order',
189 logging
.error('Duplicate enum %s', name
)
193 last_int_value
= None
195 enum_dict
['name'] = name
196 enum_dict
['values'] = {}
198 for int_tag
in enum
.getElementsByTagName('int'):
200 int_value
= int(int_tag
.getAttribute('value'))
201 if last_int_value
is not None and int_value
< last_int_value
:
202 logging
.error('Enum %s int values %d and %d are not in numerical order',
203 name
, last_int_value
, int_value
)
205 last_int_value
= int_value
206 if int_value
in enum_dict
['values']:
207 logging
.error('Duplicate enum value %d for enum %s', int_value
, name
)
210 value_dict
['label'] = int_tag
.getAttribute('label')
211 value_dict
['summary'] = _JoinChildNodes(int_tag
)
212 enum_dict
['values'][int_value
] = value_dict
214 summary_nodes
= enum
.getElementsByTagName('summary')
216 enum_dict
['summary'] = _NormalizeString(_JoinChildNodes(summary_nodes
[0]))
218 enums
[name
] = enum_dict
220 return enums
, have_errors
223 def _ExtractOwners(xml_node
):
224 """Extract all owners into a list from owner tag under |xml_node|."""
226 for owner_node
in xml_node
.getElementsByTagName('owner'):
227 owner_entry
= _NormalizeString(_JoinChildNodes(owner_node
))
228 if OWNER_FIELD_PLACEHOLDER
not in owner_entry
:
229 owners
.append(owner_entry
)
233 def _ExtractHistogramsFromXmlTree(tree
, enums
):
234 """Extract all <histogram> nodes in the tree into a dictionary."""
236 # Process the histograms. The descriptions can include HTML tags.
240 for histogram
in tree
.getElementsByTagName('histogram'):
241 name
= histogram
.getAttribute('name')
242 if last_name
is not None and name
.lower() < last_name
.lower():
243 logging
.error('Histograms %s and %s are not in alphabetical order',
247 if name
in histograms
:
248 logging
.error('Duplicate histogram definition %s', name
)
251 histograms
[name
] = histogram_entry
= {}
254 owners
= _ExtractOwners(histogram
)
256 histogram_entry
['owners'] = owners
258 # Find <summary> tag.
259 summary_nodes
= histogram
.getElementsByTagName('summary')
261 histogram_entry
['summary'] = _NormalizeString(
262 _JoinChildNodes(summary_nodes
[0]))
264 histogram_entry
['summary'] = 'TBD'
266 # Find <obsolete> tag.
267 obsolete_nodes
= histogram
.getElementsByTagName('obsolete')
269 reason
= _JoinChildNodes(obsolete_nodes
[0])
270 histogram_entry
['obsolete'] = reason
273 if histogram
.hasAttribute('units'):
274 histogram_entry
['units'] = histogram
.getAttribute('units')
276 # Find <details> tag.
277 details_nodes
= histogram
.getElementsByTagName('details')
279 histogram_entry
['details'] = _NormalizeString(
280 _JoinChildNodes(details_nodes
[0]))
283 if histogram
.hasAttribute('enum'):
284 enum_name
= histogram
.getAttribute('enum')
285 if enum_name
not in enums
:
286 logging
.error('Unknown enum %s in histogram %s', enum_name
, name
)
289 histogram_entry
['enum'] = enums
[enum_name
]
291 return histograms
, have_errors
294 # Finds an <obsolete> node amongst |node|'s immediate children and returns its
295 # content as a string. Returns None if no such node exists.
296 def _GetObsoleteReason(node
):
297 for child
in node
.childNodes
:
298 if child
.localName
== 'obsolete':
299 # There can be at most 1 obsolete element per node.
300 return _JoinChildNodes(child
)
304 def _UpdateHistogramsWithSuffixes(tree
, histograms
):
305 """Process <histogram_suffixes> tags and combine with affected histograms.
307 The histograms dictionary will be updated in-place by adding new histograms
308 created by combining histograms themselves with histogram_suffixes targeting
313 histograms: a dictionary of histograms previously extracted from the tree;
316 True if any errors were found.
320 histogram_suffix_tag
= 'histogram_suffixes'
321 suffix_tag
= 'suffix'
322 with_tag
= 'with-suffix'
324 # Verify order of histogram_suffixes fields first.
326 for histogram_suffixes
in tree
.getElementsByTagName(histogram_suffix_tag
):
327 name
= histogram_suffixes
.getAttribute('name')
328 if last_name
is not None and name
.lower() < last_name
.lower():
329 logging
.error('histogram_suffixes %s and %s are not in alphabetical '
330 'order', last_name
, name
)
334 # histogram_suffixes can depend on other histogram_suffixes, so we need to be
335 # careful. Make a temporary copy of the list of histogram_suffixes to use as a
336 # queue. histogram_suffixes whose dependencies have not yet been processed
337 # will get relegated to the back of the queue to be processed later.
339 def GenerateHistogramSuffixes():
340 for f
in tree
.getElementsByTagName(histogram_suffix_tag
):
342 for r
, f
in reprocess_queue
:
345 for reprocess_count
, histogram_suffixes
in GenerateHistogramSuffixes():
346 # Check dependencies first
347 dependencies_valid
= True
348 affected_histograms
= histogram_suffixes
.getElementsByTagName(
349 'affected-histogram')
350 for affected_histogram
in affected_histograms
:
351 histogram_name
= affected_histogram
.getAttribute('name')
352 if histogram_name
not in histograms
:
353 # Base histogram is missing
354 dependencies_valid
= False
355 missing_dependency
= histogram_name
357 if not dependencies_valid
:
358 if reprocess_count
< MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH
:
359 reprocess_queue
.append((reprocess_count
+ 1, histogram_suffixes
))
362 logging
.error('histogram_suffixes %s is missing its dependency %s',
363 histogram_suffixes
.getAttribute('name'),
368 # If the suffix group has an obsolete tag, all suffixes it generates inherit
370 group_obsolete_reason
= _GetObsoleteReason(histogram_suffixes
)
372 name
= histogram_suffixes
.getAttribute('name')
373 suffix_nodes
= histogram_suffixes
.getElementsByTagName(suffix_tag
)
375 for suffix
in suffix_nodes
:
376 suffix_labels
[suffix
.getAttribute('name')] = suffix
.getAttribute('label')
377 # Find owners list under current histogram_suffixes tag.
378 owners
= _ExtractOwners(histogram_suffixes
)
380 last_histogram_name
= None
381 for affected_histogram
in affected_histograms
:
382 histogram_name
= affected_histogram
.getAttribute('name')
383 if (last_histogram_name
is not None
384 and histogram_name
.lower() < last_histogram_name
.lower()):
385 logging
.error('Affected histograms %s and %s of histogram_suffixes %s '
386 'are not in alphabetical order',
387 last_histogram_name
, histogram_name
, name
)
389 last_histogram_name
= histogram_name
390 with_suffixes
= affected_histogram
.getElementsByTagName(with_tag
)
392 suffixes_to_add
= with_suffixes
394 suffixes_to_add
= suffix_nodes
395 for suffix
in suffixes_to_add
:
396 suffix_name
= suffix
.getAttribute('name')
398 new_histogram_name
= _ExpandHistogramNameWithSuffixes(
399 suffix_name
, histogram_name
, histogram_suffixes
)
400 if new_histogram_name
!= histogram_name
:
401 histograms
[new_histogram_name
] = copy
.deepcopy(
402 histograms
[histogram_name
])
404 suffix_label
= suffix_labels
.get(suffix_name
, '')
406 # TODO(yiyaoliu): Rename these to be consistent with the new naming.
407 # It is kept unchanged for now to be it's used by dashboards.
408 if 'fieldtrial_groups' not in histograms
[new_histogram_name
]:
409 histograms
[new_histogram_name
]['fieldtrial_groups'] = []
410 histograms
[new_histogram_name
]['fieldtrial_groups'].append(
413 if 'fieldtrial_names' not in histograms
[new_histogram_name
]:
414 histograms
[new_histogram_name
]['fieldtrial_names'] = []
415 histograms
[new_histogram_name
]['fieldtrial_names'].append(name
)
417 if 'fieldtrial_labels' not in histograms
[new_histogram_name
]:
418 histograms
[new_histogram_name
]['fieldtrial_labels'] = []
419 histograms
[new_histogram_name
]['fieldtrial_labels'].append(
422 # If no owners are added for this histogram-suffixes, it inherits the
423 # owners of its parents.
425 histograms
[new_histogram_name
]['owners'] = owners
427 # If a suffix has an obsolete node, it's marked as obsolete for the
428 # specified reason, overwriting its group's obsoletion reason if the
429 # group itself was obsolete as well.
430 obsolete_reason
= _GetObsoleteReason(suffix
)
431 if not obsolete_reason
:
432 obsolete_reason
= group_obsolete_reason
434 # If the suffix has an obsolete tag, all histograms it generates
437 histograms
[new_histogram_name
]['obsolete'] = obsolete_reason
445 def ExtractHistogramsFromFile(file_handle
):
446 """Compute the histogram names and descriptions from the XML representation.
449 file_handle: A file or file-like with XML content.
452 a tuple of (histograms, status) where histograms is a dictionary mapping
453 histogram names to dictionaries containing histogram descriptions and status
454 is a boolean indicating if errros were encoutered in processing.
456 tree
= xml
.dom
.minidom
.parse(file_handle
)
457 _NormalizeAllAttributeValues(tree
)
459 enums
, enum_errors
= _ExtractEnumsFromXmlTree(tree
)
460 histograms
, histogram_errors
= _ExtractHistogramsFromXmlTree(tree
, enums
)
461 update_errors
= _UpdateHistogramsWithSuffixes(tree
, histograms
)
463 return histograms
, enum_errors
or histogram_errors
or update_errors
466 def ExtractHistograms(filename
):
467 """Load histogram definitions from a disk file.
470 filename: a file path to load data from.
473 a dictionary of histogram descriptions.
476 Error: if the file is not well-formatted.
478 with
open(filename
, 'r') as f
:
479 histograms
, had_errors
= ExtractHistogramsFromFile(f
)
481 logging
.error('Error parsing %s', filename
)
486 def ExtractNames(histograms
):
487 return sorted(histograms
.keys())