Revert 250828 "Add a UMA stat to track if the Browser blacklist ..."
[chromium-blink-merge.git] / tools / metrics / histograms / extract_histograms.py
blob6203e3b71a875ee3378d22018dd525cd972b02e4
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Extract histogram names from the description XML file.
7 For more information on the format of the XML file, which is self-documenting,
8 see histograms.xml; however, here is a simple example to get you started. The
9 XML below will generate the following five histograms:
11 HistogramTime
12 HistogramEnum
13 HistogramEnum_Chrome
14 HistogramEnum_IE
15 HistogramEnum_Firefox
17 <histogram-configuration>
19 <histograms>
21 <histogram name="HistogramTime" units="milliseconds">
22 <summary>A brief description.</summary>
23 <details>This is a more thorough description of this histogram.</details>
24 </histogram>
26 <histogram name="HistogramEnum" enum="MyEnumType">
27 <summary>This histogram sports an enum value type.</summary>
28 </histogram>
30 </histograms>
32 <enums>
34 <enum name="MyEnumType">
35 <summary>This is an example enum type, where the values mean little.</summary>
36 <int value="1" label="FIRST_VALUE">This is the first value.</int>
37 <int value="2" label="SECOND_VALUE">This is the second value.</int>
38 </enum>
40 </enums>
42 <fieldtrials>
44 <fieldtrial name="BrowserType">
45 <group name="Chrome"/>
46 <group name="IE"/>
47 <group name="Firefox"/>
48 <affected-histogram name="HistogramEnum"/>
49 </fieldtrial>
51 </fieldtrials>
53 </histogram-configuration>
55 """
57 import copy
58 import logging
59 import xml.dom.minidom
62 MAX_FIELDTRIAL_DEPENDENCY_DEPTH = 5
65 class Error(Exception):
66 pass
69 def _JoinChildNodes(tag):
70 """Join child nodes into a single text.
72 Applicable to leafs like 'summary' and 'detail'.
74 Args:
75 tag: parent node
77 Returns:
78 a string with concatenated nodes' text representation.
79 """
80 return ''.join(c.toxml() for c in tag.childNodes).strip()
83 def _NormalizeString(s):
84 """Normalizes a string (possibly of multiple lines) by replacing each
85 whitespace sequence with a single space.
87 Args:
88 s: The string to normalize, e.g. ' \n a b c\n d '
90 Returns:
91 The normalized string, e.g. 'a b c d'
92 """
93 return ' '.join(s.split())
96 def _NormalizeAllAttributeValues(node):
97 """Recursively normalizes all tag attribute values in the given tree.
99 Args:
100 node: The minidom node to be normalized.
102 Returns:
103 The normalized minidom node.
105 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
106 for a in node.attributes.keys():
107 node.attributes[a].value = _NormalizeString(node.attributes[a].value)
109 for c in node.childNodes: _NormalizeAllAttributeValues(c)
110 return node
113 def _ExpandHistogramNameWithFieldTrial(group_name, histogram_name, fieldtrial):
114 """Creates a new histogram name based on the field trial group.
116 Args:
117 group_name: The name of the field trial group. May be empty.
118 histogram_name: The name of the histogram. May be of the form
119 Group.BaseName or BaseName
120 field_trial: The FieldTrial XML element.
122 Returns:
123 A string with the expanded histogram name.
125 Raises:
126 Error if the expansion can't be done.
128 if fieldtrial.hasAttribute('separator'):
129 separator = fieldtrial.getAttribute('separator')
130 else:
131 separator = '_'
133 if fieldtrial.hasAttribute('ordering'):
134 ordering = fieldtrial.getAttribute('ordering')
135 else:
136 ordering = 'suffix'
137 if ordering not in ['prefix', 'suffix']:
138 logging.error('ordering needs to be prefix or suffix, value is %s' %
139 ordering)
140 raise Error()
142 if not group_name:
143 return histogram_name
145 if ordering == 'suffix':
146 return histogram_name + separator + group_name
148 # For prefixes, the group_name is inserted between the "cluster" and the
149 # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.
150 sections = histogram_name.split('.')
151 if len(sections) <= 1:
152 logging.error(
153 'Prefix Field Trial expansions require histogram names which include a '
154 'dot separator. Histogram name is %s, and Field Trial is %s' %
155 (histogram_name, fieldtrial.getAttribute('name')))
156 raise Error()
158 cluster = sections[0] + '.'
159 remainder = '.'.join(sections[1:])
160 return cluster + group_name + separator + remainder
163 def _ExtractEnumsFromXmlTree(tree):
164 """Extract all <enum> nodes in the tree into a dictionary."""
166 enums = {}
167 have_errors = False
169 last_name = None
170 for enum in tree.getElementsByTagName("enum"):
171 if enum.getAttribute('type') != 'int':
172 logging.error('Unknown enum type %s' % enum.getAttribute('type'))
173 have_errors = True
174 continue
176 name = enum.getAttribute('name')
177 if last_name is not None and name.lower() < last_name.lower():
178 logging.error('Enums %s and %s are not in alphabetical order'
179 % (last_name, name))
180 have_errors = True
181 last_name = name
183 if name in enums:
184 logging.error('Duplicate enum %s' % name)
185 have_errors = True
186 continue
188 last_int_value = None
189 enum_dict = {}
190 enum_dict['name'] = name
191 enum_dict['values'] = {}
193 for int_tag in enum.getElementsByTagName("int"):
194 value_dict = {}
195 int_value = int(int_tag.getAttribute('value'))
196 if last_int_value is not None and int_value < last_int_value:
197 logging.error('Enum %s int values %d and %d are not in numerical order'
198 % (name, last_int_value, int_value))
199 have_errors = True
200 last_int_value = int_value
201 if int_value in enum_dict['values']:
202 logging.error('Duplicate enum value %d for enum %s' % (int_value, name))
203 have_errors = True
204 continue
205 value_dict['label'] = int_tag.getAttribute('label')
206 value_dict['summary'] = _JoinChildNodes(int_tag)
207 enum_dict['values'][int_value] = value_dict
209 summary_nodes = enum.getElementsByTagName("summary")
210 if len(summary_nodes) > 0:
211 enum_dict['summary'] = _NormalizeString(_JoinChildNodes(summary_nodes[0]))
213 enums[name] = enum_dict
215 return enums, have_errors
218 def _ExtractHistogramsFromXmlTree(tree, enums):
219 """Extract all <histogram> nodes in the tree into a dictionary."""
221 # Process the histograms. The descriptions can include HTML tags.
222 histograms = {}
223 have_errors = False
224 last_name = None
225 for histogram in tree.getElementsByTagName("histogram"):
226 name = histogram.getAttribute('name')
227 if last_name is not None and name.lower() < last_name.lower():
228 logging.error('Histograms %s and %s are not in alphabetical order'
229 % (last_name, name))
230 have_errors = True
231 last_name = name
232 if name in histograms:
233 logging.error('Duplicate histogram definition %s' % name)
234 have_errors = True
235 continue
236 histograms[name] = histogram_entry = {}
238 # Find <summary> tag.
239 summary_nodes = histogram.getElementsByTagName("summary")
240 if len(summary_nodes) > 0:
241 histogram_entry['summary'] = _NormalizeString(
242 _JoinChildNodes(summary_nodes[0]))
243 else:
244 histogram_entry['summary'] = 'TBD'
246 # Find <obsolete> tag.
247 obsolete_nodes = histogram.getElementsByTagName("obsolete")
248 if len(obsolete_nodes) > 0:
249 reason = _JoinChildNodes(obsolete_nodes[0])
250 histogram_entry['obsolete'] = reason
252 # Handle units.
253 if histogram.hasAttribute('units'):
254 histogram_entry['units'] = histogram.getAttribute('units')
256 # Find <details> tag.
257 details_nodes = histogram.getElementsByTagName("details")
258 if len(details_nodes) > 0:
259 histogram_entry['details'] = _NormalizeString(
260 _JoinChildNodes(details_nodes[0]))
262 # Handle enum types.
263 if histogram.hasAttribute('enum'):
264 enum_name = histogram.getAttribute('enum')
265 if not enum_name in enums:
266 logging.error('Unknown enum %s in histogram %s' % (enum_name, name))
267 have_errors = True
268 else:
269 histogram_entry['enum'] = enums[enum_name]
271 return histograms, have_errors
274 def _UpdateHistogramsWithFieldTrialInformation(tree, histograms):
275 """Process field trials' tags and combine with affected histograms.
277 The histograms dictionary will be updated in-place by adding new histograms
278 created by combining histograms themselves with field trials targetting these
279 histograms.
281 Args:
282 tree: XML dom tree.
283 histograms: a dictinary of histograms previously extracted from the tree;
285 Returns:
286 True if any errors were found.
288 have_errors = False
290 # Verify order of fieldtrial fields first.
291 last_name = None
292 for fieldtrial in tree.getElementsByTagName("fieldtrial"):
293 name = fieldtrial.getAttribute('name')
294 if last_name is not None and name.lower() < last_name.lower():
295 logging.error('Field trials %s and %s are not in alphabetical order'
296 % (last_name, name))
297 have_errors = True
298 last_name = name
300 # Field trials can depend on other field trials, so we need to be careful.
301 # Make a temporary copy of the list of field trials to use as a queue.
302 # Field trials whose dependencies have not yet been processed will get
303 # relegated to the back of the queue to be processed later.
304 reprocess_queue = []
305 def GenerateFieldTrials():
306 for f in tree.getElementsByTagName("fieldtrial"): yield 0, f
307 for r, f in reprocess_queue: yield r, f
309 for reprocess_count, fieldtrial in GenerateFieldTrials():
310 # Check dependencies first
311 dependencies_valid = True
312 affected_histograms = fieldtrial.getElementsByTagName('affected-histogram')
313 for affected_histogram in affected_histograms:
314 histogram_name = affected_histogram.getAttribute('name')
315 if not histogram_name in histograms:
316 # Base histogram is missing
317 dependencies_valid = False
318 missing_dependency = histogram_name
319 break
320 if not dependencies_valid:
321 if reprocess_count < MAX_FIELDTRIAL_DEPENDENCY_DEPTH:
322 reprocess_queue.append( (reprocess_count + 1, fieldtrial) )
323 continue
324 else:
325 logging.error('Field trial %s is missing its dependency %s'
326 % (fieldtrial.getAttribute('name'),
327 missing_dependency))
328 have_errors = True
329 continue
331 name = fieldtrial.getAttribute('name')
332 groups = fieldtrial.getElementsByTagName('group')
333 group_labels = {}
334 for group in groups:
335 group_labels[group.getAttribute('name')] = group.getAttribute('label')
337 last_histogram_name = None
338 for affected_histogram in affected_histograms:
339 histogram_name = affected_histogram.getAttribute('name')
340 if (last_histogram_name is not None
341 and histogram_name.lower() < last_histogram_name.lower()):
342 logging.error('Affected histograms %s and %s of field trial %s are not '
343 'in alphabetical order'
344 % (last_histogram_name, histogram_name, name))
345 have_errors = True
346 last_histogram_name = histogram_name
347 base_description = histograms[histogram_name]
348 with_groups = affected_histogram.getElementsByTagName('with-group')
349 if len(with_groups) > 0:
350 histogram_groups = with_groups
351 else:
352 histogram_groups = groups
353 for group in histogram_groups:
354 group_name = group.getAttribute('name')
355 try:
356 new_histogram_name = _ExpandHistogramNameWithFieldTrial(
357 group_name, histogram_name, fieldtrial)
358 if new_histogram_name != histogram_name:
359 histograms[new_histogram_name] = copy.deepcopy(
360 histograms[histogram_name])
362 group_label = group_labels.get(group_name, '')
364 if not 'fieldtrial_groups' in histograms[new_histogram_name]:
365 histograms[new_histogram_name]['fieldtrial_groups'] = []
366 histograms[new_histogram_name]['fieldtrial_groups'].append(group_name)
368 if not 'fieldtrial_names' in histograms[new_histogram_name]:
369 histograms[new_histogram_name]['fieldtrial_names'] = []
370 histograms[new_histogram_name]['fieldtrial_names'].append(name)
372 if not 'fieldtrial_labels' in histograms[new_histogram_name]:
373 histograms[new_histogram_name]['fieldtrial_labels'] = []
374 histograms[new_histogram_name]['fieldtrial_labels'].append(
375 group_label)
377 except Error:
378 have_errors = True
380 return have_errors
383 def ExtractHistogramsFromFile(file_handle):
384 """Compute the histogram names and descriptions from the XML representation.
386 Args:
387 file_handle: A file or file-like with XML content.
389 Returns:
390 a tuple of (histograms, status) where histograms is a dictionary mapping
391 histogram names to dictionaries containing histogram descriptions and status
392 is a boolean indicating if errros were encoutered in processing.
394 tree = xml.dom.minidom.parse(file_handle)
395 _NormalizeAllAttributeValues(tree)
397 enums, enum_errors = _ExtractEnumsFromXmlTree(tree)
398 histograms, histogram_errors = _ExtractHistogramsFromXmlTree(tree, enums)
399 update_errors = _UpdateHistogramsWithFieldTrialInformation(tree, histograms)
401 return histograms, enum_errors or histogram_errors or update_errors
404 def ExtractHistograms(filename):
405 """Load histogram definitions from a disk file.
406 Args:
407 filename: a file path to load data from.
409 Raises:
410 Error if the file is not well-formatted.
412 with open(filename, 'r') as f:
413 histograms, had_errors = ExtractHistogramsFromFile(f)
414 if had_errors:
415 logging.error('Error parsing %s' % filename)
416 raise Error()
417 return histograms
420 def ExtractNames(histograms):
421 return sorted(histograms.keys())