1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Extract histogram names from the description XML file.
7 For more information on the format of the XML file, which is self-documenting,
8 see histograms.xml; however, here is a simple example to get you started. The
9 XML below will generate the following five histograms:
17 <histogram-configuration>
21 <histogram name="HistogramTime" units="milliseconds">
22 <summary>A brief description.</summary>
23 <details>This is a more thorough description of this histogram.</details>
26 <histogram name="HistogramEnum" enum="MyEnumType">
27 <summary>This histogram sports an enum value type.</summary>
34 <enum name="MyEnumType">
35 <summary>This is an example enum type, where the values mean little.</summary>
36 <int value="1" label="FIRST_VALUE">This is the first value.</int>
37 <int value="2" label="SECOND_VALUE">This is the second value.</int>
44 <fieldtrial name="BrowserType">
45 <group name="Chrome"/>
47 <group name="Firefox"/>
48 <affected-histogram name="HistogramEnum"/>
53 </histogram-configuration>
59 import xml
.dom
.minidom
62 MAX_FIELDTRIAL_DEPENDENCY_DEPTH
= 5
65 class Error(Exception):
69 def JoinChildNodes(tag
):
70 return ''.join([c
.toxml() for c
in tag
.childNodes
]).strip()
73 def NormalizeAttributeValue(s
):
74 """Normalizes an attribute value (which might be wrapped over multiple lines)
75 by replacing each whitespace sequence with a single space.
78 s: The string to normalize, e.g. ' \n a b c\n d '
81 The normalized string, e.g. 'a b c d'
83 return ' '.join(s
.split())
86 def NormalizeAllAttributeValues(node
):
87 """Recursively normalizes all tag attribute values in the given tree.
90 node: The minidom node to be normalized.
93 The normalized minidom node.
95 if node
.nodeType
== xml
.dom
.minidom
.Node
.ELEMENT_NODE
:
96 for a
in node
.attributes
.keys():
97 node
.attributes
[a
].value
= NormalizeAttributeValue(
98 node
.attributes
[a
].value
)
100 for c
in node
.childNodes
: NormalizeAllAttributeValues(c
)
104 def _ExpandHistogramNameWithFieldTrial(group_name
, histogram_name
, fieldtrial
):
105 """Creates a new histogram name based on the field trial group.
108 group_name: The name of the field trial group. May be empty.
109 histogram_name: The name of the histogram. May be of the form
110 Group.BaseName or BaseName
111 field_trial: The FieldTrial XML element.
114 A string with the expanded histogram name.
117 Error if the expansion can't be done.
119 if fieldtrial
.hasAttribute('separator'):
120 separator
= fieldtrial
.getAttribute('separator')
124 if fieldtrial
.hasAttribute('ordering'):
125 ordering
= fieldtrial
.getAttribute('ordering')
128 if ordering
not in ['prefix', 'suffix']:
129 logging
.error('ordering needs to be prefix or suffix, value is %s' %
134 return histogram_name
136 if ordering
== 'suffix':
137 return histogram_name
+ separator
+ group_name
139 # For prefixes, the group_name is inserted between the "cluster" and the
140 # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.
141 sections
= histogram_name
.split('.')
142 if len(sections
) <= 1:
144 'Prefix Field Trial expansions require histogram names which include a '
145 'dot separator. Histogram name is %s, and Field Trial is %s' %
146 (histogram_name
, fieldtrial
.getAttribute('name')))
149 cluster
= sections
[0] + '.'
150 remainder
= '.'.join(sections
[1:])
151 return cluster
+ group_name
+ separator
+ remainder
154 def ExtractHistograms(filename
):
155 """Compute the histogram names and descriptions from the XML representation.
158 filename: The path to the histograms XML file.
161 { 'histogram_name': 'histogram_description', ... }
164 Error if the file is not well-formatted.
166 # Slurp in histograms.xml
168 with
open(filename
, 'r') as f
:
171 # Parse the XML into a tree
172 tree
= xml
.dom
.minidom
.parseString(raw_xml
)
173 NormalizeAllAttributeValues(tree
)
181 for enum
in tree
.getElementsByTagName("enum"):
182 if enum
.getAttribute('type') != 'int':
183 logging
.error('Unknown enum type %s' % enum
.getAttribute('type'))
187 name
= enum
.getAttribute('name')
188 if last_name
is not None and name
.lower() < last_name
.lower():
189 logging
.error('Enums %s and %s are not in alphabetical order'
195 logging
.error('Duplicate enum %s' % name
)
199 last_int_value
= None
201 enum_dict
['name'] = name
202 enum_dict
['values'] = {}
204 for int_tag
in enum
.getElementsByTagName("int"):
206 int_value
= int(int_tag
.getAttribute('value'))
207 if last_int_value
is not None and int_value
< last_int_value
:
208 logging
.error('Enum %s int values %d and %d are not in numerical order'
209 % (name
, last_int_value
, int_value
))
211 last_int_value
= int_value
212 if int_value
in enum_dict
['values']:
213 logging
.error('Duplicate enum value %d for enum %s' % (int_value
, name
))
216 value_dict
['label'] = int_tag
.getAttribute('label')
217 value_dict
['summary'] = JoinChildNodes(int_tag
)
218 enum_dict
['values'][int_value
] = value_dict
220 summary_nodes
= enum
.getElementsByTagName("summary")
221 if len(summary_nodes
) > 0:
222 enum_dict
['summary'] = JoinChildNodes(summary_nodes
[0])
224 enums
[name
] = enum_dict
226 # Process the histograms. The descriptions can include HTML tags.
228 for histogram
in tree
.getElementsByTagName("histogram"):
229 name
= histogram
.getAttribute('name')
230 if last_name
is not None and name
.lower() < last_name
.lower():
231 logging
.error('Histograms %s and %s are not in alphabetical order'
235 if name
in histograms
:
236 logging
.error('Duplicate histogram definition %s' % name
)
239 histograms
[name
] = {}
241 # Find <summary> tag.
242 summary_nodes
= histogram
.getElementsByTagName("summary")
243 if len(summary_nodes
) > 0:
244 histograms
[name
]['summary'] = JoinChildNodes(summary_nodes
[0])
246 histograms
[name
]['summary'] = 'TBD'
248 # Find <obsolete> tag.
249 obsolete_nodes
= histogram
.getElementsByTagName("obsolete")
250 if len(obsolete_nodes
) > 0:
251 reason
= JoinChildNodes(obsolete_nodes
[0])
252 histograms
[name
]['obsolete'] = reason
255 if histogram
.hasAttribute('units'):
256 histograms
[name
]['units'] = histogram
.getAttribute('units')
258 # Find <details> tag.
259 details_nodes
= histogram
.getElementsByTagName("details")
260 if len(details_nodes
) > 0:
261 histograms
[name
]['details'] = JoinChildNodes(details_nodes
[0])
264 if histogram
.hasAttribute('enum'):
265 enum_name
= histogram
.getAttribute('enum')
266 if not enum_name
in enums
:
267 logging
.error('Unknown enum %s in histogram %s' % (enum_name
, name
))
270 histograms
[name
]['enum'] = enums
[enum_name
]
272 # Process the field trials and compute the combinations with their affected
275 for fieldtrial
in tree
.getElementsByTagName("fieldtrial"):
276 name
= fieldtrial
.getAttribute('name')
277 if last_name
is not None and name
.lower() < last_name
.lower():
278 logging
.error('Field trials %s and %s are not in alphabetical order'
282 # Field trials can depend on other field trials, so we need to be careful.
283 # Make a temporary copy of the list of field trials to use as a queue.
284 # Field trials whose dependencies have not yet been processed will get
285 # relegated to the back of the queue to be processed later.
287 def GenerateFieldTrials():
288 for f
in tree
.getElementsByTagName("fieldtrial"): yield 0, f
289 for r
, f
in reprocess_queue
: yield r
, f
290 for reprocess_count
, fieldtrial
in GenerateFieldTrials():
291 # Check dependencies first
292 dependencies_valid
= True
293 affected_histograms
= fieldtrial
.getElementsByTagName('affected-histogram')
294 for affected_histogram
in affected_histograms
:
295 histogram_name
= affected_histogram
.getAttribute('name')
296 if not histogram_name
in histograms
:
297 # Base histogram is missing
298 dependencies_valid
= False
299 missing_dependency
= histogram_name
301 if not dependencies_valid
:
302 if reprocess_count
< MAX_FIELDTRIAL_DEPENDENCY_DEPTH
:
303 reprocess_queue
.append( (reprocess_count
+ 1, fieldtrial
) )
306 logging
.error('Field trial %s is missing its dependency %s'
307 % (fieldtrial
.getAttribute('name'),
312 name
= fieldtrial
.getAttribute('name')
313 groups
= fieldtrial
.getElementsByTagName('group')
316 group_labels
[group
.getAttribute('name')] = group
.getAttribute('label')
317 last_histogram_name
= None
318 for affected_histogram
in affected_histograms
:
319 histogram_name
= affected_histogram
.getAttribute('name')
320 if (last_histogram_name
is not None
321 and histogram_name
.lower() < last_histogram_name
.lower()):
322 logging
.error('Affected histograms %s and %s of field trial %s are not '
323 'in alphabetical order'
324 % (last_histogram_name
, histogram_name
, name
))
326 last_histogram_name
= histogram_name
327 base_description
= histograms
[histogram_name
]
328 with_groups
= affected_histogram
.getElementsByTagName('with-group')
329 if len(with_groups
) > 0:
330 histogram_groups
= with_groups
332 histogram_groups
= groups
333 for group
in histogram_groups
:
334 group_name
= group
.getAttribute('name')
336 new_histogram_name
= _ExpandHistogramNameWithFieldTrial(
337 group_name
, histogram_name
, fieldtrial
)
338 if new_histogram_name
!= histogram_name
:
339 histograms
[new_histogram_name
] = copy
.deepcopy(
340 histograms
[histogram_name
])
342 group_label
= group_labels
.get(group_name
, '')
344 if not 'fieldtrial_groups' in histograms
[new_histogram_name
]:
345 histograms
[new_histogram_name
]['fieldtrial_groups'] = []
346 histograms
[new_histogram_name
]['fieldtrial_groups'].append(group_name
)
348 if not 'fieldtrial_names' in histograms
[new_histogram_name
]:
349 histograms
[new_histogram_name
]['fieldtrial_names'] = []
350 histograms
[new_histogram_name
]['fieldtrial_names'].append(name
)
352 if not 'fieldtrial_labels' in histograms
[new_histogram_name
]:
353 histograms
[new_histogram_name
]['fieldtrial_labels'] = []
354 histograms
[new_histogram_name
]['fieldtrial_labels'].append(
361 logging
.error('Error parsing %s' % filename
)
367 def ExtractNames(histograms
):
368 return sorted(histograms
.keys())