tools/metrics/histograms/extract_histograms.py

   1 # Copyright 2013 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 """Extract histogram names from the description XML file.
   6
   7 For more information on the format of the XML file, which is self-documenting,
   8 see histograms.xml; however, here is a simple example to get you started. The
   9 XML below will generate the following five histograms:
  10
  11     HistogramTime
  12     HistogramEnum
  13     HistogramEnum_Chrome
  14     HistogramEnum_IE
  15     HistogramEnum_Firefox
  16
  17 <histogram-configuration>
  18
  19 <histograms>
  20
  21 <histogram name="HistogramTime" units="milliseconds">
  22   <summary>A brief description.</summary>
  23   <details>This is a more thorough description of this histogram.</details>
  24 </histogram>
  25
  26 <histogram name="HistogramEnum" enum="MyEnumType">
  27   <summary>This histogram sports an enum value type.</summary>
  28 </histogram>
  29
  30 </histograms>
  31
  32 <enums>
  33
  34 <enum name="MyEnumType">
  35   <summary>This is an example enum type, where the values mean little.</summary>
  36   <int value="1" label="FIRST_VALUE">This is the first value.</int>
  37   <int value="2" label="SECOND_VALUE">This is the second value.</int>
  38 </enum>
  39
  40 </enums>
  41
  42 <fieldtrials>
  43
  44 <fieldtrial name="BrowserType">
  45   <group name="Chrome"/>
  46   <group name="IE"/>
  47   <group name="Firefox"/>
  48   <affected-histogram name="HistogramEnum"/>
  49 </fieldtrial>
  50
  51 </fieldtrials>
  52
  53 </histogram-configuration>
  54
  55 """
  56
  57 import copy
  58 import logging
  59 import xml.dom.minidom
  60
  61
  62 MAX_FIELDTRIAL_DEPENDENCY_DEPTH = 5
  63
  64
  65 class Error(Exception):
  66   pass
  67
  68
  69 def JoinChildNodes(tag):
  70   return ''.join([c.toxml() for c in tag.childNodes]).strip()
  71
  72
  73 def NormalizeAttributeValue(s):
  74   """Normalizes an attribute value (which might be wrapped over multiple lines)
  75   by replacing each whitespace sequence with a single space.
  76
  77   Args:
  78     s: The string to normalize, e.g. '  \n a  b c\n d  '
  79
  80   Returns:
  81     The normalized string, e.g. 'a b c d'
  82   """
  83   return ' '.join(s.split())
  84
  85
  86 def NormalizeAllAttributeValues(node):
  87   """Recursively normalizes all tag attribute values in the given tree.
  88
  89   Args:
  90     node: The minidom node to be normalized.
  91
  92   Returns:
  93     The normalized minidom node.
  94   """
  95   if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
  96     for a in node.attributes.keys():
  97       node.attributes[a].value = NormalizeAttributeValue(
  98         node.attributes[a].value)
  99
 100   for c in node.childNodes: NormalizeAllAttributeValues(c)
 101   return node
 102
 103
 104 def _ExpandHistogramNameWithFieldTrial(group_name, histogram_name, fieldtrial):
 105   """Creates a new histogram name based on the field trial group.
 106
 107   Args:
 108     group_name: The name of the field trial group. May be empty.
 109     histogram_name: The name of the histogram. May be of the form
 110       Group.BaseName or BaseName
 111     field_trial: The FieldTrial XML element.
 112
 113   Returns:
 114     A string with the expanded histogram name.
 115
 116   Raises:
 117     Error if the expansion can't be done.
 118   """
 119   if fieldtrial.hasAttribute('separator'):
 120     separator = fieldtrial.getAttribute('separator')
 121   else:
 122     separator = '_'
 123
 124   if fieldtrial.hasAttribute('ordering'):
 125     ordering = fieldtrial.getAttribute('ordering')
 126   else:
 127     ordering = 'suffix'
 128   if ordering not in ['prefix', 'suffix']:
 129     logging.error('ordering needs to be prefix or suffix, value is %s' %
 130                   ordering)
 131     raise Error()
 132
 133   if not group_name:
 134     return histogram_name
 135
 136   if ordering == 'suffix':
 137     return histogram_name + separator + group_name
 138
 139   # For prefixes, the group_name is inserted between the "cluster" and the
 140   # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.
 141   sections = histogram_name.split('.')
 142   if len(sections) <= 1:
 143     logging.error(
 144       'Prefix Field Trial expansions require histogram names which include a '
 145       'dot separator. Histogram name is %s, and Field Trial is %s' %
 146       (histogram_name, fieldtrial.getAttribute('name')))
 147     raise Error()
 148
 149   cluster = sections[0] + '.'
 150   remainder = '.'.join(sections[1:])
 151   return cluster + group_name + separator + remainder
 152
 153
 154 def ExtractHistograms(filename):
 155   """Compute the histogram names and descriptions from the XML representation.
 156
 157   Args:
 158     filename: The path to the histograms XML file.
 159
 160   Returns:
 161     { 'histogram_name': 'histogram_description', ... }
 162
 163   Raises:
 164     Error if the file is not well-formatted.
 165   """
 166   # Slurp in histograms.xml
 167   raw_xml = ''
 168   with open(filename, 'r') as f:
 169     raw_xml = f.read()
 170
 171   # Parse the XML into a tree
 172   tree = xml.dom.minidom.parseString(raw_xml)
 173   NormalizeAllAttributeValues(tree)
 174
 175   histograms = {}
 176   have_errors = False
 177
 178   # Load the enums.
 179   enums = {}
 180   last_name = None
 181   for enum in tree.getElementsByTagName("enum"):
 182     if enum.getAttribute('type') != 'int':
 183       logging.error('Unknown enum type %s' % enum.getAttribute('type'))
 184       have_errors = True
 185       continue
 186
 187     name = enum.getAttribute('name')
 188     if last_name is not None and name.lower() < last_name.lower():
 189       logging.error('Enums %s and %s are not in alphabetical order'
 190                     % (last_name, name))
 191       have_errors = True
 192     last_name = name
 193
 194     if name in enums:
 195       logging.error('Duplicate enum %s' % name)
 196       have_errors = True
 197       continue
 198
 199     last_int_value = None
 200     enum_dict = {}
 201     enum_dict['name'] = name
 202     enum_dict['values'] = {}
 203
 204     for int_tag in enum.getElementsByTagName("int"):
 205       value_dict = {}
 206       int_value = int(int_tag.getAttribute('value'))
 207       if last_int_value is not None and int_value < last_int_value:
 208         logging.error('Enum %s int values %d and %d are not in numerical order'
 209                       % (name, last_int_value, int_value))
 210         have_errors = True
 211       last_int_value = int_value
 212       if int_value in enum_dict['values']:
 213         logging.error('Duplicate enum value %d for enum %s' % (int_value, name))
 214         have_errors = True
 215         continue
 216       value_dict['label'] = int_tag.getAttribute('label')
 217       value_dict['summary'] = JoinChildNodes(int_tag)
 218       enum_dict['values'][int_value] = value_dict
 219
 220     summary_nodes = enum.getElementsByTagName("summary")
 221     if len(summary_nodes) > 0:
 222       enum_dict['summary'] = JoinChildNodes(summary_nodes[0])
 223
 224     enums[name] = enum_dict
 225
 226   # Process the histograms. The descriptions can include HTML tags.
 227   last_name = None
 228   for histogram in tree.getElementsByTagName("histogram"):
 229     name = histogram.getAttribute('name')
 230     if last_name is not None and name.lower() < last_name.lower():
 231       logging.error('Histograms %s and %s are not in alphabetical order'
 232                     % (last_name, name))
 233       have_errors = True
 234     last_name = name
 235     if name in histograms:
 236       logging.error('Duplicate histogram definition %s' % name)
 237       have_errors = True
 238       continue
 239     histograms[name] = {}
 240
 241     # Find <summary> tag.
 242     summary_nodes = histogram.getElementsByTagName("summary")
 243     if len(summary_nodes) > 0:
 244       histograms[name]['summary'] = JoinChildNodes(summary_nodes[0])
 245     else:
 246       histograms[name]['summary'] = 'TBD'
 247
 248     # Find <obsolete> tag.
 249     obsolete_nodes = histogram.getElementsByTagName("obsolete")
 250     if len(obsolete_nodes) > 0:
 251       reason = JoinChildNodes(obsolete_nodes[0])
 252       histograms[name]['obsolete'] = reason
 253
 254     # Handle units.
 255     if histogram.hasAttribute('units'):
 256       histograms[name]['units'] = histogram.getAttribute('units')
 257
 258     # Find <details> tag.
 259     details_nodes = histogram.getElementsByTagName("details")
 260     if len(details_nodes) > 0:
 261       histograms[name]['details'] = JoinChildNodes(details_nodes[0])
 262
 263     # Handle enum types.
 264     if histogram.hasAttribute('enum'):
 265       enum_name = histogram.getAttribute('enum')
 266       if not enum_name in enums:
 267         logging.error('Unknown enum %s in histogram %s' % (enum_name, name))
 268         have_errors = True
 269       else:
 270         histograms[name]['enum'] = enums[enum_name]
 271
 272   # Process the field trials and compute the combinations with their affected
 273   # histograms.
 274   last_name = None
 275   for fieldtrial in tree.getElementsByTagName("fieldtrial"):
 276     name = fieldtrial.getAttribute('name')
 277     if last_name is not None and name.lower() < last_name.lower():
 278       logging.error('Field trials %s and %s are not in alphabetical order'
 279                     % (last_name, name))
 280       have_errors = True
 281     last_name = name
 282   # Field trials can depend on other field trials, so we need to be careful.
 283   # Make a temporary copy of the list of field trials to use as a queue.
 284   # Field trials whose dependencies have not yet been processed will get
 285   # relegated to the back of the queue to be processed later.
 286   reprocess_queue = []
 287   def GenerateFieldTrials():
 288     for f in tree.getElementsByTagName("fieldtrial"): yield 0, f
 289     for r, f in reprocess_queue: yield r, f
 290   for reprocess_count, fieldtrial in GenerateFieldTrials():
 291     # Check dependencies first
 292     dependencies_valid = True
 293     affected_histograms = fieldtrial.getElementsByTagName('affected-histogram')
 294     for affected_histogram in affected_histograms:
 295       histogram_name = affected_histogram.getAttribute('name')
 296       if not histogram_name in histograms:
 297         # Base histogram is missing
 298         dependencies_valid = False
 299         missing_dependency = histogram_name
 300         break
 301     if not dependencies_valid:
 302       if reprocess_count < MAX_FIELDTRIAL_DEPENDENCY_DEPTH:
 303         reprocess_queue.append( (reprocess_count + 1, fieldtrial) )
 304         continue
 305       else:
 306         logging.error('Field trial %s is missing its dependency %s'
 307                       % (fieldtrial.getAttribute('name'),
 308                          missing_dependency))
 309         have_errors = True
 310         continue
 311
 312     name = fieldtrial.getAttribute('name')
 313     groups = fieldtrial.getElementsByTagName('group')
 314     group_labels = {}
 315     for group in groups:
 316       group_labels[group.getAttribute('name')] = group.getAttribute('label')
 317     last_histogram_name = None
 318     for affected_histogram in affected_histograms:
 319       histogram_name = affected_histogram.getAttribute('name')
 320       if (last_histogram_name is not None
 321           and histogram_name.lower() < last_histogram_name.lower()):
 322         logging.error('Affected histograms %s and %s of field trial %s are not '
 323                       'in alphabetical order'
 324                       % (last_histogram_name, histogram_name, name))
 325         have_errors = True
 326       last_histogram_name = histogram_name
 327       base_description = histograms[histogram_name]
 328       with_groups = affected_histogram.getElementsByTagName('with-group')
 329       if len(with_groups) > 0:
 330         histogram_groups = with_groups
 331       else:
 332         histogram_groups = groups
 333       for group in histogram_groups:
 334         group_name = group.getAttribute('name')
 335         try:
 336           new_histogram_name = _ExpandHistogramNameWithFieldTrial(
 337             group_name, histogram_name, fieldtrial)
 338           if new_histogram_name != histogram_name:
 339             histograms[new_histogram_name] = copy.deepcopy(
 340               histograms[histogram_name])
 341
 342           group_label = group_labels.get(group_name, '')
 343
 344           if not 'fieldtrial_groups' in histograms[new_histogram_name]:
 345             histograms[new_histogram_name]['fieldtrial_groups'] = []
 346           histograms[new_histogram_name]['fieldtrial_groups'].append(group_name)
 347
 348           if not 'fieldtrial_names' in histograms[new_histogram_name]:
 349             histograms[new_histogram_name]['fieldtrial_names'] = []
 350           histograms[new_histogram_name]['fieldtrial_names'].append(name)
 351
 352           if not 'fieldtrial_labels' in histograms[new_histogram_name]:
 353             histograms[new_histogram_name]['fieldtrial_labels'] = []
 354           histograms[new_histogram_name]['fieldtrial_labels'].append(
 355             group_label)
 356
 357         except Error:
 358           have_errors = True
 359
 360   if have_errors:
 361     logging.error('Error parsing %s' % filename)
 362     raise Error()
 363
 364   return histograms
 365
 366
 367 def ExtractNames(histograms):
 368   return sorted(histograms.keys())