Re-enable index-basics-workers test to see if still times
[chromium-blink-merge.git] / tools / metrics / histograms / pretty_print.py
blob67c840fd6002f2d4123aceba16184aaad4c41daa
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text
6 at 80 chars, enforcing standard attribute ordering, and standardizing
7 indentation.
9 This is quite a bit more complicated than just calling tree.toprettyxml();
10 we need additional customization, like special attribute ordering in tags
11 and wrapping text nodes, so we implement our own full custom XML pretty-printer.
12 """
14 from __future__ import with_statement
16 import diffutil
17 import json
18 import logging
19 import shutil
20 import sys
21 import textwrap
22 import xml.dom.minidom
25 WRAP_COLUMN = 80
27 # Desired order for tag attributes; attributes listed here will appear first,
28 # and in the same order as in these lists.
29 # { tag_name: [attribute_name, ...] }
30 ATTRIBUTE_ORDER = {
31 'enum': ['name', 'type'],
32 'histogram': ['name', 'enum', 'units'],
33 'int': ['value', 'label'],
34 'fieldtrial': ['name', 'separator', 'ordering'],
35 'group': ['name', 'label'],
36 'affected-histogram': ['name'],
37 'with-group': ['name'],
40 # Tag names for top-level nodes whose children we don't want to indent.
41 TAGS_THAT_DONT_INDENT = [
42 'histogram-configuration',
43 'histograms',
44 'fieldtrials',
45 'enums'
48 # Extra vertical spacing rules for special tag names.
49 # {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)}
50 TAGS_THAT_HAVE_EXTRA_NEWLINE = {
51 'histogram-configuration': (2, 1, 1),
52 'histograms': (2, 1, 1),
53 'fieldtrials': (2, 1, 1),
54 'enums': (2, 1, 1),
55 'histogram': (1, 1, 1),
56 'enum': (1, 1, 1),
57 'fieldtrial': (1, 1, 1),
60 # Tags that we allow to be squished into a single line for brevity.
61 TAGS_THAT_ALLOW_SINGLE_LINE = [
62 'summary',
63 'int',
66 # Tags whose children we want to alphabetize. The key is the parent tag name,
67 # and the value is a pair of the tag name of the children we want to sort,
68 # and a key function that maps each child node to the desired sort key.
69 ALPHABETIZATION_RULES = {
70 'histograms': ('histogram', lambda n: n.attributes['name'].value.lower()),
71 'enums': ('enum', lambda n: n.attributes['name'].value.lower()),
72 'enum': ('int', lambda n: int(n.attributes['value'].value)),
73 'fieldtrials': ('fieldtrial', lambda n: n.attributes['name'].value.lower()),
74 'fieldtrial': ('affected-histogram',
75 lambda n: n.attributes['name'].value.lower()),
79 class Error(Exception):
80 pass
83 def LastLineLength(s):
84 """Returns the length of the last line in s.
86 Args:
87 s: A multi-line string, including newlines.
89 Returns:
90 The length of the last line in s, in characters.
91 """
92 if s.rfind('\n') == -1: return len(s)
93 return len(s) - s.rfind('\n') - len('\n')
96 def XmlEscape(s):
97 """XML-escapes the given string, replacing magic characters (&<>") with their
98 escaped equivalents."""
99 s = s.replace("&", "&amp;").replace("<", "&lt;")
100 s = s.replace("\"", "&quot;").replace(">", "&gt;")
101 return s
104 def PrettyPrintNode(node, indent=0):
105 """Pretty-prints the given XML node at the given indent level.
107 Args:
108 node: The minidom node to pretty-print.
109 indent: The current indent level.
111 Returns:
112 The pretty-printed string (including embedded newlines).
114 Raises:
115 Error if the XML has unknown tags or attributes.
117 # Handle the top-level document node.
118 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE:
119 return '\n'.join([PrettyPrintNode(n) for n in node.childNodes])
121 # Handle text nodes.
122 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE:
123 # Wrap each paragraph in the text to fit in the 80 column limit.
124 wrapper = textwrap.TextWrapper()
125 wrapper.initial_indent = ' ' * indent
126 wrapper.subsequent_indent = ' ' * indent
127 wrapper.break_on_hyphens = False
128 wrapper.break_long_words = False
129 wrapper.width = WRAP_COLUMN
130 text = XmlEscape(node.data)
131 # Remove any common indent.
132 text = textwrap.dedent(text.strip('\n'))
133 lines = text.split('\n')
134 # Split the text into paragraphs at blank line boundaries.
135 paragraphs = [[]]
136 for l in lines:
137 if len(l.strip()) == 0 and len(paragraphs[-1]) > 0:
138 paragraphs.append([])
139 else:
140 paragraphs[-1].append(l)
141 # Remove trailing empty paragraph if present.
142 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0:
143 paragraphs = paragraphs[:-1]
144 # Wrap each paragraph and separate with two newlines.
145 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs])
147 # Handle element nodes.
148 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
149 newlines_after_open, newlines_before_close, newlines_after_close = (
150 TAGS_THAT_HAVE_EXTRA_NEWLINE.get(node.tagName, (1, 1, 0)))
151 # Open the tag.
152 s = ' ' * indent + '<' + node.tagName
154 # Calculate how much space to allow for the '>' or '/>'.
155 closing_chars = 1
156 if not node.childNodes:
157 closing_chars = 2
159 # Pretty-print the attributes.
160 attributes = node.attributes.keys()
161 if attributes:
162 # Reorder the attributes.
163 if not node.tagName in ATTRIBUTE_ORDER:
164 unrecognized_attributes = attributes;
165 else:
166 unrecognized_attributes = (
167 [a for a in attributes if not a in ATTRIBUTE_ORDER[node.tagName]])
168 attributes = (
169 [a for a in ATTRIBUTE_ORDER[node.tagName] if a in attributes])
171 for a in unrecognized_attributes:
172 logging.error(
173 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName))
174 if unrecognized_attributes:
175 raise Error()
177 for a in attributes:
178 value = XmlEscape(node.attributes[a].value)
179 # Replace sequences of whitespace with single spaces.
180 words = value.split()
181 a_str = ' %s="%s"' % (a, ' '.join(words))
182 # Start a new line if the attribute will make this line too long.
183 if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN:
184 s += '\n' + ' ' * (indent + 3)
185 # Output everything up to the first quote.
186 s += ' %s="' % (a)
187 value_indent_level = LastLineLength(s)
188 # Output one word at a time, splitting to the next line where necessary.
189 column = value_indent_level
190 for i, word in enumerate(words):
191 # This is slightly too conservative since not every word will be
192 # followed by the closing characters...
193 if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN):
194 s = s.rstrip() # remove any trailing whitespace
195 s += '\n' + ' ' * value_indent_level
196 column = value_indent_level
197 s += word + ' '
198 column += len(word) + 1
199 s = s.rstrip() # remove any trailing whitespace
200 s += '"'
201 s = s.rstrip() # remove any trailing whitespace
203 # Pretty-print the child nodes.
204 if node.childNodes:
205 s += '>'
206 # Calculate the new indent level for child nodes.
207 new_indent = indent
208 if node.tagName not in TAGS_THAT_DONT_INDENT:
209 new_indent += 2
210 child_nodes = node.childNodes
212 # Recursively pretty-print the child nodes.
213 child_nodes = [PrettyPrintNode(n, indent=new_indent) for n in child_nodes]
214 child_nodes = [c for c in child_nodes if len(c.strip()) > 0]
216 # Determine whether we can fit the entire node on a single line.
217 close_tag = '</%s>' % node.tagName
218 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag)
219 if (node.tagName in TAGS_THAT_ALLOW_SINGLE_LINE and
220 len(child_nodes) == 1 and len(child_nodes[0].strip()) <= space_left):
221 s += child_nodes[0].strip()
222 else:
223 s += '\n' * newlines_after_open + '\n'.join(child_nodes)
224 s += '\n' * newlines_before_close + ' ' * indent
225 s += close_tag
226 else:
227 s += '/>'
228 s += '\n' * newlines_after_close
229 return s
231 # Handle comment nodes.
232 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE:
233 return '<!--%s-->\n' % node.data
235 # Ignore other node types. This could be a processing instruction (<? ... ?>)
236 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the
237 # histograms XML at present.
238 logging.error('Ignoring unrecognized node data: %s' % node.toxml())
239 raise Error()
242 def unsafeAppendChild(parent, child):
243 """Append child to parent's list of children, ignoring the possibility that it
244 is already in another node's childNodes list. Requires that the previous
245 parent of child is discarded (to avoid non-tree DOM graphs).
246 This can provide a significant speedup as O(n^2) operations are removed (in
247 particular, each child insertion avoids the need to traverse the old parent's
248 entire list of children)."""
249 child.parentNode = None
250 parent.appendChild(child)
251 child.parentNode = parent
254 def TransformByAlphabetizing(node):
255 """Transform the given XML by alphabetizing specific node types according to
256 the rules in ALPHABETIZATION_RULES.
258 Args:
259 node: The minidom node to transform.
261 Returns:
262 The minidom node, with children appropriately alphabetized. Note that the
263 transformation is done in-place, i.e. the original minidom tree is modified
264 directly.
266 if node.nodeType != xml.dom.minidom.Node.ELEMENT_NODE:
267 for c in node.childNodes: TransformByAlphabetizing(c)
268 return node
270 # Element node with a tag name that we alphabetize the children of?
271 if node.tagName in ALPHABETIZATION_RULES:
272 subtag, key_function = ALPHABETIZATION_RULES[node.tagName]
273 # Remove the subnodes to be alphabetized.
274 clone = node.cloneNode(False)
275 subnodes = []
276 for c in node.childNodes:
277 if (c.nodeType == xml.dom.minidom.Node.ELEMENT_NODE and
278 c.tagName == subtag):
279 subnodes.append(c)
280 continue
281 unsafeAppendChild(clone, c)
282 # Sort the subnodes.
283 subnodes.sort(key=key_function)
284 # Readd the subnodes, transforming each recursively.
285 for c in subnodes:
286 unsafeAppendChild(clone, TransformByAlphabetizing(c))
287 node = clone
288 return node
290 # Recursively handle other element nodes and other node types.
291 for c in node.childNodes: TransformByAlphabetizing(c)
292 return node
295 def PrettyPrint(raw_xml):
296 """Pretty-print the given XML.
298 Args:
299 xml: The contents of the histograms XML file, as a string.
301 Returns:
302 The pretty-printed version.
304 tree = xml.dom.minidom.parseString(raw_xml)
305 tree = TransformByAlphabetizing(tree)
306 return PrettyPrintNode(tree)
309 def main():
310 logging.basicConfig(level=logging.INFO)
312 presubmit = ('--presubmit' in sys.argv)
314 logging.info('Loading histograms.xml...')
315 with open('histograms.xml', 'rb') as f:
316 xml = f.read()
318 # Check there are no CR ('\r') characters in the file.
319 if '\r' in xml:
320 logging.info('DOS-style line endings (CR characters) detected - these are '
321 'not allowed. Please run dos2unix histograms.xml')
322 sys.exit(1)
324 logging.info('Pretty-printing...')
325 try:
326 pretty = PrettyPrint(xml)
327 except Error:
328 logging.error('Aborting parsing due to fatal errors.')
329 sys.exit(1)
331 if xml == pretty:
332 logging.info('histograms.xml is correctly pretty-printed.')
333 sys.exit(0)
334 if presubmit:
335 logging.info('histograms.xml is not formatted correctly; run '
336 'pretty_print.py to fix.')
337 sys.exit(1)
338 if not diffutil.PromptUserToAcceptDiff(
339 xml, pretty,
340 'Is the prettified version acceptable?'):
341 logging.error('Aborting')
342 return
344 logging.info('Creating backup file histograms.before.pretty-print.xml')
345 shutil.move('histograms.xml', 'histograms.before.pretty-print.xml')
347 logging.info('Writing new histograms.xml file')
348 with open('histograms.xml', 'wb') as f:
349 f.write(pretty)
352 if __name__ == '__main__':
353 main()