2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text
7 at 80 chars, enforcing standard attribute ordering, and standardizing
10 This is quite a bit more complicated than just calling tree.toprettyxml();
11 we need additional customization, like special attribute ordering in tags
12 and wrapping text nodes, so we implement our own full custom XML pretty-printer.
15 from __future__
import with_statement
24 import xml
.dom
.minidom
26 sys
.path
.insert(1, os
.path
.join(sys
.path
[0], '..', '..', 'python'))
27 from google
import path_utils
31 # Desired order for tag attributes; attributes listed here will appear first,
32 # and in the same order as in these lists.
33 # { tag_name: [attribute_name, ...] }
35 'enum': ['name', 'type'],
36 'histogram': ['name', 'enum', 'units'],
37 'int': ['value', 'label'],
38 'fieldtrial': ['name', 'separator', 'ordering'],
39 'group': ['name', 'label'],
40 'affected-histogram': ['name'],
41 'with-group': ['name'],
44 # Tag names for top-level nodes whose children we don't want to indent.
45 TAGS_THAT_DONT_INDENT
= [
46 'histogram-configuration',
52 # Extra vertical spacing rules for special tag names.
53 # {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)}
54 TAGS_THAT_HAVE_EXTRA_NEWLINE
= {
55 'histogram-configuration': (2, 1, 1),
56 'histograms': (2, 1, 1),
57 'fieldtrials': (2, 1, 1),
59 'histogram': (1, 1, 1),
61 'fieldtrial': (1, 1, 1),
64 # Tags that we allow to be squished into a single line for brevity.
65 TAGS_THAT_ALLOW_SINGLE_LINE
= [
70 # Tags whose children we want to alphabetize. The key is the parent tag name,
71 # and the value is a pair of the tag name of the children we want to sort,
72 # and a key function that maps each child node to the desired sort key.
73 ALPHABETIZATION_RULES
= {
74 'histograms': ('histogram', lambda n
: n
.attributes
['name'].value
.lower()),
75 'enums': ('enum', lambda n
: n
.attributes
['name'].value
.lower()),
76 'enum': ('int', lambda n
: int(n
.attributes
['value'].value
)),
77 'fieldtrials': ('fieldtrial', lambda n
: n
.attributes
['name'].value
.lower()),
78 'fieldtrial': ('affected-histogram',
79 lambda n
: n
.attributes
['name'].value
.lower()),
83 class Error(Exception):
87 def LastLineLength(s
):
88 """Returns the length of the last line in s.
91 s: A multi-line string, including newlines.
94 The length of the last line in s, in characters.
96 if s
.rfind('\n') == -1: return len(s
)
97 return len(s
) - s
.rfind('\n') - len('\n')
101 """XML-escapes the given string, replacing magic characters (&<>") with their
102 escaped equivalents."""
103 s
= s
.replace("&", "&").replace("<", "<")
104 s
= s
.replace("\"", """).replace(">", ">")
108 def PrettyPrintNode(node
, indent
=0):
109 """Pretty-prints the given XML node at the given indent level.
112 node: The minidom node to pretty-print.
113 indent: The current indent level.
116 The pretty-printed string (including embedded newlines).
119 Error if the XML has unknown tags or attributes.
121 # Handle the top-level document node.
122 if node
.nodeType
== xml
.dom
.minidom
.Node
.DOCUMENT_NODE
:
123 return '\n'.join([PrettyPrintNode(n
) for n
in node
.childNodes
])
126 if node
.nodeType
== xml
.dom
.minidom
.Node
.TEXT_NODE
:
127 # Wrap each paragraph in the text to fit in the 80 column limit.
128 wrapper
= textwrap
.TextWrapper()
129 wrapper
.initial_indent
= ' ' * indent
130 wrapper
.subsequent_indent
= ' ' * indent
131 wrapper
.break_on_hyphens
= False
132 wrapper
.break_long_words
= False
133 wrapper
.width
= WRAP_COLUMN
134 text
= XmlEscape(node
.data
)
135 # Remove any common indent.
136 text
= textwrap
.dedent(text
.strip('\n'))
137 lines
= text
.split('\n')
138 # Split the text into paragraphs at blank line boundaries.
141 if len(l
.strip()) == 0 and len(paragraphs
[-1]) > 0:
142 paragraphs
.append([])
144 paragraphs
[-1].append(l
)
145 # Remove trailing empty paragraph if present.
146 if len(paragraphs
) > 0 and len(paragraphs
[-1]) == 0:
147 paragraphs
= paragraphs
[:-1]
148 # Wrap each paragraph and separate with two newlines.
149 return '\n\n'.join([wrapper
.fill('\n'.join(p
)) for p
in paragraphs
])
151 # Handle element nodes.
152 if node
.nodeType
== xml
.dom
.minidom
.Node
.ELEMENT_NODE
:
153 newlines_after_open
, newlines_before_close
, newlines_after_close
= (
154 TAGS_THAT_HAVE_EXTRA_NEWLINE
.get(node
.tagName
, (1, 1, 0)))
156 s
= ' ' * indent
+ '<' + node
.tagName
158 # Calculate how much space to allow for the '>' or '/>'.
160 if not node
.childNodes
:
163 # Pretty-print the attributes.
164 attributes
= node
.attributes
.keys()
166 # Reorder the attributes.
167 if not node
.tagName
in ATTRIBUTE_ORDER
:
168 unrecognized_attributes
= attributes
;
170 unrecognized_attributes
= (
171 [a
for a
in attributes
if not a
in ATTRIBUTE_ORDER
[node
.tagName
]])
173 [a
for a
in ATTRIBUTE_ORDER
[node
.tagName
] if a
in attributes
])
175 for a
in unrecognized_attributes
:
177 'Unrecognized attribute "%s" in tag "%s"' % (a
, node
.tagName
))
178 if unrecognized_attributes
:
182 value
= XmlEscape(node
.attributes
[a
].value
)
183 # Replace sequences of whitespace with single spaces.
184 words
= value
.split()
185 a_str
= ' %s="%s"' % (a
, ' '.join(words
))
186 # Start a new line if the attribute will make this line too long.
187 if LastLineLength(s
) + len(a_str
) + closing_chars
> WRAP_COLUMN
:
188 s
+= '\n' + ' ' * (indent
+ 3)
189 # Output everything up to the first quote.
191 value_indent_level
= LastLineLength(s
)
192 # Output one word at a time, splitting to the next line where necessary.
193 column
= value_indent_level
194 for i
, word
in enumerate(words
):
195 # This is slightly too conservative since not every word will be
196 # followed by the closing characters...
197 if i
> 0 and (column
+ len(word
) + 1 + closing_chars
> WRAP_COLUMN
):
198 s
= s
.rstrip() # remove any trailing whitespace
199 s
+= '\n' + ' ' * value_indent_level
200 column
= value_indent_level
202 column
+= len(word
) + 1
203 s
= s
.rstrip() # remove any trailing whitespace
205 s
= s
.rstrip() # remove any trailing whitespace
207 # Pretty-print the child nodes.
210 # Calculate the new indent level for child nodes.
212 if node
.tagName
not in TAGS_THAT_DONT_INDENT
:
214 child_nodes
= node
.childNodes
216 # Recursively pretty-print the child nodes.
217 child_nodes
= [PrettyPrintNode(n
, indent
=new_indent
) for n
in child_nodes
]
218 child_nodes
= [c
for c
in child_nodes
if len(c
.strip()) > 0]
220 # Determine whether we can fit the entire node on a single line.
221 close_tag
= '</%s>' % node
.tagName
222 space_left
= WRAP_COLUMN
- LastLineLength(s
) - len(close_tag
)
223 if (node
.tagName
in TAGS_THAT_ALLOW_SINGLE_LINE
and
224 len(child_nodes
) == 1 and len(child_nodes
[0].strip()) <= space_left
):
225 s
+= child_nodes
[0].strip()
227 s
+= '\n' * newlines_after_open
+ '\n'.join(child_nodes
)
228 s
+= '\n' * newlines_before_close
+ ' ' * indent
232 s
+= '\n' * newlines_after_close
235 # Handle comment nodes.
236 if node
.nodeType
== xml
.dom
.minidom
.Node
.COMMENT_NODE
:
237 return '<!--%s-->\n' % node
.data
239 # Ignore other node types. This could be a processing instruction (<? ... ?>)
240 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the
241 # histograms XML at present.
242 logging
.error('Ignoring unrecognized node data: %s' % node
.toxml())
246 def unsafeAppendChild(parent
, child
):
247 """Append child to parent's list of children, ignoring the possibility that it
248 is already in another node's childNodes list. Requires that the previous
249 parent of child is discarded (to avoid non-tree DOM graphs).
250 This can provide a significant speedup as O(n^2) operations are removed (in
251 particular, each child insertion avoids the need to traverse the old parent's
252 entire list of children)."""
253 child
.parentNode
= None
254 parent
.appendChild(child
)
255 child
.parentNode
= parent
258 def TransformByAlphabetizing(node
):
259 """Transform the given XML by alphabetizing specific node types according to
260 the rules in ALPHABETIZATION_RULES.
263 node: The minidom node to transform.
266 The minidom node, with children appropriately alphabetized. Note that the
267 transformation is done in-place, i.e. the original minidom tree is modified
270 if node
.nodeType
!= xml
.dom
.minidom
.Node
.ELEMENT_NODE
:
271 for c
in node
.childNodes
: TransformByAlphabetizing(c
)
274 # Element node with a tag name that we alphabetize the children of?
275 if node
.tagName
in ALPHABETIZATION_RULES
:
276 # Put subnodes in a list of node,key pairs to allow for custom sorting.
277 subtag
, key_function
= ALPHABETIZATION_RULES
[node
.tagName
]
280 for c
in node
.childNodes
:
281 if (c
.nodeType
== xml
.dom
.minidom
.Node
.ELEMENT_NODE
and
282 c
.tagName
== subtag
):
283 last_key
= key_function(c
)
284 # Subnodes that we don't want to rearrange use the last node's key,
285 # so they stay in the same relative position.
286 subnodes
.append( (c
, last_key
) )
288 # Sort the subnode list.
289 subnodes
.sort(key
=lambda pair
: pair
[1])
291 # Re-add the subnodes, transforming each recursively.
292 while node
.firstChild
:
293 node
.removeChild(node
.firstChild
)
294 for (c
, _
) in subnodes
:
295 unsafeAppendChild(node
, TransformByAlphabetizing(c
))
298 # Recursively handle other element nodes and other node types.
299 for c
in node
.childNodes
: TransformByAlphabetizing(c
)
303 def PrettyPrint(raw_xml
):
304 """Pretty-print the given XML.
307 xml: The contents of the histograms XML file, as a string.
310 The pretty-printed version.
312 tree
= xml
.dom
.minidom
.parseString(raw_xml
)
313 tree
= TransformByAlphabetizing(tree
)
314 return PrettyPrintNode(tree
)
318 logging
.basicConfig(level
=logging
.INFO
)
320 presubmit
= ('--presubmit' in sys
.argv
)
322 histograms_filename
= 'histograms.xml'
323 histograms_backup_filename
= 'histograms.before.pretty-print.xml'
325 script_dir
= path_utils
.ScriptDir()
327 histograms_pathname
= os
.path
.join(script_dir
, histograms_filename
)
328 histograms_backup_pathname
= os
.path
.join(script_dir
,
329 histograms_backup_filename
)
331 logging
.info('Loading %s...' % histograms_filename
)
332 with
open(histograms_pathname
, 'rb') as f
:
335 # Check there are no CR ('\r') characters in the file.
337 logging
.info('DOS-style line endings (CR characters) detected - these are '
338 'not allowed. Please run dos2unix %s' % histograms_filename
)
341 logging
.info('Pretty-printing...')
343 pretty
= PrettyPrint(xml
)
345 logging
.error('Aborting parsing due to fatal errors.')
349 logging
.info('%s is correctly pretty-printed.' % histograms_filename
)
352 logging
.info('%s is not formatted correctly; run pretty_print.py to fix.' %
355 if not diffutil
.PromptUserToAcceptDiff(
357 'Is the prettified version acceptable?'):
358 logging
.error('Aborting')
361 logging
.info('Creating backup file %s' % histograms_backup_filename
)
362 shutil
.move(histograms_pathname
, histograms_backup_pathname
)
364 logging
.info('Writing new %s file' % histograms_filename
)
365 with
open(histograms_pathname
, 'wb') as f
:
369 if __name__
== '__main__':