1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Pretty-prints the histograms.xml file, alphabetizing tags, wrapping text
6 at 80 chars, enforcing standard attribute ordering, and standardizing
9 This is quite a bit more complicated than just calling tree.toprettyxml();
10 we need additional customization, like special attribute ordering in tags
11 and wrapping text nodes, so we implement our own full custom XML pretty-printer.
14 from __future__
import with_statement
22 import xml
.dom
.minidom
27 # Desired order for tag attributes; attributes listed here will appear first,
28 # and in the same order as in these lists.
29 # { tag_name: [attribute_name, ...] }
31 'enum': ['name', 'type'],
32 'histogram': ['name', 'enum', 'units'],
33 'int': ['value', 'label'],
34 'fieldtrial': ['name', 'separator', 'ordering'],
35 'group': ['name', 'label'],
36 'affected-histogram': ['name'],
37 'with-group': ['name'],
40 # Tag names for top-level nodes whose children we don't want to indent.
41 TAGS_THAT_DONT_INDENT
= [
42 'histogram-configuration',
48 # Extra vertical spacing rules for special tag names.
49 # {tag_name: (newlines_after_open, newlines_before_close, newlines_after_close)}
50 TAGS_THAT_HAVE_EXTRA_NEWLINE
= {
51 'histogram-configuration': (2, 1, 1),
52 'histograms': (2, 1, 1),
53 'fieldtrials': (2, 1, 1),
55 'histogram': (1, 1, 1),
57 'fieldtrial': (1, 1, 1),
60 # Tags that we allow to be squished into a single line for brevity.
61 TAGS_THAT_ALLOW_SINGLE_LINE
= [
66 # Tags whose children we want to alphabetize. The key is the parent tag name,
67 # and the value is a pair of the tag name of the children we want to sort,
68 # and a key function that maps each child node to the desired sort key.
69 ALPHABETIZATION_RULES
= {
70 'histograms': ('histogram', lambda n
: n
.attributes
['name'].value
.lower()),
71 'enums': ('enum', lambda n
: n
.attributes
['name'].value
.lower()),
72 'enum': ('int', lambda n
: int(n
.attributes
['value'].value
)),
73 'fieldtrials': ('fieldtrial', lambda n
: n
.attributes
['name'].value
.lower()),
74 'fieldtrial': ('affected-histogram',
75 lambda n
: n
.attributes
['name'].value
.lower()),
79 class Error(Exception):
83 def LastLineLength(s
):
84 """Returns the length of the last line in s.
87 s: A multi-line string, including newlines.
90 The length of the last line in s, in characters.
92 if s
.rfind('\n') == -1: return len(s
)
93 return len(s
) - s
.rfind('\n') - len('\n')
97 """XML-escapes the given string, replacing magic characters (&<>") with their
98 escaped equivalents."""
99 s
= s
.replace("&", "&").replace("<", "<")
100 s
= s
.replace("\"", """).replace(">", ">")
104 def PrettyPrintNode(node
, indent
=0):
105 """Pretty-prints the given XML node at the given indent level.
108 node: The minidom node to pretty-print.
109 indent: The current indent level.
112 The pretty-printed string (including embedded newlines).
115 Error if the XML has unknown tags or attributes.
117 # Handle the top-level document node.
118 if node
.nodeType
== xml
.dom
.minidom
.Node
.DOCUMENT_NODE
:
119 return '\n'.join([PrettyPrintNode(n
) for n
in node
.childNodes
])
122 if node
.nodeType
== xml
.dom
.minidom
.Node
.TEXT_NODE
:
123 # Wrap each paragraph in the text to fit in the 80 column limit.
124 wrapper
= textwrap
.TextWrapper()
125 wrapper
.initial_indent
= ' ' * indent
126 wrapper
.subsequent_indent
= ' ' * indent
127 wrapper
.break_on_hyphens
= False
128 wrapper
.break_long_words
= False
129 wrapper
.width
= WRAP_COLUMN
130 text
= XmlEscape(node
.data
)
131 # Remove any common indent.
132 text
= textwrap
.dedent(text
.strip('\n'))
133 lines
= text
.split('\n')
134 # Split the text into paragraphs at blank line boundaries.
137 if len(l
.strip()) == 0 and len(paragraphs
[-1]) > 0:
138 paragraphs
.append([])
140 paragraphs
[-1].append(l
)
141 # Remove trailing empty paragraph if present.
142 if len(paragraphs
) > 0 and len(paragraphs
[-1]) == 0:
143 paragraphs
= paragraphs
[:-1]
144 # Wrap each paragraph and separate with two newlines.
145 return '\n\n'.join([wrapper
.fill('\n'.join(p
)) for p
in paragraphs
])
147 # Handle element nodes.
148 if node
.nodeType
== xml
.dom
.minidom
.Node
.ELEMENT_NODE
:
149 newlines_after_open
, newlines_before_close
, newlines_after_close
= (
150 TAGS_THAT_HAVE_EXTRA_NEWLINE
.get(node
.tagName
, (1, 1, 0)))
152 s
= ' ' * indent
+ '<' + node
.tagName
154 # Calculate how much space to allow for the '>' or '/>'.
156 if not node
.childNodes
:
159 # Pretty-print the attributes.
160 attributes
= node
.attributes
.keys()
162 # Reorder the attributes.
163 if not node
.tagName
in ATTRIBUTE_ORDER
:
164 unrecognized_attributes
= attributes
;
166 unrecognized_attributes
= (
167 [a
for a
in attributes
if not a
in ATTRIBUTE_ORDER
[node
.tagName
]])
169 [a
for a
in ATTRIBUTE_ORDER
[node
.tagName
] if a
in attributes
])
171 for a
in unrecognized_attributes
:
173 'Unrecognized attribute "%s" in tag "%s"' % (a
, node
.tagName
))
174 if unrecognized_attributes
:
178 value
= XmlEscape(node
.attributes
[a
].value
)
179 # Replace sequences of whitespace with single spaces.
180 words
= value
.split()
181 a_str
= ' %s="%s"' % (a
, ' '.join(words
))
182 # Start a new line if the attribute will make this line too long.
183 if LastLineLength(s
) + len(a_str
) + closing_chars
> WRAP_COLUMN
:
184 s
+= '\n' + ' ' * (indent
+ 3)
185 # Output everything up to the first quote.
187 value_indent_level
= LastLineLength(s
)
188 # Output one word at a time, splitting to the next line where necessary.
189 column
= value_indent_level
190 for i
, word
in enumerate(words
):
191 # This is slightly too conservative since not every word will be
192 # followed by the closing characters...
193 if i
> 0 and (column
+ len(word
) + 1 + closing_chars
> WRAP_COLUMN
):
194 s
= s
.rstrip() # remove any trailing whitespace
195 s
+= '\n' + ' ' * value_indent_level
196 column
= value_indent_level
198 column
+= len(word
) + 1
199 s
= s
.rstrip() # remove any trailing whitespace
201 s
= s
.rstrip() # remove any trailing whitespace
203 # Pretty-print the child nodes.
206 # Calculate the new indent level for child nodes.
208 if node
.tagName
not in TAGS_THAT_DONT_INDENT
:
210 child_nodes
= node
.childNodes
212 # Recursively pretty-print the child nodes.
213 child_nodes
= [PrettyPrintNode(n
, indent
=new_indent
) for n
in child_nodes
]
214 child_nodes
= [c
for c
in child_nodes
if len(c
.strip()) > 0]
216 # Determine whether we can fit the entire node on a single line.
217 close_tag
= '</%s>' % node
.tagName
218 space_left
= WRAP_COLUMN
- LastLineLength(s
) - len(close_tag
)
219 if (node
.tagName
in TAGS_THAT_ALLOW_SINGLE_LINE
and
220 len(child_nodes
) == 1 and len(child_nodes
[0].strip()) <= space_left
):
221 s
+= child_nodes
[0].strip()
223 s
+= '\n' * newlines_after_open
+ '\n'.join(child_nodes
)
224 s
+= '\n' * newlines_before_close
+ ' ' * indent
228 s
+= '\n' * newlines_after_close
231 # Handle comment nodes.
232 if node
.nodeType
== xml
.dom
.minidom
.Node
.COMMENT_NODE
:
233 return '<!--%s-->\n' % node
.data
235 # Ignore other node types. This could be a processing instruction (<? ... ?>)
236 # or cdata section (<![CDATA[...]]!>), neither of which are legal in the
237 # histograms XML at present.
238 logging
.error('Ignoring unrecognized node data: %s' % node
.toxml())
242 def unsafeAppendChild(parent
, child
):
243 """Append child to parent's list of children, ignoring the possibility that it
244 is already in another node's childNodes list. Requires that the previous
245 parent of child is discarded (to avoid non-tree DOM graphs).
246 This can provide a significant speedup as O(n^2) operations are removed (in
247 particular, each child insertion avoids the need to traverse the old parent's
248 entire list of children)."""
249 child
.parentNode
= None
250 parent
.appendChild(child
)
251 child
.parentNode
= parent
254 def TransformByAlphabetizing(node
):
255 """Transform the given XML by alphabetizing specific node types according to
256 the rules in ALPHABETIZATION_RULES.
259 node: The minidom node to transform.
262 The minidom node, with children appropriately alphabetized. Note that the
263 transformation is done in-place, i.e. the original minidom tree is modified
266 if node
.nodeType
!= xml
.dom
.minidom
.Node
.ELEMENT_NODE
:
267 for c
in node
.childNodes
: TransformByAlphabetizing(c
)
270 # Element node with a tag name that we alphabetize the children of?
271 if node
.tagName
in ALPHABETIZATION_RULES
:
272 subtag
, key_function
= ALPHABETIZATION_RULES
[node
.tagName
]
273 # Remove the subnodes to be alphabetized.
274 clone
= node
.cloneNode(False)
276 for c
in node
.childNodes
:
277 if (c
.nodeType
== xml
.dom
.minidom
.Node
.ELEMENT_NODE
and
278 c
.tagName
== subtag
):
281 unsafeAppendChild(clone
, c
)
283 subnodes
.sort(key
=key_function
)
284 # Readd the subnodes, transforming each recursively.
286 unsafeAppendChild(clone
, TransformByAlphabetizing(c
))
290 # Recursively handle other element nodes and other node types.
291 for c
in node
.childNodes
: TransformByAlphabetizing(c
)
295 def PrettyPrint(raw_xml
):
296 """Pretty-print the given XML.
299 xml: The contents of the histograms XML file, as a string.
302 The pretty-printed version.
304 tree
= xml
.dom
.minidom
.parseString(raw_xml
)
305 tree
= TransformByAlphabetizing(tree
)
306 return PrettyPrintNode(tree
)
310 logging
.basicConfig(level
=logging
.INFO
)
312 presubmit
= ('--presubmit' in sys
.argv
)
314 logging
.info('Loading histograms.xml...')
315 with
open('histograms.xml', 'rb') as f
:
318 # Check there are no CR ('\r') characters in the file.
320 logging
.info('DOS-style line endings (CR characters) detected - these are '
321 'not allowed. Please run dos2unix histograms.xml')
324 logging
.info('Pretty-printing...')
326 pretty
= PrettyPrint(xml
)
328 logging
.error('Aborting parsing due to fatal errors.')
332 logging
.info('histograms.xml is correctly pretty-printed.')
335 logging
.info('histograms.xml is not formatted correctly; run '
336 'pretty_print.py to fix.')
338 if not diffutil
.PromptUserToAcceptDiff(
340 'Is the prettified version acceptable?'):
341 logging
.error('Aborting')
344 logging
.info('Creating backup file histograms.before.pretty-print.xml')
345 shutil
.move('histograms.xml', 'histograms.before.pretty-print.xml')
347 logging
.info('Writing new histograms.xml file')
348 with
open('histograms.xml', 'wb') as f
:
352 if __name__
== '__main__':