1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Utility file for pretty print xml file.
7 The function PrettyPrintNode will be used for formatting both histograms.xml
13 import xml
.dom
.minidom
18 class Error(Exception):
22 def LastLineLength(s
):
23 """Returns the length of the last line in s.
26 s: A multi-line string, including newlines.
29 The length of the last line in s, in characters.
31 if s
.rfind('\n') == -1: return len(s
)
32 return len(s
) - s
.rfind('\n') - len('\n')
36 """XML-escapes the given string, replacing magic characters (&<>") with their
37 escaped equivalents."""
38 s
= s
.replace("&", "&").replace("<", "<")
39 s
= s
.replace("\"", """).replace(">", ">")
43 class XmlStyle(object):
44 """A class that stores all style specification for an output xml file."""
46 def __init__(self
, attribute_order
, tags_that_have_extra_newline
,
47 tags_that_dont_indent
, tags_that_allow_single_line
):
48 # List of tag names for top-level nodes whose children are not indented.
49 self
.attribute_order
= attribute_order
50 self
.tags_that_have_extra_newline
= tags_that_have_extra_newline
51 self
.tags_that_dont_indent
= tags_that_dont_indent
52 self
.tags_that_allow_single_line
= tags_that_allow_single_line
54 def PrettyPrintNode(self
, node
, indent
=0):
55 """Pretty-prints the given XML node at the given indent level.
58 node: The minidom node to pretty-print.
59 indent: The current indent level.
62 The pretty-printed string (including embedded newlines).
65 Error if the XML has unknown tags or attributes.
67 # Handle the top-level document node.
68 if node
.nodeType
== xml
.dom
.minidom
.Node
.DOCUMENT_NODE
:
69 return '\n'.join([self
.PrettyPrintNode(n
) for n
in node
.childNodes
])
72 if node
.nodeType
== xml
.dom
.minidom
.Node
.TEXT_NODE
:
73 # Wrap each paragraph in the text to fit in the 80 column limit.
74 wrapper
= textwrap
.TextWrapper()
75 wrapper
.initial_indent
= ' ' * indent
76 wrapper
.subsequent_indent
= ' ' * indent
77 wrapper
.break_on_hyphens
= False
78 wrapper
.break_long_words
= False
79 wrapper
.width
= WRAP_COLUMN
80 text
= XmlEscape(node
.data
)
81 # Remove any common indent.
82 text
= textwrap
.dedent(text
.strip('\n'))
83 lines
= text
.split('\n')
84 # Split the text into paragraphs at blank line boundaries.
87 if len(l
.strip()) == 0 and len(paragraphs
[-1]) > 0:
90 paragraphs
[-1].append(l
)
91 # Remove trailing empty paragraph if present.
92 if len(paragraphs
) > 0 and len(paragraphs
[-1]) == 0:
93 paragraphs
= paragraphs
[:-1]
94 # Wrap each paragraph and separate with two newlines.
95 return '\n\n'.join([wrapper
.fill('\n'.join(p
)) for p
in paragraphs
])
97 # Handle element nodes.
98 if node
.nodeType
== xml
.dom
.minidom
.Node
.ELEMENT_NODE
:
99 newlines_after_open
, newlines_before_close
, newlines_after_close
= (
100 self
.tags_that_have_extra_newline
.get(node
.tagName
, (1, 1, 0)))
102 s
= ' ' * indent
+ '<' + node
.tagName
104 # Calculate how much space to allow for the '>' or '/>'.
106 if not node
.childNodes
:
109 # Pretty-print the attributes.
110 attributes
= node
.attributes
.keys()
112 # Reorder the attributes.
113 if node
.tagName
not in self
.attribute_order
:
114 unrecognized_attributes
= attributes
116 unrecognized_attributes
= (
117 [a
for a
in attributes
118 if a
not in self
.attribute_order
[node
.tagName
]])
119 attributes
= [a
for a
in self
.attribute_order
[node
.tagName
]
122 for a
in unrecognized_attributes
:
124 'Unrecognized attribute "%s" in tag "%s"' % (a
, node
.tagName
))
125 if unrecognized_attributes
:
129 value
= XmlEscape(node
.attributes
[a
].value
)
130 # Replace sequences of whitespace with single spaces.
131 words
= value
.split()
132 a_str
= ' %s="%s"' % (a
, ' '.join(words
))
133 # Start a new line if the attribute will make this line too long.
134 if LastLineLength(s
) + len(a_str
) + closing_chars
> WRAP_COLUMN
:
135 s
+= '\n' + ' ' * (indent
+ 3)
136 # Output everything up to the first quote.
138 value_indent_level
= LastLineLength(s
)
139 # Output one word at a time, splitting to the next line where
141 column
= value_indent_level
142 for i
, word
in enumerate(words
):
143 # This is slightly too conservative since not every word will be
144 # followed by the closing characters...
145 if i
> 0 and (column
+ len(word
) + 1 + closing_chars
> WRAP_COLUMN
):
146 s
= s
.rstrip() # remove any trailing whitespace
147 s
+= '\n' + ' ' * value_indent_level
148 column
= value_indent_level
150 column
+= len(word
) + 1
151 s
= s
.rstrip() # remove any trailing whitespace
153 s
= s
.rstrip() # remove any trailing whitespace
155 # Pretty-print the child nodes.
158 # Calculate the new indent level for child nodes.
160 if node
.tagName
not in self
.tags_that_dont_indent
:
162 child_nodes
= node
.childNodes
164 # Recursively pretty-print the child nodes.
165 child_nodes
= [self
.PrettyPrintNode(n
, indent
=new_indent
)
166 for n
in child_nodes
]
167 child_nodes
= [c
for c
in child_nodes
if len(c
.strip()) > 0]
169 # Determine whether we can fit the entire node on a single line.
170 close_tag
= '</%s>' % node
.tagName
171 space_left
= WRAP_COLUMN
- LastLineLength(s
) - len(close_tag
)
172 if (node
.tagName
in self
.tags_that_allow_single_line
and
173 len(child_nodes
) == 1 and
174 len(child_nodes
[0].strip()) <= space_left
):
175 s
+= child_nodes
[0].strip()
177 s
+= '\n' * newlines_after_open
+ '\n'.join(child_nodes
)
178 s
+= '\n' * newlines_before_close
+ ' ' * indent
182 s
+= '\n' * newlines_after_close
185 # Handle comment nodes.
186 if node
.nodeType
== xml
.dom
.minidom
.Node
.COMMENT_NODE
:
187 return '<!--%s-->\n' % node
.data
189 # Ignore other node types. This could be a processing instruction
190 # (<? ... ?>) or cdata section (<![CDATA[...]]!>), neither of which are
191 # legal in the histograms XML at present.
192 logging
.error('Ignoring unrecognized node data: %s' % node
.toxml())