Roll src/third_party/WebKit 3aea697:d9c6159 (svn 201973:201974)
[chromium-blink-merge.git] / tools / metrics / common / pretty_print_xml.py
blobad37aa1bb1b1bba47b7186ff182b5a1f832445a6
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Utility file for pretty print xml file.
7 The function PrettyPrintNode will be used for formatting both histograms.xml
8 and actions.xml.
9 """
11 import logging
12 import textwrap
13 import xml.dom.minidom
15 WRAP_COLUMN = 80
18 class Error(Exception):
19 pass
22 def LastLineLength(s):
23 """Returns the length of the last line in s.
25 Args:
26 s: A multi-line string, including newlines.
28 Returns:
29 The length of the last line in s, in characters.
30 """
31 if s.rfind('\n') == -1: return len(s)
32 return len(s) - s.rfind('\n') - len('\n')
35 def XmlEscape(s):
36 """XML-escapes the given string, replacing magic characters (&<>") with their
37 escaped equivalents."""
38 s = s.replace("&", "&amp;").replace("<", "&lt;")
39 s = s.replace("\"", "&quot;").replace(">", "&gt;")
40 return s
43 class XmlStyle(object):
44 """A class that stores all style specification for an output xml file."""
46 def __init__(self, attribute_order, tags_that_have_extra_newline,
47 tags_that_dont_indent, tags_that_allow_single_line):
48 # List of tag names for top-level nodes whose children are not indented.
49 self.attribute_order = attribute_order
50 self.tags_that_have_extra_newline = tags_that_have_extra_newline
51 self.tags_that_dont_indent = tags_that_dont_indent
52 self.tags_that_allow_single_line = tags_that_allow_single_line
54 def PrettyPrintNode(self, node, indent=0):
55 """Pretty-prints the given XML node at the given indent level.
57 Args:
58 node: The minidom node to pretty-print.
59 indent: The current indent level.
61 Returns:
62 The pretty-printed string (including embedded newlines).
64 Raises:
65 Error if the XML has unknown tags or attributes.
66 """
67 # Handle the top-level document node.
68 if node.nodeType == xml.dom.minidom.Node.DOCUMENT_NODE:
69 return '\n'.join([self.PrettyPrintNode(n) for n in node.childNodes])
71 # Handle text nodes.
72 if node.nodeType == xml.dom.minidom.Node.TEXT_NODE:
73 # Wrap each paragraph in the text to fit in the 80 column limit.
74 wrapper = textwrap.TextWrapper()
75 wrapper.initial_indent = ' ' * indent
76 wrapper.subsequent_indent = ' ' * indent
77 wrapper.break_on_hyphens = False
78 wrapper.break_long_words = False
79 wrapper.width = WRAP_COLUMN
80 text = XmlEscape(node.data)
81 # Remove any common indent.
82 text = textwrap.dedent(text.strip('\n'))
83 lines = text.split('\n')
84 # Split the text into paragraphs at blank line boundaries.
85 paragraphs = [[]]
86 for l in lines:
87 if len(l.strip()) == 0 and len(paragraphs[-1]) > 0:
88 paragraphs.append([])
89 else:
90 paragraphs[-1].append(l)
91 # Remove trailing empty paragraph if present.
92 if len(paragraphs) > 0 and len(paragraphs[-1]) == 0:
93 paragraphs = paragraphs[:-1]
94 # Wrap each paragraph and separate with two newlines.
95 return '\n\n'.join([wrapper.fill('\n'.join(p)) for p in paragraphs])
97 # Handle element nodes.
98 if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
99 newlines_after_open, newlines_before_close, newlines_after_close = (
100 self.tags_that_have_extra_newline.get(node.tagName, (1, 1, 0)))
101 # Open the tag.
102 s = ' ' * indent + '<' + node.tagName
104 # Calculate how much space to allow for the '>' or '/>'.
105 closing_chars = 1
106 if not node.childNodes:
107 closing_chars = 2
109 # Pretty-print the attributes.
110 attributes = node.attributes.keys()
111 if attributes:
112 # Reorder the attributes.
113 if node.tagName not in self.attribute_order:
114 unrecognized_attributes = attributes
115 else:
116 unrecognized_attributes = (
117 [a for a in attributes
118 if a not in self.attribute_order[node.tagName]])
119 attributes = [a for a in self.attribute_order[node.tagName]
120 if a in attributes]
122 for a in unrecognized_attributes:
123 logging.error(
124 'Unrecognized attribute "%s" in tag "%s"' % (a, node.tagName))
125 if unrecognized_attributes:
126 raise Error()
128 for a in attributes:
129 value = XmlEscape(node.attributes[a].value)
130 # Replace sequences of whitespace with single spaces.
131 words = value.split()
132 a_str = ' %s="%s"' % (a, ' '.join(words))
133 # Start a new line if the attribute will make this line too long.
134 if LastLineLength(s) + len(a_str) + closing_chars > WRAP_COLUMN:
135 s += '\n' + ' ' * (indent + 3)
136 # Output everything up to the first quote.
137 s += ' %s="' % (a)
138 value_indent_level = LastLineLength(s)
139 # Output one word at a time, splitting to the next line where
140 # necessary.
141 column = value_indent_level
142 for i, word in enumerate(words):
143 # This is slightly too conservative since not every word will be
144 # followed by the closing characters...
145 if i > 0 and (column + len(word) + 1 + closing_chars > WRAP_COLUMN):
146 s = s.rstrip() # remove any trailing whitespace
147 s += '\n' + ' ' * value_indent_level
148 column = value_indent_level
149 s += word + ' '
150 column += len(word) + 1
151 s = s.rstrip() # remove any trailing whitespace
152 s += '"'
153 s = s.rstrip() # remove any trailing whitespace
155 # Pretty-print the child nodes.
156 if node.childNodes:
157 s += '>'
158 # Calculate the new indent level for child nodes.
159 new_indent = indent
160 if node.tagName not in self.tags_that_dont_indent:
161 new_indent += 2
162 child_nodes = node.childNodes
164 # Recursively pretty-print the child nodes.
165 child_nodes = [self.PrettyPrintNode(n, indent=new_indent)
166 for n in child_nodes]
167 child_nodes = [c for c in child_nodes if len(c.strip()) > 0]
169 # Determine whether we can fit the entire node on a single line.
170 close_tag = '</%s>' % node.tagName
171 space_left = WRAP_COLUMN - LastLineLength(s) - len(close_tag)
172 if (node.tagName in self.tags_that_allow_single_line and
173 len(child_nodes) == 1 and
174 len(child_nodes[0].strip()) <= space_left):
175 s += child_nodes[0].strip()
176 else:
177 s += '\n' * newlines_after_open + '\n'.join(child_nodes)
178 s += '\n' * newlines_before_close + ' ' * indent
179 s += close_tag
180 else:
181 s += '/>'
182 s += '\n' * newlines_after_close
183 return s
185 # Handle comment nodes.
186 if node.nodeType == xml.dom.minidom.Node.COMMENT_NODE:
187 return '<!--%s-->\n' % node.data
189 # Ignore other node types. This could be a processing instruction
190 # (<? ... ?>) or cdata section (<![CDATA[...]]!>), neither of which are
191 # legal in the histograms XML at present.
192 logging.error('Ignoring unrecognized node data: %s' % node.toxml())
193 raise Error()