Blink roll 25b6bd3a7a131ffe68d809546ad1a20707915cdc:3a503f41ae42e5b79cfcd2ff10e65afde...
[chromium-blink-merge.git] / third_party / markdown / serializers.py
blob67e9daed21aab383b4024e344fa0ab1a3fdfea47
1 # markdown is released under the BSD license
2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
4 # Copyright 2004 Manfred Stienstra (the original version)
5 #
6 # All rights reserved.
7 #
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions are met:
11 # * Redistributions of source code must retain the above copyright
12 # notice, this list of conditions and the following disclaimer.
13 # * Redistributions in binary form must reproduce the above copyright
14 # notice, this list of conditions and the following disclaimer in the
15 # documentation and/or other materials provided with the distribution.
16 # * Neither the name of the <organization> nor the
17 # names of its contributors may be used to endorse or promote products
18 # derived from this software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
33 # markdown/searializers.py
35 # Add x/html serialization to Elementree
36 # Taken from ElementTree 1.3 preview with slight modifications
38 # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
40 # fredrik@pythonware.com
41 # http://www.pythonware.com
43 # --------------------------------------------------------------------
44 # The ElementTree toolkit is
46 # Copyright (c) 1999-2007 by Fredrik Lundh
48 # By obtaining, using, and/or copying this software and/or its
49 # associated documentation, you agree that you have read, understood,
50 # and will comply with the following terms and conditions:
52 # Permission to use, copy, modify, and distribute this software and
53 # its associated documentation for any purpose and without fee is
54 # hereby granted, provided that the above copyright notice appears in
55 # all copies, and that both that copyright notice and this permission
56 # notice appear in supporting documentation, and that the name of
57 # Secret Labs AB or the author not be used in advertising or publicity
58 # pertaining to distribution of the software without specific, written
59 # prior permission.
61 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
62 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
63 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
64 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
65 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
66 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
67 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
68 # OF THIS SOFTWARE.
69 # --------------------------------------------------------------------
72 from __future__ import absolute_import
73 from __future__ import unicode_literals
74 from . import util
75 ElementTree = util.etree.ElementTree
76 QName = util.etree.QName
77 if hasattr(util.etree, 'test_comment'):
78 Comment = util.etree.test_comment
79 else:
80 Comment = util.etree.Comment
81 PI = util.etree.PI
82 ProcessingInstruction = util.etree.ProcessingInstruction
84 __all__ = ['to_html_string', 'to_xhtml_string']
86 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
87 "img", "input", "isindex", "link", "meta" "param")
89 try:
90 HTML_EMPTY = set(HTML_EMPTY)
91 except NameError:
92 pass
94 _namespace_map = {
95 # "well-known" namespace prefixes
96 "http://www.w3.org/XML/1998/namespace": "xml",
97 "http://www.w3.org/1999/xhtml": "html",
98 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
99 "http://schemas.xmlsoap.org/wsdl/": "wsdl",
100 # xml schema
101 "http://www.w3.org/2001/XMLSchema": "xs",
102 "http://www.w3.org/2001/XMLSchema-instance": "xsi",
103 # dublic core
104 "http://purl.org/dc/elements/1.1/": "dc",
108 def _raise_serialization_error(text):
109 raise TypeError(
110 "cannot serialize %r (type %s)" % (text, type(text).__name__)
113 def _encode(text, encoding):
114 try:
115 return text.encode(encoding, "xmlcharrefreplace")
116 except (TypeError, AttributeError):
117 _raise_serialization_error(text)
119 def _escape_cdata(text):
120 # escape character data
121 try:
122 # it's worth avoiding do-nothing calls for strings that are
123 # shorter than 500 character, or so. assume that's, by far,
124 # the most common case in most applications.
125 if "&" in text:
126 text = text.replace("&", "&amp;")
127 if "<" in text:
128 text = text.replace("<", "&lt;")
129 if ">" in text:
130 text = text.replace(">", "&gt;")
131 return text
132 except (TypeError, AttributeError):
133 _raise_serialization_error(text)
136 def _escape_attrib(text):
137 # escape attribute value
138 try:
139 if "&" in text:
140 text = text.replace("&", "&amp;")
141 if "<" in text:
142 text = text.replace("<", "&lt;")
143 if ">" in text:
144 text = text.replace(">", "&gt;")
145 if "\"" in text:
146 text = text.replace("\"", "&quot;")
147 if "\n" in text:
148 text = text.replace("\n", "&#10;")
149 return text
150 except (TypeError, AttributeError):
151 _raise_serialization_error(text)
153 def _escape_attrib_html(text):
154 # escape attribute value
155 try:
156 if "&" in text:
157 text = text.replace("&", "&amp;")
158 if "<" in text:
159 text = text.replace("<", "&lt;")
160 if ">" in text:
161 text = text.replace(">", "&gt;")
162 if "\"" in text:
163 text = text.replace("\"", "&quot;")
164 return text
165 except (TypeError, AttributeError):
166 _raise_serialization_error(text)
169 def _serialize_html(write, elem, qnames, namespaces, format):
170 tag = elem.tag
171 text = elem.text
172 if tag is Comment:
173 write("<!--%s-->" % _escape_cdata(text))
174 elif tag is ProcessingInstruction:
175 write("<?%s?>" % _escape_cdata(text))
176 else:
177 tag = qnames[tag]
178 if tag is None:
179 if text:
180 write(_escape_cdata(text))
181 for e in elem:
182 _serialize_html(write, e, qnames, None, format)
183 else:
184 write("<" + tag)
185 items = elem.items()
186 if items or namespaces:
187 items.sort() # lexical order
188 for k, v in items:
189 if isinstance(k, QName):
190 k = k.text
191 if isinstance(v, QName):
192 v = qnames[v.text]
193 else:
194 v = _escape_attrib_html(v)
195 if qnames[k] == v and format == 'html':
196 # handle boolean attributes
197 write(" %s" % v)
198 else:
199 write(" %s=\"%s\"" % (qnames[k], v))
200 if namespaces:
201 items = namespaces.items()
202 items.sort(key=lambda x: x[1]) # sort on prefix
203 for v, k in items:
204 if k:
205 k = ":" + k
206 write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))
207 if format == "xhtml" and tag in HTML_EMPTY:
208 write(" />")
209 else:
210 write(">")
211 tag = tag.lower()
212 if text:
213 if tag == "script" or tag == "style":
214 write(text)
215 else:
216 write(_escape_cdata(text))
217 for e in elem:
218 _serialize_html(write, e, qnames, None, format)
219 if tag not in HTML_EMPTY:
220 write("</" + tag + ">")
221 if elem.tail:
222 write(_escape_cdata(elem.tail))
224 def _write_html(root,
225 encoding=None,
226 default_namespace=None,
227 format="html"):
228 assert root is not None
229 data = []
230 write = data.append
231 qnames, namespaces = _namespaces(root, default_namespace)
232 _serialize_html(write, root, qnames, namespaces, format)
233 if encoding is None:
234 return "".join(data)
235 else:
236 return _encode("".join(data))
239 # --------------------------------------------------------------------
240 # serialization support
242 def _namespaces(elem, default_namespace=None):
243 # identify namespaces used in this tree
245 # maps qnames to *encoded* prefix:local names
246 qnames = {None: None}
248 # maps uri:s to prefixes
249 namespaces = {}
250 if default_namespace:
251 namespaces[default_namespace] = ""
253 def add_qname(qname):
254 # calculate serialized qname representation
255 try:
256 if qname[:1] == "{":
257 uri, tag = qname[1:].split("}", 1)
258 prefix = namespaces.get(uri)
259 if prefix is None:
260 prefix = _namespace_map.get(uri)
261 if prefix is None:
262 prefix = "ns%d" % len(namespaces)
263 if prefix != "xml":
264 namespaces[uri] = prefix
265 if prefix:
266 qnames[qname] = "%s:%s" % (prefix, tag)
267 else:
268 qnames[qname] = tag # default element
269 else:
270 if default_namespace:
271 raise ValueError(
272 "cannot use non-qualified names with "
273 "default_namespace option"
275 qnames[qname] = qname
276 except TypeError:
277 _raise_serialization_error(qname)
279 # populate qname and namespaces table
280 try:
281 iterate = elem.iter
282 except AttributeError:
283 iterate = elem.getiterator # cET compatibility
284 for elem in iterate():
285 tag = elem.tag
286 if isinstance(tag, QName) and tag.text not in qnames:
287 add_qname(tag.text)
288 elif isinstance(tag, util.string_type):
289 if tag not in qnames:
290 add_qname(tag)
291 elif tag is not None and tag is not Comment and tag is not PI:
292 _raise_serialization_error(tag)
293 for key, value in elem.items():
294 if isinstance(key, QName):
295 key = key.text
296 if key not in qnames:
297 add_qname(key)
298 if isinstance(value, QName) and value.text not in qnames:
299 add_qname(value.text)
300 text = elem.text
301 if isinstance(text, QName) and text.text not in qnames:
302 add_qname(text.text)
303 return qnames, namespaces
305 def to_html_string(element):
306 return _write_html(ElementTree(element).getroot(), format="html")
308 def to_xhtml_string(element):
309 return _write_html(ElementTree(element).getroot(), format="xhtml")