Re-subimission of https://codereview.chromium.org/1041213003/
[chromium-blink-merge.git] / third_party / markdown / postprocessors.py
blob536c96dd0a91b18758d68abb1d21fc3255121d8f
1 # markdown is released under the BSD license
2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
4 # Copyright 2004 Manfred Stienstra (the original version)
5 #
6 # All rights reserved.
7 #
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions are met:
11 # * Redistributions of source code must retain the above copyright
12 # notice, this list of conditions and the following disclaimer.
13 # * Redistributions in binary form must reproduce the above copyright
14 # notice, this list of conditions and the following disclaimer in the
15 # documentation and/or other materials provided with the distribution.
16 # * Neither the name of the <organization> nor the
17 # names of its contributors may be used to endorse or promote products
18 # derived from this software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
33 """
34 POST-PROCESSORS
35 =============================================================================
37 Markdown also allows post-processors, which are similar to preprocessors in
38 that they need to implement a "run" method. However, they are run after core
39 processing.
41 """
43 from __future__ import absolute_import
44 from __future__ import unicode_literals
45 from . import util
46 from . import odict
47 import re
50 def build_postprocessors(md_instance, **kwargs):
51 """ Build the default postprocessors for Markdown. """
52 postprocessors = odict.OrderedDict()
53 postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance)
54 postprocessors["amp_substitute"] = AndSubstitutePostprocessor()
55 postprocessors["unescape"] = UnescapePostprocessor()
56 return postprocessors
59 class Postprocessor(util.Processor):
60 """
61 Postprocessors are run after the ElementTree it converted back into text.
63 Each Postprocessor implements a "run" method that takes a pointer to a
64 text string, modifies it as necessary and returns a text string.
66 Postprocessors must extend markdown.Postprocessor.
68 """
70 def run(self, text):
71 """
72 Subclasses of Postprocessor should implement a `run` method, which
73 takes the html document as a single text string and returns a
74 (possibly modified) string.
76 """
77 pass
80 class RawHtmlPostprocessor(Postprocessor):
81 """ Restore raw html to the document. """
83 def run(self, text):
84 """ Iterate over html stash and restore "safe" html. """
85 for i in range(self.markdown.htmlStash.html_counter):
86 html, safe = self.markdown.htmlStash.rawHtmlBlocks[i]
87 if self.markdown.safeMode and not safe:
88 if str(self.markdown.safeMode).lower() == 'escape':
89 html = self.escape(html)
90 elif str(self.markdown.safeMode).lower() == 'remove':
91 html = ''
92 else:
93 html = self.markdown.html_replacement_text
94 if self.isblocklevel(html) and (safe or not self.markdown.safeMode):
95 text = text.replace("<p>%s</p>" %
96 (self.markdown.htmlStash.get_placeholder(i)),
97 html + "\n")
98 text = text.replace(self.markdown.htmlStash.get_placeholder(i),
99 html)
100 return text
102 def escape(self, html):
103 """ Basic html escaping """
104 html = html.replace('&', '&amp;')
105 html = html.replace('<', '&lt;')
106 html = html.replace('>', '&gt;')
107 return html.replace('"', '&quot;')
109 def isblocklevel(self, html):
110 m = re.match(r'^\<\/?([^ >]+)', html)
111 if m:
112 if m.group(1)[0] in ('!', '?', '@', '%'):
113 # Comment, php etc...
114 return True
115 return util.isBlockLevel(m.group(1))
116 return False
119 class AndSubstitutePostprocessor(Postprocessor):
120 """ Restore valid entities """
122 def run(self, text):
123 text = text.replace(util.AMP_SUBSTITUTE, "&")
124 return text
127 class UnescapePostprocessor(Postprocessor):
128 """ Restore escaped chars """
130 RE = re.compile('%s(\d+)%s' % (util.STX, util.ETX))
132 def unescape(self, m):
133 return util.int2str(int(m.group(1)))
135 def run(self, text):
136 return self.RE.sub(self.unescape, text)