Add ICU message format support
[chromium-blink-merge.git] / third_party / markdown / util.py
blob97f7679700a0a680816f0f92790c78e48270c7ca
1 # -*- coding: utf-8 -*-
2 # markdown is released under the BSD license
3 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
4 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
5 # Copyright 2004 Manfred Stienstra (the original version)
6 #
7 # All rights reserved.
8 #
9 # Redistribution and use in source and binary forms, with or without
10 # modification, are permitted provided that the following conditions are met:
12 # * Redistributions of source code must retain the above copyright
13 # notice, this list of conditions and the following disclaimer.
14 # * Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 # * Neither the name of the <organization> nor the
18 # names of its contributors may be used to endorse or promote products
19 # derived from this software without specific prior written permission.
21 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
22 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
25 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 # POSSIBILITY OF SUCH DAMAGE.
34 from __future__ import unicode_literals
35 import re
36 import sys
39 """
40 Python 3 Stuff
41 =============================================================================
42 """
43 PY3 = sys.version_info[0] == 3
45 if PY3:
46 string_type = str
47 text_type = str
48 int2str = chr
49 else:
50 string_type = basestring
51 text_type = unicode
52 int2str = unichr
55 """
56 Constants you might want to modify
57 -----------------------------------------------------------------------------
58 """
60 BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
61 "|script|noscript|form|fieldset|iframe|math"
62 "|hr|hr/|style|li|dt|dd|thead|tbody"
63 "|tr|th|td|section|footer|header|group|figure"
64 "|figcaption|aside|article|canvas|output"
65 "|progress|video)$", re.IGNORECASE)
66 # Placeholders
67 STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder
68 ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder
69 INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
70 INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
71 INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
72 AMP_SUBSTITUTE = STX+"amp"+ETX
74 """
75 Constants you probably do not need to change
76 -----------------------------------------------------------------------------
77 """
79 RTL_BIDI_RANGES = ( ('\u0590', '\u07FF'),
80 # Hebrew (0590-05FF), Arabic (0600-06FF),
81 # Syriac (0700-074F), Arabic supplement (0750-077F),
82 # Thaana (0780-07BF), Nko (07C0-07FF).
83 ('\u2D30', '\u2D7F'), # Tifinagh
86 # Extensions should use "markdown.util.etree" instead of "etree" (or do `from
87 # markdown.util import etree`). Do not import it by yourself.
89 try: # Is the C implemenation of ElementTree available?
90 import xml.etree.cElementTree as etree
91 from xml.etree.ElementTree import Comment
92 # Serializers (including ours) test with non-c Comment
93 etree.test_comment = Comment
94 if etree.VERSION < "1.0.5":
95 raise RuntimeError("cElementTree version 1.0.5 or higher is required.")
96 except (ImportError, RuntimeError):
97 # Use the Python implementation of ElementTree?
98 import xml.etree.ElementTree as etree
99 if etree.VERSION < "1.1":
100 raise RuntimeError("ElementTree version 1.1 or higher is required")
104 AUXILIARY GLOBAL FUNCTIONS
105 =============================================================================
109 def isBlockLevel(tag):
110 """Check if the tag is a block level HTML tag."""
111 if isinstance(tag, string_type):
112 return BLOCK_LEVEL_ELEMENTS.match(tag)
113 # Some ElementTree tags are not strings, so return False.
114 return False
117 MISC AUXILIARY CLASSES
118 =============================================================================
121 class AtomicString(text_type):
122 """A string which should not be further processed."""
123 pass
126 class Processor(object):
127 def __init__(self, markdown_instance=None):
128 if markdown_instance:
129 self.markdown = markdown_instance
132 class HtmlStash(object):
134 This class is used for stashing HTML objects that we extract
135 in the beginning and replace with place-holders.
138 def __init__ (self):
139 """ Create a HtmlStash. """
140 self.html_counter = 0 # for counting inline html segments
141 self.rawHtmlBlocks=[]
143 def store(self, html, safe=False):
145 Saves an HTML segment for later reinsertion. Returns a
146 placeholder string that needs to be inserted into the
147 document.
149 Keyword arguments:
151 * html: an html segment
152 * safe: label an html segment as safe for safemode
154 Returns : a placeholder string
157 self.rawHtmlBlocks.append((html, safe))
158 placeholder = self.get_placeholder(self.html_counter)
159 self.html_counter += 1
160 return placeholder
162 def reset(self):
163 self.html_counter = 0
164 self.rawHtmlBlocks = []
166 def get_placeholder(self, key):
167 return "%swzxhzdk:%d%s" % (STX, key, ETX)