1 # markdown is released under the BSD license
2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
4 # Copyright 2004 Manfred Stienstra (the original version)
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions are met:
11 # * Redistributions of source code must retain the above copyright
12 # notice, this list of conditions and the following disclaimer.
13 # * Redistributions in binary form must reproduce the above copyright
14 # notice, this list of conditions and the following disclaimer in the
15 # documentation and/or other materials provided with the distribution.
16 # * Neither the name of the <organization> nor the
17 # names of its contributors may be used to endorse or promote products
18 # derived from this software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
33 from __future__
import unicode_literals
34 from __future__
import absolute_import
37 from . import inlinepatterns
40 def build_treeprocessors(md_instance
, **kwargs
):
41 """ Build the default treeprocessors for Markdown. """
42 treeprocessors
= odict
.OrderedDict()
43 treeprocessors
["inline"] = InlineProcessor(md_instance
)
44 treeprocessors
["prettify"] = PrettifyTreeprocessor(md_instance
)
49 """ Check if it's string """
50 if not isinstance(s
, util
.AtomicString
):
51 return isinstance(s
, util
.string_type
)
55 class Treeprocessor(util
.Processor
):
57 Treeprocessors are run on the ElementTree object before serialization.
59 Each Treeprocessor implements a "run" method that takes a pointer to an
60 ElementTree, modifies it as necessary and returns an ElementTree
63 Treeprocessors must extend markdown.Treeprocessor.
68 Subclasses of Treeprocessor should implement a `run` method, which
69 takes a root ElementTree. This method can return another ElementTree
70 object, and the existing root ElementTree will be replaced, or it can
71 modify the current tree and return None.
76 class InlineProcessor(Treeprocessor
):
78 A Treeprocessor that traverses a tree, applying inline patterns.
81 def __init__(self
, md
):
82 self
.__placeholder
_prefix
= util
.INLINE_PLACEHOLDER_PREFIX
83 self
.__placeholder
_suffix
= util
.ETX
84 self
.__placeholder
_length
= 4 + len(self
.__placeholder
_prefix
) \
85 + len(self
.__placeholder
_suffix
)
86 self
.__placeholder
_re
= util
.INLINE_PLACEHOLDER_RE
89 def __makePlaceholder(self
, type):
90 """ Generate a placeholder """
91 id = "%04d" % len(self
.stashed_nodes
)
92 hash = util
.INLINE_PLACEHOLDER
% id
95 def __findPlaceholder(self
, data
, index
):
97 Extract id from data string, start from index
102 * index: index, from which we start search
104 Returns: placeholder id and string index, after the found placeholder.
107 m
= self
.__placeholder
_re
.search(data
, index
)
109 return m
.group(1), m
.end()
111 return None, index
+ 1
113 def __stashNode(self
, node
, type):
114 """ Add node to stash """
115 placeholder
, id = self
.__makePlaceholder
(type)
116 self
.stashed_nodes
[id] = node
119 def __handleInline(self
, data
, patternIndex
=0):
121 Process string with inline patterns and replace it
126 * data: A line of Markdown text
127 * patternIndex: The index of the inlinePattern to start with
129 Returns: String with placeholders.
132 if not isinstance(data
, util
.AtomicString
):
134 while patternIndex
< len(self
.markdown
.inlinePatterns
):
135 data
, matched
, startIndex
= self
.__applyPattern
(
136 self
.markdown
.inlinePatterns
.value_for_index(patternIndex
),
137 data
, patternIndex
, startIndex
)
142 def __processElementText(self
, node
, subnode
, isText
=True):
144 Process placeholders in Element.text or Element.tail
145 of Elements popped from self.stashed_nodes.
150 * subnode: processing node
151 * isText: bool variable, True - it's text, False - it's tail
163 childResult
= self
.__processPlaceholders
(text
, subnode
)
165 if not isText
and node
is not subnode
:
166 pos
= node
.getchildren().index(subnode
)
171 childResult
.reverse()
172 for newChild
in childResult
:
173 node
.insert(pos
, newChild
)
175 def __processPlaceholders(self
, data
, parent
):
177 Process string with placeholders and generate ElementTree tree.
181 * data: string with placeholders instead of ElementTree elements.
182 * parent: Element, which contains processing inline data
184 Returns: list with ElementTree elements with applied inline patterns.
191 result
[-1].tail
+= text
193 result
[-1].tail
= text
202 index
= data
.find(self
.__placeholder
_prefix
, strartIndex
)
204 id, phEndIndex
= self
.__findPlaceholder
(data
, index
)
206 if id in self
.stashed_nodes
:
207 node
= self
.stashed_nodes
.get(id)
210 text
= data
[strartIndex
:index
]
213 if not isString(node
): # it's Element
214 for child
in [node
] + node
.getchildren():
216 if child
.tail
.strip():
217 self
.__processElementText
(node
, child
,False)
219 if child
.text
.strip():
220 self
.__processElementText
(child
, child
)
221 else: # it's just a string
223 strartIndex
= phEndIndex
226 strartIndex
= phEndIndex
229 else: # wrong placeholder
230 end
= index
+ len(self
.__placeholder
_prefix
)
231 linkText(data
[strartIndex
:end
])
234 text
= data
[strartIndex
:]
235 if isinstance(data
, util
.AtomicString
):
236 # We don't want to loose the AtomicString
237 text
= util
.AtomicString(text
)
243 def __applyPattern(self
, pattern
, data
, patternIndex
, startIndex
=0):
245 Check if the line fits the pattern, create the necessary
246 elements, add it to stashed_nodes.
250 * data: the text to be processed
251 * pattern: the pattern to be checked
252 * patternIndex: index of current pattern
253 * startIndex: string index, from which we start searching
255 Returns: String with placeholders instead of ElementTree elements.
258 match
= pattern
.getCompiledRegExp().match(data
[startIndex
:])
259 leftData
= data
[:startIndex
]
262 return data
, False, 0
264 node
= pattern
.handleMatch(match
)
267 return data
, True, len(leftData
)+match
.span(len(match
.groups()))[0]
269 if not isString(node
):
270 if not isinstance(node
.text
, util
.AtomicString
):
271 # We need to process current node too
272 for child
in [node
] + node
.getchildren():
273 if not isString(node
):
275 child
.text
= self
.__handleInline
(child
.text
,
278 child
.tail
= self
.__handleInline
(child
.tail
,
281 placeholder
= self
.__stashNode
(node
, pattern
.type())
283 return "%s%s%s%s" % (leftData
,
285 placeholder
, match
.groups()[-1]), True, 0
288 """Apply inline patterns to a parsed Markdown tree.
290 Iterate over ElementTree, find elements with inline tag, apply inline
291 patterns and append newly created Elements to tree. If you don't
292 want to process your data with inline paterns, instead of normal string,
293 use subclass AtomicString:
295 node.text = markdown.AtomicString("This will not be processed.")
299 * tree: ElementTree object, representing Markdown tree.
301 Returns: ElementTree object with applied inline patterns.
304 self
.stashed_nodes
= {}
309 currElement
= stack
.pop()
311 for child
in currElement
.getchildren():
312 if child
.text
and not isinstance(child
.text
, util
.AtomicString
):
315 lst
= self
.__processPlaceholders
(self
.__handleInline
(
318 insertQueue
.append((child
, lst
))
320 tail
= self
.__handleInline
(child
.tail
)
321 dumby
= util
.etree
.Element('d')
322 tailResult
= self
.__processPlaceholders
(tail
, dumby
)
324 child
.tail
= dumby
.text
327 pos
= currElement
.getchildren().index(child
) + 1
329 for newChild
in tailResult
:
330 currElement
.insert(pos
, newChild
)
331 if child
.getchildren():
334 for element
, lst
in insertQueue
:
335 if self
.markdown
.enable_attributes
:
336 if element
.text
and isString(element
.text
):
338 inlinepatterns
.handleAttributes(element
.text
,
342 if self
.markdown
.enable_attributes
:
343 # Processing attributes
344 if newChild
.tail
and isString(newChild
.tail
):
346 inlinepatterns
.handleAttributes(newChild
.tail
,
348 if newChild
.text
and isString(newChild
.text
):
350 inlinepatterns
.handleAttributes(newChild
.text
,
352 element
.insert(i
, newChild
)
357 class PrettifyTreeprocessor(Treeprocessor
):
358 """ Add linebreaks to the html document. """
360 def _prettifyETree(self
, elem
):
361 """ Recursively add linebreaks to ElementTree children. """
364 if util
.isBlockLevel(elem
.tag
) and elem
.tag
not in ['code', 'pre']:
365 if (not elem
.text
or not elem
.text
.strip()) \
366 and len(elem
) and util
.isBlockLevel(elem
[0].tag
):
369 if util
.isBlockLevel(e
.tag
):
370 self
._prettifyETree
(e
)
371 if not elem
.tail
or not elem
.tail
.strip():
373 if not elem
.tail
or not elem
.tail
.strip():
377 """ Add linebreaks to ElementTree root object. """
379 self
._prettifyETree
(root
)
380 # Do <br />'s seperately as they are often in the middle of
381 # inline content and missed by _prettifyETree.
382 brs
= root
.getiterator('br')
384 if not br
.tail
or not br
.tail
.strip():
387 br
.tail
= '\n%s' % br
.tail
388 # Clean up extra empty lines at end of code blocks.
389 pres
= root
.getiterator('pre')
391 if len(pre
) and pre
[0].tag
== 'code':
392 pre
[0].text
= pre
[0].text
.rstrip() + '\n'