1 # markdown is released under the BSD license
2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
4 # Copyright 2004 Manfred Stienstra (the original version)
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions are met:
11 # * Redistributions of source code must retain the above copyright
12 # notice, this list of conditions and the following disclaimer.
13 # * Redistributions in binary form must reproduce the above copyright
14 # notice, this list of conditions and the following disclaimer in the
15 # documentation and/or other materials provided with the distribution.
16 # * Neither the name of the <organization> nor the
17 # names of its contributors may be used to endorse or promote products
18 # derived from this software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
34 Table of Contents Extension for Python-Markdown
37 (c) 2008 [Jack Miller](http://codezen.org)
40 * [Markdown 2.1+](http://packages.python.org/Markdown/)
44 from __future__
import absolute_import
45 from __future__
import unicode_literals
46 from . import Extension
47 from ..treeprocessors
import Treeprocessor
48 from ..util
import etree
49 from .headerid
import slugify
, unique
, itertext
53 def order_toc_list(toc_list
):
54 """Given an unsorted list with errors and skips, return a nested one.
55 [{'level': 1}, {'level': 2}]
57 [{'level': 1, 'children': [{'level': 2, 'children': []}]}]
59 A wrong list is also converted:
60 [{'level': 2}, {'level': 1}]
62 [{'level': 2, 'children': []}, {'level': 1, 'children': []}]
65 def build_correct(remaining_list
, prev_elements
=[{'level': 1000}]):
67 if not remaining_list
:
70 current
= remaining_list
.pop(0)
71 if not 'children' in current
.keys():
72 current
['children'] = []
75 # This happens for instance with [8, 1, 1], ie. when some
76 # header level is outside a scope. We treat it as a
78 next_elements
, children
= build_correct(remaining_list
, [current
])
79 current
['children'].append(children
)
80 return [current
] + next_elements
, []
82 prev_element
= prev_elements
.pop()
85 # Is current part of the child list or next list?
86 if current
['level'] > prev_element
['level']:
87 #print "%d is a child of %d" % (current['level'], prev_element['level'])
88 prev_elements
.append(prev_element
)
89 prev_elements
.append(current
)
90 prev_element
['children'].append(current
)
91 next_elements2
, children2
= build_correct(remaining_list
, prev_elements
)
93 next_elements
+= next_elements2
95 #print "%d is ancestor of %d" % (current['level'], prev_element['level'])
97 #print "No previous elements, so appending to the next set"
98 next_elements
.append(current
)
99 prev_elements
= [current
]
100 next_elements2
, children2
= build_correct(remaining_list
, prev_elements
)
101 current
['children'].extend(children2
)
103 #print "Previous elements, comparing to those first"
104 remaining_list
.insert(0, current
)
105 next_elements2
, children2
= build_correct(remaining_list
, prev_elements
)
106 children
.extend(children2
)
107 next_elements
+= next_elements2
109 return next_elements
, children
111 ordered_list
, __
= build_correct(toc_list
)
115 class TocTreeprocessor(Treeprocessor
):
117 # Iterator wrapper to get parent and child all at once
118 def iterparent(self
, root
):
119 for parent
in root
.getiterator():
123 def add_anchor(self
, c
, elem_id
): #@ReservedAssignment
125 anchor
= etree
.Element("a")
127 anchor
.attrib
["href"] = "#" + elem_id
128 anchor
.attrib
["class"] = "toclink"
130 for elem
in c
.getchildren():
135 def build_toc_etree(self
, div
, toc_list
):
136 # Add title to the div
137 if self
.config
["title"]:
138 header
= etree
.SubElement(div
, "span")
139 header
.attrib
["class"] = "toctitle"
140 header
.text
= self
.config
["title"]
142 def build_etree_ul(toc_list
, parent
):
143 ul
= etree
.SubElement(parent
, "ul")
144 for item
in toc_list
:
145 # List item link, to be inserted into the toc div
146 li
= etree
.SubElement(ul
, "li")
147 link
= etree
.SubElement(li
, "a")
148 link
.text
= item
.get('name', '')
149 link
.attrib
["href"] = '#' + item
.get('id', '')
151 build_etree_ul(item
['children'], li
)
154 return build_etree_ul(toc_list
, div
)
158 div
= etree
.Element("div")
159 div
.attrib
["class"] = "toc"
160 header_rgx
= re
.compile("[Hh][123456]")
162 self
.use_anchors
= self
.config
["anchorlink"] in [1, '1', True, 'True', 'true']
164 # Get a list of id attributes
166 for c
in doc
.getiterator():
168 used_ids
.add(c
.attrib
["id"])
172 for (p
, c
) in self
.iterparent(doc
):
173 text
= ''.join(itertext(c
)).strip()
177 # To keep the output from screwing up the
178 # validation by putting a <div> inside of a <p>
179 # we actually replace the <p> in its entirety.
180 # We do not allow the marker inside a header as that
181 # would causes an enless loop of placing a new TOC
182 # inside previously generated TOC.
183 if c
.text
and c
.text
.strip() == self
.config
["marker"] and \
184 not header_rgx
.match(c
.tag
) and c
.tag
not in ['pre', 'code']:
185 for i
in range(len(p
)):
191 if header_rgx
.match(c
.tag
):
193 # Do not override pre-existing ids
194 if not "id" in c
.attrib
:
195 elem_id
= unique(self
.config
["slugify"](text
, '-'), used_ids
)
196 c
.attrib
["id"] = elem_id
198 elem_id
= c
.attrib
["id"]
200 tag_level
= int(c
.tag
[-1])
202 toc_list
.append({'level': tag_level
,
206 self
.add_anchor(c
, elem_id
)
208 toc_list_nested
= order_toc_list(toc_list
)
209 self
.build_toc_etree(div
, toc_list_nested
)
210 prettify
= self
.markdown
.treeprocessors
.get('prettify')
211 if prettify
: prettify
.run(div
)
213 # serialize and attach to markdown instance.
214 toc
= self
.markdown
.serializer(div
)
215 for pp
in self
.markdown
.postprocessors
.values():
217 self
.markdown
.toc
= toc
220 class TocExtension(Extension
):
222 TreeProcessorClass
= TocTreeprocessor
224 def __init__(self
, configs
=[]):
225 self
.config
= { "marker" : ["[TOC]",
226 "Text to find and replace with Table of Contents -"
227 "Defaults to \"[TOC]\""],
228 "slugify" : [slugify
,
229 "Function to generate anchors based on header text-"
230 "Defaults to the headerid ext's slugify function."],
232 "Title to insert into TOC <div> - "
235 "1 if header should be a self link"
238 for key
, value
in configs
:
239 self
.setConfig(key
, value
)
241 def extendMarkdown(self
, md
, md_globals
):
242 tocext
= self
.TreeProcessorClass(md
)
243 tocext
.config
= self
.getConfigs()
244 # Headerid ext is set to '>prettify'. With this set to '_end',
245 # it should always come after headerid ext (and honor ids assinged
246 # by the header id extension) if both are used. Same goes for
247 # attr_list extension. This must come last because we don't want
248 # to redefine ids after toc is created. But we do want toc prettified.
249 md
.treeprocessors
.add("toc", tocext
, "_end")
252 def makeExtension(configs
={}):
253 return TocExtension(configs
=configs
)