Fix bug FS#8417
[archweb_dev-nj.git] / lib / markdown.py
blob6b3e57a2b3c50b1da5c96b44cc56a8ae12efa504
1 #!/usr/bin/env python
3 SPEED_TEST = 0
5 """
6 ====================================================================
7 IF YOU ARE LOOKING TO EXTEND MARKDOWN, SEE THE "FOOTNOTES" SECTION
8 ====================================================================
10 Python-Markdown
11 ===============
13 Converts Markdown to HTML. Basic usage as a module:
15 import markdown
16 html = markdown.markdown(your_text_string)
18 Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and
19 maintained by [Yuri Takhteyev](http://www.freewisdom.org).
21 Project website: http://www.freewisdom.org/projects/python-markdown
22 Contact: yuri [at] freewisdom.org
24 License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
26 Version: 1.5 (May 15, 2006)
28 For changelog, see end of file
29 """
31 import re, sys, os, random
33 # set debug level: 3 none, 2 critical, 1 informative, 0 all
34 (VERBOSE, INFO, CRITICAL, NONE) = range(4)
36 MESSAGE_THRESHOLD = CRITICAL
38 def message(level, text) :
39 if level >= MESSAGE_THRESHOLD :
40 print text
43 # --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
45 # all tabs will be expanded to up to this many spaces
46 TAB_LENGTH = 4
47 ENABLE_ATTRIBUTES = 1
48 SMART_EMPHASIS = 1
50 # --------------- CONSTANTS YOU _SHOULD NOT_ HAVE TO CHANGE ----------
52 FN_BACKLINK_TEXT = "zz1337820767766393qq"
53 # a template for html placeholders
54 HTML_PLACEHOLDER_PREFIX = "qaodmasdkwaspemas"
55 HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%dajkqlsmdqpakldnzsdfls"
57 BLOCK_LEVEL_ELEMENTS = ['p', 'div', 'blockquote', 'pre', 'table',
58 'dl', 'ol', 'ul', 'script', 'noscript',
59 'form', 'fieldset', 'iframe', 'math', 'ins',
60 'del', 'hr', 'hr/']
62 def is_block_level (tag) :
63 return ( (tag in BLOCK_LEVEL_ELEMENTS) or
64 (tag[0] == 'h' and tag[1] in "0123456789") )
66 """
67 ======================================================================
68 ========================== NANODOM ===================================
69 ======================================================================
71 The three classes below implement some of the most basic DOM
72 methods. I use this instead of minidom because I need a simpler
73 functionality and do not want to require additional libraries.
75 Importantly, NanoDom does not do normalization, which is what we
76 want. It also adds extra white space when converting DOM to string
77 """
80 class Document :
82 def appendChild(self, child) :
83 self.documentElement = child
84 child.parent = self
85 self.entities = {}
87 def createElement(self, tag, textNode=None) :
88 el = Element(tag)
89 el.doc = self
90 if textNode :
91 el.appendChild(self.createTextNode(textNode))
92 return el
94 def createTextNode(self, text) :
95 node = TextNode(text)
96 node.doc = self
97 return node
99 def createEntityReference(self, entity):
100 if entity not in self.entities:
101 self.entities[entity] = EntityReference(entity)
102 return self.entities[entity]
104 def toxml (self) :
105 return self.documentElement.toxml()
107 def normalizeEntities(self, text) :
109 pairs = [ #("&", "&"),
110 ("<", "&lt;"),
111 (">", "&gt;"),
112 ("\"", "&quot;")]
114 for old, new in pairs :
115 text = text.replace(old, new)
116 return text
118 def find(self, test) :
119 return self.documentElement.find(test)
121 def unlink(self) :
122 self.documentElement.unlink()
123 self.documentElement = None
126 class Element :
128 type = "element"
130 def __init__ (self, tag) :
132 self.nodeName = tag
133 self.attributes = []
134 self.attribute_values = {}
135 self.childNodes = []
137 def unlink(self) :
138 for child in self.childNodes :
139 if child.type == "element" :
140 child.unlink()
141 self.childNodes = None
143 def setAttribute(self, attr, value) :
144 if not attr in self.attributes :
145 self.attributes.append(attr)
147 self.attribute_values[attr] = value
149 def insertChild(self, position, child) :
150 self.childNodes.insert(position, child)
151 child.parent = self
153 def removeChild(self, child) :
154 self.childNodes.remove(child)
156 def replaceChild(self, oldChild, newChild) :
157 position = self.childNodes.index(oldChild)
158 self.removeChild(oldChild)
159 self.insertChild(position, newChild)
161 def appendChild(self, child) :
162 self.childNodes.append(child)
163 child.parent = self
165 def handleAttributes(self) :
166 pass
168 def find(self, test, depth=0) :
169 """ Returns a list of descendants that pass the test function """
170 matched_nodes = []
171 for child in self.childNodes :
172 if test(child) :
173 matched_nodes.append(child)
174 if child.type == "element" :
175 matched_nodes += child.find(test, depth+1)
176 return matched_nodes
178 def toxml(self):
179 if ENABLE_ATTRIBUTES :
180 for child in self.childNodes:
181 child.handleAttributes()
182 buffer = ""
183 if self.nodeName in ['h1', 'h2', 'h3', 'h4'] :
184 buffer += "\n"
185 elif self.nodeName in ['li'] :
186 buffer += "\n "
187 buffer += "<" + self.nodeName
188 for attr in self.attributes :
189 value = self.attribute_values[attr]
190 value = self.doc.normalizeEntities(value)
191 buffer += ' %s="%s"' % (attr, value)
192 if self.childNodes or self.nodeName in ['blockquote']:
193 buffer += ">"
194 for child in self.childNodes :
195 buffer += child.toxml()
196 if self.nodeName == 'p' :
197 buffer += "\n"
198 elif self.nodeName == 'li' :
199 buffer += "\n "
200 buffer += "</%s>" % self.nodeName
201 else :
202 buffer += "/>"
203 if self.nodeName in ['p', 'li', 'ul', 'ol',
204 'h1', 'h2', 'h3', 'h4'] :
205 buffer += "\n"
207 return buffer
210 class TextNode :
212 type = "text"
213 attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123}
215 def __init__ (self, text) :
216 self.value = text
218 def attributeCallback(self, match) :
219 self.parent.setAttribute(match.group(1), match.group(2))
221 def handleAttributes(self) :
222 self.value = self.attrRegExp.sub(self.attributeCallback, self.value)
224 def toxml(self) :
225 text = self.value
226 if not text.startswith(HTML_PLACEHOLDER_PREFIX):
227 if self.parent.nodeName == "p" :
228 text = text.replace("\n", "\n ")
229 elif (self.parent.nodeName == "li"
230 and self.parent.childNodes[0]==self):
231 text = "\n " + text.replace("\n", "\n ")
232 text = self.doc.normalizeEntities(text)
233 return text
236 class EntityReference:
238 type = "entity_ref"
240 def __init__(self, entity):
241 self.entity = entity
243 def handleAttributes(self):
244 pass
246 def toxml(self):
247 return "&" + self.entity + ";"
251 ======================================================================
252 ========================== PRE-PROCESSORS ============================
253 ======================================================================
255 Preprocessors munge source text before we start doing anything too
256 complicated.
258 Each preprocessor implements a "run" method that takes a pointer to
259 a list of lines of the document, modifies it as necessary and
260 returns either the same pointer or a pointer to a new list.
263 class HeaderPreprocessor :
266 Replaces underlined headers with hashed headers to avoid
267 the nead for lookahead later.
270 def run (self, lines) :
272 for i in range(len(lines)) :
273 if not lines[i] :
274 continue
276 if lines[i].startswith("#") :
277 lines.insert(i+1, "\n")
279 if (i+1 <= len(lines)
280 and lines[i+1]
281 and lines[i+1][0] in ['-', '=']) :
283 underline = lines[i+1].strip()
285 if underline == "="*len(underline) :
286 lines[i] = "# " + lines[i].strip()
287 lines[i+1] = ""
288 elif underline == "-"*len(underline) :
289 lines[i] = "## " + lines[i].strip()
290 lines[i+1] = ""
292 return lines
294 HEADER_PREPROCESSOR = HeaderPreprocessor()
296 class LinePreprocessor :
297 """Deals with HR lines (needs to be done before processing lists)"""
299 def run (self, lines) :
300 for i in range(len(lines)) :
301 if self._isLine(lines[i]) :
302 lines[i] = "<hr />"
303 return lines
305 def _isLine(self, block) :
306 """Determines if a block should be replaced with an <HR>"""
307 if block.startswith(" ") : return 0 # a code block
308 text = "".join([x for x in block if not x.isspace()])
309 if len(text) <= 2 :
310 return 0
311 for pattern in ['isline1', 'isline2', 'isline3'] :
312 m = RE.regExp[pattern].match(text)
313 if (m and m.group(1)) :
314 return 1
315 else:
316 return 0
318 LINE_PREPROCESSOR = LinePreprocessor()
321 class LineBreaksPreprocessor :
322 """Replaces double spaces at the end of the lines with <br/ >."""
324 def run (self, lines) :
325 for i in range(len(lines)) :
326 if (lines[i].endswith(" ")
327 and not RE.regExp['tabbed'].match(lines[i]) ):
328 lines[i] += "<br />"
329 return lines
331 LINE_BREAKS_PREPROCESSOR = LineBreaksPreprocessor()
334 class HtmlBlockPreprocessor :
335 """Removes html blocks from self.lines"""
337 def run (self, lines) :
338 new_blocks = []
339 text = "\n".join(lines)
340 for block in text.split("\n\n") :
341 if block.startswith("\n") :
342 block = block[1:]
343 if ( (block.startswith("<") and block.rstrip().endswith(">"))
344 and (block[1] in ["!", "?", "@", "%"]
345 or is_block_level( block[1:].replace(">", " ")
346 .split()[0].lower()))) :
347 new_blocks.append(
348 self.stash.store(block.strip()))
349 else :
350 new_blocks.append(block)
351 return "\n\n".join(new_blocks).split("\n")
353 HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
356 class ReferencePreprocessor :
358 def run (self, lines) :
359 new_text = [];
360 for line in lines:
361 m = RE.regExp['reference-def'].match(line)
362 if m:
363 id = m.group(2).strip().lower()
364 title = dequote(m.group(4).strip()) #.replace('"', "&quot;")
365 self.references[id] = (m.group(3), title)
366 else:
367 new_text.append(line)
368 return new_text #+ "\n"
370 REFERENCE_PREPROCESSOR = ReferencePreprocessor()
373 ======================================================================
374 ========================== INLINE PATTERNS ===========================
375 ======================================================================
377 Inline patterns such as *emphasis* are handled by means of auxiliary
378 objects, one per pattern. Each pattern object uses a single regular
379 expression and needs support the following methods:
381 pattern.getCompiledRegExp() - returns a regular expression
383 pattern.handleMatch(m, doc) - takes a match object and returns
384 a NanoDom node (as a part of the provided
385 doc) or None
387 All of python markdown's built-in patterns subclass from BasePatter,
388 but you can add additional patterns that don't.
390 Also note that all the regular expressions used by inline must
391 capture the whole block. For this reason, they all start with
392 '^(.*)' and end with '(.*)!'. In case with built-in expression
393 BasePattern takes care of adding the "^(.*)" and "(.*)!".
395 Finally, the order in which regular expressions are applied is very
396 important - e.g. if we first replace http://.../ links with <a> tags
397 and _then_ try to replace inline html, we would end up with a mess.
398 So, we apply the expressions in the following order:
400 * escape and backticks have to go before everything else, so
401 that we can preempt any markdown patterns by escaping them.
403 * then we handle auto-links (must be done before inline html)
405 * then we handle inline HTML. At this point we will simply
406 replace all inline HTML strings with a placeholder and add
407 the actual HTML to a hash.
409 * then inline images (must be done before links)
411 * then bracketed links, first regular then reference-style
413 * finally we apply strong and emphasis
416 NOBRACKET = r'[^\]\[]*'
417 BRK = ( r'\[('
418 + (NOBRACKET + r'(\['+NOBRACKET)*6
419 + (NOBRACKET+ r'\])*'+NOBRACKET)*6
420 + NOBRACKET + r')\]' )
422 BACKTICK_RE = r'\`([^\`]*)\`' # `e= m*c^2`
423 DOUBLE_BACKTICK_RE = r'\`\`(.*)\`\`' # ``e=f("`")``
424 ESCAPE_RE = r'\\(.)' # \<
425 EMPHASIS_RE = r'\*([^\*]*)\*' # *emphasis*
426 STRONG_RE = r'\*\*(.*)\*\*' # **strong**
427 STRONG_EM_RE = r'\*\*\*([^_]*)\*\*\*' # ***strong***
429 if SMART_EMPHASIS:
430 EMPHASIS_2_RE = r'(?<!\S)_(\S[^_]*)_' # _emphasis_
431 else :
432 EMPHASIS_2_RE = r'_([^_]*)_' # _emphasis_
434 STRONG_2_RE = r'__([^_]*)__' # __strong__
435 STRONG_EM_2_RE = r'___([^_]*)___' # ___strong___
437 LINK_RE = BRK + r'\s*\(([^\)]*)\)' # [text](url)
438 LINK_ANGLED_RE = BRK + r'\s*\(<([^\)]*)>\)' # [text](<url>)
439 IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(([^\)]*)\)' # ![alttxt](http://x.com/)
440 REFERENCE_RE = BRK+ r'\s*\[([^\]]*)\]' # [Google][3]
441 IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2]
442 NOT_STRONG_RE = r'( \* )' # stand-alone * or _
443 AUTOLINK_RE = r'<(http://[^>]*)>' # <http://www.123.com>
444 AUTOMAIL_RE = r'<([^> ]*@[^> ]*)>' # <me@example.com>
445 HTML_RE = r'(\<[^\>]*\>)' # <...>
446 ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # &amp;
448 class BasePattern:
450 def __init__ (self, pattern) :
451 self.pattern = pattern
452 self.compiled_re = re.compile("^(.*)%s(.*)$" % pattern, re.DOTALL)
454 def getCompiledRegExp (self) :
455 return self.compiled_re
457 class SimpleTextPattern (BasePattern) :
459 def handleMatch(self, m, doc) :
460 return doc.createTextNode(m.group(2))
462 class SimpleTagPattern (BasePattern):
464 def __init__ (self, pattern, tag) :
465 BasePattern.__init__(self, pattern)
466 self.tag = tag
468 def handleMatch(self, m, doc) :
469 el = doc.createElement(self.tag)
470 el.appendChild(doc.createTextNode(m.group(2)))
471 return el
473 class BacktickPattern (BasePattern):
475 def __init__ (self, pattern):
476 BasePattern.__init__(self, pattern)
477 self.tag = "code"
479 def handleMatch(self, m, doc) :
480 el = doc.createElement(self.tag)
481 text = m.group(2).strip()
482 text = text.replace("&", "&amp;")
483 el.appendChild(doc.createTextNode(text))
484 return el
487 class DoubleTagPattern (SimpleTagPattern) :
489 def handleMatch(self, m, doc) :
490 tag1, tag2 = self.tag.split(",")
491 el1 = doc.createElement(tag1)
492 el2 = doc.createElement(tag2)
493 el1.appendChild(el2)
494 el2.appendChild(doc.createTextNode(m.group(2)))
495 return el1
498 class HtmlPattern (BasePattern):
500 def handleMatch (self, m, doc) :
501 place_holder = self.stash.store(m.group(2))
502 return doc.createTextNode(place_holder)
505 class LinkPattern (BasePattern):
507 def handleMatch(self, m, doc) :
508 el = doc.createElement('a')
509 el.appendChild(doc.createTextNode(m.group(2)))
510 parts = m.group(9).split()
511 # We should now have [], [href], or [href, title]
512 if parts :
513 el.setAttribute('href', parts[0])
514 else :
515 el.setAttribute('href', "")
516 if len(parts) > 1 :
517 # we also got a title
518 title = " ".join(parts[1:]).strip()
519 title = dequote(title) #.replace('"', "&quot;")
520 el.setAttribute('title', title)
521 return el
524 class ImagePattern (BasePattern):
526 def handleMatch(self, m, doc):
527 el = doc.createElement('img')
528 src_parts = m.group(9).split()
529 el.setAttribute('src', src_parts[0])
530 if len(src_parts) > 1 :
531 el.setAttribute('title', dequote(" ".join(src_parts[1:])))
532 if ENABLE_ATTRIBUTES :
533 text = doc.createTextNode(m.group(2))
534 el.appendChild(text)
535 text.handleAttributes()
536 truealt = text.value
537 el.childNodes.remove(text)
538 else:
539 truealt = m.group(2)
540 el.setAttribute('alt', truealt)
541 return el
543 class ReferencePattern (BasePattern):
545 def handleMatch(self, m, doc):
546 if m.group(9) :
547 id = m.group(9).lower()
548 else :
549 # if we got something like "[Google][]"
550 # we'll use "google" as the id
551 id = m.group(2).lower()
552 if not self.references.has_key(id) : # ignore undefined refs
553 return None
554 href, title = self.references[id]
555 text = m.group(2)
556 return self.makeTag(href, title, text, doc)
558 def makeTag(self, href, title, text, doc):
559 el = doc.createElement('a')
560 el.setAttribute('href', href)
561 if title :
562 el.setAttribute('title', title)
563 el.appendChild(doc.createTextNode(text))
564 return el
567 class ImageReferencePattern (ReferencePattern):
569 def makeTag(self, href, title, text, doc):
570 el = doc.createElement('img')
571 el.setAttribute('src', href)
572 if title :
573 el.setAttribute('title', title)
574 el.setAttribute('alt', text)
575 return el
578 class AutolinkPattern (BasePattern):
580 def handleMatch(self, m, doc):
581 el = doc.createElement('a')
582 el.setAttribute('href', m.group(2))
583 el.appendChild(doc.createTextNode(m.group(2)))
584 return el
586 class AutomailPattern (BasePattern):
588 def handleMatch(self, m, doc) :
589 el = doc.createElement('a')
590 email = m.group(2)
591 if email.startswith("mailto:"):
592 email = email[len("mailto:"):]
593 for letter in email:
594 entity = doc.createEntityReference("#%d" % ord(letter))
595 el.appendChild(entity)
596 mailto = "mailto:" + email
597 mailto = "".join(['&#%d;' % ord(letter) for letter in mailto])
598 el.setAttribute('href', mailto)
599 return el
601 ESCAPE_PATTERN = SimpleTextPattern(ESCAPE_RE)
602 NOT_STRONG_PATTERN = SimpleTextPattern(NOT_STRONG_RE)
604 BACKTICK_PATTERN = BacktickPattern(BACKTICK_RE)
605 DOUBLE_BACKTICK_PATTERN = BacktickPattern(DOUBLE_BACKTICK_RE)
606 STRONG_PATTERN = SimpleTagPattern(STRONG_RE, 'strong')
607 STRONG_PATTERN_2 = SimpleTagPattern(STRONG_2_RE, 'strong')
608 EMPHASIS_PATTERN = SimpleTagPattern(EMPHASIS_RE, 'em')
609 EMPHASIS_PATTERN_2 = SimpleTagPattern(EMPHASIS_2_RE, 'em')
611 STRONG_EM_PATTERN = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
612 STRONG_EM_PATTERN_2 = DoubleTagPattern(STRONG_EM_2_RE, 'strong,em')
614 LINK_PATTERN = LinkPattern(LINK_RE)
615 LINK_ANGLED_PATTERN = LinkPattern(LINK_ANGLED_RE)
616 IMAGE_LINK_PATTERN = ImagePattern(IMAGE_LINK_RE)
617 IMAGE_REFERENCE_PATTERN = ImageReferencePattern(IMAGE_REFERENCE_RE)
618 REFERENCE_PATTERN = ReferencePattern(REFERENCE_RE)
620 HTML_PATTERN = HtmlPattern(HTML_RE)
621 ENTITY_PATTERN = HtmlPattern(ENTITY_RE)
623 AUTOLINK_PATTERN = AutolinkPattern(AUTOLINK_RE)
624 AUTOMAIL_PATTERN = AutomailPattern(AUTOMAIL_RE)
628 ======================================================================
629 ========================== POST-PROCESSORS ===========================
630 ======================================================================
632 Markdown also allows post-processors, which are similar to
633 preprocessors in that they need to implement a "run" method. Unlike
634 pre-processors, they take a NanoDom document as a parameter and work
635 with that.
637 There are currently no standard post-processors, but the footnote
638 extension below uses one.
641 ======================================================================
642 ========================== MISC AUXILIARY CLASSES ====================
643 ======================================================================
646 class HtmlStash :
647 """This class is used for stashing HTML objects that we extract
648 in the beginning and replace with place-holders."""
650 def __init__ (self) :
651 self.html_counter = 0 # for counting inline html segments
652 self.rawHtmlBlocks=[]
654 def store(self, html) :
655 """Saves an HTML segment for later reinsertion. Returns a
656 placeholder string that needs to be inserted into the
657 document.
659 @param html: an html segment
660 @returns : a placeholder string """
661 self.rawHtmlBlocks.append(html)
662 placeholder = HTML_PLACEHOLDER % self.html_counter
663 self.html_counter += 1
664 return placeholder
667 class BlockGuru :
669 def _findHead(self, lines, fn, allowBlank=0) :
671 """Functional magic to help determine boundaries of indented
672 blocks.
674 @param lines: an array of strings
675 @param fn: a function that returns a substring of a string
676 if the string matches the necessary criteria
677 @param allowBlank: specifies whether it's ok to have blank
678 lines between matching functions
679 @returns: a list of post processes items and the unused
680 remainder of the original list"""
682 items = []
683 item = -1
685 i = 0 # to keep track of where we are
687 for line in lines :
689 if not line.strip() and not allowBlank:
690 return items, lines[i:]
692 if not line.strip() and allowBlank:
693 # If we see a blank line, this _might_ be the end
694 i += 1
696 # Find the next non-blank line
697 for j in range(i, len(lines)) :
698 if lines[j].strip() :
699 next = lines[j]
700 break
701 else :
702 # There is no more text => this is the end
703 break
705 # Check if the next non-blank line is still a part of the list
707 part = fn(next)
709 if part :
710 items.append("")
711 continue
712 else :
713 break # found end of the list
715 part = fn(line)
717 if part :
718 items.append(part)
719 i += 1
720 continue
721 else :
722 return items, lines[i:]
723 else :
724 i += 1
726 return items, lines[i:]
729 def detabbed_fn(self, line) :
730 """ An auxiliary method to be passed to _findHead """
731 m = RE.regExp['tabbed'].match(line)
732 if m:
733 return m.group(4)
734 else :
735 return None
738 def detectTabbed(self, lines) :
740 return self._findHead(lines, self.detabbed_fn,
741 allowBlank = 1)
744 def print_error(string):
745 """Print an error string to stderr"""
746 sys.stderr.write(string +'\n')
749 def dequote(string) :
750 """ Removes quotes from around a string """
751 if ( ( string.startswith('"') and string.endswith('"'))
752 or (string.startswith("'") and string.endswith("'")) ) :
753 return string[1:-1]
754 else :
755 return string
758 ======================================================================
759 ========================== CORE MARKDOWN =============================
760 ======================================================================
762 This stuff is ugly, so if you are thinking of extending the syntax,
763 see first if you can do it via pre-processors, post-processors,
764 inline patterns or a combination of the three.
767 class CorePatterns :
768 """This class is scheduled for removal as part of a refactoring
769 effort."""
771 patterns = {
772 'header': r'(#*)([^#]*)(#*)', # # A title
773 'reference-def' : r'(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)',
774 # [Google]: http://www.google.com/
775 'containsline': r'([-]*)$|^([=]*)', # -----, =====, etc.
776 'ol': r'[ ]{0,3}[\d]*\.\s+(.*)', # 1. text
777 'ul': r'[ ]{0,3}[*+-]\s+(.*)', # "* text"
778 'isline1': r'(\**)', # ***
779 'isline2': r'(\-*)', # ---
780 'isline3': r'(\_*)', # ___
781 'tabbed': r'((\t)|( ))(.*)', # an indented line
782 'quoted' : r'> ?(.*)', # a quoted block ("> ...")
785 def __init__ (self) :
787 self.regExp = {}
788 for key in self.patterns.keys() :
789 self.regExp[key] = re.compile("^%s$" % self.patterns[key],
790 re.DOTALL)
792 self.regExp['containsline'] = re.compile(r'^([-]*)$|^([=]*)$', re.M)
794 RE = CorePatterns()
797 class Markdown:
798 """ Markdown formatter class for creating an html document from
799 Markdown text """
802 def __init__(self, source=None):
803 """Creates a new Markdown instance.
805 @param source: The text in Markdown format. """
807 if isinstance(source, unicode):
808 source = source.encode('utf8')
809 self.source = source
810 self.blockGuru = BlockGuru()
811 self.registeredExtensions = []
812 self.stripTopLevelTags = 1
814 self.preprocessors = [ HEADER_PREPROCESSOR,
815 LINE_PREPROCESSOR,
816 HTML_BLOCK_PREPROCESSOR,
817 LINE_BREAKS_PREPROCESSOR,
818 # A footnote preprocessor will
819 # get inserted here
820 REFERENCE_PREPROCESSOR ]
823 self.postprocessors = [] # a footnote postprocessor will get
824 # inserted later
826 self.prePatterns = []
829 self.inlinePatterns = [ DOUBLE_BACKTICK_PATTERN,
830 BACKTICK_PATTERN,
831 ESCAPE_PATTERN,
832 IMAGE_LINK_PATTERN,
833 IMAGE_REFERENCE_PATTERN,
834 REFERENCE_PATTERN,
835 LINK_ANGLED_PATTERN,
836 LINK_PATTERN,
837 AUTOLINK_PATTERN,
838 AUTOMAIL_PATTERN,
839 HTML_PATTERN,
840 ENTITY_PATTERN,
841 NOT_STRONG_PATTERN,
842 STRONG_EM_PATTERN,
843 STRONG_EM_PATTERN_2,
844 STRONG_PATTERN,
845 STRONG_PATTERN_2,
846 EMPHASIS_PATTERN,
847 EMPHASIS_PATTERN_2
848 # The order of the handlers matters!!!
851 self.reset()
853 def registerExtension(self, extension) :
854 self.registeredExtensions.append(extension)
856 def reset(self) :
857 """Resets all state variables so that we can start
858 with a new text."""
859 self.references={}
860 self.htmlStash = HtmlStash()
862 HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash
863 REFERENCE_PREPROCESSOR.references = self.references
864 HTML_PATTERN.stash = self.htmlStash
865 ENTITY_PATTERN.stash = self.htmlStash
866 REFERENCE_PATTERN.references = self.references
867 IMAGE_REFERENCE_PATTERN.references = self.references
869 for extension in self.registeredExtensions :
870 extension.reset()
873 def _transform(self):
874 """Transforms the Markdown text into a XHTML body document
876 @returns: A NanoDom Document """
878 # Setup the document
880 self.doc = Document()
881 self.top_element = self.doc.createElement("span")
882 self.top_element.appendChild(self.doc.createTextNode('\n'))
883 self.top_element.setAttribute('class', 'markdown')
884 self.doc.appendChild(self.top_element)
886 # Fixup the source text
887 text = self.source.strip()
888 text = text.replace("\r\n", "\n").replace("\r", "\n")
889 text += "\n\n"
890 text = text.expandtabs(TAB_LENGTH)
892 # Split into lines and run the preprocessors that will work with
893 # self.lines
895 self.lines = text.split("\n")
897 # Run the pre-processors on the lines
898 for prep in self.preprocessors :
899 self.lines = prep.run(self.lines)
901 # Create a NanoDom tree from the lines and attach it to Document
904 buffer = []
905 for line in self.lines :
906 if line.startswith("#") :
907 self._processSection(self.top_element, buffer)
908 buffer = [line]
909 else :
910 buffer.append(line)
911 self._processSection(self.top_element, buffer)
913 #self._processSection(self.top_element, self.lines)
915 # Not sure why I put this in but let's leave it for now.
916 self.top_element.appendChild(self.doc.createTextNode('\n'))
918 # Run the post-processors
919 for postprocessor in self.postprocessors :
920 postprocessor.run(self.doc)
922 return self.doc
925 def _processSection(self, parent_elem, lines,
926 inList = 0, looseList = 0) :
928 """Process a section of a source document, looking for high
929 level structural elements like lists, block quotes, code
930 segments, html blocks, etc. Some those then get stripped
931 of their high level markup (e.g. get unindented) and the
932 lower-level markup is processed recursively.
934 @param parent_elem: A NanoDom element to which the content
935 will be added
936 @param lines: a list of lines
937 @param inList: a level
938 @returns: None"""
940 if not lines :
941 return
943 # Check if this section starts with a list, a blockquote or
944 # a code block
946 processFn = { 'ul' : self._processUList,
947 'ol' : self._processOList,
948 'quoted' : self._processQuote,
949 'tabbed' : self._processCodeBlock }
951 for regexp in ['ul', 'ol', 'quoted', 'tabbed'] :
952 m = RE.regExp[regexp].match(lines[0])
953 if m :
954 processFn[regexp](parent_elem, lines, inList)
955 return
957 # We are NOT looking at one of the high-level structures like
958 # lists or blockquotes. So, it's just a regular paragraph
959 # (though perhaps nested inside a list or something else). If
960 # we are NOT inside a list, we just need to look for a blank
961 # line to find the end of the block. If we ARE inside a
962 # list, however, we need to consider that a sublist does not
963 # need to be separated by a blank line. Rather, the following
964 # markup is legal:
966 # * The top level list item
968 # Another paragraph of the list. This is where we are now.
969 # * Underneath we might have a sublist.
972 if inList :
974 start, theRest = self._linesUntil(lines, (lambda line:
975 RE.regExp['ul'].match(line)
976 or RE.regExp['ol'].match(line)
977 or not line.strip()))
979 self._processSection(parent_elem, start,
980 inList - 1, looseList = looseList)
981 self._processSection(parent_elem, theRest,
982 inList - 1, looseList = looseList)
985 else : # Ok, so it's just a simple block
987 paragraph, theRest = self._linesUntil(lines, lambda line:
988 not line.strip())
990 if len(paragraph) and paragraph[0].startswith('#') :
991 m = RE.regExp['header'].match(paragraph[0])
992 if m :
993 level = len(m.group(1))
994 h = self.doc.createElement("h%d" % level)
995 parent_elem.appendChild(h)
996 for item in self._handleInlineWrapper2(m.group(2).strip()) :
997 h.appendChild(item)
998 else :
999 message(CRITICAL, "We've got a problem header!")
1001 elif paragraph :
1003 list = self._handleInlineWrapper2("\n".join(paragraph))
1005 if ( parent_elem.nodeName == 'li'
1006 and not (looseList or parent_elem.childNodes)):
1008 #and not parent_elem.childNodes) :
1009 # If this is the first paragraph inside "li", don't
1010 # put <p> around it - append the paragraph bits directly
1011 # onto parent_elem
1012 el = parent_elem
1013 else :
1014 # Otherwise make a "p" element
1015 el = self.doc.createElement("p")
1016 parent_elem.appendChild(el)
1018 for item in list :
1019 el.appendChild(item)
1021 if theRest :
1022 theRest = theRest[1:] # skip the first (blank) line
1024 self._processSection(parent_elem, theRest, inList)
1028 def _processUList(self, parent_elem, lines, inList) :
1029 self._processList(parent_elem, lines, inList,
1030 listexpr='ul', tag = 'ul')
1032 def _processOList(self, parent_elem, lines, inList) :
1033 self._processList(parent_elem, lines, inList,
1034 listexpr='ol', tag = 'ol')
1037 def _processList(self, parent_elem, lines, inList, listexpr, tag) :
1038 """Given a list of document lines starting with a list item,
1039 finds the end of the list, breaks it up, and recursively
1040 processes each list item and the remainder of the text file.
1042 @param parent_elem: A dom element to which the content will be added
1043 @param lines: a list of lines
1044 @param inList: a level
1045 @returns: None"""
1047 ul = self.doc.createElement(tag) # ul might actually be '<ol>'
1048 parent_elem.appendChild(ul)
1050 looseList = 0
1052 # Make a list of list items
1053 items = []
1054 item = -1
1056 i = 0 # a counter to keep track of where we are
1058 for line in lines :
1060 loose = 0
1061 if not line.strip() :
1062 # If we see a blank line, this _might_ be the end of the list
1063 i += 1
1064 loose = 1
1066 # Find the next non-blank line
1067 for j in range(i, len(lines)) :
1068 if lines[j].strip() :
1069 next = lines[j]
1070 break
1071 else :
1072 # There is no more text => end of the list
1073 break
1075 # Check if the next non-blank line is still a part of the list
1076 if ( RE.regExp['ul'].match(next) or
1077 RE.regExp['ol'].match(next) or
1078 RE.regExp['tabbed'].match(next) ):
1079 # get rid of any white space in the line
1080 items[item].append(line.strip())
1081 looseList = loose or looseList
1082 continue
1083 else :
1084 break # found end of the list
1086 # Now we need to detect list items (at the current level)
1087 # while also detabing child elements if necessary
1089 for expr in ['ul', 'ol', 'tabbed']:
1091 m = RE.regExp[expr].match(line)
1092 if m :
1093 if expr in ['ul', 'ol'] : # We are looking at a new item
1094 if m.group(1) :
1095 items.append([m.group(1)])
1096 item += 1
1097 elif expr == 'tabbed' : # This line needs to be detabbed
1098 items[item].append(m.group(4)) #after the 'tab'
1100 i += 1
1101 break
1102 else :
1103 items[item].append(line) # Just regular continuation
1104 i += 1 # added on 2006.02.25
1105 else :
1106 i += 1
1108 # Add the dom elements
1109 for item in items :
1110 li = self.doc.createElement("li")
1111 ul.appendChild(li)
1113 self._processSection(li, item, inList + 1, looseList = looseList)
1115 # Process the remaining part of the section
1117 self._processSection(parent_elem, lines[i:], inList)
1120 def _linesUntil(self, lines, condition) :
1121 """ A utility function to break a list of lines upon the
1122 first line that satisfied a condition. The condition
1123 argument should be a predicate function.
1126 i = -1
1127 for line in lines :
1128 i += 1
1129 if condition(line) : break
1130 else :
1131 i += 1
1132 return lines[:i], lines[i:]
1134 def _processQuote(self, parent_elem, lines, inList) :
1135 """Given a list of document lines starting with a quote finds
1136 the end of the quote, unindents it and recursively
1137 processes the body of the quote and the remainder of the
1138 text file.
1140 @param parent_elem: DOM element to which the content will be added
1141 @param lines: a list of lines
1142 @param inList: a level
1143 @returns: None """
1145 dequoted = []
1146 i = 0
1147 for line in lines :
1148 m = RE.regExp['quoted'].match(line)
1149 if m :
1150 dequoted.append(m.group(1))
1151 i += 1
1152 else :
1153 break
1154 else :
1155 i += 1
1157 blockquote = self.doc.createElement('blockquote')
1158 parent_elem.appendChild(blockquote)
1160 self._processSection(blockquote, dequoted, inList)
1161 self._processSection(parent_elem, lines[i:], inList)
1166 def _processCodeBlock(self, parent_elem, lines, inList) :
1167 """Given a list of document lines starting with a code block
1168 finds the end of the block, puts it into the dom verbatim
1169 wrapped in ("<pre><code>") and recursively processes the
1170 the remainder of the text file.
1172 @param parent_elem: DOM element to which the content will be added
1173 @param lines: a list of lines
1174 @param inList: a level
1175 @returns: None"""
1177 detabbed, theRest = self.blockGuru.detectTabbed(lines)
1179 pre = self.doc.createElement('pre')
1180 code = self.doc.createElement('code')
1181 parent_elem.appendChild(pre)
1182 pre.appendChild(code)
1183 text = "\n".join(detabbed).rstrip()+"\n"
1184 text = text.replace("&", "&amp;")
1185 code.appendChild(self.doc.createTextNode(text))
1186 self._processSection(parent_elem, theRest, inList)
1189 def _handleInlineWrapper2 (self, line) :
1192 parts = [line]
1194 #if not(line):
1195 # return [self.doc.createTextNode(' ')]
1197 for pattern in self.inlinePatterns :
1199 #print
1200 #print self.inlinePatterns.index(pattern)
1202 i = 0
1204 #print parts
1205 while i < len(parts) :
1207 x = parts[i]
1208 #print i
1209 if isinstance(x, (str, unicode)) :
1210 result = self._applyPattern(x, pattern)
1211 #print result
1212 #print result
1213 #print parts, i
1214 if result :
1215 i -= 1
1216 parts.remove(x)
1217 for y in result :
1218 parts.insert(i+1,y)
1220 i += 1
1222 for i in range(len(parts)) :
1223 x = parts[i]
1224 if isinstance(x, (str, unicode)) :
1225 parts[i] = self.doc.createTextNode(x)
1227 return parts
1231 def _handleInlineWrapper (self, line) :
1233 # A wrapper around _handleInline to avoid recursion
1235 parts = [line]
1237 i = 0
1239 while i < len(parts) :
1240 x = parts[i]
1241 if isinstance(x, (str, unicode)) :
1242 parts.remove(x)
1243 result = self._handleInline(x)
1244 for y in result :
1245 parts.insert(i,y)
1246 else :
1247 i += 1
1249 return parts
1251 def _handleInline(self, line):
1252 """Transform a Markdown line with inline elements to an XHTML
1253 fragment.
1255 This function uses auxiliary objects called inline patterns.
1256 See notes on inline patterns above.
1258 @param item: A block of Markdown text
1259 @return: A list of NanoDom nodes """
1261 if not(line):
1262 return [self.doc.createTextNode(' ')]
1264 for pattern in self.inlinePatterns :
1265 list = self._applyPattern( line, pattern)
1266 if list: return list
1268 return [self.doc.createTextNode(line)]
1270 def _applyPattern(self, line, pattern) :
1271 """ Given a pattern name, this function checks if the line
1272 fits the pattern, creates the necessary elements, and returns
1273 back a list consisting of NanoDom elements and/or strings.
1275 @param line: the text to be processed
1276 @param pattern: the pattern to be checked
1278 @returns: the appropriate newly created NanoDom element if the
1279 pattern matches, None otherwise.
1282 # match the line to pattern's pre-compiled reg exp.
1283 # if no match, move on.
1285 m = pattern.getCompiledRegExp().match(line)
1286 if not m :
1287 return None
1289 # if we got a match let the pattern make us a NanoDom node
1290 # if it doesn't, move on
1291 node = pattern.handleMatch(m, self.doc)
1293 if node :
1294 # Those are in the reverse order!
1295 return ( m.groups()[-1], # the string to the left
1296 node, # the new node
1297 m.group(1)) # the string to the right of the match
1299 else :
1300 return None
1302 def __str__(self):
1303 """Return the document in XHTML format.
1305 @returns: A serialized XHTML body."""
1306 #try :
1307 doc = self._transform()
1308 xml = doc.toxml()
1309 #finally:
1310 # doc.unlink()
1312 # Let's stick in all the raw html pieces
1314 for i in range(self.htmlStash.html_counter) :
1315 xml = xml.replace("<p>%s\n</p>" % (HTML_PLACEHOLDER % i),
1316 self.htmlStash.rawHtmlBlocks[i] + "\n")
1317 xml = xml.replace(HTML_PLACEHOLDER % i,
1318 self.htmlStash.rawHtmlBlocks[i])
1320 xml = xml.replace(FN_BACKLINK_TEXT, "&#8617;")
1322 # And return everything but the top level tag
1324 if self.stripTopLevelTags :
1325 xml = xml.strip()[23:-7]
1327 if isinstance(xml, unicode) :
1328 xml = xml.encode("utf8")
1330 return xml
1333 toString = __str__
1337 ========================= FOOTNOTES =================================
1339 This section adds footnote handling to markdown. It can be used as
1340 an example for extending python-markdown with relatively complex
1341 functionality. While in this case the extension is included inside
1342 the module itself, it could just as easily be added from outside the
1343 module. Not that all markdown classes above are ignorant about
1344 footnotes. All footnote functionality is provided separately and
1345 then added to the markdown instance at the run time.
1347 Footnote functionality is attached by calling extendMarkdown()
1348 method of FootnoteExtension. The method also registers the
1349 extension to allow it's state to be reset by a call to reset()
1350 method.
1353 class FootnoteExtension :
1355 DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)')
1356 SHORT_USE_RE = re.compile(r'\[\^([^\]]*)\]', re.M) # [^a]
1358 FN_PLACE_MARKER = "///Footnotes Go Here///"
1360 def __init__ (self) :
1361 self.reset()
1363 def extendMarkdown(self, md) :
1365 self.md = md
1367 # Stateless extensions do not need to be registered
1368 md.registerExtension(self)
1370 # Insert a preprocessor before ReferencePreprocessor
1371 index = md.preprocessors.index(REFERENCE_PREPROCESSOR)
1372 preprocessor = FootnotePreprocessor(self)
1373 preprocessor.md = md
1374 md.preprocessors.insert(index, preprocessor)
1376 # Insert an inline pattern before ImageReferencePattern
1377 FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
1378 index = md.inlinePatterns.index(IMAGE_REFERENCE_PATTERN)
1379 md.inlinePatterns.insert(index, FootnotePattern(FOOTNOTE_RE, self))
1381 # Insert a post-processor that would actually add the footnote div
1382 postprocessor = FootnotePostprocessor(self)
1383 postprocessor.extension = self
1385 md.postprocessors.append(postprocessor)
1388 def reset(self) :
1389 # May be called by Markdown is state reset is desired
1391 self.footnote_suffix = "-" + str(int(random.random()*1000000000))
1392 self.used_footnotes={}
1393 self.footnotes = {}
1395 def findFootnotesPlaceholder(self, doc) :
1396 def findFootnotePlaceholderFn(node=None, indent=0):
1397 if node.type == 'text':
1398 if node.value.find(self.FN_PLACE_MARKER) > -1 :
1399 return True
1401 fn_div_list = doc.find(findFootnotePlaceholderFn)
1402 if fn_div_list :
1403 return fn_div_list[0]
1406 def setFootnote(self, id, text) :
1407 self.footnotes[id] = text
1409 def makeFootnoteId(self, num) :
1410 return 'fn%d%s' % (num, self.footnote_suffix)
1412 def makeFootnoteRefId(self, num) :
1413 return 'fnr%d%s' % (num, self.footnote_suffix)
1415 def makeFootnotesDiv (self, doc) :
1416 """Creates the div with class='footnote' and populates it with
1417 the text of the footnotes.
1419 @returns: the footnote div as a dom element """
1421 if not self.footnotes.keys() :
1422 return None
1424 div = doc.createElement("div")
1425 div.setAttribute('class', 'footnote')
1426 hr = doc.createElement("hr")
1427 div.appendChild(hr)
1428 ol = doc.createElement("ol")
1429 div.appendChild(ol)
1431 footnotes = [(self.used_footnotes[id], id)
1432 for id in self.footnotes.keys()]
1433 footnotes.sort()
1435 for i, id in footnotes :
1436 li = doc.createElement('li')
1437 li.setAttribute('id', self.makeFootnoteId(i))
1439 self.md._processSection(li, self.footnotes[id].split("\n"))
1441 #li.appendChild(doc.createTextNode(self.footnotes[id]))
1443 backlink = doc.createElement('a')
1444 backlink.setAttribute('href', '#' + self.makeFootnoteRefId(i))
1445 backlink.setAttribute('class', 'footnoteBackLink')
1446 backlink.setAttribute('title',
1447 'Jump back to footnote %d in the text' % 1)
1448 backlink.appendChild(doc.createTextNode(FN_BACKLINK_TEXT))
1450 if li.childNodes :
1451 node = li.childNodes[-1]
1452 if node.type == "text" :
1453 node = li
1454 node.appendChild(backlink)
1456 ol.appendChild(li)
1458 return div
1461 class FootnotePreprocessor :
1463 def __init__ (self, footnotes) :
1464 self.footnotes = footnotes
1466 def run(self, lines) :
1468 self.blockGuru = BlockGuru()
1469 lines = self._handleFootnoteDefinitions (lines)
1471 # Make a hash of all footnote marks in the text so that we
1472 # know in what order they are supposed to appear. (This
1473 # function call doesn't really substitute anything - it's just
1474 # a way to get a callback for each occurence.
1476 text = "\n".join(lines)
1477 self.footnotes.SHORT_USE_RE.sub(self.recordFootnoteUse, text)
1479 return text.split("\n")
1482 def recordFootnoteUse(self, match) :
1484 id = match.group(1)
1485 id = id.strip()
1486 nextNum = len(self.footnotes.used_footnotes.keys()) + 1
1487 self.footnotes.used_footnotes[id] = nextNum
1490 def _handleFootnoteDefinitions(self, lines) :
1491 """Recursively finds all footnote definitions in the lines.
1493 @param lines: a list of lines of text
1494 @returns: a string representing the text with footnote
1495 definitions removed """
1497 i, id, footnote = self._findFootnoteDefinition(lines)
1499 if id :
1501 plain = lines[:i]
1503 detabbed, theRest = self.blockGuru.detectTabbed(lines[i+1:])
1505 self.footnotes.setFootnote(id,
1506 footnote + "\n"
1507 + "\n".join(detabbed))
1509 more_plain = self._handleFootnoteDefinitions(theRest)
1510 return plain + [""] + more_plain
1512 else :
1513 return lines
1515 def _findFootnoteDefinition(self, lines) :
1516 """Finds the first line of a footnote definition.
1518 @param lines: a list of lines of text
1519 @returns: the index of the line containing a footnote definition """
1521 counter = 0
1522 for line in lines :
1523 m = self.footnotes.DEF_RE.match(line)
1524 if m :
1525 return counter, m.group(2), m.group(3)
1526 counter += 1
1527 return counter, None, None
1530 class FootnotePattern (BasePattern) :
1532 def __init__ (self, pattern, footnotes) :
1534 BasePattern.__init__(self, pattern)
1535 self.footnotes = footnotes
1537 def handleMatch(self, m, doc) :
1538 sup = doc.createElement('sup')
1539 a = doc.createElement('a')
1540 sup.appendChild(a)
1541 id = m.group(2)
1542 num = self.footnotes.used_footnotes[id]
1543 sup.setAttribute('id', self.footnotes.makeFootnoteRefId(num))
1544 a.setAttribute('href', '#' + self.footnotes.makeFootnoteId(num))
1545 a.appendChild(doc.createTextNode(str(num)))
1546 return sup
1548 class FootnotePostprocessor :
1550 def __init__ (self, footnotes) :
1551 self.footnotes = footnotes
1553 def run(self, doc) :
1554 footnotesDiv = self.footnotes.makeFootnotesDiv(doc)
1555 if footnotesDiv :
1556 fnPlaceholder = self.extension.findFootnotesPlaceholder(doc)
1557 if fnPlaceholder :
1558 fnPlaceholder.parent.replaceChild(fnPlaceholder, footnotesDiv)
1559 else :
1560 doc.documentElement.appendChild(footnotesDiv)
1562 # ====================================================================
1564 def markdown(text) :
1565 message(VERBOSE, "in markdown.py, received text:\n%s" % text)
1566 return Markdown(text).toString()
1568 def markdownWithFootnotes(text):
1569 message(VERBOSE, "Running markdown with footnotes, "
1570 + "received text:\n%s" % text)
1571 md = Markdown()
1572 footnoteExtension = FootnoteExtension()
1573 footnoteExtension.extendMarkdown(md)
1574 md.source = text
1576 return str(md)
1578 def test_markdown(args):
1579 """test markdown at the command line.
1580 in each test, arg 0 is the module name"""
1581 print "\nTEST 1: no arguments on command line"
1582 cmd_line(["markdown.py"])
1583 print "\nTEST 2a: 1 argument on command line: a good option"
1584 cmd_line(["markdown.py","-footnotes"])
1585 print "\nTEST 2b: 1 argument on command line: a bad option"
1586 cmd_line(["markdown.py","-foodnotes"])
1587 print "\nTEST 3: 1 argument on command line: non-existent input file"
1588 cmd_line(["markdown.py","junk.txt"])
1589 print "\nTEST 4: 1 argument on command line: existing input file"
1590 lines = """
1591 Markdown text with[^1]:
1593 2. **bold text**,
1594 3. *italic text*.
1596 Then more:
1598 beginning of code block;
1599 another line of code block.
1601 a second paragraph of code block.
1603 more text to end our file.
1605 [^1]: "italic" means emphasis.
1607 fid = "markdown-test.txt"
1608 f1 = open(fid, 'w+')
1609 f1.write(lines)
1610 f1.close()
1611 cmd_line(["markdown.py",fid])
1612 print "\nTEST 5: 2 arguments on command line: nofootnotes and input file"
1613 cmd_line(["markdown.py","-nofootnotes", fid])
1614 print "\nTEST 6: 2 arguments on command line: footnotes and input file"
1615 cmd_line(["markdown.py","-footnotes", fid])
1616 print "\nTEST 7: 3 arguments on command line: nofootnotes,inputfile, outputfile"
1617 fidout = "markdown-test.html"
1618 cmd_line(["markdown.py","-nofootnotes", fid, fidout])
1621 def get_vars(args):
1622 """process the command-line args received; return usable variables"""
1623 #firstly get the variables
1625 message(VERBOSE, "in get_vars(), args: %s" % args)
1627 if len(args) <= 1:
1628 option, inFile, outFile = (None, None, None)
1629 elif len(args) >= 4:
1630 option, inFile, outFile = args[1:4]
1631 elif len(args) == 3:
1632 temp1, temp2 = args[1:3]
1633 if temp1[0] == '-':
1634 #then we have an option and inFile
1635 option, inFile, outFile = temp1, temp2, None
1636 else:
1637 #we have no option, so we must have inFile and outFile
1638 option, inFile, outFile = None, temp1, temp2
1639 else:
1640 #len(args) = 2
1641 #we have only one usable arg: might be an option or a file
1642 temp1 = args[1]
1644 message(VERBOSE, "our single arg is: %s" % str(temp1))
1646 if temp1[0] == '-':
1647 #then we have an option
1648 option, inFile, outFile = temp1, None, None
1649 else:
1650 #we have no option, so we must have inFile
1651 option, inFile, outFile = None, temp1, None
1653 message(VERBOSE,
1654 "prior to validation, option: %s, inFile: %s, outFile: %s" %
1655 (str(option), str(inFile), str(outFile),))
1657 return option, inFile, outFile
1660 USAGE = """
1661 \nUsing markdown.py:
1663 python markdown.py [option] input_file_with_markdown.txt [output_file.html]
1665 Options:
1667 -footnotes or -fn : generate markdown with footnotes
1668 -test or -t : run a self-test
1669 -help or -h : print this message
1673 VALID_OPTIONS = ['footnotes','nofootnotes', 'fn', 'test', 't', 'f',
1674 'help', 'h']
1676 EXPANDED_OPTIONS = { "fn" : "footnotes",
1677 "t" : "test",
1678 "h" : "help" }
1681 def validate_option(option) :
1683 """ Check if the option makes sense and print an appropriate message
1684 if it isn't.
1686 @return: valid option string or None
1689 #now validate the variables
1690 if (option is not None):
1691 if (len(option) > 1 and option[1:] in VALID_OPTIONS) :
1692 option = option[1:]
1694 if option in EXPANDED_OPTIONS.keys() :
1695 option = EXPANDED_OPTIONS[option]
1696 return option
1697 else:
1698 message(CRITICAL,
1699 "\nSorry, I don't understand option %s" % option)
1700 message(CRITICAL, USAGE)
1701 return None
1704 def validate_input_file(inFile) :
1705 """ Check if the input file is specified and exists.
1707 @return: valid input file path or None
1710 if not inFile :
1711 message(CRITICAL,
1712 "\nI need an input filename.\n")
1713 message(CRITICAL, USAGE)
1714 return None
1717 if os.access(inFile, os.R_OK):
1718 return inFile
1719 else :
1720 message(CRITICAL, "Sorry, I can't find input file %s" % str(inFile))
1721 return None
1726 def cmd_line(args):
1728 message(VERBOSE, "in cmd_line with args: %s" % args)
1730 option, inFile, outFile = get_vars(args)
1732 if option :
1733 option = validate_option(option)
1734 if not option : return
1736 if option == "help" :
1737 message(CRITICAL, USAGE)
1738 return
1739 elif option == "test" :
1740 test_markdown(None)
1741 return
1743 inFile = validate_input_file(inFile)
1744 if not inFile :
1745 return
1746 else :
1747 input = file(inFile).read()
1749 message(VERBOSE, "Validated command line parameters:" +
1750 "\n\toption: %s, \n\tinFile: %s, \n\toutFile: %s" % (
1751 str(option), str(inFile), str(outFile),))
1753 if option == "footnotes" :
1754 md_function = markdownWithFootnotes
1755 else :
1756 md_function = markdown
1758 if outFile is None:
1759 print md_function(input)
1760 else:
1761 output = md_function(input)
1762 f1 = open(outFile, "w+")
1763 f1.write(output)
1764 f1.close()
1766 if os.access(outFile, os.F_OK):
1767 message(INFO, "Successfully wrote %s" % outFile)
1768 else:
1769 message(INFO, "Failed to write %s" % outFile)
1772 if __name__ == '__main__':
1773 """ Run Markdown from the command line.
1774 Set debug = 3 at top of file to get diagnostic output"""
1775 args = sys.argv
1777 #set testing=1 to test the command-line response of markdown.py
1778 testing = 0
1779 if testing:
1780 test_markdown(args)
1781 else:
1782 import time
1783 t0 = time.time()
1784 #for x in range(10) :
1785 cmd_line(args)
1786 #import profile
1787 #profile.run('cmd_line(args)', 'profile')
1788 t1 = time.time()
1789 #print "Time: %f - %f = %f" % (t1, t0, t1-t0)
1792 CHANGELOG
1793 =========
1795 May 15, 2006: A bug with lists, recursion on block-level elements,
1796 run-in headers, spaces before headers, unicode input (thanks to Aaron
1797 Swartz). Sourceforge tracker #s: 1489313, 1489312, 1489311, 1488370,
1798 1485178, 1485176. (v. 1.5)
1800 Mar. 24, 2006: Switched to a not-so-recursive algorithm with
1801 _handleInline. (Version 1.4)
1803 Mar. 15, 2006: Replaced some instance variables with class variables
1804 (a patch from Stelios Xanthakis). Chris Clark's new regexps that do
1805 not trigger midword underlining.
1807 Feb. 28, 2006: Clean-up and command-line handling by Stewart
1808 Midwinter. (Version 1.3)
1810 Feb. 24, 2006: Fixed a bug with the last line of the list appearing
1811 again as a separate paragraph. Incorporated Chris Clark's "mailto"
1812 patch. Added support for <br /> at the end of lines ending in two or
1813 more spaces. Fixed a crashing bug when using ImageReferencePattern.
1814 Added several utility methods to Nanodom. (Version 1.2)
1816 Jan. 31, 2006: Added "hr" and "hr/" to BLOCK_LEVEL_ELEMENTS and
1817 changed <hr/> to <hr />. (Thanks to Sergej Chodarev.)
1819 Nov. 26, 2005: Fixed a bug with certain tabbed lines inside lists
1820 getting wrapped in <pre><code>. (v. 1.1)
1822 Nov. 19, 2005: Made "<!...", "<?...", etc. behave like block-level
1823 HTML tags.
1825 Nov. 14, 2005: Added entity code and email autolink fix by Tiago
1826 Cogumbreiro. Fixed some small issues with backticks to get 100%
1827 compliance with John's test suite. (v. 1.0)
1829 Nov. 7, 2005: Added an unlink method for documents to aid with memory
1830 collection (per Doug Sauder's suggestion).
1832 Oct. 29, 2005: Restricted a set of html tags that get treated as
1833 block-level elements.
1835 Sept. 18, 2005: Refactored the whole script to make it easier to
1836 customize it and made footnote functionality into an extension.
1837 (v. 0.9)
1839 Sept. 5, 2005: Fixed a bug with multi-paragraph footnotes. Added
1840 attribute support.
1842 Sept. 1, 2005: Changed the way headers are handled to allow inline
1843 syntax in headers (e.g. links) and got the lists to use p-tags
1844 correctly (v. 0.8)
1846 Aug. 29, 2005: Added flexible tabs, fixed a few small issues, added
1847 basic support for footnotes. Got rid of xml.dom.minidom and added
1848 pretty-printing. (v. 0.7)
1850 Aug. 13, 2005: Fixed a number of small bugs in order to conform to the
1851 test suite. (v. 0.6)
1853 Aug. 11, 2005: Added support for inline html and entities, inline
1854 images, autolinks, underscore emphasis. Cleaned up and refactored the
1855 code, added some more comments.
1857 Feb. 19, 2005: Rewrote the handling of high-level elements to allow
1858 multi-line list items and all sorts of nesting.
1860 Feb. 3, 2005: Reference-style links, single-line lists, backticks,
1861 escape, emphasis in the beginning of the paragraph.
1863 Nov. 2004: Added links, blockquotes, html blocks to Manfred
1864 Stienstra's code
1866 Apr. 2004: Manfred's version at http://www.dwerg.net/projects/markdown/