MDL-9123:
[moodle-linuxchix.git] / lib / markdown.php
blob0c2cfed85e5686857839f2e21169c78a2f293a60
1 <?php
3 # Markdown Extra - A text-to-HTML conversion tool for web writers
5 # PHP Markdown & Extra
6 # Copyright (c) 2004-2007 Michel Fortin
7 # <http://www.michelf.com/projects/php-markdown/>
9 # Original Markdown
10 # Copyright (c) 2004-2006 John Gruber
11 # <http://daringfireball.net/projects/markdown/>
15 define( 'MARKDOWN_VERSION', "1.0.1f" ); # Wed 7 Feb 2007
16 define( 'MARKDOWNEXTRA_VERSION', "1.1.2" ); # Wed 7 Feb 2007
20 # Global default settings:
23 # Change to ">" for HTML output
24 define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");
26 # Define the width of a tab for code blocks.
27 define( 'MARKDOWN_TAB_WIDTH', 4 );
29 # Optional title attribute for footnote links and backlinks.
30 define( 'MARKDOWN_FN_LINK_TITLE', "" );
31 define( 'MARKDOWN_FN_BACKLINK_TITLE', "" );
33 # Optional class attribute for footnote links and backlinks.
34 define( 'MARKDOWN_FN_LINK_CLASS', "" );
35 define( 'MARKDOWN_FN_BACKLINK_CLASS', "" );
39 # WordPress settings:
42 # Change to false to remove Markdown from posts and/or comments.
43 define( 'MARKDOWN_WP_POSTS', true );
44 define( 'MARKDOWN_WP_COMMENTS', true );
48 ### Standard Function Interface ###
50 define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' );
52 function Markdown($text) {
54 # Initialize the parser and return the result of its transform method.
56 # Setup static parser variable.
57 static $parser;
58 if (!isset($parser)) {
59 $parser_class = MARKDOWN_PARSER_CLASS;
60 $parser = new $parser_class;
63 # Transform text using parser.
64 return $parser->transform($text);
68 ### WordPress Plugin Interface ###
71 Plugin Name: Markdown Extra
72 Plugin URI: http://www.michelf.com/projects/php-markdown/
73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
74 Version: 1.1.2
75 Author: Michel Fortin
76 Author URI: http://www.michelf.com/
79 if (isset($wp_version)) {
80 # More details about how it works here:
81 # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
83 # Post content and excerpts
84 # - Remove WordPress paragraph generator.
85 # - Run Markdown on excerpt, then remove all tags.
86 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
87 if (MARKDOWN_WP_POSTS) {
88 remove_filter('the_content', 'wpautop');
89 remove_filter('the_excerpt', 'wpautop');
90 add_filter('the_content', 'Markdown', 6);
91 add_filter('get_the_excerpt', 'Markdown', 6);
92 add_filter('get_the_excerpt', 'trim', 7);
93 add_filter('the_excerpt', 'mdwp_add_p');
94 add_filter('the_excerpt_rss', 'mdwp_strip_p');
96 remove_filter('content_save_pre', 'balanceTags', 50);
97 remove_filter('excerpt_save_pre', 'balanceTags', 50);
98 add_filter('the_content', 'balanceTags', 50);
99 add_filter('get_the_excerpt', 'balanceTags', 9);
102 # Comments
103 # - Remove WordPress paragraph generator.
104 # - Remove WordPress auto-link generator.
105 # - Scramble important tags before passing them to the kses filter.
106 # - Run Markdown on excerpt then remove paragraph tags.
107 if (MARKDOWN_WP_COMMENTS) {
108 remove_filter('comment_text', 'wpautop');
109 remove_filter('comment_text', 'make_clickable');
110 add_filter('pre_comment_content', 'Markdown', 6);
111 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
112 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
113 add_filter('get_comment_text', 'Markdown', 6);
114 add_filter('get_comment_excerpt', 'Markdown', 6);
115 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
117 global $markdown_hidden_tags;
118 $markdown_hidden_tags = array(
119 '<p>' => md5('<p>'), '</p>' => md5('</p>'),
120 '<pre>' => md5('<pre>'), '</pre>'=> md5('</pre>'),
121 '<ol>' => md5('<ol>'), '</ol>' => md5('</ol>'),
122 '<ul>' => md5('<ul>'), '</ul>' => md5('</ul>'),
123 '<li>' => md5('<li>'), '</li>' => md5('</li>'),
127 function mdwp_add_p($text) {
128 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
129 $text = '<p>'.$text.'</p>';
130 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
132 return $text;
135 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
137 function mdwp_hide_tags($text) {
138 global $markdown_hidden_tags;
139 return str_replace(array_keys($markdown_hidden_tags),
140 array_values($markdown_hidden_tags), $text);
142 function mdwp_show_tags($text) {
143 global $markdown_hidden_tags;
144 return str_replace(array_values($markdown_hidden_tags),
145 array_keys($markdown_hidden_tags), $text);
150 ### bBlog Plugin Info ###
152 function identify_modifier_markdown() {
153 return array(
154 'name' => 'markdown',
155 'type' => 'modifier',
156 'nicename' => 'PHP Markdown Extra',
157 'description' => 'A text-to-HTML conversion tool for web writers',
158 'authors' => 'Michel Fortin and John Gruber',
159 'licence' => 'GPL',
160 'version' => MARKDOWNEXTRA_VERSION,
161 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
166 ### Smarty Modifier Interface ###
168 function smarty_modifier_markdown($text) {
169 return Markdown($text);
173 ### Textile Compatibility Mode ###
175 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
177 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
178 # Try to include PHP SmartyPants. Should be in the same directory.
179 @include_once 'smartypants.php';
180 # Fake Textile class. It calls Markdown instead.
181 class Textile {
182 function TextileThis($text, $lite='', $encode='') {
183 if ($lite == '' && $encode == '') $text = Markdown($text);
184 if (function_exists('SmartyPants')) $text = SmartyPants($text);
185 return $text;
187 # Fake restricted version: restrictions are not supported for now.
188 function TextileRestricted($text, $lite='', $noimage='') {
189 return $this->TextileThis($text, $lite);
191 # Workaround to ensure compatibility with TextPattern 4.0.3.
192 function blockLite($text) { return $text; }
199 # Markdown Parser Class
202 class Markdown_Parser {
204 # Regex to match balanced [brackets].
205 # Needed to insert a maximum bracked depth while converting to PHP.
206 var $nested_brackets_depth = 6;
207 var $nested_brackets;
209 # Table of hash values for escaped characters:
210 var $escape_chars = '\`*_{}[]()>#+-.!';
211 var $escape_table = array();
212 var $backslash_escape_table = array();
214 # Change to ">" for HTML output.
215 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
216 var $tab_width = MARKDOWN_TAB_WIDTH;
219 function Markdown_Parser() {
221 # Constructor function. Initialize appropriate member variables.
223 $this->_initDetab();
225 $this->nested_brackets =
226 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
227 str_repeat('\])*', $this->nested_brackets_depth);
229 # Create an identical table but for escaped characters.
230 foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) {
231 $hash = md5($char);
232 $this->escape_table[$char] = $hash;
233 $this->backslash_escape_table["\\$char"] = $hash;
236 # Sort document, block, and span gamut in ascendent priority order.
237 asort($this->document_gamut);
238 asort($this->block_gamut);
239 asort($this->span_gamut);
243 # Internal hashes used during transformation.
244 var $urls = array();
245 var $titles = array();
246 var $html_blocks = array();
247 var $html_hashes = array(); # Contains both blocks and span hashes.
250 function transform($text) {
252 # Main function. The order in which other subs are called here is
253 # essential. Link and image substitutions need to happen before
254 # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
255 # and <img> tags get encoded.
257 # Clear the global hashes. If we don't clear these, you get conflicts
258 # from other articles when generating a page which contains more than
259 # one article (e.g. an index page that shows the N most recent
260 # articles):
261 $this->urls = array();
262 $this->titles = array();
263 $this->html_blocks = array();
264 $this->html_hashes = array();
266 # Standardize line endings:
267 # DOS to Unix and Mac to Unix
268 $text = str_replace(array("\r\n", "\r"), "\n", $text);
270 # Make sure $text ends with a couple of newlines:
271 $text .= "\n\n";
273 # Convert all tabs to spaces.
274 $text = $this->detab($text);
276 # Turn block-level HTML blocks into hash entries
277 $text = $this->hashHTMLBlocks($text);
279 # Strip any lines consisting only of spaces and tabs.
280 # This makes subsequent regexen easier to write, because we can
281 # match consecutive blank lines with /\n+/ instead of something
282 # contorted like /[ \t]*\n+/ .
283 $text = preg_replace('/^[ \t]+$/m', '', $text);
285 # Run document gamut methods.
286 foreach ($this->document_gamut as $method => $priority) {
287 $text = $this->$method($text);
290 return $text . "\n";
293 var $document_gamut = array(
294 # Strip link definitions, store in hashes.
295 "stripLinkDefinitions" => 20,
297 "runBasicBlockGamut" => 30,
298 "unescapeSpecialChars" => 90,
302 function stripLinkDefinitions($text) {
304 # Strips link definitions from text, stores the URLs and titles in
305 # hash references.
307 $less_than_tab = $this->tab_width - 1;
309 # Link defs are in the form: ^[id]: url "optional title"
310 $text = preg_replace_callback('{
311 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
312 [ \t]*
313 \n? # maybe *one* newline
314 [ \t]*
315 <?(\S+?)>? # url = $2
316 [ \t]*
317 \n? # maybe one newline
318 [ \t]*
320 (?<=\s) # lookbehind for whitespace
321 ["(]
322 (.*?) # title = $3
323 [")]
324 [ \t]*
325 )? # title is optional
326 (?:\n+|\Z)
327 }xm',
328 array(&$this, '_stripLinkDefinitions_callback'),
329 $text);
330 return $text;
332 function _stripLinkDefinitions_callback($matches) {
333 $link_id = strtolower($matches[1]);
334 $this->urls[$link_id] = $this->encodeAmpsAndAngles($matches[2]);
335 if (isset($matches[3]))
336 $this->titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
337 return ''; # String that will replace the block
341 function hashHTMLBlocks($text) {
342 $less_than_tab = $this->tab_width - 1;
344 # Hashify HTML blocks:
345 # We only want to do this for block-level HTML tags, such as headers,
346 # lists, and tables. That's because we still want to wrap <p>s around
347 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
348 # phrase emphasis, and spans. The list of tags we're looking for is
349 # hard-coded:
350 $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
351 'script|noscript|form|fieldset|iframe|math|ins|del';
352 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
353 'script|noscript|form|fieldset|iframe|math';
355 # Regular expression for the content of a block tag.
356 $nested_tags_level = 4;
357 $attr = '
358 (?> # optional tag attributes
359 \s # starts with whitespace
361 [^>"/]+ # text outside quotes
363 /+(?!>) # slash not followed by ">"
365 "[^"]*" # text inside double quotes (tolerate ">")
367 \'[^\']*\' # text inside single quotes (tolerate ">")
371 $content =
372 str_repeat('
374 [^<]+ # content without tag
376 <\2 # nested opening tag
377 '.$attr.' # attributes
381 >', $nested_tags_level). # end of opening tag
382 '.*?'. # last level nested tag content
383 str_repeat('
384 </\2\s*> # closing nested tag
387 <(?!/\2\s*> # other tags with a different name
389 )*',
390 $nested_tags_level);
392 # First, look for nested blocks, e.g.:
393 # <div>
394 # <div>
395 # tags for inner block must be indented.
396 # </div>
397 # </div>
399 # The outermost tags must start at the left margin for this to match, and
400 # the inner nested divs must be indented.
401 # We need to do this before the next, more liberal match, because the next
402 # match will start at the first `<div>` and stop at the first `</div>`.
403 $text = preg_replace_callback('{
404 ( # save in $1
405 ^ # start of line (with /m)
406 <('.$block_tags_a.')# start tag = $2
407 '.$attr.'>\n # attributes followed by > and \n
408 '.$content.' # content, support nesting
409 </\2> # the matching end tag
410 [ \t]* # trailing spaces/tabs
411 (?=\n+|\Z) # followed by a newline or end of document
413 }xm',
414 array(&$this, '_hashHTMLBlocks_callback'),
415 $text);
418 # Match from `\n<tag>` to `</tag>\n`, handling nested tags in between.
420 $text = preg_replace_callback('{
421 ( # save in $1
422 ^ # start of line (with /m)
423 <('.$block_tags_b.')# start tag = $2
424 '.$attr.'> # attributes followed by >
425 '.$content.' # content, support nesting
426 </\2> # the matching end tag
427 [ \t]* # trailing spaces/tabs
428 (?=\n+|\Z) # followed by a newline or end of document
430 }xm',
431 array(&$this, '_hashHTMLBlocks_callback'),
432 $text);
434 # Special case just for <hr />. It was easier to make a special case than
435 # to make the other regex more complicated.
436 $text = preg_replace_callback('{
438 (?<=\n\n) # Starting after a blank line
439 | # or
440 \A\n? # the beginning of the doc
442 ( # save in $1
443 [ ]{0,'.$less_than_tab.'}
444 <(hr) # start tag = $2
445 \b # word break
446 ([^<>])*? #
447 /?> # the matching end tag
448 [ \t]*
449 (?=\n{2,}|\Z) # followed by a blank line or end of document
451 }x',
452 array(&$this, '_hashHTMLBlocks_callback'),
453 $text);
455 # Special case for standalone HTML comments:
456 $text = preg_replace_callback('{
458 (?<=\n\n) # Starting after a blank line
459 | # or
460 \A\n? # the beginning of the doc
462 ( # save in $1
463 [ ]{0,'.$less_than_tab.'}
464 (?s:
465 <!-- .*? -->
467 [ \t]*
468 (?=\n{2,}|\Z) # followed by a blank line or end of document
470 }x',
471 array(&$this, '_hashHTMLBlocks_callback'),
472 $text);
474 # PHP and ASP-style processor instructions (<? and <%)
475 $text = preg_replace_callback('{
477 (?<=\n\n) # Starting after a blank line
478 | # or
479 \A\n? # the beginning of the doc
481 ( # save in $1
482 [ ]{0,'.$less_than_tab.'}
483 (?s:
484 <([?%]) # $2
488 [ \t]*
489 (?=\n{2,}|\Z) # followed by a blank line or end of document
491 }x',
492 array(&$this, '_hashHTMLBlocks_callback'),
493 $text);
495 return $text;
497 function _hashHTMLBlocks_callback($matches) {
498 $text = $matches[1];
499 $key = $this->hashBlock($text);
500 return "\n\n$key\n\n";
504 function hashBlock($text) {
506 # Called whenever a tag must be hashed when a function insert a block-level
507 # tag in $text, it pass through this function and is automaticaly escaped,
508 # which remove the need to call _HashHTMLBlocks at every step.
510 # Swap back any tag hash found in $text so we do not have to `unhash`
511 # multiple times at the end.
512 $text = $this->unhash($text);
514 # Then hash the block.
515 $key = md5($text);
516 $this->html_hashes[$key] = $text;
517 $this->html_blocks[$key] = $text;
518 return $key; # String that will replace the tag.
522 function hashSpan($text) {
524 # Called whenever a tag must be hashed when a function insert a span-level
525 # element in $text, it pass through this function and is automaticaly
526 # escaped, blocking invalid nested overlap.
528 # Swap back any tag hash found in $text so we do not have to `unhash`
529 # multiple times at the end.
530 $text = $this->unhash($text);
532 # Then hash the span.
533 $key = md5($text);
534 $this->html_hashes[$key] = $text;
535 return $key; # String that will replace the span tag.
539 var $block_gamut = array(
541 # These are all the transformations that form block-level
542 # tags like paragraphs, headers, and list items.
544 "doHeaders" => 10,
545 "doHorizontalRules" => 20,
547 "doLists" => 40,
548 "doCodeBlocks" => 50,
549 "doBlockQuotes" => 60,
552 function runBlockGamut($text) {
554 # Run block gamut tranformations.
556 # We need to escape raw HTML in Markdown source before doing anything
557 # else. This need to be done for each block, and not only at the
558 # begining in the Markdown function since hashed blocks can be part of
559 # list items and could have been indented. Indented blocks would have
560 # been seen as a code block in a previous pass of hashHTMLBlocks.
561 $text = $this->hashHTMLBlocks($text);
563 return $this->runBasicBlockGamut($text);
566 function runBasicBlockGamut($text) {
568 # Run block gamut tranformations, without hashing HTML blocks. This is
569 # useful when HTML blocks are known to be already hashed, like in the first
570 # whole-document pass.
572 foreach ($this->block_gamut as $method => $priority) {
573 $text = $this->$method($text);
576 # Finally form paragraph and restore hashed blocks.
577 $text = $this->formParagraphs($text);
579 return $text;
583 function doHorizontalRules($text) {
584 # Do Horizontal Rules:
585 return preg_replace(
586 array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
587 '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
588 '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
589 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
590 $text);
594 var $span_gamut = array(
596 # These are all the transformations that occur *within* block-level
597 # tags like paragraphs, headers, and list items.
599 "escapeSpecialCharsWithinTagAttributes" => -20,
600 "doCodeSpans" => -10,
601 "encodeBackslashEscapes" => -5,
603 # Process anchor and image tags. Images must come first,
604 # because ![foo][f] looks like an anchor.
605 "doImages" => 10,
606 "doAnchors" => 20,
608 # Make links out of things like `<http://example.com/>`
609 # Must come after doAnchors, because you can use < and >
610 # delimiters in inline links like [this](<url>).
611 "doAutoLinks" => 30,
612 "encodeAmpsAndAngles" => 40,
614 "doItalicsAndBold" => 50,
615 "doHardBreaks" => 60,
618 function runSpanGamut($text) {
620 # Run span gamut tranformations.
622 foreach ($this->span_gamut as $method => $priority) {
623 $text = $this->$method($text);
626 return $text;
630 function doHardBreaks($text) {
631 # Do hard breaks:
632 $br_tag = $this->hashSpan("<br$this->empty_element_suffix\n");
633 return preg_replace('/ {2,}\n/', $br_tag, $text);
637 function escapeSpecialCharsWithinTagAttributes($text) {
639 # Within tags -- meaning between < and > -- encode [\ ` * _] so they
640 # don't conflict with their use in Markdown for code, italics and strong.
641 # We're replacing each such character with its corresponding MD5 checksum
642 # value; this is likely overkill, but it should prevent us from colliding
643 # with the escape values by accident.
645 $tokens = $this->tokenizeHTML($text);
646 $text = ''; # rebuild $text from the tokens
648 foreach ($tokens as $cur_token) {
649 if ($cur_token[0] == 'tag') {
650 $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]);
651 $cur_token[1] = str_replace(array('`'), $this->escape_table['`'], $cur_token[1]);
652 $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]);
653 $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]);
655 $text .= $cur_token[1];
657 return $text;
661 function doAnchors($text) {
663 # Turn Markdown link shortcuts into XHTML <a> tags.
666 # First, handle reference-style links: [link text] [id]
668 $text = preg_replace_callback('{
669 ( # wrap whole match in $1
671 ('.$this->nested_brackets.') # link text = $2
674 [ ]? # one optional space
675 (?:\n[ ]*)? # one optional newline followed by spaces
678 (.*?) # id = $3
681 }xs',
682 array(&$this, '_doAnchors_reference_callback'), $text);
685 # Next, inline-style links: [link text](url "optional title")
687 $text = preg_replace_callback('{
688 ( # wrap whole match in $1
690 ('.$this->nested_brackets.') # link text = $2
692 \( # literal paren
693 [ \t]*
694 <?(.*?)>? # href = $3
695 [ \t]*
696 ( # $4
697 ([\'"]) # quote char = $5
698 (.*?) # Title = $6
699 \5 # matching quote
700 [ \t]* # ignore any spaces/tabs between closing quote and )
701 )? # title is optional
704 }xs',
705 array(&$this, '_DoAnchors_inline_callback'), $text);
708 # Last, handle reference-style shortcuts: [link text]
709 # These must come last in case you've also got [link test][1]
710 # or [link test](/foo)
712 // $text = preg_replace_callback('{
713 // ( # wrap whole match in $1
714 // \[
715 // ([^\[\]]+) # link text = $2; can\'t contain [ or ]
716 // \]
717 // )
718 // }xs',
719 // array(&$this, '_doAnchors_reference_callback'), $text);
721 return $text;
723 function _doAnchors_reference_callback($matches) {
724 $whole_match = $matches[1];
725 $link_text = $matches[2];
726 $link_id =& $matches[3];
728 if ($link_id == "") {
729 # for shortcut links like [this][] or [this].
730 $link_id = $link_text;
733 # lower-case and turn embedded newlines into spaces
734 $link_id = strtolower($link_id);
735 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
737 if (isset($this->urls[$link_id])) {
738 $url = $this->urls[$link_id];
739 $url = $this->encodeAmpsAndAngles($url);
741 $result = "<a href=\"$url\"";
742 if ( isset( $this->titles[$link_id] ) ) {
743 $title = $this->titles[$link_id];
744 $title = $this->encodeAmpsAndAngles($title);
745 $result .= " title=\"$title\"";
748 $link_text = $this->runSpanGamut($link_text);
749 $result .= ">$link_text</a>";
750 $result = $this->hashSpan($result);
752 else {
753 $result = $whole_match;
755 return $result;
757 function _doAnchors_inline_callback($matches) {
758 $whole_match = $matches[1];
759 $link_text = $this->runSpanGamut($matches[2]);
760 $url = $matches[3];
761 $title =& $matches[6];
763 $url = $this->encodeAmpsAndAngles($url);
765 $result = "<a href=\"$url\"";
766 if (isset($title)) {
767 $title = str_replace('"', '&quot;', $title);
768 $title = $this->encodeAmpsAndAngles($title);
769 $result .= " title=\"$title\"";
772 $link_text = $this->runSpanGamut($link_text);
773 $result .= ">$link_text</a>";
775 return $this->hashSpan($result);
779 function doImages($text) {
781 # Turn Markdown image shortcuts into <img> tags.
784 # First, handle reference-style labeled images: ![alt text][id]
786 $text = preg_replace_callback('{
787 ( # wrap whole match in $1
789 ('.$this->nested_brackets.') # alt text = $2
792 [ ]? # one optional space
793 (?:\n[ ]*)? # one optional newline followed by spaces
796 (.*?) # id = $3
800 }xs',
801 array(&$this, '_doImages_reference_callback'), $text);
804 # Next, handle inline images: ![alt text](url "optional title")
805 # Don't forget: encode * and _
807 $text = preg_replace_callback('{
808 ( # wrap whole match in $1
810 ('.$this->nested_brackets.') # alt text = $2
812 \s? # One optional whitespace character
813 \( # literal paren
814 [ \t]*
815 <?(\S+?)>? # src url = $3
816 [ \t]*
817 ( # $4
818 ([\'"]) # quote char = $5
819 (.*?) # title = $6
820 \5 # matching quote
821 [ \t]*
822 )? # title is optional
825 }xs',
826 array(&$this, '_doImages_inline_callback'), $text);
828 return $text;
830 function _doImages_reference_callback($matches) {
831 $whole_match = $matches[1];
832 $alt_text = $matches[2];
833 $link_id = strtolower($matches[3]);
835 if ($link_id == "") {
836 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
839 $alt_text = str_replace('"', '&quot;', $alt_text);
840 if (isset($this->urls[$link_id])) {
841 $url = $this->urls[$link_id];
842 $result = "<img src=\"$url\" alt=\"$alt_text\"";
843 if (isset($this->titles[$link_id])) {
844 $title = $this->titles[$link_id];
845 $result .= " title=\"$title\"";
847 $result .= $this->empty_element_suffix;
848 $result = $this->hashSpan($result);
850 else {
851 # If there's no such link ID, leave intact:
852 $result = $whole_match;
855 return $result;
857 function _doImages_inline_callback($matches) {
858 $whole_match = $matches[1];
859 $alt_text = $matches[2];
860 $url = $matches[3];
861 $title =& $matches[6];
863 $alt_text = str_replace('"', '&quot;', $alt_text);
864 $result = "<img src=\"$url\" alt=\"$alt_text\"";
865 if (isset($title)) {
866 $title = str_replace('"', '&quot;', $title);
867 $result .= " title=\"$title\""; # $title already quoted
869 $result .= $this->empty_element_suffix;
871 return $this->hashSpan($result);
875 function doHeaders($text) {
876 # Setext-style headers:
877 # Header 1
878 # ========
880 # Header 2
881 # --------
883 $text = preg_replace_callback('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }mx',
884 array(&$this, '_doHeaders_callback_setext_h1'), $text);
885 $text = preg_replace_callback('{ ^(.+)[ \t]*\n-+[ \t]*\n+ }mx',
886 array(&$this, '_doHeaders_callback_setext_h2'), $text);
888 # atx-style headers:
889 # # Header 1
890 # ## Header 2
891 # ## Header 2 with closing hashes ##
892 # ...
893 # ###### Header 6
895 $text = preg_replace_callback('{
896 ^(\#{1,6}) # $1 = string of #\'s
897 [ \t]*
898 (.+?) # $2 = Header text
899 [ \t]*
900 \#* # optional closing #\'s (not counted)
902 }xm',
903 array(&$this, '_doHeaders_callback_atx'), $text);
905 return $text;
907 function _doHeaders_callback_setext_h1($matches) {
908 $block = "<h1>".$this->runSpanGamut($matches[1])."</h1>";
909 return "\n" . $this->hashBlock($block) . "\n\n";
911 function _doHeaders_callback_setext_h2($matches) {
912 $block = "<h2>".$this->runSpanGamut($matches[1])."</h2>";
913 return "\n" . $this->hashBlock($block) . "\n\n";
915 function _doHeaders_callback_atx($matches) {
916 $level = strlen($matches[1]);
917 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
918 return "\n" . $this->hashBlock($block) . "\n\n";
922 function doLists($text) {
924 # Form HTML ordered (numbered) and unordered (bulleted) lists.
926 $less_than_tab = $this->tab_width - 1;
928 # Re-usable patterns to match list item bullets and number markers:
929 $marker_ul = '[*+-]';
930 $marker_ol = '\d+[.]';
931 $marker_any = "(?:$marker_ul|$marker_ol)";
933 $markers = array($marker_ul, $marker_ol);
935 foreach ($markers as $marker) {
936 # Re-usable pattern to match any entirel ul or ol list:
937 $whole_list = '
938 ( # $1 = whole list
939 ( # $2
940 [ ]{0,'.$less_than_tab.'}
941 ('.$marker.') # $3 = first list item marker
942 [ \t]+
944 (?s:.+?)
945 ( # $4
948 \n{2,}
949 (?=\S)
950 (?! # Negative lookahead for another list item marker
951 [ \t]*
952 '.$marker.'[ \t]+
956 '; // mx
958 # We use a different prefix before nested lists than top-level lists.
959 # See extended comment in _ProcessListItems().
961 if ($this->list_level) {
962 $text = preg_replace_callback('{
964 '.$whole_list.'
965 }mx',
966 array(&$this, '_doLists_callback'), $text);
968 else {
969 $text = preg_replace_callback('{
970 (?:(?<=\n)\n|\A\n?) # Must eat the newline
971 '.$whole_list.'
972 }mx',
973 array(&$this, '_doLists_callback'), $text);
977 return $text;
979 function _doLists_callback($matches) {
980 # Re-usable patterns to match list item bullets and number markers:
981 $marker_ul = '[*+-]';
982 $marker_ol = '\d+[.]';
983 $marker_any = "(?:$marker_ul|$marker_ol)";
985 $list = $matches[1];
986 $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
988 $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
990 $list .= "\n";
991 $result = $this->processListItems($list, $marker_any);
993 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
994 return "\n". $result ."\n\n";
997 var $list_level = 0;
999 function processListItems($list_str, $marker_any) {
1001 # Process the contents of a single ordered or unordered list, splitting it
1002 # into individual list items.
1004 # The $this->list_level global keeps track of when we're inside a list.
1005 # Each time we enter a list, we increment it; when we leave a list,
1006 # we decrement. If it's zero, we're not in a list anymore.
1008 # We do this because when we're not inside a list, we want to treat
1009 # something like this:
1011 # I recommend upgrading to version
1012 # 8. Oops, now this line is treated
1013 # as a sub-list.
1015 # As a single paragraph, despite the fact that the second line starts
1016 # with a digit-period-space sequence.
1018 # Whereas when we're inside a list (or sub-list), that line will be
1019 # treated as the start of a sub-list. What a kludge, huh? This is
1020 # an aspect of Markdown's syntax that's hard to parse perfectly
1021 # without resorting to mind-reading. Perhaps the solution is to
1022 # change the syntax rules such that sub-lists must start with a
1023 # starting cardinal number; e.g. "1." or "a.".
1025 $this->list_level++;
1027 # trim trailing blank lines:
1028 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1030 $list_str = preg_replace_callback('{
1031 (\n)? # leading line = $1
1032 (^[ \t]*) # leading whitespace = $2
1033 ('.$marker_any.') [ \t]+ # list marker = $3
1034 ((?s:.+?)) # list item text = $4
1035 (?:(\n+(?=\n))|\n) # tailing blank line = $5
1036 (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
1037 }xm',
1038 array(&$this, '_processListItems_callback'), $list_str);
1040 $this->list_level--;
1041 return $list_str;
1043 function _processListItems_callback($matches) {
1044 $item = $matches[4];
1045 $leading_line =& $matches[1];
1046 $leading_space =& $matches[2];
1047 $tailing_blank_line =& $matches[5];
1049 if ($leading_line || $tailing_blank_line ||
1050 preg_match('/\n{2,}/', $item))
1052 $item = $this->runBlockGamut($this->outdent($item)."\n");
1054 else {
1055 # Recursion for sub-lists:
1056 $item = $this->doLists($this->outdent($item));
1057 $item = preg_replace('/\n+$/', '', $item);
1058 $item = $this->runSpanGamut($item);
1061 return "<li>" . $item . "</li>\n";
1065 function doCodeBlocks($text) {
1067 # Process Markdown `<pre><code>` blocks.
1069 $text = preg_replace_callback('{
1070 (?:\n\n|\A)
1071 ( # $1 = the code block -- one or more lines, starting with a space/tab
1073 (?:[ ]{'.$this->tab_width.'} | \t) # Lines must start with a tab or a tab-width of spaces
1074 .*\n+
1077 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1078 }xm',
1079 array(&$this, '_doCodeBlocks_callback'), $text);
1081 return $text;
1083 function _doCodeBlocks_callback($matches) {
1084 $codeblock = $matches[1];
1086 $codeblock = $this->encodeCode($this->outdent($codeblock));
1087 // $codeblock = $this->detab($codeblock);
1088 # trim leading newlines and trailing whitespace
1089 $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock);
1091 $result = "\n\n".$this->hashBlock("<pre><code>" . $codeblock . "\n</code></pre>")."\n\n";
1093 return $result;
1097 function doCodeSpans($text) {
1099 # * Backtick quotes are used for <code></code> spans.
1101 # * You can use multiple backticks as the delimiters if you want to
1102 # include literal backticks in the code span. So, this input:
1104 # Just type ``foo `bar` baz`` at the prompt.
1106 # Will translate to:
1108 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1110 # There's no arbitrary limit to the number of backticks you
1111 # can use as delimters. If you need three consecutive backticks
1112 # in your code, use four for delimiters, etc.
1114 # * You can use spaces to get literal backticks at the edges:
1116 # ... type `` `bar` `` ...
1118 # Turns to:
1120 # ... type <code>`bar`</code> ...
1122 $text = preg_replace_callback('@
1123 (?<!\\\) # Character before opening ` can\'t be a backslash
1124 (`+) # $1 = Opening run of `
1125 (.+?) # $2 = The code block
1126 (?<!`)
1127 \1 # Matching closer
1128 (?!`)
1129 @xs',
1130 array(&$this, '_doCodeSpans_callback'), $text);
1132 return $text;
1134 function _doCodeSpans_callback($matches) {
1135 $c = $matches[2];
1136 $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
1137 $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
1138 $c = $this->encodeCode($c);
1139 return $this->hashSpan("<code>$c</code>");
1143 function encodeCode($_) {
1145 # Encode/escape certain characters inside Markdown code runs.
1146 # The point is that in code, these characters are literals,
1147 # and lose their special Markdown meanings.
1149 # Encode all ampersands; HTML entities are not
1150 # entities within a Markdown code span.
1151 $_ = str_replace('&', '&amp;', $_);
1153 # Do the angle bracket song and dance:
1154 $_ = str_replace(array('<', '>'),
1155 array('&lt;', '&gt;'), $_);
1157 # Now, escape characters that are magic in Markdown:
1158 // $_ = str_replace(array_keys($this->escape_table),
1159 // array_values($this->escape_table), $_);
1161 return $_;
1165 function doItalicsAndBold($text) {
1166 # <strong> must go first:
1167 $text = preg_replace_callback('{
1168 ( # $1: Marker
1169 (?<!\*\*) \* | # (not preceded by two chars of
1170 (?<!__) _ # the same marker)
1173 (?=\S) # Not followed by whitespace
1174 (?!\1\1) # or two others marker chars.
1175 ( # $2: Content
1177 [^*_]+? # Anthing not em markers.
1179 # Balence any regular emphasis inside.
1180 \1 (?=\S) .+? (?<=\S) \1
1182 (?! \1 ) . # Allow unbalenced * and _.
1185 (?<=\S) \1\1 # End mark not preceded by whitespace.
1186 }sx',
1187 array(&$this, '_doItalicAndBold_strong_callback'), $text);
1188 # Then <em>:
1189 $text = preg_replace_callback(
1190 '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1 }sx',
1191 array(&$this, '_doItalicAndBold_em_callback'), $text);
1193 return $text;
1195 function _doItalicAndBold_em_callback($matches) {
1196 $text = $matches[2];
1197 $text = $this->runSpanGamut($text);
1198 return $this->hashSpan("<em>$text</em>");
1200 function _doItalicAndBold_strong_callback($matches) {
1201 $text = $matches[2];
1202 $text = $this->runSpanGamut($text);
1203 return $this->hashSpan("<strong>$text</strong>");
1207 function doBlockQuotes($text) {
1208 $text = preg_replace_callback('/
1209 ( # Wrap whole match in $1
1211 ^[ \t]*>[ \t]? # ">" at the start of a line
1212 .+\n # rest of the first line
1213 (.+\n)* # subsequent consecutive lines
1214 \n* # blanks
1217 /xm',
1218 array(&$this, '_doBlockQuotes_callback'), $text);
1220 return $text;
1222 function _doBlockQuotes_callback($matches) {
1223 $bq = $matches[1];
1224 # trim one level of quoting - trim whitespace-only lines
1225 $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
1226 $bq = $this->runBlockGamut($bq); # recurse
1228 $bq = preg_replace('/^/m', " ", $bq);
1229 # These leading spaces cause problem with <pre> content,
1230 # so we need to fix that:
1231 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1232 array(&$this, '_DoBlockQuotes_callback2'), $bq);
1234 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1236 function _doBlockQuotes_callback2($matches) {
1237 $pre = $matches[1];
1238 $pre = preg_replace('/^ /m', '', $pre);
1239 return $pre;
1243 function formParagraphs($text) {
1245 # Params:
1246 # $text - string to process with html <p> tags
1248 # Strip leading and trailing lines:
1249 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
1251 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1254 # Wrap <p> tags.
1256 foreach ($grafs as $key => $value) {
1257 if (!isset( $this->html_blocks[$value] )) {
1258 $value = $this->runSpanGamut($value);
1259 $value = preg_replace('/^([ \t]*)/', "<p>", $value);
1260 $value .= "</p>";
1261 $grafs[$key] = $this->unhash($value);
1266 # Unhashify HTML blocks
1268 foreach ($grafs as $key => $graf) {
1269 # Modify elements of @grafs in-place...
1270 if (isset($this->html_blocks[$graf])) {
1271 $block = $this->html_blocks[$graf];
1272 $graf = $block;
1273 // if (preg_match('{
1274 // \A
1275 // ( # $1 = <div> tag
1276 // <div \s+
1277 // [^>]*
1278 // \b
1279 // markdown\s*=\s* ([\'"]) # $2 = attr quote char
1280 // 1
1281 // \2
1282 // [^>]*
1283 // >
1284 // )
1285 // ( # $3 = contents
1286 // .*
1287 // )
1288 // (</div>) # $4 = closing tag
1289 // \z
1290 // }xs', $block, $matches))
1291 // {
1292 // list(, $div_open, , $div_content, $div_close) = $matches;
1294 // # We can't call Markdown(), because that resets the hash;
1295 // # that initialization code should be pulled into its own sub, though.
1296 // $div_content = $this->hashHTMLBlocks($div_content);
1298 // # Run document gamut methods on the content.
1299 // foreach ($this->document_gamut as $method => $priority) {
1300 // $div_content = $this->$method($div_content);
1301 // }
1303 // $div_open = preg_replace(
1304 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1306 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1307 // }
1308 $grafs[$key] = $graf;
1312 return implode("\n\n", $grafs);
1316 function encodeAmpsAndAngles($text) {
1317 # Smart processing for ampersands and angle brackets that need to be encoded.
1319 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1320 # http://bumppo.net/projects/amputator/
1321 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1322 '&amp;', $text);;
1324 # Encode naked <'s
1325 $text = preg_replace('{<(?![a-z/?\$!%])}i', '&lt;', $text);
1327 return $text;
1331 function encodeBackslashEscapes($text) {
1333 # Parameter: String.
1334 # Returns: The string, with after processing the following backslash
1335 # escape sequences.
1337 # Must process escaped backslashes first.
1338 return str_replace(array_keys($this->backslash_escape_table),
1339 array_values($this->backslash_escape_table), $text);
1343 function doAutoLinks($text) {
1344 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}',
1345 array(&$this, '_doAutoLinks_url_callback'), $text);
1347 # Email addresses: <address@domain.foo>
1348 $text = preg_replace_callback('{
1350 (?:mailto:)?
1352 [-.\w\x80-\xFF]+
1354 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1357 }xi',
1358 array(&$this, '_doAutoLinks_email_callback'), $text);
1360 return $text;
1362 function _doAutoLinks_url_callback($matches) {
1363 $url = $this->encodeAmpsAndAngles($matches[1]);
1364 $link = "<a href=\"$url\">$url</a>";
1365 return $this->hashSpan($link);
1367 function _doAutoLinks_email_callback($matches) {
1368 $address = $matches[1];
1369 $address = $this->unescapeSpecialChars($address);
1370 $link = $this->encodeEmailAddress($address);
1371 return $this->hashSpan($link);
1375 function encodeEmailAddress($addr) {
1377 # Input: an email address, e.g. "foo@example.com"
1379 # Output: the email address as a mailto link, with each character
1380 # of the address encoded as either a decimal or hex entity, in
1381 # the hopes of foiling most address harvesting spam bots. E.g.:
1383 # <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1384 # &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1385 # &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1386 # &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1388 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1389 # With some optimizations by Milian Wolff.
1391 $addr = "mailto:" . $addr;
1392 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1393 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1395 foreach ($chars as $key => $char) {
1396 $ord = ord($char);
1397 # Ignore non-ascii chars.
1398 if ($ord < 128) {
1399 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1400 # roughly 10% raw, 45% hex, 45% dec
1401 # '@' *must* be encoded. I insist.
1402 if ($r > 90 && $char != '@') /* do nothing */;
1403 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1404 else $chars[$key] = '&#'.$ord.';';
1408 $addr = implode('', $chars);
1409 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1410 $addr = "<a href=\"$addr\">$text</a>";
1412 return $addr;
1416 function unescapeSpecialChars($text) {
1418 # Swap back in all the special characters we've hidden.
1420 return str_replace(array_values($this->escape_table),
1421 array_keys($this->escape_table), $text);
1425 function tokenizeHTML($str) {
1427 # Parameter: String containing HTML + Markdown markup.
1428 # Returns: An array of the tokens comprising the input
1429 # string. Each token is either a tag or a run of text
1430 # between tags. Each element of the array is a
1431 # two-element array; the first is either 'tag' or 'text';
1432 # the second is the actual value.
1433 # Note: Markdown code spans are taken into account: no tag token is
1434 # generated within a code span.
1436 $tokens = array();
1438 while ($str != "") {
1440 # Each loop iteration seach for either the next tag or the next
1441 # openning code span marker. If a code span marker is found, the
1442 # code span is extracted in entierty and will result in an extra
1443 # text token.
1445 $parts = preg_split('{
1447 (?<![`\\\\])
1448 `+ # code span marker
1450 <!-- .*? --> # comment
1452 <\?.*?\?> | <%.*?%> # processing instruction
1454 <[/!$]?[-a-zA-Z0-9:]+ # regular tags
1457 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1461 }xs', $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1463 # Create token from text preceding tag.
1464 if ($parts[0] != "") {
1465 $tokens[] = array('text', $parts[0]);
1468 # Check if we reach the end.
1469 if (count($parts) < 3) {
1470 break;
1473 # Create token from tag or code span.
1474 if ($parts[1]{0} == "`") {
1475 $tokens[] = array('text', $parts[1]);
1476 $str = $parts[2];
1478 # Skip the whole code span, pass as text token.
1479 if (preg_match('/^(.*(?<!`\\\\)'.$parts[1].'(?!`))(.*)$/sm',
1480 $str, $matches))
1482 $tokens[] = array('text', $matches[1]);
1483 $str = $matches[2];
1485 } else {
1486 $tokens[] = array('tag', $parts[1]);
1487 $str = $parts[2];
1491 return $tokens;
1495 function outdent($text) {
1497 # Remove one level of line-leading tabs or spaces
1499 return preg_replace("/^(\\t|[ ]{1,$this->tab_width})/m", "", $text);
1503 # String length function for detab. `_initDetab` will create a function to
1504 # hanlde UTF-8 if the default function does not exist.
1505 var $utf8_strlen = 'mb_strlen';
1507 function detab($text) {
1509 # Replace tabs with the appropriate amount of space.
1511 # For each line we separate the line in blocks delemited by
1512 # tab characters. Then we reconstruct every line by adding the
1513 # appropriate number of space between each blocks.
1515 $strlen = $this->utf8_strlen; # best strlen function for UTF-8.
1516 $lines = explode("\n", $text);
1517 $text = "";
1519 foreach ($lines as $line) {
1520 # Split in blocks.
1521 $blocks = explode("\t", $line);
1522 # Add each blocks to the line.
1523 $line = $blocks[0];
1524 unset($blocks[0]); # Do not add first block twice.
1525 foreach ($blocks as $block) {
1526 # Calculate amount of space, insert spaces, insert block.
1527 $amount = $this->tab_width -
1528 $strlen($line, 'UTF-8') % $this->tab_width;
1529 $line .= str_repeat(" ", $amount) . $block;
1531 $text .= "$line\n";
1533 return $text;
1535 function _initDetab() {
1537 # Check for the availability of the function in the `utf8_strlen` property
1538 # (probably `mb_strlen`). If the function is not available, create a
1539 # function that will loosely count the number of UTF-8 characters with a
1540 # regular expression.
1542 if (function_exists($this->utf8_strlen)) return;
1543 $this->utf8_strlen = 'Markdown_UTF8_strlen';
1545 if (function_exists($this->utf8_strlen)) return;
1546 function Markdown_UTF8_strlen($text) {
1547 return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/',
1548 $text, $m);
1553 function unhash($text) {
1555 # Swap back in all the tags hashed by _HashHTMLBlocks.
1557 return str_replace(array_keys($this->html_hashes),
1558 array_values($this->html_hashes), $text);
1565 # Markdown Extra Parser Class
1568 class MarkdownExtra_Parser extends Markdown_Parser {
1570 # Prefix for footnote ids.
1571 var $fn_id_prefix = "";
1573 # Optional title attribute for footnote links and backlinks.
1574 var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
1575 var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
1577 # Optional class attribute for footnote links and backlinks.
1578 var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
1579 var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
1582 function MarkdownExtra_Parser() {
1584 # Constructor function. Initialize the parser object.
1586 # Add extra escapable characters before parent constructor
1587 # initialize the table.
1588 $this->escape_chars .= ':|';
1590 # Insert extra document, block, and span transformations.
1591 # Parent constructor will do the sorting.
1592 $this->document_gamut += array(
1593 "stripFootnotes" => 15,
1594 "stripAbbreviations" => 25,
1595 "appendFootnotes" => 50,
1597 $this->block_gamut += array(
1598 "doTables" => 15,
1599 "doDefLists" => 45,
1601 $this->span_gamut += array(
1602 "doFootnotes" => 4,
1603 "doAbbreviations" => 5,
1606 parent::Markdown_Parser();
1610 # Extra hashes used during extra transformations.
1611 var $footnotes = array();
1612 var $footnotes_ordered = array();
1613 var $abbr_desciptions = array();
1614 var $abbr_matches = array();
1615 var $html_cleans = array();
1618 function transform($text) {
1620 # Added clear to the new $html_hashes, reordered `hashHTMLBlocks` before
1621 # blank line stripping and added extra parameter to `runBlockGamut`.
1623 # Clear the global hashes. If we don't clear these, you get conflicts
1624 # from other articles when generating a page which contains more than
1625 # one article (e.g. an index page that shows the N most recent
1626 # articles):
1627 $this->footnotes = array();
1628 $this->footnotes_ordered = array();
1629 $this->abbr_desciptions = array();
1630 $this->abbr_matches = array();
1631 $this->html_cleans = array();
1633 return parent::transform($text);
1637 ### HTML Block Parser ###
1639 # Tags that are always treated as block tags:
1640 var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1642 # Tags treated as block tags only if the opening tag is alone on it's line:
1643 var $context_block_tags = 'script|noscript|math|ins|del';
1645 # Tags where markdown="1" default to span mode:
1646 var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1648 # Tags which must not have their contents modified, no matter where
1649 # they appear:
1650 var $clean_tags = 'script|math';
1652 # Tags that do not need to be closed.
1653 var $auto_close_tags = 'hr|img';
1656 function hashHTMLBlocks($text) {
1658 # Hashify HTML Blocks and "clean tags".
1660 # We only want to do this for block-level HTML tags, such as headers,
1661 # lists, and tables. That's because we still want to wrap <p>s around
1662 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1663 # phrase emphasis, and spans. The list of tags we're looking for is
1664 # hard-coded.
1666 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1667 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1668 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
1669 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1670 # These two functions are calling each other. It's recursive!
1673 # Call the HTML-in-Markdown hasher.
1675 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1677 return $text;
1679 function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1680 $enclosing_tag = '', $span = false)
1683 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1685 # * $indent is the number of space to be ignored when checking for code
1686 # blocks. This is important because if we don't take the indent into
1687 # account, something like this (which looks right) won't work as expected:
1689 # <div>
1690 # <div markdown="1">
1691 # Hello World. <-- Is this a Markdown code block or text?
1692 # </div> <-- Is this a Markdown code block or a real tag?
1693 # <div>
1695 # If you don't like this, just don't indent the tag on which
1696 # you apply the markdown="1" attribute.
1698 # * If $enclosing_tag is not empty, stops at the first unmatched closing
1699 # tag with that name. Nested tags supported.
1701 # * If $span is true, text inside must treated as span. So any double
1702 # newline will be replaced by a single newline so that it does not create
1703 # paragraphs.
1705 # Returns an array of that form: ( processed text , remaining text )
1707 if ($text === '') return array('', '');
1709 # Regex to check for the presense of newlines around a block tag.
1710 $newline_match_before = '/(?:^\n?|\n\n)*$/';
1711 $newline_match_after =
1713 ^ # Start of text following the tag.
1714 (?:[ ]*<!--.*?-->)? # Optional comment.
1715 [ ]*\n # Must be followed by newline.
1716 }xs';
1718 # Regex to match any tag.
1719 $block_tag_match =
1721 ( # $2: Capture hole tag.
1722 </? # Any opening or closing tag.
1723 (?: # Tag name.
1724 '.$this->block_tags.' |
1725 '.$this->context_block_tags.' |
1726 '.$this->clean_tags.' |
1727 (?!\s)'.$enclosing_tag.'
1729 \s* # Whitespace.
1731 ".*?" | # Double quotes (can contain `>`)
1732 \'.*?\' | # Single quotes (can contain `>`)
1733 .+? # Anything but quotes and `>`.
1735 > # End of tag.
1737 <!-- .*? --> # HTML Comment
1739 <\?.*?\?> | <%.*?%> # Processing instruction
1741 <!\[CDATA\[.*?\]\]> # CData Block
1743 }xs';
1746 $depth = 0; # Current depth inside the tag tree.
1747 $parsed = ""; # Parsed text that will be returned.
1750 # Loop through every tag until we find the closing tag of the parent
1751 # or loop until reaching the end of text if no parent tag specified.
1753 do {
1755 # Split the text using the first $tag_match pattern found.
1756 # Text before pattern will be first in the array, text after
1757 # pattern will be at the end, and between will be any catches made
1758 # by the pattern.
1760 $parts = preg_split($block_tag_match, $text, 2,
1761 PREG_SPLIT_DELIM_CAPTURE);
1763 # If in Markdown span mode, add a empty-string span-level hash
1764 # after each newline to prevent triggering any block element.
1765 if ($span) {
1766 $newline = $this->hashSpan("") . "\n";
1767 $parts[0] = str_replace("\n", $newline, $parts[0]);
1770 $parsed .= $parts[0]; # Text before current tag.
1772 # If end of $text has been reached. Stop loop.
1773 if (count($parts) < 3) {
1774 $text = "";
1775 break;
1778 $tag = $parts[1]; # Tag to handle.
1779 $text = $parts[2]; # Remaining text after current tag.
1782 # Check for: Tag inside code block or span
1784 if (# Find current paragraph
1785 preg_match('/(?>^\n?|\n\n)((?>.\n?)+?)$/', $parsed, $matches) &&
1787 # Then match in it either a code block...
1788 preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'.
1789 '(?!\n)$/', $matches[1], $x) ||
1790 # ...or unbalenced code span markers. (the regex matches balenced)
1791 !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s',
1792 $matches[1])
1795 # Tag is in code block or span and may not be a tag at all. So we
1796 # simply skip the first char (should be a `<`).
1797 $parsed .= $tag{0};
1798 $text = substr($tag, 1) . $text; # Put back $tag minus first char.
1801 # Check for: Opening Block level tag or
1802 # Opening Content Block tag (like ins and del)
1803 # used as a block tag (tag is alone on it's line).
1805 else if (preg_match("{^<(?:$this->block_tags)\b}", $tag) ||
1806 ( preg_match("{^<(?:$this->context_block_tags)\b}", $tag) &&
1807 preg_match($newline_match_before, $parsed) &&
1808 preg_match($newline_match_after, $text) )
1811 # Need to parse tag and following text using the HTML parser.
1812 list($block_text, $text) =
1813 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1815 # Make sure it stays outside of any paragraph by adding newlines.
1816 $parsed .= "\n\n$block_text\n\n";
1819 # Check for: Clean tag (like script, math)
1820 # HTML Comments, processing instructions.
1822 else if (preg_match("{^<(?:$this->clean_tags)\b}", $tag) ||
1823 $tag{1} == '!' || $tag{1} == '?')
1825 # Need to parse tag and following text using the HTML parser.
1826 # (don't check for markdown attribute)
1827 list($block_text, $text) =
1828 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1830 $parsed .= $block_text;
1833 # Check for: Tag with same name as enclosing tag.
1835 else if ($enclosing_tag !== '' &&
1836 # Same name as enclosing tag.
1837 preg_match("{^</?(?:$enclosing_tag)\b}", $tag))
1840 # Increase/decrease nested tag count.
1842 if ($tag{1} == '/') $depth--;
1843 else if ($tag{strlen($tag)-2} != '/') $depth++;
1845 if ($depth < 0) {
1847 # Going out of parent element. Clean up and break so we
1848 # return to the calling function.
1850 $text = $tag . $text;
1851 break;
1854 $parsed .= $tag;
1856 else {
1857 $parsed .= $tag;
1859 } while ($depth >= 0);
1861 return array($parsed, $text);
1863 function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
1865 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
1867 # * Calls $hash_method to convert any blocks.
1868 # * Stops when the first opening tag closes.
1869 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
1870 # (it is not inside clean tags)
1872 # Returns an array of that form: ( processed text , remaining text )
1874 if ($text === '') return array('', '');
1876 # Regex to match `markdown` attribute inside of a tag.
1877 $markdown_attr_match = '
1879 \s* # Eat whitespace before the `markdown` attribute
1880 markdown
1881 \s*=\s*
1882 (["\']) # $1: quote delimiter
1883 (.*?) # $2: attribute value
1884 \1 # matching delimiter
1885 }xs';
1887 # Regex to match any tag.
1888 $tag_match = '{
1889 ( # $2: Capture hole tag.
1890 </? # Any opening or closing tag.
1891 [\w:$]+ # Tag name.
1892 \s* # Whitespace.
1894 ".*?" | # Double quotes (can contain `>`)
1895 \'.*?\' | # Single quotes (can contain `>`)
1896 .+? # Anything but quotes and `>`.
1898 > # End of tag.
1900 <!-- .*? --> # HTML Comment
1902 <\?.*?\?> | <%.*?%> # Processing instruction
1904 <!\[CDATA\[.*?\]\]> # CData Block
1906 }xs';
1908 $original_text = $text; # Save original text in case of faliure.
1910 $depth = 0; # Current depth inside the tag tree.
1911 $block_text = ""; # Temporary text holder for current text.
1912 $parsed = ""; # Parsed text that will be returned.
1915 # Get the name of the starting tag.
1917 if (preg_match("/^<([\w:$]*)\b/", $text, $matches))
1918 $base_tag_name = $matches[1];
1921 # Loop through every tag until we find the corresponding closing tag.
1923 do {
1925 # Split the text using the first $tag_match pattern found.
1926 # Text before pattern will be first in the array, text after
1927 # pattern will be at the end, and between will be any catches made
1928 # by the pattern.
1930 $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1932 if (count($parts) < 3) {
1934 # End of $text reached with unbalenced tag(s).
1935 # In that case, we return original text unchanged and pass the
1936 # first character as filtered to prevent an infinite loop in the
1937 # parent function.
1939 return array($original_text{0}, substr($original_text, 1));
1942 $block_text .= $parts[0]; # Text before current tag.
1943 $tag = $parts[1]; # Tag to handle.
1944 $text = $parts[2]; # Remaining text after current tag.
1947 # Check for: Auto-close tag (like <hr/>)
1948 # Comments and Processing Instructions.
1950 if (preg_match("{^</?(?:$this->auto_close_tags)\b}", $tag) ||
1951 $tag{1} == '!' || $tag{1} == '?')
1953 # Just add the tag to the block as if it was text.
1954 $block_text .= $tag;
1956 else {
1958 # Increase/decrease nested tag count. Only do so if
1959 # the tag's name match base tag's.
1961 if (preg_match("{^</?$base_tag_name\b}", $tag)) {
1962 if ($tag{1} == '/') $depth--;
1963 else if ($tag{strlen($tag)-2} != '/') $depth++;
1967 # Check for `markdown="1"` attribute and handle it.
1969 if ($md_attr &&
1970 preg_match($markdown_attr_match, $tag, $attr_matches) &&
1971 preg_match('/^1|block|span$/', $attr_matches[2]))
1973 # Remove `markdown` attribute from opening tag.
1974 $tag = preg_replace($markdown_attr_match, '', $tag);
1976 # Check if text inside this tag must be parsed in span mode.
1977 $this->mode = $attr_matches[2];
1978 $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
1979 preg_match("{^<(?:$this->contain_span_tags)\b}", $tag);
1981 # Calculate indent before tag.
1982 preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches);
1983 $indent = strlen($matches[1]);
1985 # End preceding block with this tag.
1986 $block_text .= $tag;
1987 $parsed .= $this->$hash_method($block_text);
1989 # Get enclosing tag name for the ParseMarkdown function.
1990 preg_match('/^<([\w:$]*)\b/', $tag, $matches);
1991 $tag_name = $matches[1];
1993 # Parse the content using the HTML-in-Markdown parser.
1994 list ($block_text, $text)
1995 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
1996 $tag_name, $span_mode);
1998 # Outdent markdown text.
1999 if ($indent > 0) {
2000 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
2001 $block_text);
2004 # Append tag content to parsed text.
2005 if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
2006 else $parsed .= "$block_text";
2008 # Start over a new block.
2009 $block_text = "";
2011 else $block_text .= $tag;
2014 } while ($depth > 0);
2017 # Hash last block text that wasn't processed inside the loop.
2019 $parsed .= $this->$hash_method($block_text);
2021 return array($parsed, $text);
2025 function hashClean($text) {
2027 # Called whenever a tag must be hashed when a function insert a "clean" tag
2028 # in $text, it pass through this function and is automaticaly escaped,
2029 # blocking invalid nested overlap.
2031 # Swap back any tag hash found in $text so we do not have to `unhash`
2032 # multiple times at the end.
2033 $text = $this->unhash($text);
2035 # Then hash the tag.
2036 $key = md5($text);
2037 $this->html_cleans[$key] = $text;
2038 $this->html_hashes[$key] = $text;
2039 return $key; # String that will replace the clean tag.
2043 function doHeaders($text) {
2045 # Redefined to add id attribute support.
2047 # Setext-style headers:
2048 # Header 1 {#header1}
2049 # ========
2051 # Header 2 {#header2}
2052 # --------
2054 $text = preg_replace_callback(
2055 '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n=+[ \t]*\n+ }mx',
2056 array(&$this, '_doHeaders_callback_setext_h1'), $text);
2057 $text = preg_replace_callback(
2058 '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n-+[ \t]*\n+ }mx',
2059 array(&$this, '_doHeaders_callback_setext_h2'), $text);
2061 # atx-style headers:
2062 # # Header 1 {#header1}
2063 # ## Header 2 {#header2}
2064 # ## Header 2 with closing hashes ## {#header3}
2065 # ...
2066 # ###### Header 6 {#header2}
2068 $text = preg_replace_callback('{
2069 ^(\#{1,6}) # $1 = string of #\'s
2070 [ \t]*
2071 (.+?) # $2 = Header text
2072 [ \t]*
2073 \#* # optional closing #\'s (not counted)
2074 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
2075 [ \t]*
2077 }xm',
2078 array(&$this, '_doHeaders_callback_atx'), $text);
2080 return $text;
2082 function _doHeaders_attr($attr) {
2083 if (empty($attr)) return "";
2084 return " id=\"$attr\"";
2086 function _doHeaders_callback_setext_h1($matches) {
2087 $attr = $this->_doHeaders_attr($id =& $matches[2]);
2088 $block = "<h1$attr>".$this->runSpanGamut($matches[1])."</h1>";
2089 return "\n" . $this->hashBlock($block) . "\n\n";
2091 function _doHeaders_callback_setext_h2($matches) {
2092 $attr = $this->_doHeaders_attr($id =& $matches[2]);
2093 $block = "<h2$attr>".$this->runSpanGamut($matches[1])."</h2>";
2094 return "\n" . $this->hashBlock($block) . "\n\n";
2096 function _doHeaders_callback_atx($matches) {
2097 $level = strlen($matches[1]);
2098 $attr = $this->_doHeaders_attr($id =& $matches[3]);
2099 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2100 return "\n" . $this->hashBlock($block) . "\n\n";
2104 function doTables($text) {
2106 # Form HTML tables.
2108 $less_than_tab = $this->tab_width - 1;
2110 # Find tables with leading pipe.
2112 # | Header 1 | Header 2
2113 # | -------- | --------
2114 # | Cell 1 | Cell 2
2115 # | Cell 3 | Cell 4
2117 $text = preg_replace_callback('
2119 ^ # Start of a line
2120 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2121 [|] # Optional leading pipe (present)
2122 (.+) \n # $1: Header row (at least one pipe)
2124 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2125 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
2127 ( # $3: Cells
2129 [ ]* # Allowed whitespace.
2130 [|] .* \n # Row content.
2133 (?=\n|\Z) # Stop at final double newline.
2134 }xm',
2135 array(&$this, '_doTable_leadingPipe_callback'), $text);
2138 # Find tables without leading pipe.
2140 # Header 1 | Header 2
2141 # -------- | --------
2142 # Cell 1 | Cell 2
2143 # Cell 3 | Cell 4
2145 $text = preg_replace_callback('
2147 ^ # Start of a line
2148 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2149 (\S.*[|].*) \n # $1: Header row (at least one pipe)
2151 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2152 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
2154 ( # $3: Cells
2156 .* [|] .* \n # Row content
2159 (?=\n|\Z) # Stop at final double newline.
2160 }xm',
2161 array(&$this, '_DoTable_callback'), $text);
2163 return $text;
2165 function _doTable_leadingPipe_callback($matches) {
2166 $head = $matches[1];
2167 $underline = $matches[2];
2168 $content = $matches[3];
2170 # Remove leading pipe for each row.
2171 $content = preg_replace('/^ *[|]/m', '', $content);
2173 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2175 function _doTable_callback($matches) {
2176 $head = $matches[1];
2177 $underline = $matches[2];
2178 $content = $matches[3];
2180 # Remove any tailing pipes for each line.
2181 $head = preg_replace('/[|] *$/m', '', $head);
2182 $underline = preg_replace('/[|] *$/m', '', $underline);
2183 $content = preg_replace('/[|] *$/m', '', $content);
2185 # Reading alignement from header underline.
2186 $separators = preg_split('/ *[|] */', $underline);
2187 foreach ($separators as $n => $s) {
2188 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"';
2189 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
2190 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
2191 else $attr[$n] = '';
2194 # Creating code spans before splitting the row is an easy way to
2195 # handle a code span containg pipes.
2196 $head = $this->doCodeSpans($head);
2197 $headers = preg_split('/ *[|] */', $head);
2198 $col_count = count($headers);
2200 # Write column headers.
2201 $text = "<table>\n";
2202 $text .= "<thead>\n";
2203 $text .= "<tr>\n";
2204 foreach ($headers as $n => $header)
2205 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2206 $text .= "</tr>\n";
2207 $text .= "</thead>\n";
2209 # Split content by row.
2210 $rows = explode("\n", trim($content, "\n"));
2212 $text .= "<tbody>\n";
2213 foreach ($rows as $row) {
2214 # Creating code spans before splitting the row is an easy way to
2215 # handle a code span containg pipes.
2216 $row = $this->doCodeSpans($row);
2218 # Split row by cell.
2219 $row_cells = preg_split('/ *[|] */', $row, $col_count);
2220 $row_cells = array_pad($row_cells, $col_count, '');
2222 $text .= "<tr>\n";
2223 foreach ($row_cells as $n => $cell)
2224 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2225 $text .= "</tr>\n";
2227 $text .= "</tbody>\n";
2228 $text .= "</table>";
2230 return $this->hashBlock($text) . "\n";
2234 function doDefLists($text) {
2236 # Form HTML definition lists.
2238 $less_than_tab = $this->tab_width - 1;
2240 # Re-usable pattern to match any entire dl list:
2241 $whole_list = '
2242 ( # $1 = whole list
2243 ( # $2
2244 [ ]{0,'.$less_than_tab.'}
2245 ((?>.*\S.*\n)+) # $3 = defined term
2247 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2249 (?s:.+?)
2250 ( # $4
2253 \n{2,}
2254 (?=\S)
2255 (?! # Negative lookahead for another term
2256 [ ]{0,'.$less_than_tab.'}
2257 (?: \S.*\n )+? # defined term
2259 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2261 (?! # Negative lookahead for another definition
2262 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2266 '; // mx
2268 $text = preg_replace_callback('{
2269 (?:(?<=\n\n)|\A\n?)
2270 '.$whole_list.'
2271 }mx',
2272 array(&$this, '_doDefLists_callback'), $text);
2274 return $text;
2276 function _doDefLists_callback($matches) {
2277 # Re-usable patterns to match list item bullets and number markers:
2278 $list = $matches[1];
2280 # Turn double returns into triple returns, so that we can make a
2281 # paragraph for the last item in a list, if necessary:
2282 $result = trim($this->processDefListItems($list));
2283 $result = "<dl>\n" . $result . "\n</dl>";
2284 return $this->hashBlock($result) . "\n\n";
2288 function processDefListItems($list_str) {
2290 # Process the contents of a single definition list, splitting it
2291 # into individual term and definition list items.
2293 $less_than_tab = $this->tab_width - 1;
2295 # trim trailing blank lines:
2296 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2298 # Process definition terms.
2299 $list_str = preg_replace_callback('{
2300 (?:\n\n+|\A\n?) # leading line
2301 ( # definition terms = $1
2302 [ ]{0,'.$less_than_tab.'} # leading whitespace
2303 (?![:][ ]|[ ]) # negative lookahead for a definition
2304 # mark (colon) or more whitespace.
2305 (?: \S.* \n)+? # actual term (not whitespace).
2307 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
2308 # with a definition mark.
2309 }xm',
2310 array(&$this, '_processDefListItems_callback_dt'), $list_str);
2312 # Process actual definitions.
2313 $list_str = preg_replace_callback('{
2314 \n(\n+)? # leading line = $1
2315 [ ]{0,'.$less_than_tab.'} # whitespace before colon
2316 [:][ ]+ # definition mark (colon)
2317 ((?s:.+?)) # definition text = $2
2318 (?= \n+ # stop at next definition mark,
2319 (?: # next term or end of text
2320 [ ]{0,'.$less_than_tab.'} [:][ ] |
2321 <dt> | \z
2324 }xm',
2325 array(&$this, '_processDefListItems_callback_dd'), $list_str);
2327 return $list_str;
2329 function _processDefListItems_callback_dt($matches) {
2330 $terms = explode("\n", trim($matches[1]));
2331 $text = '';
2332 foreach ($terms as $term) {
2333 $term = $this->runSpanGamut(trim($term));
2334 $text .= "\n<dt>" . $term . "</dt>";
2336 return $text . "\n";
2338 function _processDefListItems_callback_dd($matches) {
2339 $leading_line = $matches[1];
2340 $def = $matches[2];
2342 if ($leading_line || preg_match('/\n{2,}/', $def)) {
2343 $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2344 $def = "\n". $def ."\n";
2346 else {
2347 $def = rtrim($def);
2348 $def = $this->runSpanGamut($this->outdent($def));
2351 return "\n<dd>" . $def . "</dd>\n";
2355 function doItalicsAndBold($text) {
2357 # Redefined to change emphasis by underscore behaviour so that it does not
2358 # work in the middle of a word.
2360 # <strong> must go first:
2361 $text = preg_replace_callback(array(
2363 ( # $1: Marker
2364 (?<![a-zA-Z0-9]) # Not preceded by alphanum
2365 (?<!__) # or by two marker chars.
2368 (?=\S) # Not followed by whitespace
2369 (?!__) # or two others marker chars.
2370 ( # $2: Content
2372 [^_]+? # Anthing not em markers.
2374 # Balence any regular _ emphasis inside.
2375 (?<![a-zA-Z0-9]) _ (?=\S) (.+?)
2376 (?<=\S) _ (?![a-zA-Z0-9])
2378 ___+
2381 (?<=\S) __ # End mark not preceded by whitespace.
2382 (?![a-zA-Z0-9]) # Not followed by alphanum
2383 (?!__) # or two others marker chars.
2384 }sx',
2386 ( (?<!\*\*) \*\* ) # $1: Marker (not preceded by two *)
2387 (?=\S) # Not followed by whitespace
2388 (?!\1) # or two others marker chars.
2389 ( # $2: Content
2391 [^*]+? # Anthing not em markers.
2393 # Balence any regular * emphasis inside.
2394 \* (?=\S) (.+?) (?<=\S) \*
2397 (?<=\S) \*\* # End mark not preceded by whitespace.
2398 }sx',
2400 array(&$this, '_doItalicAndBold_strong_callback'), $text);
2401 # Then <em>:
2402 $text = preg_replace_callback(array(
2403 '{ ( (?<![a-zA-Z0-9])(?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1(?![a-zA-Z0-9]) }sx',
2404 '{ ( (?<!\*)\* ) (?=\S) (?! \1) (.+?) (?<=\S) \1 }sx',
2406 array(&$this, '_doItalicAndBold_em_callback'), $text);
2408 return $text;
2412 function formParagraphs($text) {
2414 # Params:
2415 # $text - string to process with html <p> tags
2417 # Strip leading and trailing lines:
2418 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
2420 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2423 # Wrap <p> tags and unhashify HTML blocks
2425 foreach ($grafs as $key => $value) {
2426 $value = trim($this->runSpanGamut($value));
2428 # Check if this should be enclosed in a paragraph.
2429 # Clean tag hashes & block tag hashes are left alone.
2430 $clean_key = $value;
2431 $block_key = substr($value, 0, 32);
2433 $is_p = (!isset($this->html_blocks[$block_key]) &&
2434 !isset($this->html_cleans[$clean_key]));
2436 if ($is_p) {
2437 $value = "<p>$value</p>";
2439 $grafs[$key] = $value;
2442 # Join grafs in one text, then unhash HTML tags.
2443 $text = implode("\n\n", $grafs);
2445 # Finish by removing any tag hashes still present in $text.
2446 $text = $this->unhash($text);
2448 return $text;
2452 ### Footnotes
2454 function stripFootnotes($text) {
2456 # Strips link definitions from text, stores the URLs and titles in
2457 # hash references.
2459 $less_than_tab = $this->tab_width - 1;
2461 # Link defs are in the form: [^id]: url "optional title"
2462 $text = preg_replace_callback('{
2463 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1
2464 [ \t]*
2465 \n? # maybe *one* newline
2466 ( # text = $2 (no blank lines allowed)
2467 (?:
2468 .+ # actual text
2470 \n # newlines but
2471 (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2472 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2473 # by non-indented content
2476 }xm',
2477 array(&$this, '_stripFootnotes_callback'),
2478 $text);
2479 return $text;
2481 function _stripFootnotes_callback($matches) {
2482 $note_id = $matches[1];
2483 $this->footnotes[$note_id] = $this->outdent($matches[2]);
2484 return ''; # String that will replace the block
2488 function doFootnotes($text) {
2490 # Replace footnote references in $text [^id] with a special text-token
2491 # which will be can be
2493 $text = preg_replace('{\[\^(.+?)\]}', "a\0fn:\\1\0z", $text);
2494 return $text;
2498 function appendFootnotes($text) {
2500 # Append footnote list to text.
2502 $text = preg_replace_callback('{a\0fn:(.*?)\0z}',
2503 array(&$this, '_appendFootnotes_callback'), $text);
2505 if (!empty($this->footnotes_ordered)) {
2506 $text .= "\n\n";
2507 $text .= "<div class=\"footnotes\">\n";
2508 $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n";
2509 $text .= "<ol>\n\n";
2511 $attr = " rev=\"footnote\"";
2512 if ($this->fn_backlink_class != "") {
2513 $class = $this->fn_backlink_class;
2514 $class = $this->encodeAmpsAndAngles($class);
2515 $class = str_replace('"', '&quot;', $class);
2516 $attr .= " class=\"$class\"";
2518 if ($this->fn_backlink_title != "") {
2519 $title = $this->fn_backlink_title;
2520 $title = $this->encodeAmpsAndAngles($title);
2521 $title = str_replace('"', '&quot;', $title);
2522 $attr .= " title=\"$title\"";
2524 $num = 0;
2526 foreach ($this->footnotes_ordered as $note_id => $footnote) {
2527 $footnote .= "\n"; # Need to append newline before parsing.
2528 $footnote = $this->runBlockGamut("$footnote\n");
2530 $attr2 = str_replace("%%", ++$num, $attr);
2532 # Add backlink to last paragraph; create new paragraph if needed.
2533 $backlink = "<a href=\"#fnref:$note_id\"$attr2>&#8617;</a>";
2534 if (preg_match('{</p>$}', $footnote)) {
2535 $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
2536 } else {
2537 $footnote .= "\n\n<p>$backlink</p>";
2540 $text .= "<li id=\"fn:$note_id\">\n";
2541 $text .= $footnote . "\n";
2542 $text .= "</li>\n\n";
2545 $text .= "</ol>\n";
2546 $text .= "</div>";
2548 $text = preg_replace('{a\{fn:(.*?)\}z}', '[^\\1]', $text);
2550 return $text;
2552 function _appendFootnotes_callback($matches) {
2553 $node_id = $this->fn_id_prefix . $matches[1];
2555 # Create footnote marker only if it has a corresponding footnote *and*
2556 # the footnote hasn't been used by another marker.
2557 if (isset($this->footnotes[$node_id])) {
2558 # Transfert footnote content to the ordered list.
2559 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
2560 unset($this->footnotes[$node_id]);
2562 $num = count($this->footnotes_ordered);
2563 $attr = " rel=\"footnote\"";
2564 if ($this->fn_link_class != "") {
2565 $class = $this->fn_link_class;
2566 $class = $this->encodeAmpsAndAngles($class);
2567 $class = str_replace('"', '&quot;', $class);
2568 $attr .= " class=\"$class\"";
2570 if ($this->fn_link_title != "") {
2571 $title = $this->fn_link_title;
2572 $title = $this->encodeAmpsAndAngles($title);
2573 $title = str_replace('"', '&quot;', $title);
2574 $attr .= " title=\"$title\"";
2576 $attr = str_replace("%%", $num, $attr);
2578 return
2579 "<sup id=\"fnref:$node_id\">".
2580 "<a href=\"#fn:$node_id\"$attr>$num</a>".
2581 "</sup>";
2584 return "[^".$matches[1]."]";
2588 ### Abbreviations ###
2590 function stripAbbreviations($text) {
2592 # Strips abbreviations from text, stores the URLs and titles in
2593 # hash references.
2595 $less_than_tab = $this->tab_width - 1;
2597 # Link defs are in the form: [id]*: url "optional title"
2598 $text = preg_replace_callback('{
2599 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
2600 (.*) # text = $2 (no blank lines allowed)
2601 }xm',
2602 array(&$this, '_stripAbbreviations_callback'),
2603 $text);
2604 return $text;
2606 function _stripAbbreviations_callback($matches) {
2607 $abbr_word = $matches[1];
2608 $abbr_desc = $matches[2];
2609 $this->abbr_matches[] = preg_quote($abbr_word);
2610 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
2611 return ''; # String that will replace the block
2615 function doAbbreviations($text) {
2617 # Replace footnote references in $text [^id] with a link to the footnote.
2619 if ($this->abbr_matches) {
2620 $regex = '{(?<!\w)(?:'. implode('|', $this->abbr_matches) .')(?!\w)}';
2622 $text = preg_replace_callback($regex,
2623 array(&$this, '_doAbbreviations_callback'), $text);
2625 return $text;
2627 function _doAbbreviations_callback($matches) {
2628 $abbr = $matches[0];
2629 if (isset($this->abbr_desciptions[$abbr])) {
2630 $desc = $this->abbr_desciptions[$abbr];
2631 if (empty($desc)) {
2632 return $this->hashSpan("<abbr>$abbr</abbr>");
2633 } else {
2634 $desc = $this->escapeSpecialCharsWithinTagAttributes($desc);
2635 return $this->hashSpan("<abbr title=\"$desc\">$abbr</abbr>");
2637 } else {
2638 return $matches[0];
2647 PHP Markdown Extra
2648 ==================
2650 Description
2651 -----------
2653 This is a PHP port of the original Markdown formatter written in Perl
2654 by John Gruber. This special "Extra" version of PHP Markdown features
2655 further enhancements to the syntax for making additional constructs
2656 such as tables and definition list.
2658 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2659 easy-to-write structured text format into HTML. Markdown's text format
2660 is most similar to that of plain text email, and supports features such
2661 as headers, *emphasis*, code blocks, blockquotes, and links.
2663 Markdown's syntax is designed not as a generic markup language, but
2664 specifically to serve as a front-end to (X)HTML. You can use span-level
2665 HTML tags anywhere in a Markdown document, and you can use block level
2666 HTML tags (like <div> and <table> as well).
2668 For more information about Markdown's syntax, see:
2670 <http://daringfireball.net/projects/markdown/>
2673 Bugs
2674 ----
2676 To file bug reports please send email to:
2678 <michel.fortin@michelf.com>
2680 Please include with your report: (1) the example input; (2) the output you
2681 expected; (3) the output Markdown actually produced.
2684 Version History
2685 ---------------
2687 See Readme file for details.
2689 Extra 1.1.2 (7 Feb 2007)
2691 Extra 1.1.1 (28 Dec 2006)
2693 Extra 1.1 (1 Dec 2006)
2695 Extra 1.0.1 (9 Dec 2005)
2697 Extra 1.0 (5 Sep 2005)
2700 Copyright and License
2701 ---------------------
2703 PHP Markdown & Extra
2704 Copyright (c) 2004-2007 Michel Fortin
2705 <http://www.michelf.com/>
2706 All rights reserved.
2708 Based on Markdown
2709 Copyright (c) 2003-2006 John Gruber
2710 <http://daringfireball.net/>
2711 All rights reserved.
2713 Redistribution and use in source and binary forms, with or without
2714 modification, are permitted provided that the following conditions are
2715 met:
2717 * Redistributions of source code must retain the above copyright notice,
2718 this list of conditions and the following disclaimer.
2720 * Redistributions in binary form must reproduce the above copyright
2721 notice, this list of conditions and the following disclaimer in the
2722 documentation and/or other materials provided with the distribution.
2724 * Neither the name "Markdown" nor the names of its contributors may
2725 be used to endorse or promote products derived from this software
2726 without specific prior written permission.
2728 This software is provided by the copyright holders and contributors "as
2729 is" and any express or implied warranties, including, but not limited
2730 to, the implied warranties of merchantability and fitness for a
2731 particular purpose are disclaimed. In no event shall the copyright owner
2732 or contributors be liable for any direct, indirect, incidental, special,
2733 exemplary, or consequential damages (including, but not limited to,
2734 procurement of substitute goods or services; loss of use, data, or
2735 profits; or business interruption) however caused and on any theory of
2736 liability, whether in contract, strict liability, or tort (including
2737 negligence or otherwise) arising in any way out of the use of this
2738 software, even if advised of the possibility of such damage.