MDL-11082 Improved groups upgrade performance 1.8x -> 1.9; thanks Eloy for telling...
[moodle-pu.git] / lib / markdown.php
blob6d43f8bcac140e52fbeac034f311b1e868da0e0d
1 <?php
3 # Markdown Extra - A text-to-HTML conversion tool for web writers
5 # PHP Markdown & Extra
6 # Copyright (c) 2004-2007 Michel Fortin
7 # <http://www.michelf.com/projects/php-markdown/>
9 # Original Markdown
10 # Copyright (c) 2004-2006 John Gruber
11 # <http://daringfireball.net/projects/markdown/>
15 define( 'MARKDOWN_VERSION', "1.0.1h" ); # Fri 3 Aug 2007
16 define( 'MARKDOWNEXTRA_VERSION', "1.1.4" ); # Fri 3 Aug 2007
20 # Global default settings:
23 # Change to ">" for HTML output
24 define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");
26 # Define the width of a tab for code blocks.
27 define( 'MARKDOWN_TAB_WIDTH', 4 );
29 # Optional title attribute for footnote links and backlinks.
30 define( 'MARKDOWN_FN_LINK_TITLE', "" );
31 define( 'MARKDOWN_FN_BACKLINK_TITLE', "" );
33 # Optional class attribute for footnote links and backlinks.
34 define( 'MARKDOWN_FN_LINK_CLASS', "" );
35 define( 'MARKDOWN_FN_BACKLINK_CLASS', "" );
39 # WordPress settings:
42 # Change to false to remove Markdown from posts and/or comments.
43 define( 'MARKDOWN_WP_POSTS', true );
44 define( 'MARKDOWN_WP_COMMENTS', true );
48 ### Standard Function Interface ###
50 define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' );
52 function Markdown($text) {
54 # Initialize the parser and return the result of its transform method.
56 # Setup static parser variable.
57 static $parser;
58 if (!isset($parser)) {
59 $parser_class = MARKDOWN_PARSER_CLASS;
60 $parser = new $parser_class;
63 # Transform text using parser.
64 return $parser->transform($text);
68 ### WordPress Plugin Interface ###
71 Plugin Name: Markdown Extra
72 Plugin URI: http://www.michelf.com/projects/php-markdown/
73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
74 Version: 1.1.4
75 Author: Michel Fortin
76 Author URI: http://www.michelf.com/
79 if (isset($wp_version)) {
80 # More details about how it works here:
81 # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
83 # Post content and excerpts
84 # - Remove WordPress paragraph generator.
85 # - Run Markdown on excerpt, then remove all tags.
86 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
87 if (MARKDOWN_WP_POSTS) {
88 remove_filter('the_content', 'wpautop');
89 remove_filter('the_content_rss', 'wpautop');
90 remove_filter('the_excerpt', 'wpautop');
91 add_filter('the_content', 'Markdown', 6);
92 add_filter('the_content_rss', 'Markdown', 6);
93 add_filter('get_the_excerpt', 'Markdown', 6);
94 add_filter('get_the_excerpt', 'trim', 7);
95 add_filter('the_excerpt', 'mdwp_add_p');
96 add_filter('the_excerpt_rss', 'mdwp_strip_p');
98 remove_filter('content_save_pre', 'balanceTags', 50);
99 remove_filter('excerpt_save_pre', 'balanceTags', 50);
100 add_filter('the_content', 'balanceTags', 50);
101 add_filter('get_the_excerpt', 'balanceTags', 9);
104 # Comments
105 # - Remove WordPress paragraph generator.
106 # - Remove WordPress auto-link generator.
107 # - Scramble important tags before passing them to the kses filter.
108 # - Run Markdown on excerpt then remove paragraph tags.
109 if (MARKDOWN_WP_COMMENTS) {
110 remove_filter('comment_text', 'wpautop', 30);
111 remove_filter('comment_text', 'make_clickable');
112 add_filter('pre_comment_content', 'Markdown', 6);
113 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
114 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
115 add_filter('get_comment_text', 'Markdown', 6);
116 add_filter('get_comment_excerpt', 'Markdown', 6);
117 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
119 global $mdwp_hidden_tags, $mdwp_placeholders;
120 $mdwp_hidden_tags = explode(' ',
121 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
122 $mdwp_placeholders = explode(' ', str_rot13(
123 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
124 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
127 function mdwp_add_p($text) {
128 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
129 $text = '<p>'.$text.'</p>';
130 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
132 return $text;
135 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
137 function mdwp_hide_tags($text) {
138 global $mdwp_hidden_tags, $mdwp_placeholders;
139 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
141 function mdwp_show_tags($text) {
142 global $mdwp_hidden_tags, $mdwp_placeholders;
143 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
148 ### bBlog Plugin Info ###
150 function identify_modifier_markdown() {
151 return array(
152 'name' => 'markdown',
153 'type' => 'modifier',
154 'nicename' => 'PHP Markdown Extra',
155 'description' => 'A text-to-HTML conversion tool for web writers',
156 'authors' => 'Michel Fortin and John Gruber',
157 'licence' => 'GPL',
158 'version' => MARKDOWNEXTRA_VERSION,
159 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
164 ### Smarty Modifier Interface ###
166 function smarty_modifier_markdown($text) {
167 return Markdown($text);
171 ### Textile Compatibility Mode ###
173 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
175 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
176 # Try to include PHP SmartyPants. Should be in the same directory.
177 @include_once 'smartypants.php';
178 # Fake Textile class. It calls Markdown instead.
179 class Textile {
180 function TextileThis($text, $lite='', $encode='') {
181 if ($lite == '' && $encode == '') $text = Markdown($text);
182 if (function_exists('SmartyPants')) $text = SmartyPants($text);
183 return $text;
185 # Fake restricted version: restrictions are not supported for now.
186 function TextileRestricted($text, $lite='', $noimage='') {
187 return $this->TextileThis($text, $lite);
189 # Workaround to ensure compatibility with TextPattern 4.0.3.
190 function blockLite($text) { return $text; }
197 # Markdown Parser Class
200 class Markdown_Parser {
202 # Regex to match balanced [brackets].
203 # Needed to insert a maximum bracked depth while converting to PHP.
204 var $nested_brackets_depth = 6;
205 var $nested_brackets;
207 var $nested_url_parenthesis_depth = 4;
208 var $nested_url_parenthesis;
210 # Table of hash values for escaped characters:
211 var $escape_chars = '\`*_{}[]()>#+-.!';
212 // var $escape_table = array();
213 var $backslash_escape_table = array();
215 # Change to ">" for HTML output.
216 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
217 var $tab_width = MARKDOWN_TAB_WIDTH;
219 # Change to `true` to disallow markup or entities.
220 var $no_markup = false;
221 var $no_entities = false;
224 function Markdown_Parser() {
226 # Constructor function. Initialize appropriate member variables.
228 $this->_initDetab();
230 $this->nested_brackets =
231 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
232 str_repeat('\])*', $this->nested_brackets_depth);
234 $this->nested_url_parenthesis =
235 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
236 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
238 # Create an identical table but for escaped characters.
239 foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) {
240 $entity = "&#". ord($char). ";";
241 // $this->escape_table[$char] = $entity;
242 $this->backslash_escape_table["\\$char"] = $entity;
245 # Sort document, block, and span gamut in ascendent priority order.
246 asort($this->document_gamut);
247 asort($this->block_gamut);
248 asort($this->span_gamut);
252 # Internal hashes used during transformation.
253 var $urls = array();
254 var $titles = array();
255 var $html_blocks = array();
256 var $html_hashes = array(); # Contains both blocks and span hashes.
258 # Status flag to avoid invalid nesting.
259 var $in_anchor = false;
262 function transform($text) {
264 # Main function. The order in which other subs are called here is
265 # essential. Link and image substitutions need to happen before
266 # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
267 # and <img> tags get encoded.
269 # Clear the global hashes. If we don't clear these, you get conflicts
270 # from other articles when generating a page which contains more than
271 # one article (e.g. an index page that shows the N most recent
272 # articles):
273 $this->urls = array();
274 $this->titles = array();
275 $this->html_blocks = array();
276 $this->html_hashes = array();
278 # Standardize line endings:
279 # DOS to Unix and Mac to Unix
280 $text = str_replace(array("\r\n", "\r"), "\n", $text);
282 # Make sure $text ends with a couple of newlines:
283 $text .= "\n\n";
285 # Convert all tabs to spaces.
286 $text = $this->detab($text);
288 # Turn block-level HTML blocks into hash entries
289 $text = $this->hashHTMLBlocks($text);
291 # Strip any lines consisting only of spaces and tabs.
292 # This makes subsequent regexen easier to write, because we can
293 # match consecutive blank lines with /\n+/ instead of something
294 # contorted like /[ ]*\n+/ .
295 $text = preg_replace('/^[ ]+$/m', '', $text);
297 # Run document gamut methods.
298 foreach ($this->document_gamut as $method => $priority) {
299 $text = $this->$method($text);
302 return $text . "\n";
305 var $document_gamut = array(
306 # Strip link definitions, store in hashes.
307 "stripLinkDefinitions" => 20,
309 "runBasicBlockGamut" => 30,
313 function stripLinkDefinitions($text) {
315 # Strips link definitions from text, stores the URLs and titles in
316 # hash references.
318 $less_than_tab = $this->tab_width - 1;
320 # Link defs are in the form: ^[id]: url "optional title"
321 $text = preg_replace_callback('{
322 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
323 [ ]*
324 \n? # maybe *one* newline
325 [ ]*
326 <?(\S+?)>? # url = $2
327 [ ]*
328 \n? # maybe one newline
329 [ ]*
331 (?<=\s) # lookbehind for whitespace
332 ["(]
333 (.*?) # title = $3
334 [")]
335 [ ]*
336 )? # title is optional
337 (?:\n+|\Z)
338 }xm',
339 array(&$this, '_stripLinkDefinitions_callback'),
340 $text);
341 return $text;
343 function _stripLinkDefinitions_callback($matches) {
344 $link_id = strtolower($matches[1]);
345 $this->urls[$link_id] = $this->encodeAmpsAndAngles($matches[2]);
346 if (isset($matches[3]))
347 $this->titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
348 return ''; # String that will replace the block
352 function hashHTMLBlocks($text) {
353 if ($this->no_markup) return $text;
355 $less_than_tab = $this->tab_width - 1;
357 # Hashify HTML blocks:
358 # We only want to do this for block-level HTML tags, such as headers,
359 # lists, and tables. That's because we still want to wrap <p>s around
360 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
361 # phrase emphasis, and spans. The list of tags we're looking for is
362 # hard-coded:
363 $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
364 'script|noscript|form|fieldset|iframe|math|ins|del';
365 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
366 'script|noscript|form|fieldset|iframe|math';
368 # Regular expression for the content of a block tag.
369 $nested_tags_level = 4;
370 $attr = '
371 (?> # optional tag attributes
372 \s # starts with whitespace
374 [^>"/]+ # text outside quotes
376 /+(?!>) # slash not followed by ">"
378 "[^"]*" # text inside double quotes (tolerate ">")
380 \'[^\']*\' # text inside single quotes (tolerate ">")
384 $content =
385 str_repeat('
387 [^<]+ # content without tag
389 <\2 # nested opening tag
390 '.$attr.' # attributes
394 >', $nested_tags_level). # end of opening tag
395 '.*?'. # last level nested tag content
396 str_repeat('
397 </\2\s*> # closing nested tag
400 <(?!/\2\s*> # other tags with a different name
402 )*',
403 $nested_tags_level);
405 # First, look for nested blocks, e.g.:
406 # <div>
407 # <div>
408 # tags for inner block must be indented.
409 # </div>
410 # </div>
412 # The outermost tags must start at the left margin for this to match, and
413 # the inner nested divs must be indented.
414 # We need to do this before the next, more liberal match, because the next
415 # match will start at the first `<div>` and stop at the first `</div>`.
416 $text = preg_replace_callback('{
417 ( # save in $1
418 ^ # start of line (with /m)
419 <('.$block_tags_a.')# start tag = $2
420 '.$attr.'>\n # attributes followed by > and \n
421 '.$content.' # content, support nesting
422 </\2> # the matching end tag
423 [ ]* # trailing spaces/tabs
424 (?=\n+|\Z) # followed by a newline or end of document
426 }xmi',
427 array(&$this, '_hashHTMLBlocks_callback'),
428 $text);
431 # Match from `\n<tag>` to `</tag>\n`, handling nested tags in between.
433 $text = preg_replace_callback('{
434 ( # save in $1
435 ^ # start of line (with /m)
436 <('.$block_tags_b.')# start tag = $2
437 '.$attr.'> # attributes followed by >
438 '.$content.' # content, support nesting
439 </\2> # the matching end tag
440 [ ]* # trailing spaces/tabs
441 (?=\n+|\Z) # followed by a newline or end of document
443 }xmi',
444 array(&$this, '_hashHTMLBlocks_callback'),
445 $text);
447 # Special case just for <hr />. It was easier to make a special case than
448 # to make the other regex more complicated.
449 $text = preg_replace_callback('{
451 (?<=\n\n) # Starting after a blank line
452 | # or
453 \A\n? # the beginning of the doc
455 ( # save in $1
456 [ ]{0,'.$less_than_tab.'}
457 <(hr) # start tag = $2
458 \b # word break
459 ([^<>])*? #
460 /?> # the matching end tag
461 [ ]*
462 (?=\n{2,}|\Z) # followed by a blank line or end of document
464 }xi',
465 array(&$this, '_hashHTMLBlocks_callback'),
466 $text);
468 # Special case for standalone HTML comments:
469 $text = preg_replace_callback('{
471 (?<=\n\n) # Starting after a blank line
472 | # or
473 \A\n? # the beginning of the doc
475 ( # save in $1
476 [ ]{0,'.$less_than_tab.'}
477 (?s:
478 <!-- .*? -->
480 [ ]*
481 (?=\n{2,}|\Z) # followed by a blank line or end of document
483 }x',
484 array(&$this, '_hashHTMLBlocks_callback'),
485 $text);
487 # PHP and ASP-style processor instructions (<? and <%)
488 $text = preg_replace_callback('{
490 (?<=\n\n) # Starting after a blank line
491 | # or
492 \A\n? # the beginning of the doc
494 ( # save in $1
495 [ ]{0,'.$less_than_tab.'}
496 (?s:
497 <([?%]) # $2
501 [ ]*
502 (?=\n{2,}|\Z) # followed by a blank line or end of document
504 }x',
505 array(&$this, '_hashHTMLBlocks_callback'),
506 $text);
508 return $text;
510 function _hashHTMLBlocks_callback($matches) {
511 $text = $matches[1];
512 $key = $this->hashBlock($text);
513 return "\n\n$key\n\n";
517 function hashBlock($text) {
519 # Called whenever a tag must be hashed when a function insert a block-level
520 # tag in $text, it pass through this function and is automaticaly escaped,
521 # which remove the need to call _HashHTMLBlocks at every step.
523 # Swap back any tag hash found in $text so we do not have to `unhash`
524 # multiple times at the end.
525 $text = $this->unhash($text);
527 # Then hash the block.
528 $key = "B\x1A". md5($text);
529 $this->html_hashes[$key] = $text;
530 $this->html_blocks[$key] = $text;
531 return $key; # String that will replace the tag.
535 function hashSpan($text, $word_separator = false) {
537 # Called whenever a tag must be hashed when a function insert a span-level
538 # element in $text, it pass through this function and is automaticaly
539 # escaped, blocking invalid nested overlap. If optional argument
540 # $word_separator is true, surround the hash value by spaces.
542 # Swap back any tag hash found in $text so we do not have to `unhash`
543 # multiple times at the end.
544 $text = $this->unhash($text);
546 # Then hash the span.
547 $key = "S\x1A". md5($text);
548 if ($word_separator) $key = ":$key:";
550 $this->html_hashes[$key] = $text;
551 return $key; # String that will replace the span tag.
555 var $block_gamut = array(
557 # These are all the transformations that form block-level
558 # tags like paragraphs, headers, and list items.
560 "doHeaders" => 10,
561 "doHorizontalRules" => 20,
563 "doLists" => 40,
564 "doCodeBlocks" => 50,
565 "doBlockQuotes" => 60,
568 function runBlockGamut($text) {
570 # Run block gamut tranformations.
572 # We need to escape raw HTML in Markdown source before doing anything
573 # else. This need to be done for each block, and not only at the
574 # begining in the Markdown function since hashed blocks can be part of
575 # list items and could have been indented. Indented blocks would have
576 # been seen as a code block in a previous pass of hashHTMLBlocks.
577 $text = $this->hashHTMLBlocks($text);
579 return $this->runBasicBlockGamut($text);
582 function runBasicBlockGamut($text) {
584 # Run block gamut tranformations, without hashing HTML blocks. This is
585 # useful when HTML blocks are known to be already hashed, like in the first
586 # whole-document pass.
588 foreach ($this->block_gamut as $method => $priority) {
589 $text = $this->$method($text);
592 # Finally form paragraph and restore hashed blocks.
593 $text = $this->formParagraphs($text);
595 return $text;
599 function doHorizontalRules($text) {
600 # Do Horizontal Rules:
601 return preg_replace(
602 array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ ]*$}mx',
603 '{^[ ]{0,2}([ ]? -[ ]?){3,}[ ]*$}mx',
604 '{^[ ]{0,2}([ ]? _[ ]?){3,}[ ]*$}mx'),
605 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
606 $text);
610 var $span_gamut = array(
612 # These are all the transformations that occur *within* block-level
613 # tags like paragraphs, headers, and list items.
615 "escapeSpecialCharsWithinTagAttributes" => -20,
616 "doCodeSpans" => -10,
617 "encodeBackslashEscapes" => -5,
619 # Process anchor and image tags. Images must come first,
620 # because ![foo][f] looks like an anchor.
621 "doImages" => 10,
622 "doAnchors" => 20,
624 # Make links out of things like `<http://example.com/>`
625 # Must come after doAnchors, because you can use < and >
626 # delimiters in inline links like [this](<url>).
627 "doAutoLinks" => 30,
628 "encodeAmpsAndAngles" => 40,
630 "doItalicsAndBold" => 50,
631 "doHardBreaks" => 60,
634 function runSpanGamut($text) {
636 # Run span gamut tranformations.
638 foreach ($this->span_gamut as $method => $priority) {
639 $text = $this->$method($text);
642 return $text;
646 function doHardBreaks($text) {
647 # Do hard breaks:
648 $br_tag = $this->hashSpan("<br$this->empty_element_suffix\n");
649 return preg_replace('/ {2,}\n/', $br_tag, $text);
653 function escapeSpecialCharsWithinTagAttributes($text) {
655 # Within tags -- meaning between < and > -- encode [\ ` * _] so they
656 # don't conflict with their use in Markdown for code, italics and strong.
657 # We're replacing each such character with its corresponding MD5 checksum
658 # value; this is likely overkill, but it should prevent us from colliding
659 # with the escape values by accident.
661 if ($this->no_markup) return $text;
663 $tokens = $this->tokenizeHTML($text);
664 $text = ''; # rebuild $text from the tokens
666 foreach ($tokens as $cur_token) {
667 if ($cur_token[0] == 'tag') {
668 // $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]);
669 // $cur_token[1] = str_replace('`', $this->escape_table['`'], $cur_token[1]);
670 // $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]);
671 // $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]);
672 $cur_token[1] = $this->hashSpan($cur_token[1]);
674 $text .= $cur_token[1];
676 return $text;
680 function doAnchors($text) {
682 # Turn Markdown link shortcuts into XHTML <a> tags.
684 if ($this->in_anchor) return $text;
685 $this->in_anchor = true;
688 # First, handle reference-style links: [link text] [id]
690 $text = preg_replace_callback('{
691 ( # wrap whole match in $1
693 ('.$this->nested_brackets.') # link text = $2
696 [ ]? # one optional space
697 (?:\n[ ]*)? # one optional newline followed by spaces
700 (.*?) # id = $3
703 }xs',
704 array(&$this, '_doAnchors_reference_callback'), $text);
707 # Next, inline-style links: [link text](url "optional title")
709 $text = preg_replace_callback('{
710 ( # wrap whole match in $1
712 ('.$this->nested_brackets.') # link text = $2
714 \( # literal paren
715 [ ]*
717 <(\S*)> # href = $3
719 ('.$this->nested_url_parenthesis.') # href = $4
721 [ ]*
722 ( # $5
723 ([\'"]) # quote char = $6
724 (.*?) # Title = $7
725 \6 # matching quote
726 [ ]* # ignore any spaces/tabs between closing quote and )
727 )? # title is optional
730 }xs',
731 array(&$this, '_DoAnchors_inline_callback'), $text);
734 # Last, handle reference-style shortcuts: [link text]
735 # These must come last in case you've also got [link test][1]
736 # or [link test](/foo)
738 // $text = preg_replace_callback('{
739 // ( # wrap whole match in $1
740 // \[
741 // ([^\[\]]+) # link text = $2; can\'t contain [ or ]
742 // \]
743 // )
744 // }xs',
745 // array(&$this, '_doAnchors_reference_callback'), $text);
747 $this->in_anchor = false;
748 return $text;
750 function _doAnchors_reference_callback($matches) {
751 $whole_match = $matches[1];
752 $link_text = $matches[2];
753 $link_id =& $matches[3];
755 if ($link_id == "") {
756 # for shortcut links like [this][] or [this].
757 $link_id = $link_text;
760 # lower-case and turn embedded newlines into spaces
761 $link_id = strtolower($link_id);
762 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
764 if (isset($this->urls[$link_id])) {
765 $url = $this->urls[$link_id];
766 $url = $this->encodeAmpsAndAngles($url);
768 $result = "<a href=\"$url\"";
769 if ( isset( $this->titles[$link_id] ) ) {
770 $title = $this->titles[$link_id];
771 $title = $this->encodeAmpsAndAngles($title);
772 $result .= " title=\"$title\"";
775 $link_text = $this->runSpanGamut($link_text);
776 $result .= ">$link_text</a>";
777 $result = $this->hashSpan($result);
779 else {
780 $result = $whole_match;
782 return $result;
784 function _doAnchors_inline_callback($matches) {
785 $whole_match = $matches[1];
786 $link_text = $this->runSpanGamut($matches[2]);
787 $url = $matches[3] == '' ? $matches[4] : $matches[3];
788 $title =& $matches[7];
790 $url = $this->encodeAmpsAndAngles($url);
792 $result = "<a href=\"$url\"";
793 if (isset($title)) {
794 $title = str_replace('"', '&quot;', $title);
795 $title = $this->encodeAmpsAndAngles($title);
796 $result .= " title=\"$title\"";
799 $link_text = $this->runSpanGamut($link_text);
800 $result .= ">$link_text</a>";
802 return $this->hashSpan($result);
806 function doImages($text) {
808 # Turn Markdown image shortcuts into <img> tags.
811 # First, handle reference-style labeled images: ![alt text][id]
813 $text = preg_replace_callback('{
814 ( # wrap whole match in $1
816 ('.$this->nested_brackets.') # alt text = $2
819 [ ]? # one optional space
820 (?:\n[ ]*)? # one optional newline followed by spaces
823 (.*?) # id = $3
827 }xs',
828 array(&$this, '_doImages_reference_callback'), $text);
831 # Next, handle inline images: ![alt text](url "optional title")
832 # Don't forget: encode * and _
834 $text = preg_replace_callback('{
835 ( # wrap whole match in $1
837 ('.$this->nested_brackets.') # alt text = $2
839 \s? # One optional whitespace character
840 \( # literal paren
841 [ ]*
843 <(\S*)> # src url = $3
845 ('.$this->nested_url_parenthesis.') # src url = $4
847 [ ]*
848 ( # $5
849 ([\'"]) # quote char = $6
850 (.*?) # title = $7
851 \6 # matching quote
852 [ ]*
853 )? # title is optional
856 }xs',
857 array(&$this, '_doImages_inline_callback'), $text);
859 return $text;
861 function _doImages_reference_callback($matches) {
862 $whole_match = $matches[1];
863 $alt_text = $matches[2];
864 $link_id = strtolower($matches[3]);
866 if ($link_id == "") {
867 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
870 $alt_text = str_replace('"', '&quot;', $alt_text);
871 if (isset($this->urls[$link_id])) {
872 $url = $this->urls[$link_id];
873 $result = "<img src=\"$url\" alt=\"$alt_text\"";
874 if (isset($this->titles[$link_id])) {
875 $title = $this->titles[$link_id];
876 $result .= " title=\"$title\"";
878 $result .= $this->empty_element_suffix;
879 $result = $this->hashSpan($result);
881 else {
882 # If there's no such link ID, leave intact:
883 $result = $whole_match;
886 return $result;
888 function _doImages_inline_callback($matches) {
889 $whole_match = $matches[1];
890 $alt_text = $matches[2];
891 $url = $matches[3] == '' ? $matches[4] : $matches[3];
892 $title =& $matches[7];
894 $alt_text = str_replace('"', '&quot;', $alt_text);
895 $result = "<img src=\"$url\" alt=\"$alt_text\"";
896 if (isset($title)) {
897 $title = str_replace('"', '&quot;', $title);
898 $result .= " title=\"$title\""; # $title already quoted
900 $result .= $this->empty_element_suffix;
902 return $this->hashSpan($result);
906 function doHeaders($text) {
907 # Setext-style headers:
908 # Header 1
909 # ========
911 # Header 2
912 # --------
914 $text = preg_replace_callback('{ ^(.+?)[ ]*\n=+[ ]*\n+ }mx',
915 array(&$this, '_doHeaders_callback_setext_h1'), $text);
916 $text = preg_replace_callback('{ ^(.+?)[ ]*\n-+[ ]*\n+ }mx',
917 array(&$this, '_doHeaders_callback_setext_h2'), $text);
919 # atx-style headers:
920 # # Header 1
921 # ## Header 2
922 # ## Header 2 with closing hashes ##
923 # ...
924 # ###### Header 6
926 $text = preg_replace_callback('{
927 ^(\#{1,6}) # $1 = string of #\'s
928 [ ]*
929 (.+?) # $2 = Header text
930 [ ]*
931 \#* # optional closing #\'s (not counted)
933 }xm',
934 array(&$this, '_doHeaders_callback_atx'), $text);
936 return $text;
938 function _doHeaders_callback_setext_h1($matches) {
939 $block = "<h1>".$this->runSpanGamut($matches[1])."</h1>";
940 return "\n" . $this->hashBlock($block) . "\n\n";
942 function _doHeaders_callback_setext_h2($matches) {
943 $block = "<h2>".$this->runSpanGamut($matches[1])."</h2>";
944 return "\n" . $this->hashBlock($block) . "\n\n";
946 function _doHeaders_callback_atx($matches) {
947 $level = strlen($matches[1]);
948 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
949 return "\n" . $this->hashBlock($block) . "\n\n";
953 function doLists($text) {
955 # Form HTML ordered (numbered) and unordered (bulleted) lists.
957 $less_than_tab = $this->tab_width - 1;
959 # Re-usable patterns to match list item bullets and number markers:
960 $marker_ul = '[*+-]';
961 $marker_ol = '\d+[.]';
962 $marker_any = "(?:$marker_ul|$marker_ol)";
964 $markers = array($marker_ul, $marker_ol);
966 foreach ($markers as $marker) {
967 # Re-usable pattern to match any entirel ul or ol list:
968 $whole_list = '
969 ( # $1 = whole list
970 ( # $2
971 [ ]{0,'.$less_than_tab.'}
972 ('.$marker.') # $3 = first list item marker
973 [ ]+
975 (?s:.+?)
976 ( # $4
979 \n{2,}
980 (?=\S)
981 (?! # Negative lookahead for another list item marker
982 [ ]*
983 '.$marker.'[ ]+
987 '; // mx
989 # We use a different prefix before nested lists than top-level lists.
990 # See extended comment in _ProcessListItems().
992 if ($this->list_level) {
993 $text = preg_replace_callback('{
995 '.$whole_list.'
996 }mx',
997 array(&$this, '_doLists_callback'), $text);
999 else {
1000 $text = preg_replace_callback('{
1001 (?:(?<=\n)\n|\A\n?) # Must eat the newline
1002 '.$whole_list.'
1003 }mx',
1004 array(&$this, '_doLists_callback'), $text);
1008 return $text;
1010 function _doLists_callback($matches) {
1011 # Re-usable patterns to match list item bullets and number markers:
1012 $marker_ul = '[*+-]';
1013 $marker_ol = '\d+[.]';
1014 $marker_any = "(?:$marker_ul|$marker_ol)";
1016 $list = $matches[1];
1017 $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
1019 $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
1021 $list .= "\n";
1022 $result = $this->processListItems($list, $marker_any);
1024 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1025 return "\n". $result ."\n\n";
1028 var $list_level = 0;
1030 function processListItems($list_str, $marker_any) {
1032 # Process the contents of a single ordered or unordered list, splitting it
1033 # into individual list items.
1035 # The $this->list_level global keeps track of when we're inside a list.
1036 # Each time we enter a list, we increment it; when we leave a list,
1037 # we decrement. If it's zero, we're not in a list anymore.
1039 # We do this because when we're not inside a list, we want to treat
1040 # something like this:
1042 # I recommend upgrading to version
1043 # 8. Oops, now this line is treated
1044 # as a sub-list.
1046 # As a single paragraph, despite the fact that the second line starts
1047 # with a digit-period-space sequence.
1049 # Whereas when we're inside a list (or sub-list), that line will be
1050 # treated as the start of a sub-list. What a kludge, huh? This is
1051 # an aspect of Markdown's syntax that's hard to parse perfectly
1052 # without resorting to mind-reading. Perhaps the solution is to
1053 # change the syntax rules such that sub-lists must start with a
1054 # starting cardinal number; e.g. "1." or "a.".
1056 $this->list_level++;
1058 # trim trailing blank lines:
1059 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1061 $list_str = preg_replace_callback('{
1062 (\n)? # leading line = $1
1063 (^[ ]*) # leading whitespace = $2
1064 ('.$marker_any.') [ ]+ # list marker = $3
1065 ((?s:.+?)) # list item text = $4
1066 (?:(\n+(?=\n))|\n) # tailing blank line = $5
1067 (?= \n* (\z | \2 ('.$marker_any.') [ ]+))
1068 }xm',
1069 array(&$this, '_processListItems_callback'), $list_str);
1071 $this->list_level--;
1072 return $list_str;
1074 function _processListItems_callback($matches) {
1075 $item = $matches[4];
1076 $leading_line =& $matches[1];
1077 $leading_space =& $matches[2];
1078 $tailing_blank_line =& $matches[5];
1080 if ($leading_line || $tailing_blank_line ||
1081 preg_match('/\n{2,}/', $item))
1083 $item = $this->runBlockGamut($this->outdent($item)."\n");
1085 else {
1086 # Recursion for sub-lists:
1087 $item = $this->doLists($this->outdent($item));
1088 $item = preg_replace('/\n+$/', '', $item);
1089 $item = $this->runSpanGamut($item);
1092 return "<li>" . $item . "</li>\n";
1096 function doCodeBlocks($text) {
1098 # Process Markdown `<pre><code>` blocks.
1100 $text = preg_replace_callback('{
1101 (?:\n\n|\A)
1102 ( # $1 = the code block -- one or more lines, starting with a space/tab
1104 (?:[ ]{'.$this->tab_width.'} | \t) # Lines must start with a tab or a tab-width of spaces
1105 .*\n+
1108 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1109 }xm',
1110 array(&$this, '_doCodeBlocks_callback'), $text);
1112 return $text;
1114 function _doCodeBlocks_callback($matches) {
1115 $codeblock = $matches[1];
1117 $codeblock = $this->encodeCode($this->outdent($codeblock));
1118 // $codeblock = $this->detab($codeblock);
1119 # trim leading newlines and trailing whitespace
1120 $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock);
1122 $result = "\n\n".$this->hashBlock("<pre><code>" . $codeblock . "\n</code></pre>")."\n\n";
1124 return $result;
1128 function doCodeSpans($text) {
1130 # * Backtick quotes are used for <code></code> spans.
1132 # * You can use multiple backticks as the delimiters if you want to
1133 # include literal backticks in the code span. So, this input:
1135 # Just type ``foo `bar` baz`` at the prompt.
1137 # Will translate to:
1139 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1141 # There's no arbitrary limit to the number of backticks you
1142 # can use as delimters. If you need three consecutive backticks
1143 # in your code, use four for delimiters, etc.
1145 # * You can use spaces to get literal backticks at the edges:
1147 # ... type `` `bar` `` ...
1149 # Turns to:
1151 # ... type <code>`bar`</code> ...
1153 $text = preg_replace_callback('@
1154 (?<!\\\) # Character before opening ` can\'t be a backslash
1155 (`+) # $1 = Opening run of `
1156 (.+?) # $2 = The code block
1157 (?<!`)
1158 \1 # Matching closer
1159 (?!`)
1160 @xs',
1161 array(&$this, '_doCodeSpans_callback'), $text);
1163 return $text;
1165 function _doCodeSpans_callback($matches) {
1166 $c = $matches[2];
1167 $c = preg_replace('/^[ ]*/', '', $c); # leading whitespace
1168 $c = preg_replace('/[ ]*$/', '', $c); # trailing whitespace
1169 $c = $this->encodeCode($c);
1170 return $this->hashSpan("<code>$c</code>");
1174 function encodeCode($_) {
1176 # Encode/escape certain characters inside Markdown code runs.
1177 # The point is that in code, these characters are literals,
1178 # and lose their special Markdown meanings.
1180 # Encode all ampersands; HTML entities are not
1181 # entities within a Markdown code span.
1182 $_ = str_replace('&', '&amp;', $_);
1184 # Do the angle bracket song and dance:
1185 $_ = str_replace(array('<', '>'),
1186 array('&lt;', '&gt;'), $_);
1188 # Now, escape characters that are magic in Markdown:
1189 // $_ = str_replace(array_keys($this->escape_table),
1190 // array_values($this->escape_table), $_);
1192 return $_;
1196 function doItalicsAndBold($text) {
1197 # <strong> must go first:
1198 $text = preg_replace_callback('{
1199 ( # $1: Marker
1200 (?<!\*\*) \* | # (not preceded by two chars of
1201 (?<!__) _ # the same marker)
1204 (?=\S) # Not followed by whitespace
1205 (?!\1\1) # or two others marker chars.
1206 ( # $2: Content
1208 [^*_]+? # Anthing not em markers.
1210 # Balence any regular emphasis inside.
1211 \1 (?=\S) .+? (?<=\S) \1
1213 . # Allow unbalenced * and _.
1216 (?<=\S) \1\1 # End mark not preceded by whitespace.
1217 }sx',
1218 array(&$this, '_doItalicAndBold_strong_callback'), $text);
1219 # Then <em>:
1220 $text = preg_replace_callback(
1221 '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s(?=\1).) \1 }sx',
1222 array(&$this, '_doItalicAndBold_em_callback'), $text);
1224 return $text;
1226 function _doItalicAndBold_em_callback($matches) {
1227 $text = $matches[2];
1228 $text = $this->runSpanGamut($text);
1229 return $this->hashSpan("<em>$text</em>");
1231 function _doItalicAndBold_strong_callback($matches) {
1232 $text = $matches[2];
1233 $text = $this->runSpanGamut($text);
1234 return $this->hashSpan("<strong>$text</strong>");
1238 function doBlockQuotes($text) {
1239 $text = preg_replace_callback('/
1240 ( # Wrap whole match in $1
1242 ^[ ]*>[ ]? # ">" at the start of a line
1243 .+\n # rest of the first line
1244 (.+\n)* # subsequent consecutive lines
1245 \n* # blanks
1248 /xm',
1249 array(&$this, '_doBlockQuotes_callback'), $text);
1251 return $text;
1253 function _doBlockQuotes_callback($matches) {
1254 $bq = $matches[1];
1255 # trim one level of quoting - trim whitespace-only lines
1256 $bq = preg_replace(array('/^[ ]*>[ ]?/m', '/^[ ]+$/m'), '', $bq);
1257 $bq = $this->runBlockGamut($bq); # recurse
1259 $bq = preg_replace('/^/m', " ", $bq);
1260 # These leading spaces cause problem with <pre> content,
1261 # so we need to fix that:
1262 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1263 array(&$this, '_DoBlockQuotes_callback2'), $bq);
1265 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1267 function _doBlockQuotes_callback2($matches) {
1268 $pre = $matches[1];
1269 $pre = preg_replace('/^ /m', '', $pre);
1270 return $pre;
1274 function formParagraphs($text) {
1276 # Params:
1277 # $text - string to process with html <p> tags
1279 # Strip leading and trailing lines:
1280 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
1282 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1285 # Wrap <p> tags.
1287 foreach ($grafs as $key => $value) {
1288 if (!isset( $this->html_blocks[$value] )) {
1289 $value = $this->runSpanGamut($value);
1290 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1291 $value .= "</p>";
1292 $grafs[$key] = $this->unhash($value);
1297 # Unhashify HTML blocks
1299 foreach ($grafs as $key => $graf) {
1300 # Modify elements of @grafs in-place...
1301 if (isset($this->html_blocks[$graf])) {
1302 $block = $this->html_blocks[$graf];
1303 $graf = $block;
1304 // if (preg_match('{
1305 // \A
1306 // ( # $1 = <div> tag
1307 // <div \s+
1308 // [^>]*
1309 // \b
1310 // markdown\s*=\s* ([\'"]) # $2 = attr quote char
1311 // 1
1312 // \2
1313 // [^>]*
1314 // >
1315 // )
1316 // ( # $3 = contents
1317 // .*
1318 // )
1319 // (</div>) # $4 = closing tag
1320 // \z
1321 // }xs', $block, $matches))
1322 // {
1323 // list(, $div_open, , $div_content, $div_close) = $matches;
1325 // # We can't call Markdown(), because that resets the hash;
1326 // # that initialization code should be pulled into its own sub, though.
1327 // $div_content = $this->hashHTMLBlocks($div_content);
1329 // # Run document gamut methods on the content.
1330 // foreach ($this->document_gamut as $method => $priority) {
1331 // $div_content = $this->$method($div_content);
1332 // }
1334 // $div_open = preg_replace(
1335 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1337 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1338 // }
1339 $grafs[$key] = $graf;
1343 return implode("\n\n", $grafs);
1347 function encodeAmpsAndAngles($text) {
1348 # Smart processing for ampersands and angle brackets that need to be encoded.
1349 if ($this->no_entities) {
1350 $text = str_replace('&', '&amp;', $text);
1351 $text = str_replace('<', '&lt;', $text);
1352 return $text;
1355 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1356 # http://bumppo.net/projects/amputator/
1357 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1358 '&amp;', $text);;
1360 # Encode naked <'s
1361 $text = preg_replace('{<(?![a-z/?\$!%])}i', '&lt;', $text);
1363 return $text;
1367 function encodeBackslashEscapes($text) {
1369 # Parameter: String.
1370 # Returns: The string, with after processing the following backslash
1371 # escape sequences.
1373 # Must process escaped backslashes first (should be first in list).
1374 foreach ($this->backslash_escape_table as $search => $replacement) {
1375 $text = str_replace($search, $this->hashSpan($replacement), $text);
1377 return $text;
1381 function doAutoLinks($text) {
1382 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}',
1383 array(&$this, '_doAutoLinks_url_callback'), $text);
1385 # Email addresses: <address@domain.foo>
1386 $text = preg_replace_callback('{
1388 (?:mailto:)?
1390 [-.\w\x80-\xFF]+
1392 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1395 }xi',
1396 array(&$this, '_doAutoLinks_email_callback'), $text);
1398 return $text;
1400 function _doAutoLinks_url_callback($matches) {
1401 $url = $this->encodeAmpsAndAngles($matches[1]);
1402 $link = "<a href=\"$url\">$url</a>";
1403 return $this->hashSpan($link);
1405 function _doAutoLinks_email_callback($matches) {
1406 $address = $matches[1];
1407 $link = $this->encodeEmailAddress($address);
1408 return $this->hashSpan($link);
1412 function encodeEmailAddress($addr) {
1414 # Input: an email address, e.g. "foo@example.com"
1416 # Output: the email address as a mailto link, with each character
1417 # of the address encoded as either a decimal or hex entity, in
1418 # the hopes of foiling most address harvesting spam bots. E.g.:
1420 # <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1421 # &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1422 # &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1423 # &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1425 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1426 # With some optimizations by Milian Wolff.
1428 $addr = "mailto:" . $addr;
1429 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1430 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1432 foreach ($chars as $key => $char) {
1433 $ord = ord($char);
1434 # Ignore non-ascii chars.
1435 if ($ord < 128) {
1436 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1437 # roughly 10% raw, 45% hex, 45% dec
1438 # '@' *must* be encoded. I insist.
1439 if ($r > 90 && $char != '@') /* do nothing */;
1440 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1441 else $chars[$key] = '&#'.$ord.';';
1445 $addr = implode('', $chars);
1446 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1447 $addr = "<a href=\"$addr\">$text</a>";
1449 return $addr;
1453 function tokenizeHTML($str) {
1455 # Parameter: String containing HTML + Markdown markup.
1456 # Returns: An array of the tokens comprising the input
1457 # string. Each token is either a tag or a run of text
1458 # between tags. Each element of the array is a
1459 # two-element array; the first is either 'tag' or 'text';
1460 # the second is the actual value.
1461 # Note: Markdown code spans are taken into account: no tag token is
1462 # generated within a code span.
1464 $tokens = array();
1466 while ($str != "") {
1468 # Each loop iteration seach for either the next tag or the next
1469 # openning code span marker. If a code span marker is found, the
1470 # code span is extracted in entierty and will result in an extra
1471 # text token.
1473 $parts = preg_split('{
1475 (?<![`\\\\])
1476 `+ # code span marker
1478 <!-- .*? --> # comment
1480 <\?.*?\?> | <%.*?%> # processing instruction
1482 <[/!$]?[-a-zA-Z0-9:]+ # regular tags
1485 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1489 }xs', $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1491 # Create token from text preceding tag.
1492 if ($parts[0] != "") {
1493 $tokens[] = array('text', $parts[0]);
1496 # Check if we reach the end.
1497 if (count($parts) < 3) {
1498 break;
1501 # Create token from tag or code span.
1502 if ($parts[1]{0} == "`") {
1503 $tokens[] = array('text', $parts[1]);
1504 $str = $parts[2];
1506 # Skip the whole code span, pass as text token.
1507 if (preg_match('/^(.*(?<!`\\\\)'.$parts[1].'(?!`))(.*)$/sm',
1508 $str, $matches))
1510 $tokens[] = array('text', $matches[1]);
1511 $str = $matches[2];
1513 } else {
1514 $tokens[] = array('tag', $parts[1]);
1515 $str = $parts[2];
1519 return $tokens;
1523 function outdent($text) {
1525 # Remove one level of line-leading tabs or spaces
1527 return preg_replace("/^(\\t|[ ]{1,$this->tab_width})/m", "", $text);
1531 # String length function for detab. `_initDetab` will create a function to
1532 # hanlde UTF-8 if the default function does not exist.
1533 var $utf8_strlen = 'mb_strlen';
1535 function detab($text) {
1537 # Replace tabs with the appropriate amount of space.
1539 # For each line we separate the line in blocks delemited by
1540 # tab characters. Then we reconstruct every line by adding the
1541 # appropriate number of space between each blocks.
1543 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1544 $lines = explode("\n", $text);
1545 $text = "";
1547 foreach ($lines as $line) {
1548 # Split in blocks.
1549 $blocks = explode("\t", $line);
1550 # Add each blocks to the line.
1551 $line = $blocks[0];
1552 unset($blocks[0]); # Do not add first block twice.
1553 foreach ($blocks as $block) {
1554 # Calculate amount of space, insert spaces, insert block.
1555 $amount = $this->tab_width -
1556 $strlen($line, 'UTF-8') % $this->tab_width;
1557 $line .= str_repeat(" ", $amount) . $block;
1559 $text .= "$line\n";
1561 return $text;
1563 function _initDetab() {
1565 # Check for the availability of the function in the `utf8_strlen` property
1566 # (initially `mb_strlen`). If the function is not available, create a
1567 # function that will loosely count the number of UTF-8 characters with a
1568 # regular expression.
1570 if (function_exists($this->utf8_strlen)) return;
1571 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1572 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1573 $text, $m);');
1577 function unhash($text) {
1579 # Swap back in all the tags hashed by _HashHTMLBlocks.
1581 return str_replace(array_keys($this->html_hashes),
1582 array_values($this->html_hashes), $text);
1589 # Markdown Extra Parser Class
1592 class MarkdownExtra_Parser extends Markdown_Parser {
1594 # Prefix for footnote ids.
1595 var $fn_id_prefix = "";
1597 # Optional title attribute for footnote links and backlinks.
1598 var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
1599 var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
1601 # Optional class attribute for footnote links and backlinks.
1602 var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
1603 var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
1606 function MarkdownExtra_Parser() {
1608 # Constructor function. Initialize the parser object.
1610 # Add extra escapable characters before parent constructor
1611 # initialize the table.
1612 $this->escape_chars .= ':|';
1614 # Insert extra document, block, and span transformations.
1615 # Parent constructor will do the sorting.
1616 $this->document_gamut += array(
1617 "stripFootnotes" => 15,
1618 "stripAbbreviations" => 25,
1619 "appendFootnotes" => 50,
1621 $this->block_gamut += array(
1622 "doTables" => 15,
1623 "doDefLists" => 45,
1625 $this->span_gamut += array(
1626 "doFootnotes" => 5,
1627 "doAbbreviations" => 70,
1630 parent::Markdown_Parser();
1634 # Extra hashes used during extra transformations.
1635 var $footnotes = array();
1636 var $footnotes_ordered = array();
1637 var $abbr_desciptions = array();
1638 var $abbr_matches = array();
1639 var $html_cleans = array();
1641 # Status flag to avoid invalid nesting.
1642 var $in_footnote = false;
1645 function transform($text) {
1647 # Added clear to the new $html_hashes, reordered `hashHTMLBlocks` before
1648 # blank line stripping and added extra parameter to `runBlockGamut`.
1650 # Clear the global hashes. If we don't clear these, you get conflicts
1651 # from other articles when generating a page which contains more than
1652 # one article (e.g. an index page that shows the N most recent
1653 # articles):
1654 $this->footnotes = array();
1655 $this->footnotes_ordered = array();
1656 $this->abbr_desciptions = array();
1657 $this->abbr_matches = array();
1658 $this->html_cleans = array();
1660 return parent::transform($text);
1664 ### HTML Block Parser ###
1666 # Tags that are always treated as block tags:
1667 var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1669 # Tags treated as block tags only if the opening tag is alone on it's line:
1670 var $context_block_tags = 'script|noscript|math|ins|del';
1672 # Tags where markdown="1" default to span mode:
1673 var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1675 # Tags which must not have their contents modified, no matter where
1676 # they appear:
1677 var $clean_tags = 'script|math';
1679 # Tags that do not need to be closed.
1680 var $auto_close_tags = 'hr|img';
1683 function hashHTMLBlocks($text) {
1685 # Hashify HTML Blocks and "clean tags".
1687 # We only want to do this for block-level HTML tags, such as headers,
1688 # lists, and tables. That's because we still want to wrap <p>s around
1689 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1690 # phrase emphasis, and spans. The list of tags we're looking for is
1691 # hard-coded.
1693 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1694 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1695 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
1696 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1697 # These two functions are calling each other. It's recursive!
1700 # Call the HTML-in-Markdown hasher.
1702 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1704 return $text;
1706 function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1707 $enclosing_tag = '', $span = false)
1710 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1712 # * $indent is the number of space to be ignored when checking for code
1713 # blocks. This is important because if we don't take the indent into
1714 # account, something like this (which looks right) won't work as expected:
1716 # <div>
1717 # <div markdown="1">
1718 # Hello World. <-- Is this a Markdown code block or text?
1719 # </div> <-- Is this a Markdown code block or a real tag?
1720 # <div>
1722 # If you don't like this, just don't indent the tag on which
1723 # you apply the markdown="1" attribute.
1725 # * If $enclosing_tag is not empty, stops at the first unmatched closing
1726 # tag with that name. Nested tags supported.
1728 # * If $span is true, text inside must treated as span. So any double
1729 # newline will be replaced by a single newline so that it does not create
1730 # paragraphs.
1732 # Returns an array of that form: ( processed text , remaining text )
1734 if ($text === '') return array('', '');
1736 # Regex to check for the presense of newlines around a block tag.
1737 $newline_match_before = '/(?:^\n?|\n\n)*$/';
1738 $newline_match_after =
1740 ^ # Start of text following the tag.
1741 (?:[ ]*<!--.*?-->)? # Optional comment.
1742 [ ]*\n # Must be followed by newline.
1743 }xs';
1745 # Regex to match any tag.
1746 $block_tag_match =
1748 ( # $2: Capture hole tag.
1749 </? # Any opening or closing tag.
1750 (?: # Tag name.
1751 '.$this->block_tags.' |
1752 '.$this->context_block_tags.' |
1753 '.$this->clean_tags.' |
1754 (?!\s)'.$enclosing_tag.'
1756 \s* # Whitespace.
1758 ".*?" | # Double quotes (can contain `>`)
1759 \'.*?\' | # Single quotes (can contain `>`)
1760 .+? # Anything but quotes and `>`.
1762 > # End of tag.
1764 <!-- .*? --> # HTML Comment
1766 <\?.*?\?> | <%.*?%> # Processing instruction
1768 <!\[CDATA\[.*?\]\]> # CData Block
1770 }xs';
1773 $depth = 0; # Current depth inside the tag tree.
1774 $parsed = ""; # Parsed text that will be returned.
1777 # Loop through every tag until we find the closing tag of the parent
1778 # or loop until reaching the end of text if no parent tag specified.
1780 do {
1782 # Split the text using the first $tag_match pattern found.
1783 # Text before pattern will be first in the array, text after
1784 # pattern will be at the end, and between will be any catches made
1785 # by the pattern.
1787 $parts = preg_split($block_tag_match, $text, 2,
1788 PREG_SPLIT_DELIM_CAPTURE);
1790 # If in Markdown span mode, add a empty-string span-level hash
1791 # after each newline to prevent triggering any block element.
1792 if ($span) {
1793 $void = $this->hashSpan("", true) ;
1794 $newline = $this->hashSpan("", true) . "\n";
1795 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1798 $parsed .= $parts[0]; # Text before current tag.
1800 # If end of $text has been reached. Stop loop.
1801 if (count($parts) < 3) {
1802 $text = "";
1803 break;
1806 $tag = $parts[1]; # Tag to handle.
1807 $text = $parts[2]; # Remaining text after current tag.
1810 # Check for: Tag inside code block or span
1812 if (# Find current paragraph
1813 preg_match('/(?>^\n?|\n\n)((?>.\n?)+?)$/', $parsed, $matches) &&
1815 # Then match in it either a code block...
1816 preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'.
1817 '(?!\n)$/', $matches[1], $x) ||
1818 # ...or unbalenced code span markers. (the regex matches balenced)
1819 !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s',
1820 $matches[1])
1823 # Tag is in code block or span and may not be a tag at all. So we
1824 # simply skip the first char (should be a `<`).
1825 $parsed .= $tag{0};
1826 $text = substr($tag, 1) . $text; # Put back $tag minus first char.
1829 # Check for: Opening Block level tag or
1830 # Opening Content Block tag (like ins and del)
1831 # used as a block tag (tag is alone on it's line).
1833 else if (preg_match("{^<(?:$this->block_tags)\b}", $tag) ||
1834 ( preg_match("{^<(?:$this->context_block_tags)\b}", $tag) &&
1835 preg_match($newline_match_before, $parsed) &&
1836 preg_match($newline_match_after, $text) )
1839 # Need to parse tag and following text using the HTML parser.
1840 list($block_text, $text) =
1841 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1843 # Make sure it stays outside of any paragraph by adding newlines.
1844 $parsed .= "\n\n$block_text\n\n";
1847 # Check for: Clean tag (like script, math)
1848 # HTML Comments, processing instructions.
1850 else if (preg_match("{^<(?:$this->clean_tags)\b}", $tag) ||
1851 $tag{1} == '!' || $tag{1} == '?')
1853 # Need to parse tag and following text using the HTML parser.
1854 # (don't check for markdown attribute)
1855 list($block_text, $text) =
1856 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1858 $parsed .= $block_text;
1861 # Check for: Tag with same name as enclosing tag.
1863 else if ($enclosing_tag !== '' &&
1864 # Same name as enclosing tag.
1865 preg_match("{^</?(?:$enclosing_tag)\b}", $tag))
1868 # Increase/decrease nested tag count.
1870 if ($tag{1} == '/') $depth--;
1871 else if ($tag{strlen($tag)-2} != '/') $depth++;
1873 if ($depth < 0) {
1875 # Going out of parent element. Clean up and break so we
1876 # return to the calling function.
1878 $text = $tag . $text;
1879 break;
1882 $parsed .= $tag;
1884 else {
1885 $parsed .= $tag;
1887 } while ($depth >= 0);
1889 return array($parsed, $text);
1891 function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
1893 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
1895 # * Calls $hash_method to convert any blocks.
1896 # * Stops when the first opening tag closes.
1897 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
1898 # (it is not inside clean tags)
1900 # Returns an array of that form: ( processed text , remaining text )
1902 if ($text === '') return array('', '');
1904 # Regex to match `markdown` attribute inside of a tag.
1905 $markdown_attr_match = '
1907 \s* # Eat whitespace before the `markdown` attribute
1908 markdown
1909 \s*=\s*
1911 (["\']) # $1: quote delimiter
1912 (.*?) # $2: attribute value
1913 \1 # matching delimiter
1915 ([^\s>]*) # $3: unquoted attribute value
1917 () # $4: make $3 always defined (avoid warnings)
1918 }xs';
1920 # Regex to match any tag.
1921 $tag_match = '{
1922 ( # $2: Capture hole tag.
1923 </? # Any opening or closing tag.
1924 [\w:$]+ # Tag name.
1925 \s* # Whitespace.
1927 ".*?" | # Double quotes (can contain `>`)
1928 \'.*?\' | # Single quotes (can contain `>`)
1929 .+? # Anything but quotes and `>`.
1931 > # End of tag.
1933 <!-- .*? --> # HTML Comment
1935 <\?.*?\?> | <%.*?%> # Processing instruction
1937 <!\[CDATA\[.*?\]\]> # CData Block
1939 }xs';
1941 $original_text = $text; # Save original text in case of faliure.
1943 $depth = 0; # Current depth inside the tag tree.
1944 $block_text = ""; # Temporary text holder for current text.
1945 $parsed = ""; # Parsed text that will be returned.
1948 # Get the name of the starting tag.
1950 if (preg_match("/^<([\w:$]*)\b/", $text, $matches))
1951 $base_tag_name = $matches[1];
1954 # Loop through every tag until we find the corresponding closing tag.
1956 do {
1958 # Split the text using the first $tag_match pattern found.
1959 # Text before pattern will be first in the array, text after
1960 # pattern will be at the end, and between will be any catches made
1961 # by the pattern.
1963 $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1965 if (count($parts) < 3) {
1967 # End of $text reached with unbalenced tag(s).
1968 # In that case, we return original text unchanged and pass the
1969 # first character as filtered to prevent an infinite loop in the
1970 # parent function.
1972 return array($original_text{0}, substr($original_text, 1));
1975 $block_text .= $parts[0]; # Text before current tag.
1976 $tag = $parts[1]; # Tag to handle.
1977 $text = $parts[2]; # Remaining text after current tag.
1980 # Check for: Auto-close tag (like <hr/>)
1981 # Comments and Processing Instructions.
1983 if (preg_match("{^</?(?:$this->auto_close_tags)\b}", $tag) ||
1984 $tag{1} == '!' || $tag{1} == '?')
1986 # Just add the tag to the block as if it was text.
1987 $block_text .= $tag;
1989 else {
1991 # Increase/decrease nested tag count. Only do so if
1992 # the tag's name match base tag's.
1994 if (preg_match("{^</?$base_tag_name\b}", $tag)) {
1995 if ($tag{1} == '/') $depth--;
1996 else if ($tag{strlen($tag)-2} != '/') $depth++;
2000 # Check for `markdown="1"` attribute and handle it.
2002 if ($md_attr &&
2003 preg_match($markdown_attr_match, $tag, $attr_m) &&
2004 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2006 # Remove `markdown` attribute from opening tag.
2007 $tag = preg_replace($markdown_attr_match, '', $tag);
2009 # Check if text inside this tag must be parsed in span mode.
2010 $this->mode = $attr_m[2] . $attr_m[3];
2011 $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
2012 preg_match("{^<(?:$this->contain_span_tags)\b}", $tag);
2014 # Calculate indent before tag.
2015 preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches);
2016 $indent = strlen($matches[1]);
2018 # End preceding block with this tag.
2019 $block_text .= $tag;
2020 $parsed .= $this->$hash_method($block_text);
2022 # Get enclosing tag name for the ParseMarkdown function.
2023 preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2024 $tag_name = $matches[1];
2026 # Parse the content using the HTML-in-Markdown parser.
2027 list ($block_text, $text)
2028 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
2029 $tag_name, $span_mode);
2031 # Outdent markdown text.
2032 if ($indent > 0) {
2033 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
2034 $block_text);
2037 # Append tag content to parsed text.
2038 if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
2039 else $parsed .= "$block_text";
2041 # Start over a new block.
2042 $block_text = "";
2044 else $block_text .= $tag;
2047 } while ($depth > 0);
2050 # Hash last block text that wasn't processed inside the loop.
2052 $parsed .= $this->$hash_method($block_text);
2054 return array($parsed, $text);
2058 function hashClean($text) {
2060 # Called whenever a tag must be hashed when a function insert a "clean" tag
2061 # in $text, it pass through this function and is automaticaly escaped,
2062 # blocking invalid nested overlap.
2064 # Swap back any tag hash found in $text so we do not have to `unhash`
2065 # multiple times at the end.
2066 $text = $this->unhash($text);
2068 # Then hash the tag.
2069 $key = "C\x1A". md5($text);
2070 $this->html_cleans[$key] = $text;
2071 $this->html_hashes[$key] = $text;
2072 return $key; # String that will replace the clean tag.
2076 function doHeaders($text) {
2078 # Redefined to add id attribute support.
2080 # Setext-style headers:
2081 # Header 1 {#header1}
2082 # ========
2084 # Header 2 {#header2}
2085 # --------
2087 $text = preg_replace_callback(
2088 '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ ]*\n=+[ ]*\n+ }mx',
2089 array(&$this, '_doHeaders_callback_setext_h1'), $text);
2090 $text = preg_replace_callback(
2091 '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ ]*\n-+[ ]*\n+ }mx',
2092 array(&$this, '_doHeaders_callback_setext_h2'), $text);
2094 # atx-style headers:
2095 # # Header 1 {#header1}
2096 # ## Header 2 {#header2}
2097 # ## Header 2 with closing hashes ## {#header3}
2098 # ...
2099 # ###### Header 6 {#header2}
2101 $text = preg_replace_callback('{
2102 ^(\#{1,6}) # $1 = string of #\'s
2103 [ ]*
2104 (.+?) # $2 = Header text
2105 [ ]*
2106 \#* # optional closing #\'s (not counted)
2107 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
2108 [ ]*
2110 }xm',
2111 array(&$this, '_doHeaders_callback_atx'), $text);
2113 return $text;
2115 function _doHeaders_attr($attr) {
2116 if (empty($attr)) return "";
2117 return " id=\"$attr\"";
2119 function _doHeaders_callback_setext_h1($matches) {
2120 $attr = $this->_doHeaders_attr($id =& $matches[2]);
2121 $block = "<h1$attr>".$this->runSpanGamut($matches[1])."</h1>";
2122 return "\n" . $this->hashBlock($block) . "\n\n";
2124 function _doHeaders_callback_setext_h2($matches) {
2125 $attr = $this->_doHeaders_attr($id =& $matches[2]);
2126 $block = "<h2$attr>".$this->runSpanGamut($matches[1])."</h2>";
2127 return "\n" . $this->hashBlock($block) . "\n\n";
2129 function _doHeaders_callback_atx($matches) {
2130 $level = strlen($matches[1]);
2131 $attr = $this->_doHeaders_attr($id =& $matches[3]);
2132 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2133 return "\n" . $this->hashBlock($block) . "\n\n";
2137 function doTables($text) {
2139 # Form HTML tables.
2141 $less_than_tab = $this->tab_width - 1;
2143 # Find tables with leading pipe.
2145 # | Header 1 | Header 2
2146 # | -------- | --------
2147 # | Cell 1 | Cell 2
2148 # | Cell 3 | Cell 4
2150 $text = preg_replace_callback('
2152 ^ # Start of a line
2153 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2154 [|] # Optional leading pipe (present)
2155 (.+) \n # $1: Header row (at least one pipe)
2157 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2158 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
2160 ( # $3: Cells
2162 [ ]* # Allowed whitespace.
2163 [|] .* \n # Row content.
2166 (?=\n|\Z) # Stop at final double newline.
2167 }xm',
2168 array(&$this, '_doTable_leadingPipe_callback'), $text);
2171 # Find tables without leading pipe.
2173 # Header 1 | Header 2
2174 # -------- | --------
2175 # Cell 1 | Cell 2
2176 # Cell 3 | Cell 4
2178 $text = preg_replace_callback('
2180 ^ # Start of a line
2181 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2182 (\S.*[|].*) \n # $1: Header row (at least one pipe)
2184 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2185 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
2187 ( # $3: Cells
2189 .* [|] .* \n # Row content
2192 (?=\n|\Z) # Stop at final double newline.
2193 }xm',
2194 array(&$this, '_DoTable_callback'), $text);
2196 return $text;
2198 function _doTable_leadingPipe_callback($matches) {
2199 $head = $matches[1];
2200 $underline = $matches[2];
2201 $content = $matches[3];
2203 # Remove leading pipe for each row.
2204 $content = preg_replace('/^ *[|]/m', '', $content);
2206 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2208 function _doTable_callback($matches) {
2209 $head = $matches[1];
2210 $underline = $matches[2];
2211 $content = $matches[3];
2213 # Remove any tailing pipes for each line.
2214 $head = preg_replace('/[|] *$/m', '', $head);
2215 $underline = preg_replace('/[|] *$/m', '', $underline);
2216 $content = preg_replace('/[|] *$/m', '', $content);
2218 # Reading alignement from header underline.
2219 $separators = preg_split('/ *[|] */', $underline);
2220 foreach ($separators as $n => $s) {
2221 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"';
2222 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
2223 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
2224 else $attr[$n] = '';
2227 # Creating code spans before splitting the row is an easy way to
2228 # handle a code span containg pipes.
2229 $head = $this->doCodeSpans($head);
2230 $headers = preg_split('/ *[|] */', $head);
2231 $col_count = count($headers);
2233 # Write column headers.
2234 $text = "<table>\n";
2235 $text .= "<thead>\n";
2236 $text .= "<tr>\n";
2237 foreach ($headers as $n => $header)
2238 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2239 $text .= "</tr>\n";
2240 $text .= "</thead>\n";
2242 # Split content by row.
2243 $rows = explode("\n", trim($content, "\n"));
2245 $text .= "<tbody>\n";
2246 foreach ($rows as $row) {
2247 # Creating code spans before splitting the row is an easy way to
2248 # handle a code span containg pipes.
2249 $row = $this->doCodeSpans($row);
2251 # Split row by cell.
2252 $row_cells = preg_split('/ *[|] */', $row, $col_count);
2253 $row_cells = array_pad($row_cells, $col_count, '');
2255 $text .= "<tr>\n";
2256 foreach ($row_cells as $n => $cell)
2257 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2258 $text .= "</tr>\n";
2260 $text .= "</tbody>\n";
2261 $text .= "</table>";
2263 return $this->hashBlock($text) . "\n";
2267 function doDefLists($text) {
2269 # Form HTML definition lists.
2271 $less_than_tab = $this->tab_width - 1;
2273 # Re-usable pattern to match any entire dl list:
2274 $whole_list = '
2275 ( # $1 = whole list
2276 ( # $2
2277 [ ]{0,'.$less_than_tab.'}
2278 ((?>.*\S.*\n)+) # $3 = defined term
2280 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2282 (?s:.+?)
2283 ( # $4
2286 \n{2,}
2287 (?=\S)
2288 (?! # Negative lookahead for another term
2289 [ ]{0,'.$less_than_tab.'}
2290 (?: \S.*\n )+? # defined term
2292 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2294 (?! # Negative lookahead for another definition
2295 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2299 '; // mx
2301 $text = preg_replace_callback('{
2302 (?:(?<=\n\n)|\A\n?)
2303 '.$whole_list.'
2304 }mx',
2305 array(&$this, '_doDefLists_callback'), $text);
2307 return $text;
2309 function _doDefLists_callback($matches) {
2310 # Re-usable patterns to match list item bullets and number markers:
2311 $list = $matches[1];
2313 # Turn double returns into triple returns, so that we can make a
2314 # paragraph for the last item in a list, if necessary:
2315 $result = trim($this->processDefListItems($list));
2316 $result = "<dl>\n" . $result . "\n</dl>";
2317 return $this->hashBlock($result) . "\n\n";
2321 function processDefListItems($list_str) {
2323 # Process the contents of a single definition list, splitting it
2324 # into individual term and definition list items.
2326 $less_than_tab = $this->tab_width - 1;
2328 # trim trailing blank lines:
2329 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2331 # Process definition terms.
2332 $list_str = preg_replace_callback('{
2333 (?:\n\n+|\A\n?) # leading line
2334 ( # definition terms = $1
2335 [ ]{0,'.$less_than_tab.'} # leading whitespace
2336 (?![:][ ]|[ ]) # negative lookahead for a definition
2337 # mark (colon) or more whitespace.
2338 (?: \S.* \n)+? # actual term (not whitespace).
2340 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
2341 # with a definition mark.
2342 }xm',
2343 array(&$this, '_processDefListItems_callback_dt'), $list_str);
2345 # Process actual definitions.
2346 $list_str = preg_replace_callback('{
2347 \n(\n+)? # leading line = $1
2348 [ ]{0,'.$less_than_tab.'} # whitespace before colon
2349 [:][ ]+ # definition mark (colon)
2350 ((?s:.+?)) # definition text = $2
2351 (?= \n+ # stop at next definition mark,
2352 (?: # next term or end of text
2353 [ ]{0,'.$less_than_tab.'} [:][ ] |
2354 <dt> | \z
2357 }xm',
2358 array(&$this, '_processDefListItems_callback_dd'), $list_str);
2360 return $list_str;
2362 function _processDefListItems_callback_dt($matches) {
2363 $terms = explode("\n", trim($matches[1]));
2364 $text = '';
2365 foreach ($terms as $term) {
2366 $term = $this->runSpanGamut(trim($term));
2367 $text .= "\n<dt>" . $term . "</dt>";
2369 return $text . "\n";
2371 function _processDefListItems_callback_dd($matches) {
2372 $leading_line = $matches[1];
2373 $def = $matches[2];
2375 if ($leading_line || preg_match('/\n{2,}/', $def)) {
2376 $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2377 $def = "\n". $def ."\n";
2379 else {
2380 $def = rtrim($def);
2381 $def = $this->runSpanGamut($this->outdent($def));
2384 return "\n<dd>" . $def . "</dd>\n";
2388 function doItalicsAndBold($text) {
2390 # Redefined to change emphasis by underscore behaviour so that it does not
2391 # work in the middle of a word.
2393 # <strong> must go first:
2394 $text = preg_replace_callback(array(
2396 ( # $1: Marker
2397 (?<![a-zA-Z0-9]) # Not preceded by alphanum
2398 (?<!__) # or by two marker chars.
2401 (?=\S) # Not followed by whitespace
2402 (?!__) # or two others marker chars.
2403 ( # $2: Content
2405 [^_]+? # Anthing not em markers.
2407 # Balence any regular _ emphasis inside.
2408 (?<![a-zA-Z0-9]) _ (?=\S) (.+?)
2409 (?<=\S) _ (?![a-zA-Z0-9])
2411 _+ # Allow unbalenced as last resort.
2414 (?<=\S) __ # End mark not preceded by whitespace.
2415 (?![a-zA-Z0-9]) # Not followed by alphanum
2416 (?!__) # or two others marker chars.
2417 }sx',
2419 ( (?<!\*\*) \*\* ) # $1: Marker (not preceded by two *)
2420 (?=\S) # Not followed by whitespace
2421 (?!\1) # or two others marker chars.
2422 ( # $2: Content
2424 [^*]+? # Anthing not em markers.
2426 # Balence any regular * emphasis inside.
2427 \* (?=\S) (.+?) (?<=\S) \*
2429 \* # Allow unbalenced as last resort.
2432 (?<=\S) \*\* # End mark not preceded by whitespace.
2433 }sx',
2435 array(&$this, '_doItalicAndBold_strong_callback'), $text);
2436 # Then <em>:
2437 $text = preg_replace_callback(array(
2438 '{ ( (?<![a-zA-Z0-9])(?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1(?![a-zA-Z0-9]) }sx',
2439 '{ ( (?<!\*)\* ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s\*) \1 }sx',
2441 array(&$this, '_doItalicAndBold_em_callback'), $text);
2443 return $text;
2447 function formParagraphs($text) {
2449 # Params:
2450 # $text - string to process with html <p> tags
2452 # Strip leading and trailing lines:
2453 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
2455 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2458 # Wrap <p> tags and unhashify HTML blocks
2460 foreach ($grafs as $key => $value) {
2461 $value = trim($this->runSpanGamut($value));
2463 # Check if this should be enclosed in a paragraph.
2464 # Clean tag hashes & block tag hashes are left alone.
2465 $clean_key = $value;
2466 $block_key = substr($value, 0, 34);
2468 $is_p = (!isset($this->html_blocks[$block_key]) &&
2469 !isset($this->html_cleans[$clean_key]));
2471 if ($is_p) {
2472 $value = "<p>$value</p>";
2474 $grafs[$key] = $value;
2477 # Join grafs in one text, then unhash HTML tags.
2478 $text = implode("\n\n", $grafs);
2480 # Finish by removing any tag hashes still present in $text.
2481 $text = $this->unhash($text);
2483 return $text;
2487 ### Footnotes
2489 function stripFootnotes($text) {
2491 # Strips link definitions from text, stores the URLs and titles in
2492 # hash references.
2494 $less_than_tab = $this->tab_width - 1;
2496 # Link defs are in the form: [^id]: url "optional title"
2497 $text = preg_replace_callback('{
2498 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1
2499 [ ]*
2500 \n? # maybe *one* newline
2501 ( # text = $2 (no blank lines allowed)
2502 (?:
2503 .+ # actual text
2505 \n # newlines but
2506 (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2507 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2508 # by non-indented content
2511 }xm',
2512 array(&$this, '_stripFootnotes_callback'),
2513 $text);
2514 return $text;
2516 function _stripFootnotes_callback($matches) {
2517 $note_id = $this->fn_id_prefix . $matches[1];
2518 $this->footnotes[$note_id] = $this->outdent($matches[2]);
2519 return ''; # String that will replace the block
2523 function doFootnotes($text) {
2525 # Replace footnote references in $text [^id] with a special text-token
2526 # which will be can be
2528 if (!$this->in_footnote && !$this->in_anchor) {
2529 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2531 return $text;
2535 function appendFootnotes($text) {
2537 # Append footnote list to text.
2540 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2541 array(&$this, '_appendFootnotes_callback'), $text);
2543 if (!empty($this->footnotes_ordered)) {
2544 $text .= "\n\n";
2545 $text .= "<div class=\"footnotes\">\n";
2546 $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n";
2547 $text .= "<ol>\n\n";
2549 $attr = " rev=\"footnote\"";
2550 if ($this->fn_backlink_class != "") {
2551 $class = $this->fn_backlink_class;
2552 $class = $this->encodeAmpsAndAngles($class);
2553 $class = str_replace('"', '&quot;', $class);
2554 $attr .= " class=\"$class\"";
2556 if ($this->fn_backlink_title != "") {
2557 $title = $this->fn_backlink_title;
2558 $title = $this->encodeAmpsAndAngles($title);
2559 $title = str_replace('"', '&quot;', $title);
2560 $attr .= " title=\"$title\"";
2562 $num = 0;
2564 $this->in_footnote = true;
2566 foreach ($this->footnotes_ordered as $note_id => $footnote) {
2567 $footnote .= "\n"; # Need to append newline before parsing.
2568 $footnote = $this->runBlockGamut("$footnote\n");
2570 $attr2 = str_replace("%%", ++$num, $attr);
2572 # Add backlink to last paragraph; create new paragraph if needed.
2573 $backlink = "<a href=\"#fnref:$note_id\"$attr2>&#8617;</a>";
2574 if (preg_match('{</p>$}', $footnote)) {
2575 $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
2576 } else {
2577 $footnote .= "\n\n<p>$backlink</p>";
2580 $text .= "<li id=\"fn:$note_id\">\n";
2581 $text .= $footnote . "\n";
2582 $text .= "</li>\n\n";
2585 $this->in_footnote = false;
2587 $text .= "</ol>\n";
2588 $text .= "</div>";
2590 return $text;
2592 function _appendFootnotes_callback($matches) {
2593 $node_id = $this->fn_id_prefix . $matches[1];
2595 # Create footnote marker only if it has a corresponding footnote *and*
2596 # the footnote hasn't been used by another marker.
2597 if (isset($this->footnotes[$node_id])) {
2598 # Transfert footnote content to the ordered list.
2599 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
2600 unset($this->footnotes[$node_id]);
2602 $num = count($this->footnotes_ordered);
2603 $attr = " rel=\"footnote\"";
2604 if ($this->fn_link_class != "") {
2605 $class = $this->fn_link_class;
2606 $class = $this->encodeAmpsAndAngles($class);
2607 $class = str_replace('"', '&quot;', $class);
2608 $attr .= " class=\"$class\"";
2610 if ($this->fn_link_title != "") {
2611 $title = $this->fn_link_title;
2612 $title = $this->encodeAmpsAndAngles($title);
2613 $title = str_replace('"', '&quot;', $title);
2614 $attr .= " title=\"$title\"";
2616 $attr = str_replace("%%", $num, $attr);
2618 return
2619 "<sup id=\"fnref:$node_id\">".
2620 "<a href=\"#fn:$node_id\"$attr>$num</a>".
2621 "</sup>";
2624 return "[^".$matches[1]."]";
2628 ### Abbreviations ###
2630 function stripAbbreviations($text) {
2632 # Strips abbreviations from text, stores titles in hash references.
2634 $less_than_tab = $this->tab_width - 1;
2636 # Link defs are in the form: [id]*: url "optional title"
2637 $text = preg_replace_callback('{
2638 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
2639 (.*) # text = $2 (no blank lines allowed)
2640 }xm',
2641 array(&$this, '_stripAbbreviations_callback'),
2642 $text);
2643 return $text;
2645 function _stripAbbreviations_callback($matches) {
2646 $abbr_word = $matches[1];
2647 $abbr_desc = $matches[2];
2648 $this->abbr_matches[] = preg_quote($abbr_word);
2649 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
2650 return ''; # String that will replace the block
2654 function doAbbreviations($text) {
2656 # Find defined abbreviations in text and wrap them in <abbr> elements.
2658 if ($this->abbr_matches) {
2659 // cannot use the /x modifier because abbr_matches may
2660 // contain spaces:
2661 $text = preg_replace_callback('{'.
2662 '(?<![\w\x1A])'.
2663 '(?:'. implode('|', $this->abbr_matches) .')'.
2664 '(?![\w\x1A])'.
2665 '}',
2666 array(&$this, '_doAbbreviations_callback'), $text);
2668 return $text;
2670 function _doAbbreviations_callback($matches) {
2671 $abbr = $matches[0];
2672 if (isset($this->abbr_desciptions[$abbr])) {
2673 $desc = $this->abbr_desciptions[$abbr];
2674 if (empty($desc)) {
2675 return $this->hashSpan("<abbr>$abbr</abbr>");
2676 } else {
2677 $desc = $this->escapeSpecialCharsWithinTagAttributes($desc);
2678 return $this->hashSpan("<abbr title=\"$desc\">$abbr</abbr>");
2680 } else {
2681 return $matches[0];
2690 PHP Markdown Extra
2691 ==================
2693 Description
2694 -----------
2696 This is a PHP port of the original Markdown formatter written in Perl
2697 by John Gruber. This special "Extra" version of PHP Markdown features
2698 further enhancements to the syntax for making additional constructs
2699 such as tables and definition list.
2701 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2702 easy-to-write structured text format into HTML. Markdown's text format
2703 is most similar to that of plain text email, and supports features such
2704 as headers, *emphasis*, code blocks, blockquotes, and links.
2706 Markdown's syntax is designed not as a generic markup language, but
2707 specifically to serve as a front-end to (X)HTML. You can use span-level
2708 HTML tags anywhere in a Markdown document, and you can use block level
2709 HTML tags (like <div> and <table> as well).
2711 For more information about Markdown's syntax, see:
2713 <http://daringfireball.net/projects/markdown/>
2716 Bugs
2717 ----
2719 To file bug reports please send email to:
2721 <michel.fortin@michelf.com>
2723 Please include with your report: (1) the example input; (2) the output you
2724 expected; (3) the output Markdown actually produced.
2727 Version History
2728 ---------------
2730 See Readme file for details.
2732 Extra 1.1.4 (3 Aug 2007):
2734 Extra 1.1.3 (3 Jul 2007):
2736 Extra 1.1.2 (7 Feb 2007)
2738 Extra 1.1.1 (28 Dec 2006)
2740 Extra 1.1 (1 Dec 2006)
2742 Extra 1.0.1 (9 Dec 2005)
2744 Extra 1.0 (5 Sep 2005)
2747 Copyright and License
2748 ---------------------
2750 PHP Markdown & Extra
2751 Copyright (c) 2004-2007 Michel Fortin
2752 <http://www.michelf.com/>
2753 All rights reserved.
2755 Based on Markdown
2756 Copyright (c) 2003-2006 John Gruber
2757 <http://daringfireball.net/>
2758 All rights reserved.
2760 Redistribution and use in source and binary forms, with or without
2761 modification, are permitted provided that the following conditions are
2762 met:
2764 * Redistributions of source code must retain the above copyright notice,
2765 this list of conditions and the following disclaimer.
2767 * Redistributions in binary form must reproduce the above copyright
2768 notice, this list of conditions and the following disclaimer in the
2769 documentation and/or other materials provided with the distribution.
2771 * Neither the name "Markdown" nor the names of its contributors may
2772 be used to endorse or promote products derived from this software
2773 without specific prior written permission.
2775 This software is provided by the copyright holders and contributors "as
2776 is" and any express or implied warranties, including, but not limited
2777 to, the implied warranties of merchantability and fitness for a
2778 particular purpose are disclaimed. In no event shall the copyright owner
2779 or contributors be liable for any direct, indirect, incidental, special,
2780 exemplary, or consequential damages (including, but not limited to,
2781 procurement of substitute goods or services; loss of use, data, or
2782 profits; or business interruption) however caused and on any theory of
2783 liability, whether in contract, strict liability, or tort (including
2784 negligence or otherwise) arising in any way out of the use of this
2785 software, even if advised of the possibility of such damage.