3 # Markdown Extra - A text-to-HTML conversion tool for web writers
6 # Copyright (c) 2004-2007 Michel Fortin
7 # <http://www.michelf.com/projects/php-markdown/>
10 # Copyright (c) 2004-2006 John Gruber
11 # <http://daringfireball.net/projects/markdown/>
15 define( 'MARKDOWN_VERSION', "1.0.1h" ); # Fri 3 Aug 2007
16 define( 'MARKDOWNEXTRA_VERSION', "1.1.4" ); # Fri 3 Aug 2007
20 # Global default settings:
23 # Change to ">" for HTML output
24 define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");
26 # Define the width of a tab for code blocks.
27 define( 'MARKDOWN_TAB_WIDTH', 4 );
29 # Optional title attribute for footnote links and backlinks.
30 define( 'MARKDOWN_FN_LINK_TITLE', "" );
31 define( 'MARKDOWN_FN_BACKLINK_TITLE', "" );
33 # Optional class attribute for footnote links and backlinks.
34 define( 'MARKDOWN_FN_LINK_CLASS', "" );
35 define( 'MARKDOWN_FN_BACKLINK_CLASS', "" );
42 # Change to false to remove Markdown from posts and/or comments.
43 define( 'MARKDOWN_WP_POSTS', true );
44 define( 'MARKDOWN_WP_COMMENTS', true );
48 ### Standard Function Interface ###
50 define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' );
52 function Markdown($text) {
54 # Initialize the parser and return the result of its transform method.
56 # Setup static parser variable.
58 if (!isset($parser)) {
59 $parser_class = MARKDOWN_PARSER_CLASS
;
60 $parser = new $parser_class;
63 # Transform text using parser.
64 return $parser->transform($text);
68 ### WordPress Plugin Interface ###
71 Plugin Name: Markdown Extra
72 Plugin URI: http://www.michelf.com/projects/php-markdown/
73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
76 Author URI: http://www.michelf.com/
79 if (isset($wp_version)) {
80 # More details about how it works here:
81 # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
83 # Post content and excerpts
84 # - Remove WordPress paragraph generator.
85 # - Run Markdown on excerpt, then remove all tags.
86 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
87 if (MARKDOWN_WP_POSTS
) {
88 remove_filter('the_content', 'wpautop');
89 remove_filter('the_content_rss', 'wpautop');
90 remove_filter('the_excerpt', 'wpautop');
91 add_filter('the_content', 'Markdown', 6);
92 add_filter('the_content_rss', 'Markdown', 6);
93 add_filter('get_the_excerpt', 'Markdown', 6);
94 add_filter('get_the_excerpt', 'trim', 7);
95 add_filter('the_excerpt', 'mdwp_add_p');
96 add_filter('the_excerpt_rss', 'mdwp_strip_p');
98 remove_filter('content_save_pre', 'balanceTags', 50);
99 remove_filter('excerpt_save_pre', 'balanceTags', 50);
100 add_filter('the_content', 'balanceTags', 50);
101 add_filter('get_the_excerpt', 'balanceTags', 9);
105 # - Remove WordPress paragraph generator.
106 # - Remove WordPress auto-link generator.
107 # - Scramble important tags before passing them to the kses filter.
108 # - Run Markdown on excerpt then remove paragraph tags.
109 if (MARKDOWN_WP_COMMENTS
) {
110 remove_filter('comment_text', 'wpautop', 30);
111 remove_filter('comment_text', 'make_clickable');
112 add_filter('pre_comment_content', 'Markdown', 6);
113 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
114 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
115 add_filter('get_comment_text', 'Markdown', 6);
116 add_filter('get_comment_excerpt', 'Markdown', 6);
117 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
119 global $mdwp_hidden_tags, $mdwp_placeholders;
120 $mdwp_hidden_tags = explode(' ',
121 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
122 $mdwp_placeholders = explode(' ', str_rot13(
123 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
124 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
127 function mdwp_add_p($text) {
128 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
129 $text = '<p>'.$text.'</p>';
130 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
135 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
137 function mdwp_hide_tags($text) {
138 global $mdwp_hidden_tags, $mdwp_placeholders;
139 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
141 function mdwp_show_tags($text) {
142 global $mdwp_hidden_tags, $mdwp_placeholders;
143 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
148 ### bBlog Plugin Info ###
150 function identify_modifier_markdown() {
152 'name' => 'markdown',
153 'type' => 'modifier',
154 'nicename' => 'PHP Markdown Extra',
155 'description' => 'A text-to-HTML conversion tool for web writers',
156 'authors' => 'Michel Fortin and John Gruber',
158 'version' => MARKDOWNEXTRA_VERSION
,
159 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
164 ### Smarty Modifier Interface ###
166 function smarty_modifier_markdown($text) {
167 return Markdown($text);
171 ### Textile Compatibility Mode ###
173 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
175 if (strcasecmp(substr(__FILE__
, -16), "classTextile.php") == 0) {
176 # Try to include PHP SmartyPants. Should be in the same directory.
177 @include_once
'smartypants.php';
178 # Fake Textile class. It calls Markdown instead.
180 function TextileThis($text, $lite='', $encode='') {
181 if ($lite == '' && $encode == '') $text = Markdown($text);
182 if (function_exists('SmartyPants')) $text = SmartyPants($text);
185 # Fake restricted version: restrictions are not supported for now.
186 function TextileRestricted($text, $lite='', $noimage='') {
187 return $this->TextileThis($text, $lite);
189 # Workaround to ensure compatibility with TextPattern 4.0.3.
190 function blockLite($text) { return $text; }
197 # Markdown Parser Class
200 class Markdown_Parser
{
202 # Regex to match balanced [brackets].
203 # Needed to insert a maximum bracked depth while converting to PHP.
204 var $nested_brackets_depth = 6;
205 var $nested_brackets;
207 var $nested_url_parenthesis_depth = 4;
208 var $nested_url_parenthesis;
210 # Table of hash values for escaped characters:
211 var $escape_chars = '\`*_{}[]()>#+-.!';
212 // var $escape_table = array();
213 var $backslash_escape_table = array();
215 # Change to ">" for HTML output.
216 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX
;
217 var $tab_width = MARKDOWN_TAB_WIDTH
;
219 # Change to `true` to disallow markup or entities.
220 var $no_markup = false;
221 var $no_entities = false;
224 function Markdown_Parser() {
226 # Constructor function. Initialize appropriate member variables.
230 $this->nested_brackets
=
231 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth
).
232 str_repeat('\])*', $this->nested_brackets_depth
);
234 $this->nested_url_parenthesis
=
235 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth
).
236 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth
);
238 # Create an identical table but for escaped characters.
239 foreach (preg_split('/(?!^|$)/', $this->escape_chars
) as $char) {
240 $entity = "&#". ord($char). ";";
241 // $this->escape_table[$char] = $entity;
242 $this->backslash_escape_table
["\\$char"] = $entity;
245 # Sort document, block, and span gamut in ascendent priority order.
246 asort($this->document_gamut
);
247 asort($this->block_gamut
);
248 asort($this->span_gamut
);
252 # Internal hashes used during transformation.
254 var $titles = array();
255 var $html_blocks = array();
256 var $html_hashes = array(); # Contains both blocks and span hashes.
258 # Status flag to avoid invalid nesting.
259 var $in_anchor = false;
262 function transform($text) {
264 # Main function. The order in which other subs are called here is
265 # essential. Link and image substitutions need to happen before
266 # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
267 # and <img> tags get encoded.
269 # Clear the global hashes. If we don't clear these, you get conflicts
270 # from other articles when generating a page which contains more than
271 # one article (e.g. an index page that shows the N most recent
273 $this->urls
= array();
274 $this->titles
= array();
275 $this->html_blocks
= array();
276 $this->html_hashes
= array();
278 # Standardize line endings:
279 # DOS to Unix and Mac to Unix
280 $text = str_replace(array("\r\n", "\r"), "\n", $text);
282 # Make sure $text ends with a couple of newlines:
285 # Convert all tabs to spaces.
286 $text = $this->detab($text);
288 # Turn block-level HTML blocks into hash entries
289 $text = $this->hashHTMLBlocks($text);
291 # Strip any lines consisting only of spaces and tabs.
292 # This makes subsequent regexen easier to write, because we can
293 # match consecutive blank lines with /\n+/ instead of something
294 # contorted like /[ ]*\n+/ .
295 $text = preg_replace('/^[ ]+$/m', '', $text);
297 # Run document gamut methods.
298 foreach ($this->document_gamut
as $method => $priority) {
299 $text = $this->$method($text);
305 var $document_gamut = array(
306 # Strip link definitions, store in hashes.
307 "stripLinkDefinitions" => 20,
309 "runBasicBlockGamut" => 30,
313 function stripLinkDefinitions($text) {
315 # Strips link definitions from text, stores the URLs and titles in
318 $less_than_tab = $this->tab_width
- 1;
320 # Link defs are in the form: ^[id]: url "optional title"
321 $text = preg_replace_callback('{
322 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
324 \n? # maybe *one* newline
326 <?(\S+?)>? # url = $2
328 \n? # maybe one newline
331 (?<=\s) # lookbehind for whitespace
336 )? # title is optional
339 array(&$this, '_stripLinkDefinitions_callback'),
343 function _stripLinkDefinitions_callback($matches) {
344 $link_id = strtolower($matches[1]);
345 $this->urls
[$link_id] = $this->encodeAmpsAndAngles($matches[2]);
346 if (isset($matches[3]))
347 $this->titles
[$link_id] = str_replace('"', '"', $matches[3]);
348 return ''; # String that will replace the block
352 function hashHTMLBlocks($text) {
353 if ($this->no_markup
) return $text;
355 $less_than_tab = $this->tab_width
- 1;
357 # Hashify HTML blocks:
358 # We only want to do this for block-level HTML tags, such as headers,
359 # lists, and tables. That's because we still want to wrap <p>s around
360 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
361 # phrase emphasis, and spans. The list of tags we're looking for is
363 $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
364 'script|noscript|form|fieldset|iframe|math|ins|del';
365 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
366 'script|noscript|form|fieldset|iframe|math';
368 # Regular expression for the content of a block tag.
369 $nested_tags_level = 4;
371 (?> # optional tag attributes
372 \s # starts with whitespace
374 [^>"/]+ # text outside quotes
376 /+(?!>) # slash not followed by ">"
378 "[^"]*" # text inside double quotes (tolerate ">")
380 \'[^\']*\' # text inside single quotes (tolerate ">")
387 [^<]+ # content without tag
389 <\2 # nested opening tag
390 '.$attr.' # attributes
394 >', $nested_tags_level). # end of opening tag
395 '.*?'. # last level nested tag content
397 </\2\s*> # closing nested tag
400 <(?!/\2\s*> # other tags with a different name
405 # First, look for nested blocks, e.g.:
408 # tags for inner block must be indented.
412 # The outermost tags must start at the left margin for this to match, and
413 # the inner nested divs must be indented.
414 # We need to do this before the next, more liberal match, because the next
415 # match will start at the first `<div>` and stop at the first `</div>`.
416 $text = preg_replace_callback('{
418 ^ # start of line (with /m)
419 <('.$block_tags_a.')# start tag = $2
420 '.$attr.'>\n # attributes followed by > and \n
421 '.$content.' # content, support nesting
422 </\2> # the matching end tag
423 [ ]* # trailing spaces/tabs
424 (?=\n+|\Z) # followed by a newline or end of document
427 array(&$this, '_hashHTMLBlocks_callback'),
431 # Match from `\n<tag>` to `</tag>\n`, handling nested tags in between.
433 $text = preg_replace_callback('{
435 ^ # start of line (with /m)
436 <('.$block_tags_b.')# start tag = $2
437 '.$attr.'> # attributes followed by >
438 '.$content.' # content, support nesting
439 </\2> # the matching end tag
440 [ ]* # trailing spaces/tabs
441 (?=\n+|\Z) # followed by a newline or end of document
444 array(&$this, '_hashHTMLBlocks_callback'),
447 # Special case just for <hr />. It was easier to make a special case than
448 # to make the other regex more complicated.
449 $text = preg_replace_callback('{
451 (?<=\n\n) # Starting after a blank line
453 \A\n? # the beginning of the doc
456 [ ]{0,'.$less_than_tab.'}
457 <(hr) # start tag = $2
460 /?> # the matching end tag
462 (?=\n{2,}|\Z) # followed by a blank line or end of document
465 array(&$this, '_hashHTMLBlocks_callback'),
468 # Special case for standalone HTML comments:
469 $text = preg_replace_callback('{
471 (?<=\n\n) # Starting after a blank line
473 \A\n? # the beginning of the doc
476 [ ]{0,'.$less_than_tab.'}
481 (?=\n{2,}|\Z) # followed by a blank line or end of document
484 array(&$this, '_hashHTMLBlocks_callback'),
487 # PHP and ASP-style processor instructions (<? and <%)
488 $text = preg_replace_callback('{
490 (?<=\n\n) # Starting after a blank line
492 \A\n? # the beginning of the doc
495 [ ]{0,'.$less_than_tab.'}
502 (?=\n{2,}|\Z) # followed by a blank line or end of document
505 array(&$this, '_hashHTMLBlocks_callback'),
510 function _hashHTMLBlocks_callback($matches) {
512 $key = $this->hashBlock($text);
513 return "\n\n$key\n\n";
517 function hashBlock($text) {
519 # Called whenever a tag must be hashed when a function insert a block-level
520 # tag in $text, it pass through this function and is automaticaly escaped,
521 # which remove the need to call _HashHTMLBlocks at every step.
523 # Swap back any tag hash found in $text so we do not have to `unhash`
524 # multiple times at the end.
525 $text = $this->unhash($text);
527 # Then hash the block.
528 $key = "B\x1A". md5($text);
529 $this->html_hashes
[$key] = $text;
530 $this->html_blocks
[$key] = $text;
531 return $key; # String that will replace the tag.
535 function hashSpan($text, $word_separator = false) {
537 # Called whenever a tag must be hashed when a function insert a span-level
538 # element in $text, it pass through this function and is automaticaly
539 # escaped, blocking invalid nested overlap. If optional argument
540 # $word_separator is true, surround the hash value by spaces.
542 # Swap back any tag hash found in $text so we do not have to `unhash`
543 # multiple times at the end.
544 $text = $this->unhash($text);
546 # Then hash the span.
547 $key = "S\x1A". md5($text);
548 if ($word_separator) $key = ":$key:";
550 $this->html_hashes
[$key] = $text;
551 return $key; # String that will replace the span tag.
555 var $block_gamut = array(
557 # These are all the transformations that form block-level
558 # tags like paragraphs, headers, and list items.
561 "doHorizontalRules" => 20,
564 "doCodeBlocks" => 50,
565 "doBlockQuotes" => 60,
568 function runBlockGamut($text) {
570 # Run block gamut tranformations.
572 # We need to escape raw HTML in Markdown source before doing anything
573 # else. This need to be done for each block, and not only at the
574 # begining in the Markdown function since hashed blocks can be part of
575 # list items and could have been indented. Indented blocks would have
576 # been seen as a code block in a previous pass of hashHTMLBlocks.
577 $text = $this->hashHTMLBlocks($text);
579 return $this->runBasicBlockGamut($text);
582 function runBasicBlockGamut($text) {
584 # Run block gamut tranformations, without hashing HTML blocks. This is
585 # useful when HTML blocks are known to be already hashed, like in the first
586 # whole-document pass.
588 foreach ($this->block_gamut
as $method => $priority) {
589 $text = $this->$method($text);
592 # Finally form paragraph and restore hashed blocks.
593 $text = $this->formParagraphs($text);
599 function doHorizontalRules($text) {
600 # Do Horizontal Rules:
602 array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ ]*$}mx',
603 '{^[ ]{0,2}([ ]? -[ ]?){3,}[ ]*$}mx',
604 '{^[ ]{0,2}([ ]? _[ ]?){3,}[ ]*$}mx'),
605 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
610 var $span_gamut = array(
612 # These are all the transformations that occur *within* block-level
613 # tags like paragraphs, headers, and list items.
615 "escapeSpecialCharsWithinTagAttributes" => -20,
616 "doCodeSpans" => -10,
617 "encodeBackslashEscapes" => -5,
619 # Process anchor and image tags. Images must come first,
620 # because ![foo][f] looks like an anchor.
624 # Make links out of things like `<http://example.com/>`
625 # Must come after doAnchors, because you can use < and >
626 # delimiters in inline links like [this](<url>).
628 "encodeAmpsAndAngles" => 40,
630 "doItalicsAndBold" => 50,
631 "doHardBreaks" => 60,
634 function runSpanGamut($text) {
636 # Run span gamut tranformations.
638 foreach ($this->span_gamut
as $method => $priority) {
639 $text = $this->$method($text);
646 function doHardBreaks($text) {
648 $br_tag = $this->hashSpan("<br$this->empty_element_suffix\n");
649 return preg_replace('/ {2,}\n/', $br_tag, $text);
653 function escapeSpecialCharsWithinTagAttributes($text) {
655 # Within tags -- meaning between < and > -- encode [\ ` * _] so they
656 # don't conflict with their use in Markdown for code, italics and strong.
657 # We're replacing each such character with its corresponding MD5 checksum
658 # value; this is likely overkill, but it should prevent us from colliding
659 # with the escape values by accident.
661 if ($this->no_markup
) return $text;
663 $tokens = $this->tokenizeHTML($text);
664 $text = ''; # rebuild $text from the tokens
666 foreach ($tokens as $cur_token) {
667 if ($cur_token[0] == 'tag') {
668 // $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]);
669 // $cur_token[1] = str_replace('`', $this->escape_table['`'], $cur_token[1]);
670 // $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]);
671 // $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]);
672 $cur_token[1] = $this->hashSpan($cur_token[1]);
674 $text .= $cur_token[1];
680 function doAnchors($text) {
682 # Turn Markdown link shortcuts into XHTML <a> tags.
684 if ($this->in_anchor
) return $text;
685 $this->in_anchor
= true;
688 # First, handle reference-style links: [link text] [id]
690 $text = preg_replace_callback('{
691 ( # wrap whole match in $1
693 ('.$this->nested_brackets
.') # link text = $2
696 [ ]? # one optional space
697 (?:\n[ ]*)? # one optional newline followed by spaces
704 array(&$this, '_doAnchors_reference_callback'), $text);
707 # Next, inline-style links: [link text](url "optional title")
709 $text = preg_replace_callback('{
710 ( # wrap whole match in $1
712 ('.$this->nested_brackets
.') # link text = $2
719 ('.$this->nested_url_parenthesis
.') # href = $4
723 ([\'"]) # quote char = $6
726 [ ]* # ignore any spaces/tabs between closing quote and )
727 )? # title is optional
731 array(&$this, '_DoAnchors_inline_callback'), $text);
734 # Last, handle reference-style shortcuts: [link text]
735 # These must come last in case you've also got [link test][1]
736 # or [link test](/foo)
738 // $text = preg_replace_callback('{
739 // ( # wrap whole match in $1
741 // ([^\[\]]+) # link text = $2; can\'t contain [ or ]
745 // array(&$this, '_doAnchors_reference_callback'), $text);
747 $this->in_anchor
= false;
750 function _doAnchors_reference_callback($matches) {
751 $whole_match = $matches[1];
752 $link_text = $matches[2];
753 $link_id =& $matches[3];
755 if ($link_id == "") {
756 # for shortcut links like [this][] or [this].
757 $link_id = $link_text;
760 # lower-case and turn embedded newlines into spaces
761 $link_id = strtolower($link_id);
762 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
764 if (isset($this->urls
[$link_id])) {
765 $url = $this->urls
[$link_id];
766 $url = $this->encodeAmpsAndAngles($url);
768 $result = "<a href=\"$url\"";
769 if ( isset( $this->titles
[$link_id] ) ) {
770 $title = $this->titles
[$link_id];
771 $title = $this->encodeAmpsAndAngles($title);
772 $result .= " title=\"$title\"";
775 $link_text = $this->runSpanGamut($link_text);
776 $result .= ">$link_text</a>";
777 $result = $this->hashSpan($result);
780 $result = $whole_match;
784 function _doAnchors_inline_callback($matches) {
785 $whole_match = $matches[1];
786 $link_text = $this->runSpanGamut($matches[2]);
787 $url = $matches[3] == '' ?
$matches[4] : $matches[3];
788 $title =& $matches[7];
790 $url = $this->encodeAmpsAndAngles($url);
792 $result = "<a href=\"$url\"";
794 $title = str_replace('"', '"', $title);
795 $title = $this->encodeAmpsAndAngles($title);
796 $result .= " title=\"$title\"";
799 $link_text = $this->runSpanGamut($link_text);
800 $result .= ">$link_text</a>";
802 return $this->hashSpan($result);
806 function doImages($text) {
808 # Turn Markdown image shortcuts into <img> tags.
811 # First, handle reference-style labeled images: ![alt text][id]
813 $text = preg_replace_callback('{
814 ( # wrap whole match in $1
816 ('.$this->nested_brackets
.') # alt text = $2
819 [ ]? # one optional space
820 (?:\n[ ]*)? # one optional newline followed by spaces
828 array(&$this, '_doImages_reference_callback'), $text);
831 # Next, handle inline images: ![alt text](url "optional title")
832 # Don't forget: encode * and _
834 $text = preg_replace_callback('{
835 ( # wrap whole match in $1
837 ('.$this->nested_brackets
.') # alt text = $2
839 \s? # One optional whitespace character
843 <(\S*)> # src url = $3
845 ('.$this->nested_url_parenthesis
.') # src url = $4
849 ([\'"]) # quote char = $6
853 )? # title is optional
857 array(&$this, '_doImages_inline_callback'), $text);
861 function _doImages_reference_callback($matches) {
862 $whole_match = $matches[1];
863 $alt_text = $matches[2];
864 $link_id = strtolower($matches[3]);
866 if ($link_id == "") {
867 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
870 $alt_text = str_replace('"', '"', $alt_text);
871 if (isset($this->urls
[$link_id])) {
872 $url = $this->urls
[$link_id];
873 $result = "<img src=\"$url\" alt=\"$alt_text\"";
874 if (isset($this->titles
[$link_id])) {
875 $title = $this->titles
[$link_id];
876 $result .= " title=\"$title\"";
878 $result .= $this->empty_element_suffix
;
879 $result = $this->hashSpan($result);
882 # If there's no such link ID, leave intact:
883 $result = $whole_match;
888 function _doImages_inline_callback($matches) {
889 $whole_match = $matches[1];
890 $alt_text = $matches[2];
891 $url = $matches[3] == '' ?
$matches[4] : $matches[3];
892 $title =& $matches[7];
894 $alt_text = str_replace('"', '"', $alt_text);
895 $result = "<img src=\"$url\" alt=\"$alt_text\"";
897 $title = str_replace('"', '"', $title);
898 $result .= " title=\"$title\""; # $title already quoted
900 $result .= $this->empty_element_suffix
;
902 return $this->hashSpan($result);
906 function doHeaders($text) {
907 # Setext-style headers:
914 $text = preg_replace_callback('{ ^(.+?)[ ]*\n=+[ ]*\n+ }mx',
915 array(&$this, '_doHeaders_callback_setext_h1'), $text);
916 $text = preg_replace_callback('{ ^(.+?)[ ]*\n-+[ ]*\n+ }mx',
917 array(&$this, '_doHeaders_callback_setext_h2'), $text);
922 # ## Header 2 with closing hashes ##
926 $text = preg_replace_callback('{
927 ^(\#{1,6}) # $1 = string of #\'s
929 (.+?) # $2 = Header text
931 \#* # optional closing #\'s (not counted)
934 array(&$this, '_doHeaders_callback_atx'), $text);
938 function _doHeaders_callback_setext_h1($matches) {
939 $block = "<h1>".$this->runSpanGamut($matches[1])."</h1>";
940 return "\n" . $this->hashBlock($block) . "\n\n";
942 function _doHeaders_callback_setext_h2($matches) {
943 $block = "<h2>".$this->runSpanGamut($matches[1])."</h2>";
944 return "\n" . $this->hashBlock($block) . "\n\n";
946 function _doHeaders_callback_atx($matches) {
947 $level = strlen($matches[1]);
948 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
949 return "\n" . $this->hashBlock($block) . "\n\n";
953 function doLists($text) {
955 # Form HTML ordered (numbered) and unordered (bulleted) lists.
957 $less_than_tab = $this->tab_width
- 1;
959 # Re-usable patterns to match list item bullets and number markers:
960 $marker_ul = '[*+-]';
961 $marker_ol = '\d+[.]';
962 $marker_any = "(?:$marker_ul|$marker_ol)";
964 $markers = array($marker_ul, $marker_ol);
966 foreach ($markers as $marker) {
967 # Re-usable pattern to match any entirel ul or ol list:
971 [ ]{0,'.$less_than_tab.'}
972 ('.$marker.') # $3 = first list item marker
981 (?! # Negative lookahead for another list item marker
989 # We use a different prefix before nested lists than top-level lists.
990 # See extended comment in _ProcessListItems().
992 if ($this->list_level
) {
993 $text = preg_replace_callback('{
997 array(&$this, '_doLists_callback'), $text);
1000 $text = preg_replace_callback('{
1001 (?:(?<=\n)\n|\A\n?) # Must eat the newline
1004 array(&$this, '_doLists_callback'), $text);
1010 function _doLists_callback($matches) {
1011 # Re-usable patterns to match list item bullets and number markers:
1012 $marker_ul = '[*+-]';
1013 $marker_ol = '\d+[.]';
1014 $marker_any = "(?:$marker_ul|$marker_ol)";
1016 $list = $matches[1];
1017 $list_type = preg_match("/$marker_ul/", $matches[3]) ?
"ul" : "ol";
1019 $marker_any = ( $list_type == "ul" ?
$marker_ul : $marker_ol );
1022 $result = $this->processListItems($list, $marker_any);
1024 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1025 return "\n". $result ."\n\n";
1028 var $list_level = 0;
1030 function processListItems($list_str, $marker_any) {
1032 # Process the contents of a single ordered or unordered list, splitting it
1033 # into individual list items.
1035 # The $this->list_level global keeps track of when we're inside a list.
1036 # Each time we enter a list, we increment it; when we leave a list,
1037 # we decrement. If it's zero, we're not in a list anymore.
1039 # We do this because when we're not inside a list, we want to treat
1040 # something like this:
1042 # I recommend upgrading to version
1043 # 8. Oops, now this line is treated
1046 # As a single paragraph, despite the fact that the second line starts
1047 # with a digit-period-space sequence.
1049 # Whereas when we're inside a list (or sub-list), that line will be
1050 # treated as the start of a sub-list. What a kludge, huh? This is
1051 # an aspect of Markdown's syntax that's hard to parse perfectly
1052 # without resorting to mind-reading. Perhaps the solution is to
1053 # change the syntax rules such that sub-lists must start with a
1054 # starting cardinal number; e.g. "1." or "a.".
1056 $this->list_level++
;
1058 # trim trailing blank lines:
1059 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1061 $list_str = preg_replace_callback('{
1062 (\n)? # leading line = $1
1063 (^[ ]*) # leading whitespace = $2
1064 ('.$marker_any.') [ ]+ # list marker = $3
1065 ((?s:.+?)) # list item text = $4
1066 (?:(\n+(?=\n))|\n) # tailing blank line = $5
1067 (?= \n* (\z | \2 ('.$marker_any.') [ ]+))
1069 array(&$this, '_processListItems_callback'), $list_str);
1071 $this->list_level
--;
1074 function _processListItems_callback($matches) {
1075 $item = $matches[4];
1076 $leading_line =& $matches[1];
1077 $leading_space =& $matches[2];
1078 $tailing_blank_line =& $matches[5];
1080 if ($leading_line ||
$tailing_blank_line ||
1081 preg_match('/\n{2,}/', $item))
1083 $item = $this->runBlockGamut($this->outdent($item)."\n");
1086 # Recursion for sub-lists:
1087 $item = $this->doLists($this->outdent($item));
1088 $item = preg_replace('/\n+$/', '', $item);
1089 $item = $this->runSpanGamut($item);
1092 return "<li>" . $item . "</li>\n";
1096 function doCodeBlocks($text) {
1098 # Process Markdown `<pre><code>` blocks.
1100 $text = preg_replace_callback('{
1102 ( # $1 = the code block -- one or more lines, starting with a space/tab
1104 (?:[ ]{'.$this->tab_width
.'} | \t) # Lines must start with a tab or a tab-width of spaces
1108 ((?=^[ ]{0,'.$this->tab_width
.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1110 array(&$this, '_doCodeBlocks_callback'), $text);
1114 function _doCodeBlocks_callback($matches) {
1115 $codeblock = $matches[1];
1117 $codeblock = $this->encodeCode($this->outdent($codeblock));
1118 // $codeblock = $this->detab($codeblock);
1119 # trim leading newlines and trailing whitespace
1120 $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock);
1122 $result = "\n\n".$this->hashBlock("<pre><code>" . $codeblock . "\n</code></pre>")."\n\n";
1128 function doCodeSpans($text) {
1130 # * Backtick quotes are used for <code></code> spans.
1132 # * You can use multiple backticks as the delimiters if you want to
1133 # include literal backticks in the code span. So, this input:
1135 # Just type ``foo `bar` baz`` at the prompt.
1137 # Will translate to:
1139 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1141 # There's no arbitrary limit to the number of backticks you
1142 # can use as delimters. If you need three consecutive backticks
1143 # in your code, use four for delimiters, etc.
1145 # * You can use spaces to get literal backticks at the edges:
1147 # ... type `` `bar` `` ...
1151 # ... type <code>`bar`</code> ...
1153 $text = preg_replace_callback('@
1154 (?<!\\\) # Character before opening ` can\'t be a backslash
1155 (`+) # $1 = Opening run of `
1156 (.+?) # $2 = The code block
1158 \1 # Matching closer
1161 array(&$this, '_doCodeSpans_callback'), $text);
1165 function _doCodeSpans_callback($matches) {
1167 $c = preg_replace('/^[ ]*/', '', $c); # leading whitespace
1168 $c = preg_replace('/[ ]*$/', '', $c); # trailing whitespace
1169 $c = $this->encodeCode($c);
1170 return $this->hashSpan("<code>$c</code>");
1174 function encodeCode($_) {
1176 # Encode/escape certain characters inside Markdown code runs.
1177 # The point is that in code, these characters are literals,
1178 # and lose their special Markdown meanings.
1180 # Encode all ampersands; HTML entities are not
1181 # entities within a Markdown code span.
1182 $_ = str_replace('&', '&', $_);
1184 # Do the angle bracket song and dance:
1185 $_ = str_replace(array('<', '>'),
1186 array('<', '>'), $_);
1188 # Now, escape characters that are magic in Markdown:
1189 // $_ = str_replace(array_keys($this->escape_table),
1190 // array_values($this->escape_table), $_);
1196 function doItalicsAndBold($text) {
1197 # <strong> must go first:
1198 $text = preg_replace_callback('{
1200 (?<!\*\*) \* | # (not preceded by two chars of
1201 (?<!__) _ # the same marker)
1204 (?=\S) # Not followed by whitespace
1205 (?!\1\1) # or two others marker chars.
1208 [^*_]+? # Anthing not em markers.
1210 # Balence any regular emphasis inside.
1211 \1 (?=\S) .+? (?<=\S) \1
1213 . # Allow unbalenced * and _.
1216 (?<=\S) \1\1 # End mark not preceded by whitespace.
1218 array(&$this, '_doItalicAndBold_strong_callback'), $text);
1220 $text = preg_replace_callback(
1221 '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s(?=\1).) \1 }sx',
1222 array(&$this, '_doItalicAndBold_em_callback'), $text);
1226 function _doItalicAndBold_em_callback($matches) {
1227 $text = $matches[2];
1228 $text = $this->runSpanGamut($text);
1229 return $this->hashSpan("<em>$text</em>");
1231 function _doItalicAndBold_strong_callback($matches) {
1232 $text = $matches[2];
1233 $text = $this->runSpanGamut($text);
1234 return $this->hashSpan("<strong>$text</strong>");
1238 function doBlockQuotes($text) {
1239 $text = preg_replace_callback('/
1240 ( # Wrap whole match in $1
1242 ^[ ]*>[ ]? # ">" at the start of a line
1243 .+\n # rest of the first line
1244 (.+\n)* # subsequent consecutive lines
1249 array(&$this, '_doBlockQuotes_callback'), $text);
1253 function _doBlockQuotes_callback($matches) {
1255 # trim one level of quoting - trim whitespace-only lines
1256 $bq = preg_replace(array('/^[ ]*>[ ]?/m', '/^[ ]+$/m'), '', $bq);
1257 $bq = $this->runBlockGamut($bq); # recurse
1259 $bq = preg_replace('/^/m', " ", $bq);
1260 # These leading spaces cause problem with <pre> content,
1261 # so we need to fix that:
1262 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1263 array(&$this, '_DoBlockQuotes_callback2'), $bq);
1265 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1267 function _doBlockQuotes_callback2($matches) {
1269 $pre = preg_replace('/^ /m', '', $pre);
1274 function formParagraphs($text) {
1277 # $text - string to process with html <p> tags
1279 # Strip leading and trailing lines:
1280 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
1282 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY
);
1287 foreach ($grafs as $key => $value) {
1288 if (!isset( $this->html_blocks
[$value] )) {
1289 $value = $this->runSpanGamut($value);
1290 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1292 $grafs[$key] = $this->unhash($value);
1297 # Unhashify HTML blocks
1299 foreach ($grafs as $key => $graf) {
1300 # Modify elements of @grafs in-place...
1301 if (isset($this->html_blocks
[$graf])) {
1302 $block = $this->html_blocks
[$graf];
1304 // if (preg_match('{
1306 // ( # $1 = <div> tag
1310 // markdown\s*=\s* ([\'"]) # $2 = attr quote char
1316 // ( # $3 = contents
1319 // (</div>) # $4 = closing tag
1321 // }xs', $block, $matches))
1323 // list(, $div_open, , $div_content, $div_close) = $matches;
1325 // # We can't call Markdown(), because that resets the hash;
1326 // # that initialization code should be pulled into its own sub, though.
1327 // $div_content = $this->hashHTMLBlocks($div_content);
1329 // # Run document gamut methods on the content.
1330 // foreach ($this->document_gamut as $method => $priority) {
1331 // $div_content = $this->$method($div_content);
1334 // $div_open = preg_replace(
1335 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1337 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1339 $grafs[$key] = $graf;
1343 return implode("\n\n", $grafs);
1347 function encodeAmpsAndAngles($text) {
1348 # Smart processing for ampersands and angle brackets that need to be encoded.
1349 if ($this->no_entities
) {
1350 $text = str_replace('&', '&', $text);
1351 $text = str_replace('<', '<', $text);
1355 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1356 # http://bumppo.net/projects/amputator/
1357 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1361 $text = preg_replace('{<(?![a-z/?\$!%])}i', '<', $text);
1367 function encodeBackslashEscapes($text) {
1369 # Parameter: String.
1370 # Returns: The string, with after processing the following backslash
1373 # Must process escaped backslashes first (should be first in list).
1374 foreach ($this->backslash_escape_table
as $search => $replacement) {
1375 $text = str_replace($search, $this->hashSpan($replacement), $text);
1381 function doAutoLinks($text) {
1382 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}',
1383 array(&$this, '_doAutoLinks_url_callback'), $text);
1385 # Email addresses: <address@domain.foo>
1386 $text = preg_replace_callback('{
1392 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1396 array(&$this, '_doAutoLinks_email_callback'), $text);
1400 function _doAutoLinks_url_callback($matches) {
1401 $url = $this->encodeAmpsAndAngles($matches[1]);
1402 $link = "<a href=\"$url\">$url</a>";
1403 return $this->hashSpan($link);
1405 function _doAutoLinks_email_callback($matches) {
1406 $address = $matches[1];
1407 $link = $this->encodeEmailAddress($address);
1408 return $this->hashSpan($link);
1412 function encodeEmailAddress($addr) {
1414 # Input: an email address, e.g. "foo@example.com"
1416 # Output: the email address as a mailto link, with each character
1417 # of the address encoded as either a decimal or hex entity, in
1418 # the hopes of foiling most address harvesting spam bots. E.g.:
1420 # <p><a href="mailto:foo
1421 # @example.co
1422 # m">foo@exampl
1423 # e.com</a></p>
1425 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1426 # With some optimizations by Milian Wolff.
1428 $addr = "mailto:" . $addr;
1429 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1430 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1432 foreach ($chars as $key => $char) {
1434 # Ignore non-ascii chars.
1436 $r = ($seed * (1 +
$key)) %
100; # Pseudo-random function.
1437 # roughly 10% raw, 45% hex, 45% dec
1438 # '@' *must* be encoded. I insist.
1439 if ($r > 90 && $char != '@') /* do nothing */;
1440 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1441 else $chars[$key] = '&#'.$ord.';';
1445 $addr = implode('', $chars);
1446 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1447 $addr = "<a href=\"$addr\">$text</a>";
1453 function tokenizeHTML($str) {
1455 # Parameter: String containing HTML + Markdown markup.
1456 # Returns: An array of the tokens comprising the input
1457 # string. Each token is either a tag or a run of text
1458 # between tags. Each element of the array is a
1459 # two-element array; the first is either 'tag' or 'text';
1460 # the second is the actual value.
1461 # Note: Markdown code spans are taken into account: no tag token is
1462 # generated within a code span.
1466 while ($str != "") {
1468 # Each loop iteration seach for either the next tag or the next
1469 # openning code span marker. If a code span marker is found, the
1470 # code span is extracted in entierty and will result in an extra
1473 $parts = preg_split('{
1476 `+ # code span marker
1478 <!-- .*? --> # comment
1480 <\?.*?\?> | <%.*?%> # processing instruction
1482 <[/!$]?[-a-zA-Z0-9:]+ # regular tags
1485 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1489 }xs', $str, 2, PREG_SPLIT_DELIM_CAPTURE
);
1491 # Create token from text preceding tag.
1492 if ($parts[0] != "") {
1493 $tokens[] = array('text', $parts[0]);
1496 # Check if we reach the end.
1497 if (count($parts) < 3) {
1501 # Create token from tag or code span.
1502 if ($parts[1]{0} == "`") {
1503 $tokens[] = array('text', $parts[1]);
1506 # Skip the whole code span, pass as text token.
1507 if (preg_match('/^(.*(?<!`\\\\)'.$parts[1].'(?!`))(.*)$/sm',
1510 $tokens[] = array('text', $matches[1]);
1514 $tokens[] = array('tag', $parts[1]);
1523 function outdent($text) {
1525 # Remove one level of line-leading tabs or spaces
1527 return preg_replace("/^(\\t|[ ]{1,$this->tab_width})/m", "", $text);
1531 # String length function for detab. `_initDetab` will create a function to
1532 # hanlde UTF-8 if the default function does not exist.
1533 var $utf8_strlen = 'mb_strlen';
1535 function detab($text) {
1537 # Replace tabs with the appropriate amount of space.
1539 # For each line we separate the line in blocks delemited by
1540 # tab characters. Then we reconstruct every line by adding the
1541 # appropriate number of space between each blocks.
1543 $strlen = $this->utf8_strlen
; # strlen function for UTF-8.
1544 $lines = explode("\n", $text);
1547 foreach ($lines as $line) {
1549 $blocks = explode("\t", $line);
1550 # Add each blocks to the line.
1552 unset($blocks[0]); # Do not add first block twice.
1553 foreach ($blocks as $block) {
1554 # Calculate amount of space, insert spaces, insert block.
1555 $amount = $this->tab_width
-
1556 $strlen($line, 'UTF-8') %
$this->tab_width
;
1557 $line .= str_repeat(" ", $amount) . $block;
1563 function _initDetab() {
1565 # Check for the availability of the function in the `utf8_strlen` property
1566 # (initially `mb_strlen`). If the function is not available, create a
1567 # function that will loosely count the number of UTF-8 characters with a
1568 # regular expression.
1570 if (function_exists($this->utf8_strlen
)) return;
1571 $this->utf8_strlen
= create_function('$text', 'return preg_match_all(
1572 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1577 function unhash($text) {
1579 # Swap back in all the tags hashed by _HashHTMLBlocks.
1581 return str_replace(array_keys($this->html_hashes
),
1582 array_values($this->html_hashes
), $text);
1589 # Markdown Extra Parser Class
1592 class MarkdownExtra_Parser
extends Markdown_Parser
{
1594 # Prefix for footnote ids.
1595 var $fn_id_prefix = "";
1597 # Optional title attribute for footnote links and backlinks.
1598 var $fn_link_title = MARKDOWN_FN_LINK_TITLE
;
1599 var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE
;
1601 # Optional class attribute for footnote links and backlinks.
1602 var $fn_link_class = MARKDOWN_FN_LINK_CLASS
;
1603 var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS
;
1606 function MarkdownExtra_Parser() {
1608 # Constructor function. Initialize the parser object.
1610 # Add extra escapable characters before parent constructor
1611 # initialize the table.
1612 $this->escape_chars
.= ':|';
1614 # Insert extra document, block, and span transformations.
1615 # Parent constructor will do the sorting.
1616 $this->document_gamut +
= array(
1617 "stripFootnotes" => 15,
1618 "stripAbbreviations" => 25,
1619 "appendFootnotes" => 50,
1621 $this->block_gamut +
= array(
1625 $this->span_gamut +
= array(
1627 "doAbbreviations" => 70,
1630 parent
::Markdown_Parser();
1634 # Extra hashes used during extra transformations.
1635 var $footnotes = array();
1636 var $footnotes_ordered = array();
1637 var $abbr_desciptions = array();
1638 var $abbr_matches = array();
1639 var $html_cleans = array();
1641 # Status flag to avoid invalid nesting.
1642 var $in_footnote = false;
1645 function transform($text) {
1647 # Added clear to the new $html_hashes, reordered `hashHTMLBlocks` before
1648 # blank line stripping and added extra parameter to `runBlockGamut`.
1650 # Clear the global hashes. If we don't clear these, you get conflicts
1651 # from other articles when generating a page which contains more than
1652 # one article (e.g. an index page that shows the N most recent
1654 $this->footnotes
= array();
1655 $this->footnotes_ordered
= array();
1656 $this->abbr_desciptions
= array();
1657 $this->abbr_matches
= array();
1658 $this->html_cleans
= array();
1660 return parent
::transform($text);
1664 ### HTML Block Parser ###
1666 # Tags that are always treated as block tags:
1667 var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1669 # Tags treated as block tags only if the opening tag is alone on it's line:
1670 var $context_block_tags = 'script|noscript|math|ins|del';
1672 # Tags where markdown="1" default to span mode:
1673 var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1675 # Tags which must not have their contents modified, no matter where
1677 var $clean_tags = 'script|math';
1679 # Tags that do not need to be closed.
1680 var $auto_close_tags = 'hr|img';
1683 function hashHTMLBlocks($text) {
1685 # Hashify HTML Blocks and "clean tags".
1687 # We only want to do this for block-level HTML tags, such as headers,
1688 # lists, and tables. That's because we still want to wrap <p>s around
1689 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1690 # phrase emphasis, and spans. The list of tags we're looking for is
1693 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1694 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1695 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
1696 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1697 # These two functions are calling each other. It's recursive!
1700 # Call the HTML-in-Markdown hasher.
1702 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1706 function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1707 $enclosing_tag = '', $span = false)
1710 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1712 # * $indent is the number of space to be ignored when checking for code
1713 # blocks. This is important because if we don't take the indent into
1714 # account, something like this (which looks right) won't work as expected:
1717 # <div markdown="1">
1718 # Hello World. <-- Is this a Markdown code block or text?
1719 # </div> <-- Is this a Markdown code block or a real tag?
1722 # If you don't like this, just don't indent the tag on which
1723 # you apply the markdown="1" attribute.
1725 # * If $enclosing_tag is not empty, stops at the first unmatched closing
1726 # tag with that name. Nested tags supported.
1728 # * If $span is true, text inside must treated as span. So any double
1729 # newline will be replaced by a single newline so that it does not create
1732 # Returns an array of that form: ( processed text , remaining text )
1734 if ($text === '') return array('', '');
1736 # Regex to check for the presense of newlines around a block tag.
1737 $newline_match_before = '/(?:^\n?|\n\n)*$/';
1738 $newline_match_after =
1740 ^ # Start of text following the tag.
1741 (?:[ ]*<!--.*?-->)? # Optional comment.
1742 [ ]*\n # Must be followed by newline.
1745 # Regex to match any tag.
1748 ( # $2: Capture hole tag.
1749 </? # Any opening or closing tag.
1751 '.$this->block_tags
.' |
1752 '.$this->context_block_tags
.' |
1753 '.$this->clean_tags
.' |
1754 (?!\s)'.$enclosing_tag.'
1758 ".*?" | # Double quotes (can contain `>`)
1759 \'.*?\' | # Single quotes (can contain `>`)
1760 .+? # Anything but quotes and `>`.
1764 <!-- .*? --> # HTML Comment
1766 <\?.*?\?> | <%.*?%> # Processing instruction
1768 <!\[CDATA\[.*?\]\]> # CData Block
1773 $depth = 0; # Current depth inside the tag tree.
1774 $parsed = ""; # Parsed text that will be returned.
1777 # Loop through every tag until we find the closing tag of the parent
1778 # or loop until reaching the end of text if no parent tag specified.
1782 # Split the text using the first $tag_match pattern found.
1783 # Text before pattern will be first in the array, text after
1784 # pattern will be at the end, and between will be any catches made
1787 $parts = preg_split($block_tag_match, $text, 2,
1788 PREG_SPLIT_DELIM_CAPTURE
);
1790 # If in Markdown span mode, add a empty-string span-level hash
1791 # after each newline to prevent triggering any block element.
1793 $void = $this->hashSpan("", true) ;
1794 $newline = $this->hashSpan("", true) . "\n";
1795 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1798 $parsed .= $parts[0]; # Text before current tag.
1800 # If end of $text has been reached. Stop loop.
1801 if (count($parts) < 3) {
1806 $tag = $parts[1]; # Tag to handle.
1807 $text = $parts[2]; # Remaining text after current tag.
1810 # Check for: Tag inside code block or span
1812 if (# Find current paragraph
1813 preg_match('/(?>^\n?|\n\n)((?>.\n?)+?)$/', $parsed, $matches) &&
1815 # Then match in it either a code block...
1816 preg_match('/^ {'.($indent+
4).'}.*(?>\n {'.($indent+
4).'}.*)*'.
1817 '(?!\n)$/', $matches[1], $x) ||
1818 # ...or unbalenced code span markers. (the regex matches balenced)
1819 !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s',
1823 # Tag is in code block or span and may not be a tag at all. So we
1824 # simply skip the first char (should be a `<`).
1826 $text = substr($tag, 1) . $text; # Put back $tag minus first char.
1829 # Check for: Opening Block level tag or
1830 # Opening Content Block tag (like ins and del)
1831 # used as a block tag (tag is alone on it's line).
1833 else if (preg_match("{^<(?:$this->block_tags)\b}", $tag) ||
1834 ( preg_match("{^<(?:$this->context_block_tags)\b}", $tag) &&
1835 preg_match($newline_match_before, $parsed) &&
1836 preg_match($newline_match_after, $text) )
1839 # Need to parse tag and following text using the HTML parser.
1840 list($block_text, $text) =
1841 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1843 # Make sure it stays outside of any paragraph by adding newlines.
1844 $parsed .= "\n\n$block_text\n\n";
1847 # Check for: Clean tag (like script, math)
1848 # HTML Comments, processing instructions.
1850 else if (preg_match("{^<(?:$this->clean_tags)\b}", $tag) ||
1851 $tag{1} == '!' ||
$tag{1} == '?')
1853 # Need to parse tag and following text using the HTML parser.
1854 # (don't check for markdown attribute)
1855 list($block_text, $text) =
1856 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1858 $parsed .= $block_text;
1861 # Check for: Tag with same name as enclosing tag.
1863 else if ($enclosing_tag !== '' &&
1864 # Same name as enclosing tag.
1865 preg_match("{^</?(?:$enclosing_tag)\b}", $tag))
1868 # Increase/decrease nested tag count.
1870 if ($tag{1} == '/') $depth--;
1871 else if ($tag{strlen($tag)-2} != '/') $depth++
;
1875 # Going out of parent element. Clean up and break so we
1876 # return to the calling function.
1878 $text = $tag . $text;
1887 } while ($depth >= 0);
1889 return array($parsed, $text);
1891 function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
1893 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
1895 # * Calls $hash_method to convert any blocks.
1896 # * Stops when the first opening tag closes.
1897 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
1898 # (it is not inside clean tags)
1900 # Returns an array of that form: ( processed text , remaining text )
1902 if ($text === '') return array('', '');
1904 # Regex to match `markdown` attribute inside of a tag.
1905 $markdown_attr_match = '
1907 \s* # Eat whitespace before the `markdown` attribute
1911 (["\']) # $1: quote delimiter
1912 (.*?) # $2: attribute value
1913 \1 # matching delimiter
1915 ([^\s>]*) # $3: unquoted attribute value
1917 () # $4: make $3 always defined (avoid warnings)
1920 # Regex to match any tag.
1922 ( # $2: Capture hole tag.
1923 </? # Any opening or closing tag.
1927 ".*?" | # Double quotes (can contain `>`)
1928 \'.*?\' | # Single quotes (can contain `>`)
1929 .+? # Anything but quotes and `>`.
1933 <!-- .*? --> # HTML Comment
1935 <\?.*?\?> | <%.*?%> # Processing instruction
1937 <!\[CDATA\[.*?\]\]> # CData Block
1941 $original_text = $text; # Save original text in case of faliure.
1943 $depth = 0; # Current depth inside the tag tree.
1944 $block_text = ""; # Temporary text holder for current text.
1945 $parsed = ""; # Parsed text that will be returned.
1948 # Get the name of the starting tag.
1950 if (preg_match("/^<([\w:$]*)\b/", $text, $matches))
1951 $base_tag_name = $matches[1];
1954 # Loop through every tag until we find the corresponding closing tag.
1958 # Split the text using the first $tag_match pattern found.
1959 # Text before pattern will be first in the array, text after
1960 # pattern will be at the end, and between will be any catches made
1963 $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE
);
1965 if (count($parts) < 3) {
1967 # End of $text reached with unbalenced tag(s).
1968 # In that case, we return original text unchanged and pass the
1969 # first character as filtered to prevent an infinite loop in the
1972 return array($original_text{0}, substr($original_text, 1));
1975 $block_text .= $parts[0]; # Text before current tag.
1976 $tag = $parts[1]; # Tag to handle.
1977 $text = $parts[2]; # Remaining text after current tag.
1980 # Check for: Auto-close tag (like <hr/>)
1981 # Comments and Processing Instructions.
1983 if (preg_match("{^</?(?:$this->auto_close_tags)\b}", $tag) ||
1984 $tag{1} == '!' ||
$tag{1} == '?')
1986 # Just add the tag to the block as if it was text.
1987 $block_text .= $tag;
1991 # Increase/decrease nested tag count. Only do so if
1992 # the tag's name match base tag's.
1994 if (preg_match("{^</?$base_tag_name\b}", $tag)) {
1995 if ($tag{1} == '/') $depth--;
1996 else if ($tag{strlen($tag)-2} != '/') $depth++
;
2000 # Check for `markdown="1"` attribute and handle it.
2003 preg_match($markdown_attr_match, $tag, $attr_m) &&
2004 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2006 # Remove `markdown` attribute from opening tag.
2007 $tag = preg_replace($markdown_attr_match, '', $tag);
2009 # Check if text inside this tag must be parsed in span mode.
2010 $this->mode
= $attr_m[2] . $attr_m[3];
2011 $span_mode = $this->mode
== 'span' ||
$this->mode
!= 'block' &&
2012 preg_match("{^<(?:$this->contain_span_tags)\b}", $tag);
2014 # Calculate indent before tag.
2015 preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches);
2016 $indent = strlen($matches[1]);
2018 # End preceding block with this tag.
2019 $block_text .= $tag;
2020 $parsed .= $this->$hash_method($block_text);
2022 # Get enclosing tag name for the ParseMarkdown function.
2023 preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2024 $tag_name = $matches[1];
2026 # Parse the content using the HTML-in-Markdown parser.
2027 list ($block_text, $text)
2028 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
2029 $tag_name, $span_mode);
2031 # Outdent markdown text.
2033 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
2037 # Append tag content to parsed text.
2038 if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
2039 else $parsed .= "$block_text";
2041 # Start over a new block.
2044 else $block_text .= $tag;
2047 } while ($depth > 0);
2050 # Hash last block text that wasn't processed inside the loop.
2052 $parsed .= $this->$hash_method($block_text);
2054 return array($parsed, $text);
2058 function hashClean($text) {
2060 # Called whenever a tag must be hashed when a function insert a "clean" tag
2061 # in $text, it pass through this function and is automaticaly escaped,
2062 # blocking invalid nested overlap.
2064 # Swap back any tag hash found in $text so we do not have to `unhash`
2065 # multiple times at the end.
2066 $text = $this->unhash($text);
2068 # Then hash the tag.
2069 $key = "C\x1A". md5($text);
2070 $this->html_cleans
[$key] = $text;
2071 $this->html_hashes
[$key] = $text;
2072 return $key; # String that will replace the clean tag.
2076 function doHeaders($text) {
2078 # Redefined to add id attribute support.
2080 # Setext-style headers:
2081 # Header 1 {#header1}
2084 # Header 2 {#header2}
2087 $text = preg_replace_callback(
2088 '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ ]*\n=+[ ]*\n+ }mx',
2089 array(&$this, '_doHeaders_callback_setext_h1'), $text);
2090 $text = preg_replace_callback(
2091 '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ ]*\n-+[ ]*\n+ }mx',
2092 array(&$this, '_doHeaders_callback_setext_h2'), $text);
2094 # atx-style headers:
2095 # # Header 1 {#header1}
2096 # ## Header 2 {#header2}
2097 # ## Header 2 with closing hashes ## {#header3}
2099 # ###### Header 6 {#header2}
2101 $text = preg_replace_callback('{
2102 ^(\#{1,6}) # $1 = string of #\'s
2104 (.+?) # $2 = Header text
2106 \#* # optional closing #\'s (not counted)
2107 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
2111 array(&$this, '_doHeaders_callback_atx'), $text);
2115 function _doHeaders_attr($attr) {
2116 if (empty($attr)) return "";
2117 return " id=\"$attr\"";
2119 function _doHeaders_callback_setext_h1($matches) {
2120 $attr = $this->_doHeaders_attr($id =& $matches[2]);
2121 $block = "<h1$attr>".$this->runSpanGamut($matches[1])."</h1>";
2122 return "\n" . $this->hashBlock($block) . "\n\n";
2124 function _doHeaders_callback_setext_h2($matches) {
2125 $attr = $this->_doHeaders_attr($id =& $matches[2]);
2126 $block = "<h2$attr>".$this->runSpanGamut($matches[1])."</h2>";
2127 return "\n" . $this->hashBlock($block) . "\n\n";
2129 function _doHeaders_callback_atx($matches) {
2130 $level = strlen($matches[1]);
2131 $attr = $this->_doHeaders_attr($id =& $matches[3]);
2132 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2133 return "\n" . $this->hashBlock($block) . "\n\n";
2137 function doTables($text) {
2141 $less_than_tab = $this->tab_width
- 1;
2143 # Find tables with leading pipe.
2145 # | Header 1 | Header 2
2146 # | -------- | --------
2150 $text = preg_replace_callback('
2153 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2154 [|] # Optional leading pipe (present)
2155 (.+) \n # $1: Header row (at least one pipe)
2157 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2158 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
2162 [ ]* # Allowed whitespace.
2163 [|] .* \n # Row content.
2166 (?=\n|\Z) # Stop at final double newline.
2168 array(&$this, '_doTable_leadingPipe_callback'), $text);
2171 # Find tables without leading pipe.
2173 # Header 1 | Header 2
2174 # -------- | --------
2178 $text = preg_replace_callback('
2181 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2182 (\S.*[|].*) \n # $1: Header row (at least one pipe)
2184 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2185 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
2189 .* [|] .* \n # Row content
2192 (?=\n|\Z) # Stop at final double newline.
2194 array(&$this, '_DoTable_callback'), $text);
2198 function _doTable_leadingPipe_callback($matches) {
2199 $head = $matches[1];
2200 $underline = $matches[2];
2201 $content = $matches[3];
2203 # Remove leading pipe for each row.
2204 $content = preg_replace('/^ *[|]/m', '', $content);
2206 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2208 function _doTable_callback($matches) {
2209 $head = $matches[1];
2210 $underline = $matches[2];
2211 $content = $matches[3];
2213 # Remove any tailing pipes for each line.
2214 $head = preg_replace('/[|] *$/m', '', $head);
2215 $underline = preg_replace('/[|] *$/m', '', $underline);
2216 $content = preg_replace('/[|] *$/m', '', $content);
2218 # Reading alignement from header underline.
2219 $separators = preg_split('/ *[|] */', $underline);
2220 foreach ($separators as $n => $s) {
2221 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"';
2222 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
2223 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
2224 else $attr[$n] = '';
2227 # Creating code spans before splitting the row is an easy way to
2228 # handle a code span containg pipes.
2229 $head = $this->doCodeSpans($head);
2230 $headers = preg_split('/ *[|] */', $head);
2231 $col_count = count($headers);
2233 # Write column headers.
2234 $text = "<table>\n";
2235 $text .= "<thead>\n";
2237 foreach ($headers as $n => $header)
2238 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2240 $text .= "</thead>\n";
2242 # Split content by row.
2243 $rows = explode("\n", trim($content, "\n"));
2245 $text .= "<tbody>\n";
2246 foreach ($rows as $row) {
2247 # Creating code spans before splitting the row is an easy way to
2248 # handle a code span containg pipes.
2249 $row = $this->doCodeSpans($row);
2251 # Split row by cell.
2252 $row_cells = preg_split('/ *[|] */', $row, $col_count);
2253 $row_cells = array_pad($row_cells, $col_count, '');
2256 foreach ($row_cells as $n => $cell)
2257 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2260 $text .= "</tbody>\n";
2261 $text .= "</table>";
2263 return $this->hashBlock($text) . "\n";
2267 function doDefLists($text) {
2269 # Form HTML definition lists.
2271 $less_than_tab = $this->tab_width
- 1;
2273 # Re-usable pattern to match any entire dl list:
2277 [ ]{0,'.$less_than_tab.'}
2278 ((?>.*\S.*\n)+) # $3 = defined term
2280 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2288 (?! # Negative lookahead for another term
2289 [ ]{0,'.$less_than_tab.'}
2290 (?: \S.*\n )+? # defined term
2292 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2294 (?! # Negative lookahead for another definition
2295 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2301 $text = preg_replace_callback('{
2305 array(&$this, '_doDefLists_callback'), $text);
2309 function _doDefLists_callback($matches) {
2310 # Re-usable patterns to match list item bullets and number markers:
2311 $list = $matches[1];
2313 # Turn double returns into triple returns, so that we can make a
2314 # paragraph for the last item in a list, if necessary:
2315 $result = trim($this->processDefListItems($list));
2316 $result = "<dl>\n" . $result . "\n</dl>";
2317 return $this->hashBlock($result) . "\n\n";
2321 function processDefListItems($list_str) {
2323 # Process the contents of a single definition list, splitting it
2324 # into individual term and definition list items.
2326 $less_than_tab = $this->tab_width
- 1;
2328 # trim trailing blank lines:
2329 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2331 # Process definition terms.
2332 $list_str = preg_replace_callback('{
2333 (?:\n\n+|\A\n?) # leading line
2334 ( # definition terms = $1
2335 [ ]{0,'.$less_than_tab.'} # leading whitespace
2336 (?![:][ ]|[ ]) # negative lookahead for a definition
2337 # mark (colon) or more whitespace.
2338 (?: \S.* \n)+? # actual term (not whitespace).
2340 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
2341 # with a definition mark.
2343 array(&$this, '_processDefListItems_callback_dt'), $list_str);
2345 # Process actual definitions.
2346 $list_str = preg_replace_callback('{
2347 \n(\n+)? # leading line = $1
2348 [ ]{0,'.$less_than_tab.'} # whitespace before colon
2349 [:][ ]+ # definition mark (colon)
2350 ((?s:.+?)) # definition text = $2
2351 (?= \n+ # stop at next definition mark,
2352 (?: # next term or end of text
2353 [ ]{0,'.$less_than_tab.'} [:][ ] |
2358 array(&$this, '_processDefListItems_callback_dd'), $list_str);
2362 function _processDefListItems_callback_dt($matches) {
2363 $terms = explode("\n", trim($matches[1]));
2365 foreach ($terms as $term) {
2366 $term = $this->runSpanGamut(trim($term));
2367 $text .= "\n<dt>" . $term . "</dt>";
2369 return $text . "\n";
2371 function _processDefListItems_callback_dd($matches) {
2372 $leading_line = $matches[1];
2375 if ($leading_line ||
preg_match('/\n{2,}/', $def)) {
2376 $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2377 $def = "\n". $def ."\n";
2381 $def = $this->runSpanGamut($this->outdent($def));
2384 return "\n<dd>" . $def . "</dd>\n";
2388 function doItalicsAndBold($text) {
2390 # Redefined to change emphasis by underscore behaviour so that it does not
2391 # work in the middle of a word.
2393 # <strong> must go first:
2394 $text = preg_replace_callback(array(
2397 (?<![a-zA-Z0-9]) # Not preceded by alphanum
2398 (?<!__) # or by two marker chars.
2401 (?=\S) # Not followed by whitespace
2402 (?!__) # or two others marker chars.
2405 [^_]+? # Anthing not em markers.
2407 # Balence any regular _ emphasis inside.
2408 (?<![a-zA-Z0-9]) _ (?=\S) (.+?)
2409 (?<=\S) _ (?![a-zA-Z0-9])
2411 _+ # Allow unbalenced as last resort.
2414 (?<=\S) __ # End mark not preceded by whitespace.
2415 (?![a-zA-Z0-9]) # Not followed by alphanum
2416 (?!__) # or two others marker chars.
2419 ( (?<!\*\*) \*\* ) # $1: Marker (not preceded by two *)
2420 (?=\S) # Not followed by whitespace
2421 (?!\1) # or two others marker chars.
2424 [^*]+? # Anthing not em markers.
2426 # Balence any regular * emphasis inside.
2427 \* (?=\S) (.+?) (?<=\S) \*
2429 \* # Allow unbalenced as last resort.
2432 (?<=\S) \*\* # End mark not preceded by whitespace.
2435 array(&$this, '_doItalicAndBold_strong_callback'), $text);
2437 $text = preg_replace_callback(array(
2438 '{ ( (?<![a-zA-Z0-9])(?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1(?![a-zA-Z0-9]) }sx',
2439 '{ ( (?<!\*)\* ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s\*) \1 }sx',
2441 array(&$this, '_doItalicAndBold_em_callback'), $text);
2447 function formParagraphs($text) {
2450 # $text - string to process with html <p> tags
2452 # Strip leading and trailing lines:
2453 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
2455 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY
);
2458 # Wrap <p> tags and unhashify HTML blocks
2460 foreach ($grafs as $key => $value) {
2461 $value = trim($this->runSpanGamut($value));
2463 # Check if this should be enclosed in a paragraph.
2464 # Clean tag hashes & block tag hashes are left alone.
2465 $clean_key = $value;
2466 $block_key = substr($value, 0, 34);
2468 $is_p = (!isset($this->html_blocks
[$block_key]) &&
2469 !isset($this->html_cleans
[$clean_key]));
2472 $value = "<p>$value</p>";
2474 $grafs[$key] = $value;
2477 # Join grafs in one text, then unhash HTML tags.
2478 $text = implode("\n\n", $grafs);
2480 # Finish by removing any tag hashes still present in $text.
2481 $text = $this->unhash($text);
2489 function stripFootnotes($text) {
2491 # Strips link definitions from text, stores the URLs and titles in
2494 $less_than_tab = $this->tab_width
- 1;
2496 # Link defs are in the form: [^id]: url "optional title"
2497 $text = preg_replace_callback('{
2498 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1
2500 \n? # maybe *one* newline
2501 ( # text = $2 (no blank lines allowed)
2506 (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2507 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2508 # by non-indented content
2512 array(&$this, '_stripFootnotes_callback'),
2516 function _stripFootnotes_callback($matches) {
2517 $note_id = $this->fn_id_prefix
. $matches[1];
2518 $this->footnotes
[$note_id] = $this->outdent($matches[2]);
2519 return ''; # String that will replace the block
2523 function doFootnotes($text) {
2525 # Replace footnote references in $text [^id] with a special text-token
2526 # which will be can be
2528 if (!$this->in_footnote
&& !$this->in_anchor
) {
2529 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2535 function appendFootnotes($text) {
2537 # Append footnote list to text.
2540 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2541 array(&$this, '_appendFootnotes_callback'), $text);
2543 if (!empty($this->footnotes_ordered
)) {
2545 $text .= "<div class=\"footnotes\">\n";
2546 $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX
."\n";
2547 $text .= "<ol>\n\n";
2549 $attr = " rev=\"footnote\"";
2550 if ($this->fn_backlink_class
!= "") {
2551 $class = $this->fn_backlink_class
;
2552 $class = $this->encodeAmpsAndAngles($class);
2553 $class = str_replace('"', '"', $class);
2554 $attr .= " class=\"$class\"";
2556 if ($this->fn_backlink_title
!= "") {
2557 $title = $this->fn_backlink_title
;
2558 $title = $this->encodeAmpsAndAngles($title);
2559 $title = str_replace('"', '"', $title);
2560 $attr .= " title=\"$title\"";
2564 $this->in_footnote
= true;
2566 foreach ($this->footnotes_ordered
as $note_id => $footnote) {
2567 $footnote .= "\n"; # Need to append newline before parsing.
2568 $footnote = $this->runBlockGamut("$footnote\n");
2570 $attr2 = str_replace("%%", ++
$num, $attr);
2572 # Add backlink to last paragraph; create new paragraph if needed.
2573 $backlink = "<a href=\"#fnref:$note_id\"$attr2>↩</a>";
2574 if (preg_match('{</p>$}', $footnote)) {
2575 $footnote = substr($footnote, 0, -4) . " $backlink</p>";
2577 $footnote .= "\n\n<p>$backlink</p>";
2580 $text .= "<li id=\"fn:$note_id\">\n";
2581 $text .= $footnote . "\n";
2582 $text .= "</li>\n\n";
2585 $this->in_footnote
= false;
2592 function _appendFootnotes_callback($matches) {
2593 $node_id = $this->fn_id_prefix
. $matches[1];
2595 # Create footnote marker only if it has a corresponding footnote *and*
2596 # the footnote hasn't been used by another marker.
2597 if (isset($this->footnotes
[$node_id])) {
2598 # Transfert footnote content to the ordered list.
2599 $this->footnotes_ordered
[$node_id] = $this->footnotes
[$node_id];
2600 unset($this->footnotes
[$node_id]);
2602 $num = count($this->footnotes_ordered
);
2603 $attr = " rel=\"footnote\"";
2604 if ($this->fn_link_class
!= "") {
2605 $class = $this->fn_link_class
;
2606 $class = $this->encodeAmpsAndAngles($class);
2607 $class = str_replace('"', '"', $class);
2608 $attr .= " class=\"$class\"";
2610 if ($this->fn_link_title
!= "") {
2611 $title = $this->fn_link_title
;
2612 $title = $this->encodeAmpsAndAngles($title);
2613 $title = str_replace('"', '"', $title);
2614 $attr .= " title=\"$title\"";
2616 $attr = str_replace("%%", $num, $attr);
2619 "<sup id=\"fnref:$node_id\">".
2620 "<a href=\"#fn:$node_id\"$attr>$num</a>".
2624 return "[^".$matches[1]."]";
2628 ### Abbreviations ###
2630 function stripAbbreviations($text) {
2632 # Strips abbreviations from text, stores titles in hash references.
2634 $less_than_tab = $this->tab_width
- 1;
2636 # Link defs are in the form: [id]*: url "optional title"
2637 $text = preg_replace_callback('{
2638 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
2639 (.*) # text = $2 (no blank lines allowed)
2641 array(&$this, '_stripAbbreviations_callback'),
2645 function _stripAbbreviations_callback($matches) {
2646 $abbr_word = $matches[1];
2647 $abbr_desc = $matches[2];
2648 $this->abbr_matches
[] = preg_quote($abbr_word);
2649 $this->abbr_desciptions
[$abbr_word] = trim($abbr_desc);
2650 return ''; # String that will replace the block
2654 function doAbbreviations($text) {
2656 # Find defined abbreviations in text and wrap them in <abbr> elements.
2658 if ($this->abbr_matches
) {
2659 // cannot use the /x modifier because abbr_matches may
2661 $text = preg_replace_callback('{'.
2663 '(?:'. implode('|', $this->abbr_matches
) .')'.
2666 array(&$this, '_doAbbreviations_callback'), $text);
2670 function _doAbbreviations_callback($matches) {
2671 $abbr = $matches[0];
2672 if (isset($this->abbr_desciptions
[$abbr])) {
2673 $desc = $this->abbr_desciptions
[$abbr];
2675 return $this->hashSpan("<abbr>$abbr</abbr>");
2677 $desc = $this->escapeSpecialCharsWithinTagAttributes($desc);
2678 return $this->hashSpan("<abbr title=\"$desc\">$abbr</abbr>");
2696 This is a PHP port of the original Markdown formatter written in Perl
2697 by John Gruber. This special "Extra" version of PHP Markdown features
2698 further enhancements to the syntax for making additional constructs
2699 such as tables and definition list.
2701 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2702 easy-to-write structured text format into HTML. Markdown's text format
2703 is most similar to that of plain text email, and supports features such
2704 as headers, *emphasis*, code blocks, blockquotes, and links.
2706 Markdown's syntax is designed not as a generic markup language, but
2707 specifically to serve as a front-end to (X)HTML. You can use span-level
2708 HTML tags anywhere in a Markdown document, and you can use block level
2709 HTML tags (like <div> and <table> as well).
2711 For more information about Markdown's syntax, see:
2713 <http://daringfireball.net/projects/markdown/>
2719 To file bug reports please send email to:
2721 <michel.fortin@michelf.com>
2723 Please include with your report: (1) the example input; (2) the output you
2724 expected; (3) the output Markdown actually produced.
2730 See Readme file for details.
2732 Extra 1.1.4 (3 Aug 2007):
2734 Extra 1.1.3 (3 Jul 2007):
2736 Extra 1.1.2 (7 Feb 2007)
2738 Extra 1.1.1 (28 Dec 2006)
2740 Extra 1.1 (1 Dec 2006)
2742 Extra 1.0.1 (9 Dec 2005)
2744 Extra 1.0 (5 Sep 2005)
2747 Copyright and License
2748 ---------------------
2750 PHP Markdown & Extra
2751 Copyright (c) 2004-2007 Michel Fortin
2752 <http://www.michelf.com/>
2753 All rights reserved.
2756 Copyright (c) 2003-2006 John Gruber
2757 <http://daringfireball.net/>
2758 All rights reserved.
2760 Redistribution and use in source and binary forms, with or without
2761 modification, are permitted provided that the following conditions are
2764 * Redistributions of source code must retain the above copyright notice,
2765 this list of conditions and the following disclaimer.
2767 * Redistributions in binary form must reproduce the above copyright
2768 notice, this list of conditions and the following disclaimer in the
2769 documentation and/or other materials provided with the distribution.
2771 * Neither the name "Markdown" nor the names of its contributors may
2772 be used to endorse or promote products derived from this software
2773 without specific prior written permission.
2775 This software is provided by the copyright holders and contributors "as
2776 is" and any express or implied warranties, including, but not limited
2777 to, the implied warranties of merchantability and fitness for a
2778 particular purpose are disclaimed. In no event shall the copyright owner
2779 or contributors be liable for any direct, indirect, incidental, special,
2780 exemplary, or consequential damages (including, but not limited to,
2781 procurement of substitute goods or services; loss of use, data, or
2782 profits; or business interruption) however caused and on any theory of
2783 liability, whether in contract, strict liability, or tort (including
2784 negligence or otherwise) arising in any way out of the use of this
2785 software, even if advised of the possibility of such damage.