3 # Markdown Extra - A text-to-HTML conversion tool for web writers
6 # Copyright (c) 2004-2008 Michel Fortin
7 # <http://www.michelf.com/projects/php-markdown/>
10 # Copyright (c) 2004-2006 John Gruber
11 # <http://daringfireball.net/projects/markdown/>
15 define( 'MARKDOWN_VERSION', "1.0.1m" ); # Sat 21 Jun 2008
16 define( 'MARKDOWNEXTRA_VERSION', "1.2.3" ); # Wed 31 Dec 2008
20 # Global default settings:
23 # Change to ">" for HTML output
24 @define
( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");
26 # Define the width of a tab for code blocks.
27 @define
( 'MARKDOWN_TAB_WIDTH', 4 );
29 # Optional title attribute for footnote links and backlinks.
30 @define
( 'MARKDOWN_FN_LINK_TITLE', "" );
31 @define
( 'MARKDOWN_FN_BACKLINK_TITLE', "" );
33 # Optional class attribute for footnote links and backlinks.
34 @define
( 'MARKDOWN_FN_LINK_CLASS', "" );
35 @define
( 'MARKDOWN_FN_BACKLINK_CLASS', "" );
42 # Change to false to remove Markdown from posts and/or comments.
43 @define
( 'MARKDOWN_WP_POSTS', true );
44 @define
( 'MARKDOWN_WP_COMMENTS', true );
48 ### Standard Function Interface ###
50 @define
( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' );
52 function Markdown($text) {
54 # Initialize the parser and return the result of its transform method.
56 # Setup static parser variable.
58 if (!isset($parser)) {
59 $parser_class = MARKDOWN_PARSER_CLASS
;
60 $parser = new $parser_class;
63 # Transform text using parser.
64 return $parser->transform($text);
68 ### WordPress Plugin Interface ###
71 Plugin Name: Markdown Extra
72 Plugin URI: http://www.michelf.com/projects/php-markdown/
73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
76 Author URI: http://www.michelf.com/
79 if (isset($wp_version)) {
80 # More details about how it works here:
81 # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
83 # Post content and excerpts
84 # - Remove WordPress paragraph generator.
85 # - Run Markdown on excerpt, then remove all tags.
86 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
87 if (MARKDOWN_WP_POSTS
) {
88 remove_filter('the_content', 'wpautop');
89 remove_filter('the_content_rss', 'wpautop');
90 remove_filter('the_excerpt', 'wpautop');
91 add_filter('the_content', 'mdwp_MarkdownPost', 6);
92 add_filter('the_content_rss', 'mdwp_MarkdownPost', 6);
93 add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6);
94 add_filter('get_the_excerpt', 'trim', 7);
95 add_filter('the_excerpt', 'mdwp_add_p');
96 add_filter('the_excerpt_rss', 'mdwp_strip_p');
98 remove_filter('content_save_pre', 'balanceTags', 50);
99 remove_filter('excerpt_save_pre', 'balanceTags', 50);
100 add_filter('the_content', 'balanceTags', 50);
101 add_filter('get_the_excerpt', 'balanceTags', 9);
104 # Add a footnote id prefix to posts when inside a loop.
105 function mdwp_MarkdownPost($text) {
108 $parser_class = MARKDOWN_PARSER_CLASS
;
109 $parser = new $parser_class;
111 if (is_single() ||
is_page() ||
is_feed()) {
112 $parser->fn_id_prefix
= "";
114 $parser->fn_id_prefix
= get_the_ID() . ".";
116 return $parser->transform($text);
120 # - Remove WordPress paragraph generator.
121 # - Remove WordPress auto-link generator.
122 # - Scramble important tags before passing them to the kses filter.
123 # - Run Markdown on excerpt then remove paragraph tags.
124 if (MARKDOWN_WP_COMMENTS
) {
125 remove_filter('comment_text', 'wpautop', 30);
126 remove_filter('comment_text', 'make_clickable');
127 add_filter('pre_comment_content', 'Markdown', 6);
128 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
129 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
130 add_filter('get_comment_text', 'Markdown', 6);
131 add_filter('get_comment_excerpt', 'Markdown', 6);
132 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
134 global $mdwp_hidden_tags, $mdwp_placeholders;
135 $mdwp_hidden_tags = explode(' ',
136 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
137 $mdwp_placeholders = explode(' ', str_rot13(
138 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
139 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
142 function mdwp_add_p($text) {
143 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
144 $text = '<p>'.$text.'</p>';
145 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
150 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
152 function mdwp_hide_tags($text) {
153 global $mdwp_hidden_tags, $mdwp_placeholders;
154 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
156 function mdwp_show_tags($text) {
157 global $mdwp_hidden_tags, $mdwp_placeholders;
158 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
163 ### bBlog Plugin Info ###
165 function identify_modifier_markdown() {
167 'name' => 'markdown',
168 'type' => 'modifier',
169 'nicename' => 'PHP Markdown Extra',
170 'description' => 'A text-to-HTML conversion tool for web writers',
171 'authors' => 'Michel Fortin and John Gruber',
173 'version' => MARKDOWNEXTRA_VERSION
,
174 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
179 ### Smarty Modifier Interface ###
181 function smarty_modifier_markdown($text) {
182 return Markdown($text);
186 ### Textile Compatibility Mode ###
188 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
190 if (strcasecmp(substr(__FILE__
, -16), "classTextile.php") == 0) {
191 # Try to include PHP SmartyPants. Should be in the same directory.
192 @include_once
'smartypants.php';
193 # Fake Textile class. It calls Markdown instead.
195 function TextileThis($text, $lite='', $encode='') {
196 if ($lite == '' && $encode == '') $text = Markdown($text);
197 if (function_exists('SmartyPants')) $text = SmartyPants($text);
200 # Fake restricted version: restrictions are not supported for now.
201 function TextileRestricted($text, $lite='', $noimage='') {
202 return $this->TextileThis($text, $lite);
204 # Workaround to ensure compatibility with TextPattern 4.0.3.
205 function blockLite($text) { return $text; }
212 # Markdown Parser Class
215 class Markdown_Parser
{
217 # Regex to match balanced [brackets].
218 # Needed to insert a maximum bracked depth while converting to PHP.
219 var $nested_brackets_depth = 6;
220 var $nested_brackets_re;
222 var $nested_url_parenthesis_depth = 4;
223 var $nested_url_parenthesis_re;
225 # Table of hash values for escaped characters:
226 var $escape_chars = '\`*_{}[]()>#+-.!';
227 var $escape_chars_re;
229 # Change to ">" for HTML output.
230 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX
;
231 var $tab_width = MARKDOWN_TAB_WIDTH
;
233 # Change to `true` to disallow markup or entities.
234 var $no_markup = false;
235 var $no_entities = false;
237 # Predefined urls and titles for reference links and images.
238 var $predef_urls = array();
239 var $predef_titles = array();
242 function Markdown_Parser() {
244 # Constructor function. Initialize appropriate member variables.
247 $this->prepareItalicsAndBold();
249 $this->nested_brackets_re
=
250 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth
).
251 str_repeat('\])*', $this->nested_brackets_depth
);
253 $this->nested_url_parenthesis_re
=
254 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth
).
255 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth
);
257 $this->escape_chars_re
= '['.preg_quote($this->escape_chars
).']';
259 # Sort document, block, and span gamut in ascendent priority order.
260 asort($this->document_gamut
);
261 asort($this->block_gamut
);
262 asort($this->span_gamut
);
266 # Internal hashes used during transformation.
268 var $titles = array();
269 var $html_hashes = array();
271 # Status flag to avoid invalid nesting.
272 var $in_anchor = false;
277 # Called before the transformation process starts to setup parser
280 # Clear global hashes.
281 $this->urls
= $this->predef_urls
;
282 $this->titles
= $this->predef_titles
;
283 $this->html_hashes
= array();
288 function teardown() {
290 # Called after the transformation process to clear any variable
291 # which may be taking up memory unnecessarly.
293 $this->urls
= array();
294 $this->titles
= array();
295 $this->html_hashes
= array();
299 function transform($text) {
301 # Main function. Performs some preprocessing on the input text
302 # and pass it through the document gamut.
306 # Remove UTF-8 BOM and marker character in input, if present.
307 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
309 # Standardize line endings:
310 # DOS to Unix and Mac to Unix
311 $text = preg_replace('{\r\n?}', "\n", $text);
313 # Make sure $text ends with a couple of newlines:
316 # Convert all tabs to spaces.
317 $text = $this->detab($text);
319 # Turn block-level HTML blocks into hash entries
320 $text = $this->hashHTMLBlocks($text);
322 # Strip any lines consisting only of spaces and tabs.
323 # This makes subsequent regexen easier to write, because we can
324 # match consecutive blank lines with /\n+/ instead of something
325 # contorted like /[ ]*\n+/ .
326 $text = preg_replace('/^[ ]+$/m', '', $text);
328 # Run document gamut methods.
329 foreach ($this->document_gamut
as $method => $priority) {
330 $text = $this->$method($text);
338 var $document_gamut = array(
339 # Strip link definitions, store in hashes.
340 "stripLinkDefinitions" => 20,
342 "runBasicBlockGamut" => 30,
346 function stripLinkDefinitions($text) {
348 # Strips link definitions from text, stores the URLs and titles in
351 $less_than_tab = $this->tab_width
- 1;
353 # Link defs are in the form: ^[id]: url "optional title"
354 $text = preg_replace_callback('{
355 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
357 \n? # maybe *one* newline
359 <?(\S+?)>? # url = $2
361 \n? # maybe one newline
364 (?<=\s) # lookbehind for whitespace
369 )? # title is optional
372 array(&$this, '_stripLinkDefinitions_callback'),
376 function _stripLinkDefinitions_callback($matches) {
377 $link_id = strtolower($matches[1]);
378 $this->urls
[$link_id] = $matches[2];
379 $this->titles
[$link_id] =& $matches[3];
380 return ''; # String that will replace the block
384 function hashHTMLBlocks($text) {
385 if ($this->no_markup
) return $text;
387 $less_than_tab = $this->tab_width
- 1;
389 # Hashify HTML blocks:
390 # We only want to do this for block-level HTML tags, such as headers,
391 # lists, and tables. That's because we still want to wrap <p>s around
392 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
393 # phrase emphasis, and spans. The list of tags we're looking for is
396 # * List "a" is made of tags which can be both inline or block-level.
397 # These will be treated block-level when the start tag is alone on
398 # its line, otherwise they're not matched here and will be taken as
400 # * List "b" is made of tags which are always block-level;
402 $block_tags_a_re = 'ins|del';
403 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
404 'script|noscript|form|fieldset|iframe|math';
406 # Regular expression for the content of a block tag.
407 $nested_tags_level = 4;
409 (?> # optional tag attributes
410 \s # starts with whitespace
412 [^>"/]+ # text outside quotes
414 /+(?!>) # slash not followed by ">"
416 "[^"]*" # text inside double quotes (tolerate ">")
418 \'[^\']*\' # text inside single quotes (tolerate ">")
425 [^<]+ # content without tag
427 <\2 # nested opening tag
428 '.$attr.' # attributes
432 >', $nested_tags_level). # end of opening tag
433 '.*?'. # last level nested tag content
435 </\2\s*> # closing nested tag
438 <(?!/\2\s*> # other tags with a different name
442 $content2 = str_replace('\2', '\3', $content);
444 # First, look for nested blocks, e.g.:
447 # tags for inner block must be indented.
451 # The outermost tags must start at the left margin for this to match, and
452 # the inner nested divs must be indented.
453 # We need to do this before the next, more liberal match, because the next
454 # match will start at the first `<div>` and stop at the first `</div>`.
455 $text = preg_replace_callback('{(?>
457 (?<=\n\n) # Starting after a blank line
459 \A\n? # the beginning of the doc
463 # Match from `\n<tag>` to `</tag>\n`, handling nested tags
466 [ ]{0,'.$less_than_tab.'}
467 <('.$block_tags_b_re.')# start tag = $2
468 '.$attr.'> # attributes followed by > and \n
469 '.$content.' # content, support nesting
470 </\2> # the matching end tag
471 [ ]* # trailing spaces/tabs
472 (?=\n+|\Z) # followed by a newline or end of document
474 | # Special version for tags of group a.
476 [ ]{0,'.$less_than_tab.'}
477 <('.$block_tags_a_re.')# start tag = $3
478 '.$attr.'>[ ]*\n # attributes followed by >
479 '.$content2.' # content, support nesting
480 </\3> # the matching end tag
481 [ ]* # trailing spaces/tabs
482 (?=\n+|\Z) # followed by a newline or end of document
484 | # Special case just for <hr />. It was easier to make a special
485 # case than to make the other regex more complicated.
487 [ ]{0,'.$less_than_tab.'}
488 <(hr) # start tag = $2
489 '.$attr.' # attributes
490 /?> # the matching end tag
492 (?=\n{2,}|\Z) # followed by a blank line or end of document
494 | # Special case for standalone HTML comments:
496 [ ]{0,'.$less_than_tab.'}
501 (?=\n{2,}|\Z) # followed by a blank line or end of document
503 | # PHP and ASP-style processor instructions (<? and <%)
505 [ ]{0,'.$less_than_tab.'}
512 (?=\n{2,}|\Z) # followed by a blank line or end of document
516 array(&$this, '_hashHTMLBlocks_callback'),
521 function _hashHTMLBlocks_callback($matches) {
523 $key = $this->hashBlock($text);
524 return "\n\n$key\n\n";
528 function hashPart($text, $boundary = 'X') {
530 # Called whenever a tag must be hashed when a function insert an atomic
531 # element in the text stream. Passing $text to through this function gives
532 # a unique text-token which will be reverted back when calling unhash.
534 # The $boundary argument specify what character should be used to surround
535 # the token. By convension, "B" is used for block elements that needs not
536 # to be wrapped into paragraph tags at the end, ":" is used for elements
537 # that are word separators and "X" is used in the general case.
539 # Swap back any tag hash found in $text so we do not have to `unhash`
540 # multiple times at the end.
541 $text = $this->unhash($text);
543 # Then hash the block.
545 $key = "$boundary\x1A" . ++
$i . $boundary;
546 $this->html_hashes
[$key] = $text;
547 return $key; # String that will replace the tag.
551 function hashBlock($text) {
553 # Shortcut function for hashPart with block-level boundaries.
555 return $this->hashPart($text, 'B');
559 var $block_gamut = array(
561 # These are all the transformations that form block-level
562 # tags like paragraphs, headers, and list items.
565 "doHorizontalRules" => 20,
568 "doCodeBlocks" => 50,
569 "doBlockQuotes" => 60,
572 function runBlockGamut($text) {
574 # Run block gamut tranformations.
576 # We need to escape raw HTML in Markdown source before doing anything
577 # else. This need to be done for each block, and not only at the
578 # begining in the Markdown function since hashed blocks can be part of
579 # list items and could have been indented. Indented blocks would have
580 # been seen as a code block in a previous pass of hashHTMLBlocks.
581 $text = $this->hashHTMLBlocks($text);
583 return $this->runBasicBlockGamut($text);
586 function runBasicBlockGamut($text) {
588 # Run block gamut tranformations, without hashing HTML blocks. This is
589 # useful when HTML blocks are known to be already hashed, like in the first
590 # whole-document pass.
592 foreach ($this->block_gamut
as $method => $priority) {
593 $text = $this->$method($text);
596 # Finally form paragraph and restore hashed blocks.
597 $text = $this->formParagraphs($text);
603 function doHorizontalRules($text) {
604 # Do Horizontal Rules:
607 ^[ ]{0,3} # Leading space
608 ([-*_]) # $1: First marker
609 (?> # Repeated marker group
610 [ ]{0,2} # Zero, one, or two spaces.
611 \1 # Marker character
612 ){2,} # Group repeated at least twice
613 [ ]* # Tailing spaces
616 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
621 var $span_gamut = array(
623 # These are all the transformations that occur *within* block-level
624 # tags like paragraphs, headers, and list items.
626 # Process character escapes, code spans, and inline HTML
630 # Process anchor and image tags. Images must come first,
631 # because ![foo][f] looks like an anchor.
635 # Make links out of things like `<http://example.com/>`
636 # Must come after doAnchors, because you can use < and >
637 # delimiters in inline links like [this](<url>).
639 "encodeAmpsAndAngles" => 40,
641 "doItalicsAndBold" => 50,
642 "doHardBreaks" => 60,
645 function runSpanGamut($text) {
647 # Run span gamut tranformations.
649 foreach ($this->span_gamut
as $method => $priority) {
650 $text = $this->$method($text);
657 function doHardBreaks($text) {
659 return preg_replace_callback('/ {2,}\n/',
660 array(&$this, '_doHardBreaks_callback'), $text);
662 function _doHardBreaks_callback($matches) {
663 return $this->hashPart("<br$this->empty_element_suffix\n");
667 function doAnchors($text) {
669 # Turn Markdown link shortcuts into XHTML <a> tags.
671 if ($this->in_anchor
) return $text;
672 $this->in_anchor
= true;
675 # First, handle reference-style links: [link text] [id]
677 $text = preg_replace_callback('{
678 ( # wrap whole match in $1
680 ('.$this->nested_brackets_re
.') # link text = $2
683 [ ]? # one optional space
684 (?:\n[ ]*)? # one optional newline followed by spaces
691 array(&$this, '_doAnchors_reference_callback'), $text);
694 # Next, inline-style links: [link text](url "optional title")
696 $text = preg_replace_callback('{
697 ( # wrap whole match in $1
699 ('.$this->nested_brackets_re
.') # link text = $2
706 ('.$this->nested_url_parenthesis_re
.') # href = $4
710 ([\'"]) # quote char = $6
713 [ ]* # ignore any spaces/tabs between closing quote and )
714 )? # title is optional
718 array(&$this, '_DoAnchors_inline_callback'), $text);
721 # Last, handle reference-style shortcuts: [link text]
722 # These must come last in case you've also got [link test][1]
723 # or [link test](/foo)
725 // $text = preg_replace_callback('{
726 // ( # wrap whole match in $1
728 // ([^\[\]]+) # link text = $2; can\'t contain [ or ]
732 // array(&$this, '_doAnchors_reference_callback'), $text);
734 $this->in_anchor
= false;
737 function _doAnchors_reference_callback($matches) {
738 $whole_match = $matches[1];
739 $link_text = $matches[2];
740 $link_id =& $matches[3];
742 if ($link_id == "") {
743 # for shortcut links like [this][] or [this].
744 $link_id = $link_text;
747 # lower-case and turn embedded newlines into spaces
748 $link_id = strtolower($link_id);
749 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
751 if (isset($this->urls
[$link_id])) {
752 $url = $this->urls
[$link_id];
753 $url = $this->encodeAttribute($url);
755 $result = "<a href=\"$url\"";
756 if ( isset( $this->titles
[$link_id] ) ) {
757 $title = $this->titles
[$link_id];
758 $title = $this->encodeAttribute($title);
759 $result .= " title=\"$title\"";
762 $link_text = $this->runSpanGamut($link_text);
763 $result .= ">$link_text</a>";
764 $result = $this->hashPart($result);
767 $result = $whole_match;
771 function _doAnchors_inline_callback($matches) {
772 $whole_match = $matches[1];
773 $link_text = $this->runSpanGamut($matches[2]);
774 $url = $matches[3] == '' ?
$matches[4] : $matches[3];
775 $title =& $matches[7];
777 $url = $this->encodeAttribute($url);
779 $result = "<a href=\"$url\"";
781 $title = $this->encodeAttribute($title);
782 $result .= " title=\"$title\"";
785 $link_text = $this->runSpanGamut($link_text);
786 $result .= ">$link_text</a>";
788 return $this->hashPart($result);
792 function doImages($text) {
794 # Turn Markdown image shortcuts into <img> tags.
797 # First, handle reference-style labeled images: ![alt text][id]
799 $text = preg_replace_callback('{
800 ( # wrap whole match in $1
802 ('.$this->nested_brackets_re
.') # alt text = $2
805 [ ]? # one optional space
806 (?:\n[ ]*)? # one optional newline followed by spaces
814 array(&$this, '_doImages_reference_callback'), $text);
817 # Next, handle inline images: ![alt text](url "optional title")
818 # Don't forget: encode * and _
820 $text = preg_replace_callback('{
821 ( # wrap whole match in $1
823 ('.$this->nested_brackets_re
.') # alt text = $2
825 \s? # One optional whitespace character
829 <(\S*)> # src url = $3
831 ('.$this->nested_url_parenthesis_re
.') # src url = $4
835 ([\'"]) # quote char = $6
839 )? # title is optional
843 array(&$this, '_doImages_inline_callback'), $text);
847 function _doImages_reference_callback($matches) {
848 $whole_match = $matches[1];
849 $alt_text = $matches[2];
850 $link_id = strtolower($matches[3]);
852 if ($link_id == "") {
853 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
856 $alt_text = $this->encodeAttribute($alt_text);
857 if (isset($this->urls
[$link_id])) {
858 $url = $this->encodeAttribute($this->urls
[$link_id]);
859 $result = "<img src=\"$url\" alt=\"$alt_text\"";
860 if (isset($this->titles
[$link_id])) {
861 $title = $this->titles
[$link_id];
862 $title = $this->encodeAttribute($title);
863 $result .= " title=\"$title\"";
865 $result .= $this->empty_element_suffix
;
866 $result = $this->hashPart($result);
869 # If there's no such link ID, leave intact:
870 $result = $whole_match;
875 function _doImages_inline_callback($matches) {
876 $whole_match = $matches[1];
877 $alt_text = $matches[2];
878 $url = $matches[3] == '' ?
$matches[4] : $matches[3];
879 $title =& $matches[7];
881 $alt_text = $this->encodeAttribute($alt_text);
882 $url = $this->encodeAttribute($url);
883 $result = "<img src=\"$url\" alt=\"$alt_text\"";
885 $title = $this->encodeAttribute($title);
886 $result .= " title=\"$title\""; # $title already quoted
888 $result .= $this->empty_element_suffix
;
890 return $this->hashPart($result);
894 function doHeaders($text) {
895 # Setext-style headers:
902 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
903 array(&$this, '_doHeaders_callback_setext'), $text);
908 # ## Header 2 with closing hashes ##
912 $text = preg_replace_callback('{
913 ^(\#{1,6}) # $1 = string of #\'s
915 (.+?) # $2 = Header text
917 \#* # optional closing #\'s (not counted)
920 array(&$this, '_doHeaders_callback_atx'), $text);
924 function _doHeaders_callback_setext($matches) {
925 # Terrible hack to check we haven't found an empty list item.
926 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
929 $level = $matches[2]{0} == '=' ?
1 : 2;
930 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
931 return "\n" . $this->hashBlock($block) . "\n\n";
933 function _doHeaders_callback_atx($matches) {
934 $level = strlen($matches[1]);
935 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
936 return "\n" . $this->hashBlock($block) . "\n\n";
940 function doLists($text) {
942 # Form HTML ordered (numbered) and unordered (bulleted) lists.
944 $less_than_tab = $this->tab_width
- 1;
946 # Re-usable patterns to match list item bullets and number markers:
947 $marker_ul_re = '[*+-]';
948 $marker_ol_re = '\d+[.]';
949 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
951 $markers_relist = array($marker_ul_re, $marker_ol_re);
953 foreach ($markers_relist as $marker_re) {
954 # Re-usable pattern to match any entirel ul or ol list:
958 [ ]{0,'.$less_than_tab.'}
959 ('.$marker_re.') # $3 = first list item marker
968 (?! # Negative lookahead for another list item marker
976 # We use a different prefix before nested lists than top-level lists.
977 # See extended comment in _ProcessListItems().
979 if ($this->list_level
) {
980 $text = preg_replace_callback('{
984 array(&$this, '_doLists_callback'), $text);
987 $text = preg_replace_callback('{
988 (?:(?<=\n)\n|\A\n?) # Must eat the newline
991 array(&$this, '_doLists_callback'), $text);
997 function _doLists_callback($matches) {
998 # Re-usable patterns to match list item bullets and number markers:
999 $marker_ul_re = '[*+-]';
1000 $marker_ol_re = '\d+[.]';
1001 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1003 $list = $matches[1];
1004 $list_type = preg_match("/$marker_ul_re/", $matches[3]) ?
"ul" : "ol";
1006 $marker_any_re = ( $list_type == "ul" ?
$marker_ul_re : $marker_ol_re );
1009 $result = $this->processListItems($list, $marker_any_re);
1011 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1012 return "\n". $result ."\n\n";
1015 var $list_level = 0;
1017 function processListItems($list_str, $marker_any_re) {
1019 # Process the contents of a single ordered or unordered list, splitting it
1020 # into individual list items.
1022 # The $this->list_level global keeps track of when we're inside a list.
1023 # Each time we enter a list, we increment it; when we leave a list,
1024 # we decrement. If it's zero, we're not in a list anymore.
1026 # We do this because when we're not inside a list, we want to treat
1027 # something like this:
1029 # I recommend upgrading to version
1030 # 8. Oops, now this line is treated
1033 # As a single paragraph, despite the fact that the second line starts
1034 # with a digit-period-space sequence.
1036 # Whereas when we're inside a list (or sub-list), that line will be
1037 # treated as the start of a sub-list. What a kludge, huh? This is
1038 # an aspect of Markdown's syntax that's hard to parse perfectly
1039 # without resorting to mind-reading. Perhaps the solution is to
1040 # change the syntax rules such that sub-lists must start with a
1041 # starting cardinal number; e.g. "1." or "a.".
1043 $this->list_level++
;
1045 # trim trailing blank lines:
1046 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1048 $list_str = preg_replace_callback('{
1049 (\n)? # leading line = $1
1050 (^[ ]*) # leading whitespace = $2
1051 ('.$marker_any_re.' # list marker and space = $3
1052 (?:[ ]+|(?=\n)) # space only required if item is not empty
1054 ((?s:.*?)) # list item text = $4
1055 (?:(\n+(?=\n))|\n) # tailing blank line = $5
1056 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1058 array(&$this, '_processListItems_callback'), $list_str);
1060 $this->list_level
--;
1063 function _processListItems_callback($matches) {
1064 $item = $matches[4];
1065 $leading_line =& $matches[1];
1066 $leading_space =& $matches[2];
1067 $marker_space = $matches[3];
1068 $tailing_blank_line =& $matches[5];
1070 if ($leading_line ||
$tailing_blank_line ||
1071 preg_match('/\n{2,}/', $item))
1073 # Replace marker with the appropriate whitespace indentation
1074 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1075 $item = $this->runBlockGamut($this->outdent($item)."\n");
1078 # Recursion for sub-lists:
1079 $item = $this->doLists($this->outdent($item));
1080 $item = preg_replace('/\n+$/', '', $item);
1081 $item = $this->runSpanGamut($item);
1084 return "<li>" . $item . "</li>\n";
1088 function doCodeBlocks($text) {
1090 # Process Markdown `<pre><code>` blocks.
1092 $text = preg_replace_callback('{
1094 ( # $1 = the code block -- one or more lines, starting with a space/tab
1096 [ ]{'.$this->tab_width
.'} # Lines must start with a tab or a tab-width of spaces
1100 ((?=^[ ]{0,'.$this->tab_width
.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1102 array(&$this, '_doCodeBlocks_callback'), $text);
1106 function _doCodeBlocks_callback($matches) {
1107 $codeblock = $matches[1];
1109 $codeblock = $this->outdent($codeblock);
1110 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES
);
1112 # trim leading newlines and trailing newlines
1113 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1115 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1116 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1120 function makeCodeSpan($code) {
1122 # Create a code span markup for $code. Called from handleSpanToken.
1124 $code = htmlspecialchars(trim($code), ENT_NOQUOTES
);
1125 return $this->hashPart("<code>$code</code>");
1129 var $em_relist = array(
1130 '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)',
1131 '*' => '(?<=\S)(?<!\*)\*(?!\*)',
1132 '_' => '(?<=\S)(?<!_)_(?!_)',
1134 var $strong_relist = array(
1135 '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)',
1136 '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
1137 '__' => '(?<=\S)(?<!_)__(?!_)',
1139 var $em_strong_relist = array(
1140 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)',
1141 '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
1142 '___' => '(?<=\S)(?<!_)___(?!_)',
1144 var $em_strong_prepared_relist;
1146 function prepareItalicsAndBold() {
1148 # Prepare regular expressions for seraching emphasis tokens in any
1151 foreach ($this->em_relist
as $em => $em_re) {
1152 foreach ($this->strong_relist
as $strong => $strong_re) {
1153 # Construct list of allowed token expressions.
1154 $token_relist = array();
1155 if (isset($this->em_strong_relist
["$em$strong"])) {
1156 $token_relist[] = $this->em_strong_relist
["$em$strong"];
1158 $token_relist[] = $em_re;
1159 $token_relist[] = $strong_re;
1161 # Construct master expression from list.
1162 $token_re = '{('. implode('|', $token_relist) .')}';
1163 $this->em_strong_prepared_relist
["$em$strong"] = $token_re;
1168 function doItalicsAndBold($text) {
1169 $token_stack = array('');
1170 $text_stack = array('');
1173 $tree_char_em = false;
1177 # Get prepared regular expression for seraching emphasis tokens
1178 # in current context.
1180 $token_re = $this->em_strong_prepared_relist
["$em$strong"];
1183 # Each loop iteration seach for the next emphasis token.
1184 # Each token is then passed to handleSpanToken.
1186 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE
);
1187 $text_stack[0] .= $parts[0];
1188 $token =& $parts[1];
1191 if (empty($token)) {
1192 # Reached end of text span: empty stack without emitting.
1193 # any more emphasis.
1194 while ($token_stack[0]) {
1195 $text_stack[1] .= array_shift($token_stack);
1196 $text_stack[0] .= array_shift($text_stack);
1201 $token_len = strlen($token);
1202 if ($tree_char_em) {
1203 # Reached closing marker while inside a three-char emphasis.
1204 if ($token_len == 3) {
1205 # Three-char closing marker, close em and strong.
1206 array_shift($token_stack);
1207 $span = array_shift($text_stack);
1208 $span = $this->runSpanGamut($span);
1209 $span = "<strong><em>$span</em></strong>";
1210 $text_stack[0] .= $this->hashPart($span);
1214 # Other closing marker: close one em or strong and
1215 # change current token state to match the other
1216 $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1217 $tag = $token_len == 2 ?
"strong" : "em";
1218 $span = $text_stack[0];
1219 $span = $this->runSpanGamut($span);
1220 $span = "<$tag>$span</$tag>";
1221 $text_stack[0] = $this->hashPart($span);
1222 $
$tag = ''; # $$tag stands for $em or $strong
1224 $tree_char_em = false;
1225 } else if ($token_len == 3) {
1227 # Reached closing marker for both em and strong.
1228 # Closing strong marker:
1229 for ($i = 0; $i < 2; ++
$i) {
1230 $shifted_token = array_shift($token_stack);
1231 $tag = strlen($shifted_token) == 2 ?
"strong" : "em";
1232 $span = array_shift($text_stack);
1233 $span = $this->runSpanGamut($span);
1234 $span = "<$tag>$span</$tag>";
1235 $text_stack[0] .= $this->hashPart($span);
1236 $
$tag = ''; # $$tag stands for $em or $strong
1239 # Reached opening three-char emphasis marker. Push on token
1240 # stack; will be handled by the special condition above.
1243 array_unshift($token_stack, $token);
1244 array_unshift($text_stack, '');
1245 $tree_char_em = true;
1247 } else if ($token_len == 2) {
1249 # Unwind any dangling emphasis marker:
1250 if (strlen($token_stack[0]) == 1) {
1251 $text_stack[1] .= array_shift($token_stack);
1252 $text_stack[0] .= array_shift($text_stack);
1254 # Closing strong marker:
1255 array_shift($token_stack);
1256 $span = array_shift($text_stack);
1257 $span = $this->runSpanGamut($span);
1258 $span = "<strong>$span</strong>";
1259 $text_stack[0] .= $this->hashPart($span);
1262 array_unshift($token_stack, $token);
1263 array_unshift($text_stack, '');
1267 # Here $token_len == 1
1269 if (strlen($token_stack[0]) == 1) {
1270 # Closing emphasis marker:
1271 array_shift($token_stack);
1272 $span = array_shift($text_stack);
1273 $span = $this->runSpanGamut($span);
1274 $span = "<em>$span</em>";
1275 $text_stack[0] .= $this->hashPart($span);
1278 $text_stack[0] .= $token;
1281 array_unshift($token_stack, $token);
1282 array_unshift($text_stack, '');
1287 return $text_stack[0];
1291 function doBlockQuotes($text) {
1292 $text = preg_replace_callback('/
1293 ( # Wrap whole match in $1
1295 ^[ ]*>[ ]? # ">" at the start of a line
1296 .+\n # rest of the first line
1297 (.+\n)* # subsequent consecutive lines
1302 array(&$this, '_doBlockQuotes_callback'), $text);
1306 function _doBlockQuotes_callback($matches) {
1308 # trim one level of quoting - trim whitespace-only lines
1309 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1310 $bq = $this->runBlockGamut($bq); # recurse
1312 $bq = preg_replace('/^/m', " ", $bq);
1313 # These leading spaces cause problem with <pre> content,
1314 # so we need to fix that:
1315 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1316 array(&$this, '_DoBlockQuotes_callback2'), $bq);
1318 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1320 function _doBlockQuotes_callback2($matches) {
1322 $pre = preg_replace('/^ /m', '', $pre);
1327 function formParagraphs($text) {
1330 # $text - string to process with html <p> tags
1332 # Strip leading and trailing lines:
1333 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1335 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY
);
1338 # Wrap <p> tags and unhashify HTML blocks
1340 foreach ($grafs as $key => $value) {
1341 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1343 $value = $this->runSpanGamut($value);
1344 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1346 $grafs[$key] = $this->unhash($value);
1350 # Modify elements of @grafs in-place...
1352 $block = $this->html_hashes
[$graf];
1354 // if (preg_match('{
1356 // ( # $1 = <div> tag
1360 // markdown\s*=\s* ([\'"]) # $2 = attr quote char
1366 // ( # $3 = contents
1369 // (</div>) # $4 = closing tag
1371 // }xs', $block, $matches))
1373 // list(, $div_open, , $div_content, $div_close) = $matches;
1375 // # We can't call Markdown(), because that resets the hash;
1376 // # that initialization code should be pulled into its own sub, though.
1377 // $div_content = $this->hashHTMLBlocks($div_content);
1379 // # Run document gamut methods on the content.
1380 // foreach ($this->document_gamut as $method => $priority) {
1381 // $div_content = $this->$method($div_content);
1384 // $div_open = preg_replace(
1385 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1387 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1389 $grafs[$key] = $graf;
1393 return implode("\n\n", $grafs);
1397 function encodeAttribute($text) {
1399 # Encode text for a double-quoted HTML attribute. This function
1400 # is *not* suitable for attributes enclosed in single quotes.
1402 $text = $this->encodeAmpsAndAngles($text);
1403 $text = str_replace('"', '"', $text);
1408 function encodeAmpsAndAngles($text) {
1410 # Smart processing for ampersands and angle brackets that need to
1411 # be encoded. Valid character entities are left alone unless the
1412 # no-entities mode is set.
1414 if ($this->no_entities
) {
1415 $text = str_replace('&', '&', $text);
1417 # Ampersand-encoding based entirely on Nat Irons's Amputator
1418 # MT plugin: <http://bumppo.net/projects/amputator/>
1419 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1422 # Encode remaining <'s
1423 $text = str_replace('<', '<', $text);
1429 function doAutoLinks($text) {
1430 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
1431 array(&$this, '_doAutoLinks_url_callback'), $text);
1433 # Email addresses: <address@domain.foo>
1434 $text = preg_replace_callback('{
1440 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1444 array(&$this, '_doAutoLinks_email_callback'), $text);
1448 function _doAutoLinks_url_callback($matches) {
1449 $url = $this->encodeAttribute($matches[1]);
1450 $link = "<a href=\"$url\">$url</a>";
1451 return $this->hashPart($link);
1453 function _doAutoLinks_email_callback($matches) {
1454 $address = $matches[1];
1455 $link = $this->encodeEmailAddress($address);
1456 return $this->hashPart($link);
1460 function encodeEmailAddress($addr) {
1462 # Input: an email address, e.g. "foo@example.com"
1464 # Output: the email address as a mailto link, with each character
1465 # of the address encoded as either a decimal or hex entity, in
1466 # the hopes of foiling most address harvesting spam bots. E.g.:
1468 # <p><a href="mailto:foo
1469 # @example.co
1470 # m">foo@exampl
1471 # e.com</a></p>
1473 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1474 # With some optimizations by Milian Wolff.
1476 $addr = "mailto:" . $addr;
1477 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1478 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1480 foreach ($chars as $key => $char) {
1482 # Ignore non-ascii chars.
1484 $r = ($seed * (1 +
$key)) %
100; # Pseudo-random function.
1485 # roughly 10% raw, 45% hex, 45% dec
1486 # '@' *must* be encoded. I insist.
1487 if ($r > 90 && $char != '@') /* do nothing */;
1488 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1489 else $chars[$key] = '&#'.$ord.';';
1493 $addr = implode('', $chars);
1494 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1495 $addr = "<a href=\"$addr\">$text</a>";
1501 function parseSpan($str) {
1503 # Take the string $str and parse it into tokens, hashing embeded HTML,
1504 # escaped characters and handling code spans.
1510 \\\\'.$this->escape_chars_re
.'
1513 `+ # code span marker
1514 '.( $this->no_markup ?
'' : '
1516 <!-- .*? --> # comment
1518 <\?.*?\?> | <%.*?%> # processing instruction
1520 <[/!$]?[-a-zA-Z0-9:]+ # regular tags
1523 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1532 # Each loop iteration seach for either the next tag, the next
1533 # openning code span marker, or the next escaped character.
1534 # Each token is then passed to handleSpanToken.
1536 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE
);
1538 # Create token from text preceding tag.
1539 if ($parts[0] != "") {
1540 $output .= $parts[0];
1543 # Check if we reach the end.
1544 if (isset($parts[1])) {
1545 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1557 function handleSpanToken($token, &$str) {
1559 # Handle $token provided by parseSpan by determining its nature and
1560 # returning the corresponding value that should replace it.
1562 switch ($token{0}) {
1564 return $this->hashPart("&#". ord($token{1}). ";");
1566 # Search for end marker in remaining text.
1567 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1571 $codespan = $this->makeCodeSpan($matches[1]);
1572 return $this->hashPart($codespan);
1574 return $token; // return as text since no ending marker found.
1576 return $this->hashPart($token);
1581 function outdent($text) {
1583 # Remove one level of line-leading tabs or spaces
1585 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width
.'})/m', '', $text);
1589 # String length function for detab. `_initDetab` will create a function to
1590 # hanlde UTF-8 if the default function does not exist.
1591 var $utf8_strlen = 'mb_strlen';
1593 function detab($text) {
1595 # Replace tabs with the appropriate amount of space.
1597 # For each line we separate the line in blocks delemited by
1598 # tab characters. Then we reconstruct every line by adding the
1599 # appropriate number of space between each blocks.
1601 $text = preg_replace_callback('/^.*\t.*$/m',
1602 array(&$this, '_detab_callback'), $text);
1606 function _detab_callback($matches) {
1607 $line = $matches[0];
1608 $strlen = $this->utf8_strlen
; # strlen function for UTF-8.
1611 $blocks = explode("\t", $line);
1612 # Add each blocks to the line.
1614 unset($blocks[0]); # Do not add first block twice.
1615 foreach ($blocks as $block) {
1616 # Calculate amount of space, insert spaces, insert block.
1617 $amount = $this->tab_width
-
1618 $strlen($line, 'UTF-8') %
$this->tab_width
;
1619 $line .= str_repeat(" ", $amount) . $block;
1623 function _initDetab() {
1625 # Check for the availability of the function in the `utf8_strlen` property
1626 # (initially `mb_strlen`). If the function is not available, create a
1627 # function that will loosely count the number of UTF-8 characters with a
1628 # regular expression.
1630 if (function_exists($this->utf8_strlen
)) return;
1631 $this->utf8_strlen
= create_function('$text', 'return preg_match_all(
1632 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1637 function unhash($text) {
1639 # Swap back in all the tags hashed by _HashHTMLBlocks.
1641 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1642 array(&$this, '_unhash_callback'), $text);
1644 function _unhash_callback($matches) {
1645 return $this->html_hashes
[$matches[0]];
1652 # Markdown Extra Parser Class
1655 class MarkdownExtra_Parser
extends Markdown_Parser
{
1657 # Prefix for footnote ids.
1658 var $fn_id_prefix = "";
1660 # Optional title attribute for footnote links and backlinks.
1661 var $fn_link_title = MARKDOWN_FN_LINK_TITLE
;
1662 var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE
;
1664 # Optional class attribute for footnote links and backlinks.
1665 var $fn_link_class = MARKDOWN_FN_LINK_CLASS
;
1666 var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS
;
1668 # Predefined abbreviations.
1669 var $predef_abbr = array();
1672 function MarkdownExtra_Parser() {
1674 # Constructor function. Initialize the parser object.
1676 # Add extra escapable characters before parent constructor
1677 # initialize the table.
1678 $this->escape_chars
.= ':|';
1680 # Insert extra document, block, and span transformations.
1681 # Parent constructor will do the sorting.
1682 $this->document_gamut +
= array(
1683 "doFencedCodeBlocks" => 5,
1684 "stripFootnotes" => 15,
1685 "stripAbbreviations" => 25,
1686 "appendFootnotes" => 50,
1688 $this->block_gamut +
= array(
1689 "doFencedCodeBlocks" => 5,
1693 $this->span_gamut +
= array(
1695 "doAbbreviations" => 70,
1698 parent
::Markdown_Parser();
1702 # Extra variables used during extra transformations.
1703 var $footnotes = array();
1704 var $footnotes_ordered = array();
1705 var $abbr_desciptions = array();
1706 var $abbr_word_re = '';
1708 # Give the current footnote number.
1709 var $footnote_counter = 1;
1714 # Setting up Extra-specific variables.
1718 $this->footnotes
= array();
1719 $this->footnotes_ordered
= array();
1720 $this->abbr_desciptions
= array();
1721 $this->abbr_word_re
= '';
1722 $this->footnote_counter
= 1;
1724 foreach ($this->predef_abbr
as $abbr_word => $abbr_desc) {
1725 if ($this->abbr_word_re
)
1726 $this->abbr_word_re
.= '|';
1727 $this->abbr_word_re
.= preg_quote($abbr_word);
1728 $this->abbr_desciptions
[$abbr_word] = trim($abbr_desc);
1732 function teardown() {
1734 # Clearing Extra-specific variables.
1736 $this->footnotes
= array();
1737 $this->footnotes_ordered
= array();
1738 $this->abbr_desciptions
= array();
1739 $this->abbr_word_re
= '';
1745 ### HTML Block Parser ###
1747 # Tags that are always treated as block tags:
1748 var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1750 # Tags treated as block tags only if the opening tag is alone on it's line:
1751 var $context_block_tags_re = 'script|noscript|math|ins|del';
1753 # Tags where markdown="1" default to span mode:
1754 var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1756 # Tags which must not have their contents modified, no matter where
1758 var $clean_tags_re = 'script|math';
1760 # Tags that do not need to be closed.
1761 var $auto_close_tags_re = 'hr|img';
1764 function hashHTMLBlocks($text) {
1766 # Hashify HTML Blocks and "clean tags".
1768 # We only want to do this for block-level HTML tags, such as headers,
1769 # lists, and tables. That's because we still want to wrap <p>s around
1770 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1771 # phrase emphasis, and spans. The list of tags we're looking for is
1774 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1775 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1776 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
1777 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1778 # These two functions are calling each other. It's recursive!
1781 # Call the HTML-in-Markdown hasher.
1783 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1787 function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1788 $enclosing_tag_re = '', $span = false)
1791 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1793 # * $indent is the number of space to be ignored when checking for code
1794 # blocks. This is important because if we don't take the indent into
1795 # account, something like this (which looks right) won't work as expected:
1798 # <div markdown="1">
1799 # Hello World. <-- Is this a Markdown code block or text?
1800 # </div> <-- Is this a Markdown code block or a real tag?
1803 # If you don't like this, just don't indent the tag on which
1804 # you apply the markdown="1" attribute.
1806 # * If $enclosing_tag_re is not empty, stops at the first unmatched closing
1807 # tag with that name. Nested tags supported.
1809 # * If $span is true, text inside must treated as span. So any double
1810 # newline will be replaced by a single newline so that it does not create
1813 # Returns an array of that form: ( processed text , remaining text )
1815 if ($text === '') return array('', '');
1817 # Regex to check for the presense of newlines around a block tag.
1818 $newline_before_re = '/(?:^\n?|\n\n)*$/';
1821 ^ # Start of text following the tag.
1822 (?>[ ]*<!--.*?-->)? # Optional comment.
1823 [ ]*\n # Must be followed by newline.
1826 # Regex to match any tag.
1829 ( # $2: Capture hole tag.
1830 </? # Any opening or closing tag.
1832 '.$this->block_tags_re
.' |
1833 '.$this->context_block_tags_re
.' |
1834 '.$this->clean_tags_re
.' |
1835 (?!\s)'.$enclosing_tag_re.'
1838 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
1840 ".*?" | # Double quotes (can contain `>`)
1841 \'.*?\' | # Single quotes (can contain `>`)
1842 .+? # Anything but quotes and `>`.
1847 <!-- .*? --> # HTML Comment
1849 <\?.*?\?> | <%.*?%> # Processing instruction
1851 <!\[CDATA\[.*?\]\]> # CData Block
1855 '. ( !$span ?
' # If not in span.
1857 # Indented code block
1858 (?> ^[ ]*\n? | \n[ ]*\n )
1859 [ ]{'.($indent+
4).'}[^\n]* \n
1861 (?: [ ]{'.($indent+
4).'}[^\n]* | [ ]* ) \n
1864 # Fenced code block marker
1866 [ ]{'.($indent).'}~~~+[ ]*\n
1867 ' : '' ). ' # End (if not is span).
1872 $depth = 0; # Current depth inside the tag tree.
1873 $parsed = ""; # Parsed text that will be returned.
1876 # Loop through every tag until we find the closing tag of the parent
1877 # or loop until reaching the end of text if no parent tag specified.
1881 # Split the text using the first $tag_match pattern found.
1882 # Text before pattern will be first in the array, text after
1883 # pattern will be at the end, and between will be any catches made
1886 $parts = preg_split($block_tag_re, $text, 2,
1887 PREG_SPLIT_DELIM_CAPTURE
);
1889 # If in Markdown span mode, add a empty-string span-level hash
1890 # after each newline to prevent triggering any block element.
1892 $void = $this->hashPart("", ':');
1893 $newline = "$void\n";
1894 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1897 $parsed .= $parts[0]; # Text before current tag.
1899 # If end of $text has been reached. Stop loop.
1900 if (count($parts) < 3) {
1905 $tag = $parts[1]; # Tag to handle.
1906 $text = $parts[2]; # Remaining text after current tag.
1907 $tag_re = preg_quote($tag); # For use in a regular expression.
1910 # Check for: Code span marker
1912 if ($tag{0} == "`") {
1913 # Find corresponding end marker.
1914 $tag_re = preg_quote($tag);
1915 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
1918 # End marker found: pass text unchanged until marker.
1919 $parsed .= $tag . $matches[0];
1920 $text = substr($text, strlen($matches[0]));
1923 # Unmatched marker: just skip it.
1928 # Check for: Indented code block or fenced code block marker.
1930 else if ($tag{0} == "\n" ||
$tag{0} == "~") {
1931 if ($tag{1} == "\n" ||
$tag{1} == " ") {
1932 # Indented code block: pass it unchanged, will be handled
1937 # Fenced code block marker: find matching end marker.
1938 $tag_re = preg_quote(trim($tag));
1939 if (preg_match('{^(?>.*\n)+?'.$tag_re.' *\n}', $text,
1942 # End marker found: pass text unchanged until marker.
1943 $parsed .= $tag . $matches[0];
1944 $text = substr($text, strlen($matches[0]));
1947 # No end marker: just skip it.
1953 # Check for: Opening Block level tag or
1954 # Opening Context Block tag (like ins and del)
1955 # used as a block tag (tag is alone on it's line).
1957 else if (preg_match('{^<(?:'.$this->block_tags_re
.')\b}', $tag) ||
1958 ( preg_match('{^<(?:'.$this->context_block_tags_re
.')\b}', $tag) &&
1959 preg_match($newline_before_re, $parsed) &&
1960 preg_match($newline_after_re, $text) )
1963 # Need to parse tag and following text using the HTML parser.
1964 list($block_text, $text) =
1965 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1967 # Make sure it stays outside of any paragraph by adding newlines.
1968 $parsed .= "\n\n$block_text\n\n";
1971 # Check for: Clean tag (like script, math)
1972 # HTML Comments, processing instructions.
1974 else if (preg_match('{^<(?:'.$this->clean_tags_re
.')\b}', $tag) ||
1975 $tag{1} == '!' ||
$tag{1} == '?')
1977 # Need to parse tag and following text using the HTML parser.
1978 # (don't check for markdown attribute)
1979 list($block_text, $text) =
1980 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1982 $parsed .= $block_text;
1985 # Check for: Tag with same name as enclosing tag.
1987 else if ($enclosing_tag_re !== '' &&
1988 # Same name as enclosing tag.
1989 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
1992 # Increase/decrease nested tag count.
1994 if ($tag{1} == '/') $depth--;
1995 else if ($tag{strlen($tag)-2} != '/') $depth++
;
1999 # Going out of parent element. Clean up and break so we
2000 # return to the calling function.
2002 $text = $tag . $text;
2011 } while ($depth >= 0);
2013 return array($parsed, $text);
2015 function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
2017 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2019 # * Calls $hash_method to convert any blocks.
2020 # * Stops when the first opening tag closes.
2021 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
2022 # (it is not inside clean tags)
2024 # Returns an array of that form: ( processed text , remaining text )
2026 if ($text === '') return array('', '');
2028 # Regex to match `markdown` attribute inside of a tag.
2029 $markdown_attr_re = '
2031 \s* # Eat whitespace before the `markdown` attribute
2035 (["\']) # $1: quote delimiter
2036 (.*?) # $2: attribute value
2037 \1 # matching delimiter
2039 ([^\s>]*) # $3: unquoted attribute value
2041 () # $4: make $3 always defined (avoid warnings)
2044 # Regex to match any tag.
2046 ( # $2: Capture hole tag.
2047 </? # Any opening or closing tag.
2050 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
2052 ".*?" | # Double quotes (can contain `>`)
2053 \'.*?\' | # Single quotes (can contain `>`)
2054 .+? # Anything but quotes and `>`.
2059 <!-- .*? --> # HTML Comment
2061 <\?.*?\?> | <%.*?%> # Processing instruction
2063 <!\[CDATA\[.*?\]\]> # CData Block
2067 $original_text = $text; # Save original text in case of faliure.
2069 $depth = 0; # Current depth inside the tag tree.
2070 $block_text = ""; # Temporary text holder for current text.
2071 $parsed = ""; # Parsed text that will be returned.
2074 # Get the name of the starting tag.
2075 # (This pattern makes $base_tag_name_re safe without quoting.)
2077 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
2078 $base_tag_name_re = $matches[1];
2081 # Loop through every tag until we find the corresponding closing tag.
2085 # Split the text using the first $tag_match pattern found.
2086 # Text before pattern will be first in the array, text after
2087 # pattern will be at the end, and between will be any catches made
2090 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE
);
2092 if (count($parts) < 3) {
2094 # End of $text reached with unbalenced tag(s).
2095 # In that case, we return original text unchanged and pass the
2096 # first character as filtered to prevent an infinite loop in the
2099 return array($original_text{0}, substr($original_text, 1));
2102 $block_text .= $parts[0]; # Text before current tag.
2103 $tag = $parts[1]; # Tag to handle.
2104 $text = $parts[2]; # Remaining text after current tag.
2107 # Check for: Auto-close tag (like <hr/>)
2108 # Comments and Processing Instructions.
2110 if (preg_match('{^</?(?:'.$this->auto_close_tags_re
.')\b}', $tag) ||
2111 $tag{1} == '!' ||
$tag{1} == '?')
2113 # Just add the tag to the block as if it was text.
2114 $block_text .= $tag;
2118 # Increase/decrease nested tag count. Only do so if
2119 # the tag's name match base tag's.
2121 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
2122 if ($tag{1} == '/') $depth--;
2123 else if ($tag{strlen($tag)-2} != '/') $depth++
;
2127 # Check for `markdown="1"` attribute and handle it.
2130 preg_match($markdown_attr_re, $tag, $attr_m) &&
2131 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2133 # Remove `markdown` attribute from opening tag.
2134 $tag = preg_replace($markdown_attr_re, '', $tag);
2136 # Check if text inside this tag must be parsed in span mode.
2137 $this->mode
= $attr_m[2] . $attr_m[3];
2138 $span_mode = $this->mode
== 'span' ||
$this->mode
!= 'block' &&
2139 preg_match('{^<(?:'.$this->contain_span_tags_re
.')\b}', $tag);
2141 # Calculate indent before tag.
2142 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
2143 $strlen = $this->utf8_strlen
;
2144 $indent = $strlen($matches[1], 'UTF-8');
2149 # End preceding block with this tag.
2150 $block_text .= $tag;
2151 $parsed .= $this->$hash_method($block_text);
2153 # Get enclosing tag name for the ParseMarkdown function.
2154 # (This pattern makes $tag_name_re safe without quoting.)
2155 preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2156 $tag_name_re = $matches[1];
2158 # Parse the content using the HTML-in-Markdown parser.
2159 list ($block_text, $text)
2160 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
2161 $tag_name_re, $span_mode);
2163 # Outdent markdown text.
2165 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
2169 # Append tag content to parsed text.
2170 if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
2171 else $parsed .= "$block_text";
2173 # Start over a new block.
2176 else $block_text .= $tag;
2179 } while ($depth > 0);
2182 # Hash last block text that wasn't processed inside the loop.
2184 $parsed .= $this->$hash_method($block_text);
2186 return array($parsed, $text);
2190 function hashClean($text) {
2192 # Called whenever a tag must be hashed when a function insert a "clean" tag
2193 # in $text, it pass through this function and is automaticaly escaped,
2194 # blocking invalid nested overlap.
2196 return $this->hashPart($text, 'C');
2200 function doHeaders($text) {
2202 # Redefined to add id attribute support.
2204 # Setext-style headers:
2205 # Header 1 {#header1}
2208 # Header 2 {#header2}
2211 $text = preg_replace_callback(
2213 (^.+?) # $1: Header text
2214 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute
2215 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer
2217 array(&$this, '_doHeaders_callback_setext'), $text);
2219 # atx-style headers:
2220 # # Header 1 {#header1}
2221 # ## Header 2 {#header2}
2222 # ## Header 2 with closing hashes ## {#header3}
2224 # ###### Header 6 {#header2}
2226 $text = preg_replace_callback('{
2227 ^(\#{1,6}) # $1 = string of #\'s
2229 (.+?) # $2 = Header text
2231 \#* # optional closing #\'s (not counted)
2232 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
2236 array(&$this, '_doHeaders_callback_atx'), $text);
2240 function _doHeaders_attr($attr) {
2241 if (empty($attr)) return "";
2242 return " id=\"$attr\"";
2244 function _doHeaders_callback_setext($matches) {
2245 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2247 $level = $matches[3]{0} == '=' ?
1 : 2;
2248 $attr = $this->_doHeaders_attr($id =& $matches[2]);
2249 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2250 return "\n" . $this->hashBlock($block) . "\n\n";
2252 function _doHeaders_callback_atx($matches) {
2253 $level = strlen($matches[1]);
2254 $attr = $this->_doHeaders_attr($id =& $matches[3]);
2255 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2256 return "\n" . $this->hashBlock($block) . "\n\n";
2260 function doTables($text) {
2264 $less_than_tab = $this->tab_width
- 1;
2266 # Find tables with leading pipe.
2268 # | Header 1 | Header 2
2269 # | -------- | --------
2273 $text = preg_replace_callback('
2276 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2277 [|] # Optional leading pipe (present)
2278 (.+) \n # $1: Header row (at least one pipe)
2280 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2281 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
2285 [ ]* # Allowed whitespace.
2286 [|] .* \n # Row content.
2289 (?=\n|\Z) # Stop at final double newline.
2291 array(&$this, '_doTable_leadingPipe_callback'), $text);
2294 # Find tables without leading pipe.
2296 # Header 1 | Header 2
2297 # -------- | --------
2301 $text = preg_replace_callback('
2304 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2305 (\S.*[|].*) \n # $1: Header row (at least one pipe)
2307 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
2308 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
2312 .* [|] .* \n # Row content
2315 (?=\n|\Z) # Stop at final double newline.
2317 array(&$this, '_DoTable_callback'), $text);
2321 function _doTable_leadingPipe_callback($matches) {
2322 $head = $matches[1];
2323 $underline = $matches[2];
2324 $content = $matches[3];
2326 # Remove leading pipe for each row.
2327 $content = preg_replace('/^ *[|]/m', '', $content);
2329 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2331 function _doTable_callback($matches) {
2332 $head = $matches[1];
2333 $underline = $matches[2];
2334 $content = $matches[3];
2336 # Remove any tailing pipes for each line.
2337 $head = preg_replace('/[|] *$/m', '', $head);
2338 $underline = preg_replace('/[|] *$/m', '', $underline);
2339 $content = preg_replace('/[|] *$/m', '', $content);
2341 # Reading alignement from header underline.
2342 $separators = preg_split('/ *[|] */', $underline);
2343 foreach ($separators as $n => $s) {
2344 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"';
2345 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
2346 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
2347 else $attr[$n] = '';
2350 # Parsing span elements, including code spans, character escapes,
2351 # and inline HTML tags, so that pipes inside those gets ignored.
2352 $head = $this->parseSpan($head);
2353 $headers = preg_split('/ *[|] */', $head);
2354 $col_count = count($headers);
2356 # Write column headers.
2357 $text = "<table>\n";
2358 $text .= "<thead>\n";
2360 foreach ($headers as $n => $header)
2361 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2363 $text .= "</thead>\n";
2365 # Split content by row.
2366 $rows = explode("\n", trim($content, "\n"));
2368 $text .= "<tbody>\n";
2369 foreach ($rows as $row) {
2370 # Parsing span elements, including code spans, character escapes,
2371 # and inline HTML tags, so that pipes inside those gets ignored.
2372 $row = $this->parseSpan($row);
2374 # Split row by cell.
2375 $row_cells = preg_split('/ *[|] */', $row, $col_count);
2376 $row_cells = array_pad($row_cells, $col_count, '');
2379 foreach ($row_cells as $n => $cell)
2380 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2383 $text .= "</tbody>\n";
2384 $text .= "</table>";
2386 return $this->hashBlock($text) . "\n";
2390 function doDefLists($text) {
2392 # Form HTML definition lists.
2394 $less_than_tab = $this->tab_width
- 1;
2396 # Re-usable pattern to match any entire dl list:
2397 $whole_list_re = '(?>
2400 [ ]{0,'.$less_than_tab.'}
2401 ((?>.*\S.*\n)+) # $3 = defined term
2403 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2411 (?! # Negative lookahead for another term
2412 [ ]{0,'.$less_than_tab.'}
2413 (?: \S.*\n )+? # defined term
2415 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2417 (?! # Negative lookahead for another definition
2418 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2424 $text = preg_replace_callback('{
2428 array(&$this, '_doDefLists_callback'), $text);
2432 function _doDefLists_callback($matches) {
2433 # Re-usable patterns to match list item bullets and number markers:
2434 $list = $matches[1];
2436 # Turn double returns into triple returns, so that we can make a
2437 # paragraph for the last item in a list, if necessary:
2438 $result = trim($this->processDefListItems($list));
2439 $result = "<dl>\n" . $result . "\n</dl>";
2440 return $this->hashBlock($result) . "\n\n";
2444 function processDefListItems($list_str) {
2446 # Process the contents of a single definition list, splitting it
2447 # into individual term and definition list items.
2449 $less_than_tab = $this->tab_width
- 1;
2451 # trim trailing blank lines:
2452 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2454 # Process definition terms.
2455 $list_str = preg_replace_callback('{
2456 (?>\A\n?|\n\n+) # leading line
2457 ( # definition terms = $1
2458 [ ]{0,'.$less_than_tab.'} # leading whitespace
2459 (?![:][ ]|[ ]) # negative lookahead for a definition
2460 # mark (colon) or more whitespace.
2461 (?> \S.* \n)+? # actual term (not whitespace).
2463 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
2464 # with a definition mark.
2466 array(&$this, '_processDefListItems_callback_dt'), $list_str);
2468 # Process actual definitions.
2469 $list_str = preg_replace_callback('{
2470 \n(\n+)? # leading line = $1
2471 ( # marker space = $2
2472 [ ]{0,'.$less_than_tab.'} # whitespace before colon
2473 [:][ ]+ # definition mark (colon)
2475 ((?s:.+?)) # definition text = $3
2476 (?= \n+ # stop at next definition mark,
2477 (?: # next term or end of text
2478 [ ]{0,'.$less_than_tab.'} [:][ ] |
2483 array(&$this, '_processDefListItems_callback_dd'), $list_str);
2487 function _processDefListItems_callback_dt($matches) {
2488 $terms = explode("\n", trim($matches[1]));
2490 foreach ($terms as $term) {
2491 $term = $this->runSpanGamut(trim($term));
2492 $text .= "\n<dt>" . $term . "</dt>";
2494 return $text . "\n";
2496 function _processDefListItems_callback_dd($matches) {
2497 $leading_line = $matches[1];
2498 $marker_space = $matches[2];
2501 if ($leading_line ||
preg_match('/\n{2,}/', $def)) {
2502 # Replace marker with the appropriate whitespace indentation
2503 $def = str_repeat(' ', strlen($marker_space)) . $def;
2504 $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2505 $def = "\n". $def ."\n";
2509 $def = $this->runSpanGamut($this->outdent($def));
2512 return "\n<dd>" . $def . "</dd>\n";
2516 function doFencedCodeBlocks($text) {
2518 # Adding the fenced code block syntax to regular Markdown:
2524 $less_than_tab = $this->tab_width
;
2526 $text = preg_replace_callback('{
2530 ~{3,} # Marker: three tilde or more.
2532 [ ]* \n # Whitespace and newline following marker.
2537 (?!\1 [ ]* \n) # Not a closing marker.
2545 array(&$this, '_doFencedCodeBlocks_callback'), $text);
2549 function _doFencedCodeBlocks_callback($matches) {
2550 $codeblock = $matches[2];
2551 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES
);
2552 $codeblock = preg_replace_callback('/^\n+/',
2553 array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
2554 $codeblock = "<pre><code>$codeblock</code></pre>";
2555 return "\n\n".$this->hashBlock($codeblock)."\n\n";
2557 function _doFencedCodeBlocks_newlines($matches) {
2558 return str_repeat("<br$this->empty_element_suffix",
2559 strlen($matches[0]));
2564 # Redefining emphasis markers so that emphasis by underscore does not
2565 # work in the middle of a word.
2567 var $em_relist = array(
2568 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S)(?![.,:;]\s)',
2569 '*' => '(?<=\S)(?<!\*)\*(?!\*)',
2570 '_' => '(?<=\S)(?<!_)_(?![a-zA-Z0-9_])',
2572 var $strong_relist = array(
2573 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S)(?![.,:;]\s)',
2574 '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
2575 '__' => '(?<=\S)(?<!_)__(?![a-zA-Z0-9_])',
2577 var $em_strong_relist = array(
2578 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S)(?![.,:;]\s)',
2579 '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
2580 '___' => '(?<=\S)(?<!_)___(?![a-zA-Z0-9_])',
2584 function formParagraphs($text) {
2587 # $text - string to process with html <p> tags
2589 # Strip leading and trailing lines:
2590 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2592 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY
);
2595 # Wrap <p> tags and unhashify HTML blocks
2597 foreach ($grafs as $key => $value) {
2598 $value = trim($this->runSpanGamut($value));
2600 # Check if this should be enclosed in a paragraph.
2601 # Clean tag hashes & block tag hashes are left alone.
2602 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2605 $value = "<p>$value</p>";
2607 $grafs[$key] = $value;
2610 # Join grafs in one text, then unhash HTML tags.
2611 $text = implode("\n\n", $grafs);
2613 # Finish by removing any tag hashes still present in $text.
2614 $text = $this->unhash($text);
2622 function stripFootnotes($text) {
2624 # Strips link definitions from text, stores the URLs and titles in
2627 $less_than_tab = $this->tab_width
- 1;
2629 # Link defs are in the form: [^id]: url "optional title"
2630 $text = preg_replace_callback('{
2631 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1
2633 \n? # maybe *one* newline
2634 ( # text = $2 (no blank lines allowed)
2639 (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2640 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2641 # by non-indented content
2645 array(&$this, '_stripFootnotes_callback'),
2649 function _stripFootnotes_callback($matches) {
2650 $note_id = $this->fn_id_prefix
. $matches[1];
2651 $this->footnotes
[$note_id] = $this->outdent($matches[2]);
2652 return ''; # String that will replace the block
2656 function doFootnotes($text) {
2658 # Replace footnote references in $text [^id] with a special text-token
2659 # which will be replaced by the actual footnote marker in appendFootnotes.
2661 if (!$this->in_anchor
) {
2662 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2668 function appendFootnotes($text) {
2670 # Append footnote list to text.
2672 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2673 array(&$this, '_appendFootnotes_callback'), $text);
2675 if (!empty($this->footnotes_ordered
)) {
2677 $text .= "<div class=\"footnotes\">\n";
2678 $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX
."\n";
2679 $text .= "<ol>\n\n";
2681 $attr = " rev=\"footnote\"";
2682 if ($this->fn_backlink_class
!= "") {
2683 $class = $this->fn_backlink_class
;
2684 $class = $this->encodeAttribute($class);
2685 $attr .= " class=\"$class\"";
2687 if ($this->fn_backlink_title
!= "") {
2688 $title = $this->fn_backlink_title
;
2689 $title = $this->encodeAttribute($title);
2690 $attr .= " title=\"$title\"";
2694 while (!empty($this->footnotes_ordered
)) {
2695 $footnote = reset($this->footnotes_ordered
);
2696 $note_id = key($this->footnotes_ordered
);
2697 unset($this->footnotes_ordered
[$note_id]);
2699 $footnote .= "\n"; # Need to append newline before parsing.
2700 $footnote = $this->runBlockGamut("$footnote\n");
2701 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2702 array(&$this, '_appendFootnotes_callback'), $footnote);
2704 $attr = str_replace("%%", ++
$num, $attr);
2705 $note_id = $this->encodeAttribute($note_id);
2707 # Add backlink to last paragraph; create new paragraph if needed.
2708 $backlink = "<a href=\"#fnref:$note_id\"$attr>↩</a>";
2709 if (preg_match('{</p>$}', $footnote)) {
2710 $footnote = substr($footnote, 0, -4) . " $backlink</p>";
2712 $footnote .= "\n\n<p>$backlink</p>";
2715 $text .= "<li id=\"fn:$note_id\">\n";
2716 $text .= $footnote . "\n";
2717 $text .= "</li>\n\n";
2725 function _appendFootnotes_callback($matches) {
2726 $node_id = $this->fn_id_prefix
. $matches[1];
2728 # Create footnote marker only if it has a corresponding footnote *and*
2729 # the footnote hasn't been used by another marker.
2730 if (isset($this->footnotes
[$node_id])) {
2731 # Transfert footnote content to the ordered list.
2732 $this->footnotes_ordered
[$node_id] = $this->footnotes
[$node_id];
2733 unset($this->footnotes
[$node_id]);
2735 $num = $this->footnote_counter++
;
2736 $attr = " rel=\"footnote\"";
2737 if ($this->fn_link_class
!= "") {
2738 $class = $this->fn_link_class
;
2739 $class = $this->encodeAttribute($class);
2740 $attr .= " class=\"$class\"";
2742 if ($this->fn_link_title
!= "") {
2743 $title = $this->fn_link_title
;
2744 $title = $this->encodeAttribute($title);
2745 $attr .= " title=\"$title\"";
2748 $attr = str_replace("%%", $num, $attr);
2749 $node_id = $this->encodeAttribute($node_id);
2752 "<sup id=\"fnref:$node_id\">".
2753 "<a href=\"#fn:$node_id\"$attr>$num</a>".
2757 return "[^".$matches[1]."]";
2761 ### Abbreviations ###
2763 function stripAbbreviations($text) {
2765 # Strips abbreviations from text, stores titles in hash references.
2767 $less_than_tab = $this->tab_width
- 1;
2769 # Link defs are in the form: [id]*: url "optional title"
2770 $text = preg_replace_callback('{
2771 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
2772 (.*) # text = $2 (no blank lines allowed)
2774 array(&$this, '_stripAbbreviations_callback'),
2778 function _stripAbbreviations_callback($matches) {
2779 $abbr_word = $matches[1];
2780 $abbr_desc = $matches[2];
2781 if ($this->abbr_word_re
)
2782 $this->abbr_word_re
.= '|';
2783 $this->abbr_word_re
.= preg_quote($abbr_word);
2784 $this->abbr_desciptions
[$abbr_word] = trim($abbr_desc);
2785 return ''; # String that will replace the block
2789 function doAbbreviations($text) {
2791 # Find defined abbreviations in text and wrap them in <abbr> elements.
2793 if ($this->abbr_word_re
) {
2794 // cannot use the /x modifier because abbr_word_re may
2795 // contain significant spaces:
2796 $text = preg_replace_callback('{'.
2798 '(?:'.$this->abbr_word_re
.')'.
2801 array(&$this, '_doAbbreviations_callback'), $text);
2805 function _doAbbreviations_callback($matches) {
2806 $abbr = $matches[0];
2807 if (isset($this->abbr_desciptions
[$abbr])) {
2808 $desc = $this->abbr_desciptions
[$abbr];
2810 return $this->hashPart("<abbr>$abbr</abbr>");
2812 $desc = $this->encodeAttribute($desc);
2813 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
2831 This is a PHP port of the original Markdown formatter written in Perl
2832 by John Gruber. This special "Extra" version of PHP Markdown features
2833 further enhancements to the syntax for making additional constructs
2834 such as tables and definition list.
2836 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2837 easy-to-write structured text format into HTML. Markdown's text format
2838 is most similar to that of plain text email, and supports features such
2839 as headers, *emphasis*, code blocks, blockquotes, and links.
2841 Markdown's syntax is designed not as a generic markup language, but
2842 specifically to serve as a front-end to (X)HTML. You can use span-level
2843 HTML tags anywhere in a Markdown document, and you can use block level
2844 HTML tags (like <div> and <table> as well).
2846 For more information about Markdown's syntax, see:
2848 <http://daringfireball.net/projects/markdown/>
2854 To file bug reports please send email to:
2856 <michel.fortin@michelf.com>
2858 Please include with your report: (1) the example input; (2) the output you
2859 expected; (3) the output Markdown actually produced.
2865 See the readme file for detailed release notes for this version.
2868 Copyright and License
2869 ---------------------
2871 PHP Markdown & Extra
2872 Copyright (c) 2004-2008 Michel Fortin
2873 <http://www.michelf.com/>
2874 All rights reserved.
2877 Copyright (c) 2003-2006 John Gruber
2878 <http://daringfireball.net/>
2879 All rights reserved.
2881 Redistribution and use in source and binary forms, with or without
2882 modification, are permitted provided that the following conditions are
2885 * Redistributions of source code must retain the above copyright notice,
2886 this list of conditions and the following disclaimer.
2888 * Redistributions in binary form must reproduce the above copyright
2889 notice, this list of conditions and the following disclaimer in the
2890 documentation and/or other materials provided with the distribution.
2892 * Neither the name "Markdown" nor the names of its contributors may
2893 be used to endorse or promote products derived from this software
2894 without specific prior written permission.
2896 This software is provided by the copyright holders and contributors "as
2897 is" and any express or implied warranties, including, but not limited
2898 to, the implied warranties of merchantability and fitness for a
2899 particular purpose are disclaimed. In no event shall the copyright owner
2900 or contributors be liable for any direct, indirect, incidental, special,
2901 exemplary, or consequential damages (including, but not limited to,
2902 procurement of substitute goods or services; loss of use, data, or
2903 profits; or business interruption) however caused and on any theory of
2904 liability, whether in contract, strict liability, or tort (including
2905 negligence or otherwise) arising in any way out of the use of this
2906 software, even if advised of the possibility of such damage.