lib/markdown.php

   1 <?php
   2 #
   3 # Markdown Extra  -  A text-to-HTML conversion tool for web writers
   4 #
   5 # PHP Markdown & Extra
   6 # Copyright (c) 2004-2007 Michel Fortin
   7 # <http://www.michelf.com/projects/php-markdown/>
   8 #
   9 # Original Markdown
  10 # Copyright (c) 2004-2006 John Gruber
  11 # <http://daringfireball.net/projects/markdown/>
  12 #
  13
  14
  15 define( 'MARKDOWN_VERSION',       "1.0.1h" ); # Fri 3 Aug 2007
  16 define( 'MARKDOWNEXTRA_VERSION',  "1.1.4" );  # Fri 3 Aug 2007
  17
  18
  19 #
  20 # Global default settings:
  21 #
  22
  23 # Change to ">" for HTML output
  24 define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
  25
  26 # Define the width of a tab for code blocks.
  27 define( 'MARKDOWN_TAB_WIDTH',     4 );
  28
  29 # Optional title attribute for footnote links and backlinks.
  30 define( 'MARKDOWN_FN_LINK_TITLE',         "" );
  31 define( 'MARKDOWN_FN_BACKLINK_TITLE',     "" );
  32
  33 # Optional class attribute for footnote links and backlinks.
  34 define( 'MARKDOWN_FN_LINK_CLASS',         "" );
  35 define( 'MARKDOWN_FN_BACKLINK_CLASS',     "" );
  36
  37
  38 #
  39 # WordPress settings:
  40 #
  41
  42 # Change to false to remove Markdown from posts and/or comments.
  43 define( 'MARKDOWN_WP_POSTS',      true );
  44 define( 'MARKDOWN_WP_COMMENTS',   true );
  45
  46
  47
  48 ### Standard Function Interface ###
  49
  50 define( 'MARKDOWN_PARSER_CLASS',  'MarkdownExtra_Parser' );
  51
  52 function Markdown($text) {
  53 #
  54 # Initialize the parser and return the result of its transform method.
  55 #
  56         # Setup static parser variable.
  57         static $parser;
  58         if (!isset($parser)) {
  59                 $parser_class = MARKDOWN_PARSER_CLASS;
  60                 $parser = new $parser_class;
  61         }
  62
  63         # Transform text using parser.
  64         return $parser->transform($text);
  65 }
  66
  67
  68 ### WordPress Plugin Interface ###
  69
  70 /*
  71 Plugin Name: Markdown Extra
  72 Plugin URI: http://www.michelf.com/projects/php-markdown/
  73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
  74 Version: 1.1.4
  75 Author: Michel Fortin
  76 Author URI: http://www.michelf.com/
  77 */
  78
  79 if (isset($wp_version)) {
  80         # More details about how it works here:
  81         # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
  82
  83         # Post content and excerpts
  84         # - Remove WordPress paragraph generator.
  85         # - Run Markdown on excerpt, then remove all tags.
  86         # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
  87         if (MARKDOWN_WP_POSTS) {
  88                 remove_filter('the_content',     'wpautop');
  89         remove_filter('the_content_rss', 'wpautop');
  90                 remove_filter('the_excerpt',     'wpautop');
  91                 add_filter('the_content',     'Markdown', 6);
  92         add_filter('the_content_rss', 'Markdown', 6);
  93                 add_filter('get_the_excerpt', 'Markdown', 6);
  94                 add_filter('get_the_excerpt', 'trim', 7);
  95                 add_filter('the_excerpt',     'mdwp_add_p');
  96                 add_filter('the_excerpt_rss', 'mdwp_strip_p');
  97
  98                 remove_filter('content_save_pre',  'balanceTags', 50);
  99                 remove_filter('excerpt_save_pre',  'balanceTags', 50);
 100                 add_filter('the_content',         'balanceTags', 50);
 101                 add_filter('get_the_excerpt', 'balanceTags', 9);
 102         }
 103
 104         # Comments
 105         # - Remove WordPress paragraph generator.
 106         # - Remove WordPress auto-link generator.
 107         # - Scramble important tags before passing them to the kses filter.
 108         # - Run Markdown on excerpt then remove paragraph tags.
 109         if (MARKDOWN_WP_COMMENTS) {
 110                 remove_filter('comment_text', 'wpautop', 30);
 111                 remove_filter('comment_text', 'make_clickable');
 112                 add_filter('pre_comment_content', 'Markdown', 6);
 113                 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
 114                 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
 115                 add_filter('get_comment_text',    'Markdown', 6);
 116                 add_filter('get_comment_excerpt', 'Markdown', 6);
 117                 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
 118
 119                 global $mdwp_hidden_tags, $mdwp_placeholders;
 120                 $mdwp_hidden_tags = explode(' ',
 121                         '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
 122                 $mdwp_placeholders = explode(' ', str_rot13(
 123                         'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
 124                         'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
 125         }
 126
 127         function mdwp_add_p($text) {
 128                 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
 129                         $text = '<p>'.$text.'</p>';
 130                         $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
 131                 }
 132                 return $text;
 133         }
 134
 135         function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
 136
 137         function mdwp_hide_tags($text) {
 138                 global $mdwp_hidden_tags, $mdwp_placeholders;
 139                 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
 140         }
 141         function mdwp_show_tags($text) {
 142                 global $mdwp_hidden_tags, $mdwp_placeholders;
 143                 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
 144         }
 145 }
 146
 147
 148 ### bBlog Plugin Info ###
 149
 150 function identify_modifier_markdown() {
 151         return array(
 152                 'name' => 'markdown',
 153                 'type' => 'modifier',
 154                 'nicename' => 'PHP Markdown Extra',
 155                 'description' => 'A text-to-HTML conversion tool for web writers',
 156                 'authors' => 'Michel Fortin and John Gruber',
 157                 'licence' => 'GPL',
 158                 'version' => MARKDOWNEXTRA_VERSION,
 159                 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
 160                 );
 161 }
 162
 163
 164 ### Smarty Modifier Interface ###
 165
 166 function smarty_modifier_markdown($text) {
 167         return Markdown($text);
 168 }
 169
 170
 171 ### Textile Compatibility Mode ###
 172
 173 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
 174
 175 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
 176         # Try to include PHP SmartyPants. Should be in the same directory.
 177         @include_once 'smartypants.php';
 178         # Fake Textile class. It calls Markdown instead.
 179         class Textile {
 180                 function TextileThis($text, $lite='', $encode='') {
 181                         if ($lite == '' && $encode == '')    $text = Markdown($text);
 182                         if (function_exists('SmartyPants'))  $text = SmartyPants($text);
 183                         return $text;
 184                 }
 185                 # Fake restricted version: restrictions are not supported for now.
 186                 function TextileRestricted($text, $lite='', $noimage='') {
 187                         return $this->TextileThis($text, $lite);
 188                 }
 189                 # Workaround to ensure compatibility with TextPattern 4.0.3.
 190                 function blockLite($text) { return $text; }
 191         }
 192 }
 193
 194
 195
 196 #
 197 # Markdown Parser Class
 198 #
 199
 200 class Markdown_Parser {
 201
 202         # Regex to match balanced [brackets].
 203         # Needed to insert a maximum bracked depth while converting to PHP.
 204         var $nested_brackets_depth = 6;
 205         var $nested_brackets;
 206
 207         var $nested_url_parenthesis_depth = 4;
 208         var $nested_url_parenthesis;
 209
 210         # Table of hash values for escaped characters:
 211         var $escape_chars = '\`*_{}[]()>#+-.!';
 212 //      var $escape_table = array();
 213         var $backslash_escape_table = array();
 214
 215         # Change to ">" for HTML output.
 216         var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
 217         var $tab_width = MARKDOWN_TAB_WIDTH;
 218
 219         # Change to `true` to disallow markup or entities.
 220         var $no_markup = false;
 221         var $no_entities = false;
 222
 223
 224         function Markdown_Parser() {
 225         #
 226         # Constructor function. Initialize appropriate member variables.
 227         #
 228                 $this->_initDetab();
 229
 230                 $this->nested_brackets =
 231                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
 232                         str_repeat('\])*', $this->nested_brackets_depth);
 233
 234                 $this->nested_url_parenthesis =
 235                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
 236                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
 237
 238                 # Create an identical table but for escaped characters.
 239                 foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) {
 240                         $entity = "&#". ord($char). ";";
 241 //                      $this->escape_table[$char] = $entity;
 242                         $this->backslash_escape_table["\\$char"] = $entity;
 243                 }
 244
 245                 # Sort document, block, and span gamut in ascendent priority order.
 246                 asort($this->document_gamut);
 247                 asort($this->block_gamut);
 248                 asort($this->span_gamut);
 249         }
 250
 251
 252         # Internal hashes used during transformation.
 253         var $urls = array();
 254         var $titles = array();
 255         var $html_blocks = array();
 256         var $html_hashes = array(); # Contains both blocks and span hashes.
 257
 258         # Status flag to avoid invalid nesting.
 259         var $in_anchor = false;
 260
 261
 262         function transform($text) {
 263         #
 264         # Main function. The order in which other subs are called here is
 265         # essential. Link and image substitutions need to happen before
 266         # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
 267         # and <img> tags get encoded.
 268         #
 269                 # Clear the global hashes. If we don't clear these, you get conflicts
 270                 # from other articles when generating a page which contains more than
 271                 # one article (e.g. an index page that shows the N most recent
 272                 # articles):
 273                 $this->urls = array();
 274                 $this->titles = array();
 275                 $this->html_blocks = array();
 276                 $this->html_hashes = array();
 277
 278                 # Standardize line endings:
 279                 #   DOS to Unix and Mac to Unix
 280                 $text = str_replace(array("\r\n", "\r"), "\n", $text);
 281
 282                 # Make sure $text ends with a couple of newlines:
 283                 $text .= "\n\n";
 284
 285                 # Convert all tabs to spaces.
 286                 $text = $this->detab($text);
 287
 288                 # Turn block-level HTML blocks into hash entries
 289                 $text = $this->hashHTMLBlocks($text);
 290
 291                 # Strip any lines consisting only of spaces and tabs.
 292                 # This makes subsequent regexen easier to write, because we can
 293                 # match consecutive blank lines with /\n+/ instead of something
 294                 # contorted like /[ ]*\n+/ .
 295                 $text = preg_replace('/^[ ]+$/m', '', $text);
 296
 297                 # Run document gamut methods.
 298                 foreach ($this->document_gamut as $method => $priority) {
 299                         $text = $this->$method($text);
 300                 }
 301
 302                 return $text . "\n";
 303         }
 304
 305         var $document_gamut = array(
 306                 # Strip link definitions, store in hashes.
 307                 "stripLinkDefinitions" => 20,
 308
 309                 "runBasicBlockGamut"   => 30,
 310                 );
 311
 312
 313         function stripLinkDefinitions($text) {
 314         #
 315         # Strips link definitions from text, stores the URLs and titles in
 316         # hash references.
 317         #
 318                 $less_than_tab = $this->tab_width - 1;
 319
 320                 # Link defs are in the form: ^[id]: url "optional title"
 321                 $text = preg_replace_callback('{
 322                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 323                                                           [ ]*
 324                                                           \n?                           # maybe *one* newline
 325                                                           [ ]*
 326                                                         <?(\S+?)>?                      # url = $2
 327                                                           [ ]*
 328                                                           \n?                           # maybe one newline
 329                                                           [ ]*
 330                                                         (?:
 331                                                                 (?<=\s)                 # lookbehind for whitespace
 332                                                                 ["(]
 333                                                                 (.*?)                   # title = $3
 334                                                                 [")]
 335                                                                 [ ]*
 336                                                         )?      # title is optional
 337                                                         (?:\n+|\Z)
 338                         }xm',
 339                         array(&$this, '_stripLinkDefinitions_callback'),
 340                         $text);
 341                 return $text;
 342         }
 343         function _stripLinkDefinitions_callback($matches) {
 344                 $link_id = strtolower($matches[1]);
 345                 $this->urls[$link_id] = $this->encodeAmpsAndAngles($matches[2]);
 346                 if (isset($matches[3]))
 347                         $this->titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
 348                 return ''; # String that will replace the block
 349         }
 350
 351
 352         function hashHTMLBlocks($text) {
 353                 if ($this->no_markup)  return $text;
 354
 355                 $less_than_tab = $this->tab_width - 1;
 356
 357                 # Hashify HTML blocks:
 358                 # We only want to do this for block-level HTML tags, such as headers,
 359                 # lists, and tables. That's because we still want to wrap <p>s around
 360                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 361                 # phrase emphasis, and spans. The list of tags we're looking for is
 362                 # hard-coded:
 363                 $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 364                                                 'script|noscript|form|fieldset|iframe|math|ins|del';
 365                 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 366                                                 'script|noscript|form|fieldset|iframe|math';
 367
 368                 # Regular expression for the content of a block tag.
 369                 $nested_tags_level = 4;
 370                 $attr = '
 371                         (?>                             # optional tag attributes
 372                           \s                    # starts with whitespace
 373                           (?>
 374                                 [^>"/]+         # text outside quotes
 375                           |
 376                                 /+(?!>)         # slash not followed by ">"
 377                           |
 378                                 "[^"]*"         # text inside double quotes (tolerate ">")
 379                           |
 380                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
 381                           )*
 382                         )?
 383                         ';
 384                 $content =
 385                         str_repeat('
 386                                 (?>
 387                                   [^<]+                 # content without tag
 388                                 |
 389                                   <\2                   # nested opening tag
 390                                         '.$attr.'       # attributes
 391                                         (?:
 392                                           />
 393                                         |
 394                                           >', $nested_tags_level).      # end of opening tag
 395                                           '.*?'.                                        # last level nested tag content
 396                         str_repeat('
 397                                           </\2\s*>      # closing nested tag
 398                                         )
 399                                   |
 400                                         <(?!/\2\s*>     # other tags with a different name
 401                                   )
 402                                 )*',
 403                                 $nested_tags_level);
 404
 405                 # First, look for nested blocks, e.g.:
 406                 #       <div>
 407                 #               <div>
 408                 #               tags for inner block must be indented.
 409                 #               </div>
 410                 #       </div>
 411                 #
 412                 # The outermost tags must start at the left margin for this to match, and
 413                 # the inner nested divs must be indented.
 414                 # We need to do this before the next, more liberal match, because the next
 415                 # match will start at the first `<div>` and stop at the first `</div>`.
 416                 $text = preg_replace_callback('{
 417                                         (                                               # save in $1
 418                                                 ^                                       # start of line  (with /m)
 419                                                 <('.$block_tags_a.')# start tag = $2
 420                                                 '.$attr.'>\n            # attributes followed by > and \n
 421                                                 '.$content.'            # content, support nesting
 422                                                 </\2>                           # the matching end tag
 423                                                 [ ]*                            # trailing spaces/tabs
 424                                                 (?=\n+|\Z)      # followed by a newline or end of document
 425                                         )
 426                         }xmi',
 427                         array(&$this, '_hashHTMLBlocks_callback'),
 428                         $text);
 429
 430                 #
 431                 # Match from `\n<tag>` to `</tag>\n`, handling nested tags in between.
 432                 #
 433                 $text = preg_replace_callback('{
 434                                         (                                               # save in $1
 435                                                 ^                                       # start of line  (with /m)
 436                                                 <('.$block_tags_b.')# start tag = $2
 437                                                 '.$attr.'>                      # attributes followed by >
 438                                                 '.$content.'            # content, support nesting
 439                                                 </\2>                           # the matching end tag
 440                                                 [ ]*                            # trailing spaces/tabs
 441                                                 (?=\n+|\Z)      # followed by a newline or end of document
 442                                         )
 443                         }xmi',
 444                         array(&$this, '_hashHTMLBlocks_callback'),
 445                         $text);
 446
 447                 # Special case just for <hr />. It was easier to make a special case than
 448                 # to make the other regex more complicated.
 449                 $text = preg_replace_callback('{
 450                                         (?:
 451                                                 (?<=\n\n)               # Starting after a blank line
 452                                                 |                               # or
 453                                                 \A\n?                   # the beginning of the doc
 454                                         )
 455                                         (                                               # save in $1
 456                                                 [ ]{0,'.$less_than_tab.'}
 457                                                 <(hr)                           # start tag = $2
 458                                                 \b                                      # word break
 459                                                 ([^<>])*?                       #
 460                                                 /?>                                     # the matching end tag
 461                                                 [ ]*
 462                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
 463                                         )
 464                         }xi',
 465                         array(&$this, '_hashHTMLBlocks_callback'),
 466                         $text);
 467
 468                 # Special case for standalone HTML comments:
 469                 $text = preg_replace_callback('{
 470                                 (?:
 471                                         (?<=\n\n)               # Starting after a blank line
 472                                         |                               # or
 473                                         \A\n?                   # the beginning of the doc
 474                                 )
 475                                 (                                               # save in $1
 476                                         [ ]{0,'.$less_than_tab.'}
 477                                         (?s:
 478                                                 <!-- .*? -->
 479                                         )
 480                                         [ ]*
 481                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 482                                 )
 483                         }x',
 484                         array(&$this, '_hashHTMLBlocks_callback'),
 485                         $text);
 486
 487                 # PHP and ASP-style processor instructions (<? and <%)
 488                 $text = preg_replace_callback('{
 489                                 (?:
 490                                         (?<=\n\n)               # Starting after a blank line
 491                                         |                               # or
 492                                         \A\n?                   # the beginning of the doc
 493                                 )
 494                                 (                                               # save in $1
 495                                         [ ]{0,'.$less_than_tab.'}
 496                                         (?s:
 497                                                 <([?%])                 # $2
 498                                                 .*?
 499                                                 \2>
 500                                         )
 501                                         [ ]*
 502                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 503                                 )
 504                         }x',
 505                         array(&$this, '_hashHTMLBlocks_callback'),
 506                         $text);
 507
 508                 return $text;
 509         }
 510         function _hashHTMLBlocks_callback($matches) {
 511                 $text = $matches[1];
 512                 $key  = $this->hashBlock($text);
 513                 return "\n\n$key\n\n";
 514         }
 515
 516
 517         function hashBlock($text) {
 518         #
 519         # Called whenever a tag must be hashed when a function insert a block-level
 520         # tag in $text, it pass through this function and is automaticaly escaped,
 521         # which remove the need to call _HashHTMLBlocks at every step.
 522         #
 523                 # Swap back any tag hash found in $text so we do not have to `unhash`
 524                 # multiple times at the end.
 525                 $text = $this->unhash($text);
 526
 527                 # Then hash the block.
 528                 $key = "B\x1A". md5($text);
 529                 $this->html_hashes[$key] = $text;
 530                 $this->html_blocks[$key] = $text;
 531                 return $key; # String that will replace the tag.
 532         }
 533
 534
 535         function hashSpan($text, $word_separator = false) {
 536         #
 537         # Called whenever a tag must be hashed when a function insert a span-level
 538         # element in $text, it pass through this function and is automaticaly
 539         # escaped, blocking invalid nested overlap. If optional argument
 540         # $word_separator is true, surround the hash value by spaces.
 541         #
 542                 # Swap back any tag hash found in $text so we do not have to `unhash`
 543                 # multiple times at the end.
 544                 $text = $this->unhash($text);
 545
 546                 # Then hash the span.
 547                 $key = "S\x1A". md5($text);
 548                 if ($word_separator) $key = ":$key:";
 549
 550                 $this->html_hashes[$key] = $text;
 551                 return $key; # String that will replace the span tag.
 552         }
 553
 554
 555         var $block_gamut = array(
 556         #
 557         # These are all the transformations that form block-level
 558         # tags like paragraphs, headers, and list items.
 559         #
 560                 "doHeaders"         => 10,
 561                 "doHorizontalRules" => 20,
 562
 563                 "doLists"           => 40,
 564                 "doCodeBlocks"      => 50,
 565                 "doBlockQuotes"     => 60,
 566                 );
 567
 568         function runBlockGamut($text) {
 569         #
 570         # Run block gamut tranformations.
 571         #
 572                 # We need to escape raw HTML in Markdown source before doing anything
 573                 # else. This need to be done for each block, and not only at the
 574                 # begining in the Markdown function since hashed blocks can be part of
 575                 # list items and could have been indented. Indented blocks would have
 576                 # been seen as a code block in a previous pass of hashHTMLBlocks.
 577                 $text = $this->hashHTMLBlocks($text);
 578
 579                 return $this->runBasicBlockGamut($text);
 580         }
 581
 582         function runBasicBlockGamut($text) {
 583         #
 584         # Run block gamut tranformations, without hashing HTML blocks. This is
 585         # useful when HTML blocks are known to be already hashed, like in the first
 586         # whole-document pass.
 587         #
 588                 foreach ($this->block_gamut as $method => $priority) {
 589                         $text = $this->$method($text);
 590                 }
 591
 592                 # Finally form paragraph and restore hashed blocks.
 593                 $text = $this->formParagraphs($text);
 594
 595                 return $text;
 596         }
 597
 598
 599         function doHorizontalRules($text) {
 600                 # Do Horizontal Rules:
 601                 return preg_replace(
 602                         array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ ]*$}mx',
 603                                   '{^[ ]{0,2}([ ]? -[ ]?){3,}[ ]*$}mx',
 604                                   '{^[ ]{0,2}([ ]? _[ ]?){3,}[ ]*$}mx'),
 605                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 606                         $text);
 607         }
 608
 609
 610         var $span_gamut = array(
 611         #
 612         # These are all the transformations that occur *within* block-level
 613         # tags like paragraphs, headers, and list items.
 614         #
 615                 "escapeSpecialCharsWithinTagAttributes" => -20,
 616                 "doCodeSpans"                                                   => -10,
 617                 "encodeBackslashEscapes"                                =>  -5,
 618
 619                 # Process anchor and image tags. Images must come first,
 620                 # because ![foo][f] looks like an anchor.
 621                 "doImages"            =>  10,
 622                 "doAnchors"           =>  20,
 623
 624                 # Make links out of things like `<http://example.com/>`
 625                 # Must come after doAnchors, because you can use < and >
 626                 # delimiters in inline links like [this](<url>).
 627                 "doAutoLinks"         =>  30,
 628                 "encodeAmpsAndAngles" =>  40,
 629
 630                 "doItalicsAndBold"    =>  50,
 631                 "doHardBreaks"        =>  60,
 632                 );
 633
 634         function runSpanGamut($text) {
 635         #
 636         # Run span gamut tranformations.
 637         #
 638                 foreach ($this->span_gamut as $method => $priority) {
 639                         $text = $this->$method($text);
 640                 }
 641
 642                 return $text;
 643         }
 644
 645
 646         function doHardBreaks($text) {
 647                 # Do hard breaks:
 648                 $br_tag = $this->hashSpan("<br$this->empty_element_suffix\n");
 649                 return preg_replace('/ {2,}\n/', $br_tag, $text);
 650         }
 651
 652
 653         function escapeSpecialCharsWithinTagAttributes($text) {
 654         #
 655         # Within tags -- meaning between < and > -- encode [\ ` * _] so they
 656         # don't conflict with their use in Markdown for code, italics and strong.
 657         # We're replacing each such character with its corresponding MD5 checksum
 658         # value; this is likely overkill, but it should prevent us from colliding
 659         # with the escape values by accident.
 660         #
 661                 if ($this->no_markup)  return $text;
 662
 663                 $tokens = $this->tokenizeHTML($text);
 664                 $text = '';   # rebuild $text from the tokens
 665
 666                 foreach ($tokens as $cur_token) {
 667                         if ($cur_token[0] == 'tag') {
 668 //                              $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]);
 669 //                              $cur_token[1] = str_replace('`', $this->escape_table['`'], $cur_token[1]);
 670 //                              $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]);
 671 //                              $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]);
 672                                 $cur_token[1] = $this->hashSpan($cur_token[1]);
 673                         }
 674                         $text .= $cur_token[1];
 675                 }
 676                 return $text;
 677         }
 678
 679
 680         function doAnchors($text) {
 681         #
 682         # Turn Markdown link shortcuts into XHTML <a> tags.
 683         #
 684                 if ($this->in_anchor) return $text;
 685                 $this->in_anchor = true;
 686
 687                 #
 688                 # First, handle reference-style links: [link text] [id]
 689                 #
 690                 $text = preg_replace_callback('{
 691                         (                                       # wrap whole match in $1
 692                           \[
 693                                 ('.$this->nested_brackets.')    # link text = $2
 694                           \]
 695
 696                           [ ]?                          # one optional space
 697                           (?:\n[ ]*)?           # one optional newline followed by spaces
 698
 699                           \[
 700                                 (.*?)           # id = $3
 701                           \]
 702                         )
 703                         }xs',
 704                         array(&$this, '_doAnchors_reference_callback'), $text);
 705
 706                 #
 707                 # Next, inline-style links: [link text](url "optional title")
 708                 #
 709                 $text = preg_replace_callback('{
 710                         (                               # wrap whole match in $1
 711                           \[
 712                                 ('.$this->nested_brackets.')    # link text = $2
 713                           \]
 714                           \(                    # literal paren
 715                                 [ ]*
 716                                 (?:
 717                                         <(\S*)> # href = $3
 718                                 |
 719                                         ('.$this->nested_url_parenthesis.')     # href = $4
 720                                 )
 721                                 [ ]*
 722                                 (                       # $5
 723                                   ([\'"])       # quote char = $6
 724                                   (.*?)         # Title = $7
 725                                   \6            # matching quote
 726                                   [ ]*  # ignore any spaces/tabs between closing quote and )
 727                                 )?                      # title is optional
 728                           \)
 729                         )
 730                         }xs',
 731                         array(&$this, '_DoAnchors_inline_callback'), $text);
 732
 733                 #
 734                 # Last, handle reference-style shortcuts: [link text]
 735                 # These must come last in case you've also got [link test][1]
 736                 # or [link test](/foo)
 737                 #
 738 //              $text = preg_replace_callback('{
 739 //                      (                                       # wrap whole match in $1
 740 //                        \[
 741 //                              ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
 742 //                        \]
 743 //                      )
 744 //                      }xs',
 745 //                      array(&$this, '_doAnchors_reference_callback'), $text);
 746
 747                 $this->in_anchor = false;
 748                 return $text;
 749         }
 750         function _doAnchors_reference_callback($matches) {
 751                 $whole_match =  $matches[1];
 752                 $link_text   =  $matches[2];
 753                 $link_id     =& $matches[3];
 754
 755                 if ($link_id == "") {
 756                         # for shortcut links like [this][] or [this].
 757                         $link_id = $link_text;
 758                 }
 759
 760                 # lower-case and turn embedded newlines into spaces
 761                 $link_id = strtolower($link_id);
 762                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 763
 764                 if (isset($this->urls[$link_id])) {
 765                         $url = $this->urls[$link_id];
 766                         $url = $this->encodeAmpsAndAngles($url);
 767
 768                         $result = "<a href=\"$url\"";
 769                         if ( isset( $this->titles[$link_id] ) ) {
 770                                 $title = $this->titles[$link_id];
 771                                 $title = $this->encodeAmpsAndAngles($title);
 772                                 $result .=  " title=\"$title\"";
 773                         }
 774
 775                         $link_text = $this->runSpanGamut($link_text);
 776                         $result .= ">$link_text</a>";
 777                         $result = $this->hashSpan($result);
 778                 }
 779                 else {
 780                         $result = $whole_match;
 781                 }
 782                 return $result;
 783         }
 784         function _doAnchors_inline_callback($matches) {
 785                 $whole_match    =  $matches[1];
 786                 $link_text              =  $this->runSpanGamut($matches[2]);
 787                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
 788                 $title                  =& $matches[7];
 789
 790                 $url = $this->encodeAmpsAndAngles($url);
 791
 792                 $result = "<a href=\"$url\"";
 793                 if (isset($title)) {
 794                         $title = str_replace('"', '&quot;', $title);
 795                         $title = $this->encodeAmpsAndAngles($title);
 796                         $result .=  " title=\"$title\"";
 797                 }
 798
 799                 $link_text = $this->runSpanGamut($link_text);
 800                 $result .= ">$link_text</a>";
 801
 802                 return $this->hashSpan($result);
 803         }
 804
 805
 806         function doImages($text) {
 807         #
 808         # Turn Markdown image shortcuts into <img> tags.
 809         #
 810                 #
 811                 # First, handle reference-style labeled images: ![alt text][id]
 812                 #
 813                 $text = preg_replace_callback('{
 814                         (                               # wrap whole match in $1
 815                           !\[
 816                                 ('.$this->nested_brackets.')            # alt text = $2
 817                           \]
 818
 819                           [ ]?                          # one optional space
 820                           (?:\n[ ]*)?           # one optional newline followed by spaces
 821
 822                           \[
 823                                 (.*?)           # id = $3
 824                           \]
 825
 826                         )
 827                         }xs',
 828                         array(&$this, '_doImages_reference_callback'), $text);
 829
 830                 #
 831                 # Next, handle inline images:  ![alt text](url "optional title")
 832                 # Don't forget: encode * and _
 833                 #
 834                 $text = preg_replace_callback('{
 835                         (                               # wrap whole match in $1
 836                           !\[
 837                                 ('.$this->nested_brackets.')            # alt text = $2
 838                           \]
 839                           \s?                   # One optional whitespace character
 840                           \(                    # literal paren
 841                                 [ ]*
 842                                 (?:
 843                                         <(\S*)> # src url = $3
 844                                 |
 845                                         ('.$this->nested_url_parenthesis.')     # src url = $4
 846                                 )
 847                                 [ ]*
 848                                 (                       # $5
 849                                   ([\'"])       # quote char = $6
 850                                   (.*?)         # title = $7
 851                                   \6            # matching quote
 852                                   [ ]*
 853                                 )?                      # title is optional
 854                           \)
 855                         )
 856                         }xs',
 857                         array(&$this, '_doImages_inline_callback'), $text);
 858
 859                 return $text;
 860         }
 861         function _doImages_reference_callback($matches) {
 862                 $whole_match = $matches[1];
 863                 $alt_text    = $matches[2];
 864                 $link_id     = strtolower($matches[3]);
 865
 866                 if ($link_id == "") {
 867                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 868                 }
 869
 870                 $alt_text = str_replace('"', '&quot;', $alt_text);
 871                 if (isset($this->urls[$link_id])) {
 872                         $url = $this->urls[$link_id];
 873                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 874                         if (isset($this->titles[$link_id])) {
 875                                 $title = $this->titles[$link_id];
 876                                 $result .=  " title=\"$title\"";
 877                         }
 878                         $result .= $this->empty_element_suffix;
 879                         $result = $this->hashSpan($result);
 880                 }
 881                 else {
 882                         # If there's no such link ID, leave intact:
 883                         $result = $whole_match;
 884                 }
 885
 886                 return $result;
 887         }
 888         function _doImages_inline_callback($matches) {
 889                 $whole_match    = $matches[1];
 890                 $alt_text               = $matches[2];
 891                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
 892                 $title                  =& $matches[7];
 893
 894                 $alt_text = str_replace('"', '&quot;', $alt_text);
 895                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 896                 if (isset($title)) {
 897                         $title = str_replace('"', '&quot;', $title);
 898                         $result .=  " title=\"$title\""; # $title already quoted
 899                 }
 900                 $result .= $this->empty_element_suffix;
 901
 902                 return $this->hashSpan($result);
 903         }
 904
 905
 906         function doHeaders($text) {
 907                 # Setext-style headers:
 908                 #         Header 1
 909                 #         ========
 910                 #
 911                 #         Header 2
 912                 #         --------
 913                 #
 914                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n=+[ ]*\n+ }mx',
 915                         array(&$this, '_doHeaders_callback_setext_h1'), $text);
 916                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n-+[ ]*\n+ }mx',
 917                         array(&$this, '_doHeaders_callback_setext_h2'), $text);
 918
 919                 # atx-style headers:
 920                 #       # Header 1
 921                 #       ## Header 2
 922                 #       ## Header 2 with closing hashes ##
 923                 #       ...
 924                 #       ###### Header 6
 925                 #
 926                 $text = preg_replace_callback('{
 927                                 ^(\#{1,6})      # $1 = string of #\'s
 928                                 [ ]*
 929                                 (.+?)           # $2 = Header text
 930                                 [ ]*
 931                                 \#*                     # optional closing #\'s (not counted)
 932                                 \n+
 933                         }xm',
 934                         array(&$this, '_doHeaders_callback_atx'), $text);
 935
 936                 return $text;
 937         }
 938         function _doHeaders_callback_setext_h1($matches) {
 939                 $block = "<h1>".$this->runSpanGamut($matches[1])."</h1>";
 940                 return "\n" . $this->hashBlock($block) . "\n\n";
 941         }
 942         function _doHeaders_callback_setext_h2($matches) {
 943                 $block = "<h2>".$this->runSpanGamut($matches[1])."</h2>";
 944                 return "\n" . $this->hashBlock($block) . "\n\n";
 945         }
 946         function _doHeaders_callback_atx($matches) {
 947                 $level = strlen($matches[1]);
 948                 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
 949                 return "\n" . $this->hashBlock($block) . "\n\n";
 950         }
 951
 952
 953         function doLists($text) {
 954         #
 955         # Form HTML ordered (numbered) and unordered (bulleted) lists.
 956         #
 957                 $less_than_tab = $this->tab_width - 1;
 958
 959                 # Re-usable patterns to match list item bullets and number markers:
 960                 $marker_ul  = '[*+-]';
 961                 $marker_ol  = '\d+[.]';
 962                 $marker_any = "(?:$marker_ul|$marker_ol)";
 963
 964                 $markers = array($marker_ul, $marker_ol);
 965
 966                 foreach ($markers as $marker) {
 967                         # Re-usable pattern to match any entirel ul or ol list:
 968                         $whole_list = '
 969                                 (                                                               # $1 = whole list
 970                                   (                                                             # $2
 971                                         [ ]{0,'.$less_than_tab.'}
 972                                         ('.$marker.')                           # $3 = first list item marker
 973                                         [ ]+
 974                                   )
 975                                   (?s:.+?)
 976                                   (                                                             # $4
 977                                           \z
 978                                         |
 979                                           \n{2,}
 980                                           (?=\S)
 981                                           (?!                                           # Negative lookahead for another list item marker
 982                                                 [ ]*
 983                                                 '.$marker.'[ ]+
 984                                           )
 985                                   )
 986                                 )
 987                         '; // mx
 988
 989                         # We use a different prefix before nested lists than top-level lists.
 990                         # See extended comment in _ProcessListItems().
 991
 992                         if ($this->list_level) {
 993                                 $text = preg_replace_callback('{
 994                                                 ^
 995                                                 '.$whole_list.'
 996                                         }mx',
 997                                         array(&$this, '_doLists_callback'), $text);
 998                         }
 999                         else {
1000                                 $text = preg_replace_callback('{
1001                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
1002                                                 '.$whole_list.'
1003                                         }mx',
1004                                         array(&$this, '_doLists_callback'), $text);
1005                         }
1006                 }
1007
1008                 return $text;
1009         }
1010         function _doLists_callback($matches) {
1011                 # Re-usable patterns to match list item bullets and number markers:
1012                 $marker_ul  = '[*+-]';
1013                 $marker_ol  = '\d+[.]';
1014                 $marker_any = "(?:$marker_ul|$marker_ol)";
1015
1016                 $list = $matches[1];
1017                 $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
1018
1019                 $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
1020
1021                 $list .= "\n";
1022                 $result = $this->processListItems($list, $marker_any);
1023
1024                 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1025                 return "\n". $result ."\n\n";
1026         }
1027
1028         var $list_level = 0;
1029
1030         function processListItems($list_str, $marker_any) {
1031         #
1032         #       Process the contents of a single ordered or unordered list, splitting it
1033         #       into individual list items.
1034         #
1035                 # The $this->list_level global keeps track of when we're inside a list.
1036                 # Each time we enter a list, we increment it; when we leave a list,
1037                 # we decrement. If it's zero, we're not in a list anymore.
1038                 #
1039                 # We do this because when we're not inside a list, we want to treat
1040                 # something like this:
1041                 #
1042                 #               I recommend upgrading to version
1043                 #               8. Oops, now this line is treated
1044                 #               as a sub-list.
1045                 #
1046                 # As a single paragraph, despite the fact that the second line starts
1047                 # with a digit-period-space sequence.
1048                 #
1049                 # Whereas when we're inside a list (or sub-list), that line will be
1050                 # treated as the start of a sub-list. What a kludge, huh? This is
1051                 # an aspect of Markdown's syntax that's hard to parse perfectly
1052                 # without resorting to mind-reading. Perhaps the solution is to
1053                 # change the syntax rules such that sub-lists must start with a
1054                 # starting cardinal number; e.g. "1." or "a.".
1055
1056                 $this->list_level++;
1057
1058                 # trim trailing blank lines:
1059                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1060
1061                 $list_str = preg_replace_callback('{
1062                         (\n)?                                                   # leading line = $1
1063                         (^[ ]*)                                         # leading whitespace = $2
1064                         ('.$marker_any.') [ ]+          # list marker = $3
1065                         ((?s:.+?))                                              # list item text   = $4
1066                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
1067                         (?= \n* (\z | \2 ('.$marker_any.') [ ]+))
1068                         }xm',
1069                         array(&$this, '_processListItems_callback'), $list_str);
1070
1071                 $this->list_level--;
1072                 return $list_str;
1073         }
1074         function _processListItems_callback($matches) {
1075                 $item = $matches[4];
1076                 $leading_line =& $matches[1];
1077                 $leading_space =& $matches[2];
1078                 $tailing_blank_line =& $matches[5];
1079
1080                 if ($leading_line || $tailing_blank_line ||
1081                         preg_match('/\n{2,}/', $item))
1082                 {
1083                         $item = $this->runBlockGamut($this->outdent($item)."\n");
1084                 }
1085                 else {
1086                         # Recursion for sub-lists:
1087                         $item = $this->doLists($this->outdent($item));
1088                         $item = preg_replace('/\n+$/', '', $item);
1089                         $item = $this->runSpanGamut($item);
1090                 }
1091
1092                 return "<li>" . $item . "</li>\n";
1093         }
1094
1095
1096         function doCodeBlocks($text) {
1097         #
1098         #       Process Markdown `<pre><code>` blocks.
1099         #
1100                 $text = preg_replace_callback('{
1101                                 (?:\n\n|\A)
1102                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
1103                                   (?:
1104                                         (?:[ ]{'.$this->tab_width.'} | \t)  # Lines must start with a tab or a tab-width of spaces
1105                                         .*\n+
1106                                   )+
1107                                 )
1108                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1109                         }xm',
1110                         array(&$this, '_doCodeBlocks_callback'), $text);
1111
1112                 return $text;
1113         }
1114         function _doCodeBlocks_callback($matches) {
1115                 $codeblock = $matches[1];
1116
1117                 $codeblock = $this->encodeCode($this->outdent($codeblock));
1118 //              $codeblock = $this->detab($codeblock);
1119                 # trim leading newlines and trailing whitespace
1120                 $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock);
1121
1122                 $result = "\n\n".$this->hashBlock("<pre><code>" . $codeblock . "\n</code></pre>")."\n\n";
1123
1124                 return $result;
1125         }
1126
1127
1128         function doCodeSpans($text) {
1129         #
1130         #       *       Backtick quotes are used for <code></code> spans.
1131         #
1132         #       *       You can use multiple backticks as the delimiters if you want to
1133         #               include literal backticks in the code span. So, this input:
1134         #
1135         #                 Just type ``foo `bar` baz`` at the prompt.
1136         #
1137         #               Will translate to:
1138         #
1139         #                 <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1140         #
1141         #               There's no arbitrary limit to the number of backticks you
1142         #               can use as delimters. If you need three consecutive backticks
1143         #               in your code, use four for delimiters, etc.
1144         #
1145         #       *       You can use spaces to get literal backticks at the edges:
1146         #
1147         #                 ... type `` `bar` `` ...
1148         #
1149         #               Turns to:
1150         #
1151         #                 ... type <code>`bar`</code> ...
1152         #
1153                 $text = preg_replace_callback('@
1154                                 (?<!\\\)        # Character before opening ` can\'t be a backslash
1155                                 (`+)            # $1 = Opening run of `
1156                                 (.+?)           # $2 = The code block
1157                                 (?<!`)
1158                                 \1                      # Matching closer
1159                                 (?!`)
1160                         @xs',
1161                         array(&$this, '_doCodeSpans_callback'), $text);
1162
1163                 return $text;
1164         }
1165         function _doCodeSpans_callback($matches) {
1166                 $c = $matches[2];
1167                 $c = preg_replace('/^[ ]*/', '', $c); # leading whitespace
1168                 $c = preg_replace('/[ ]*$/', '', $c); # trailing whitespace
1169                 $c = $this->encodeCode($c);
1170                 return $this->hashSpan("<code>$c</code>");
1171         }
1172
1173
1174         function encodeCode($_) {
1175         #
1176         # Encode/escape certain characters inside Markdown code runs.
1177         # The point is that in code, these characters are literals,
1178         # and lose their special Markdown meanings.
1179         #
1180                 # Encode all ampersands; HTML entities are not
1181                 # entities within a Markdown code span.
1182                 $_ = str_replace('&', '&amp;', $_);
1183
1184                 # Do the angle bracket song and dance:
1185                 $_ = str_replace(array('<',    '>'),
1186                                                  array('&lt;', '&gt;'), $_);
1187
1188                 # Now, escape characters that are magic in Markdown:
1189 //              $_ = str_replace(array_keys($this->escape_table),
1190 //                                               array_values($this->escape_table), $_);
1191
1192                 return $_;
1193         }
1194
1195
1196         function doItalicsAndBold($text) {
1197                 # <strong> must go first:
1198                 $text = preg_replace_callback('{
1199                                 (                                               # $1: Marker
1200                                         (?<!\*\*) \* |          #     (not preceded by two chars of
1201                                         (?<!__)   _                     #      the same marker)
1202                                 )
1203                                 \1
1204                                 (?=\S)                                  # Not followed by whitespace
1205                                 (?!\1\1)                                #   or two others marker chars.
1206                                 (                                               # $2: Content
1207                                         (?>
1208                                                 [^*_]+?                 # Anthing not em markers.
1209                                         |
1210                                                                                 # Balence any regular emphasis inside.
1211                                                 \1 (?=\S) .+? (?<=\S) \1
1212                                         |
1213                                                 .                               # Allow unbalenced * and _.
1214                                         )+?
1215                                 )
1216                                 (?<=\S) \1\1                    # End mark not preceded by whitespace.
1217                         }sx',
1218                         array(&$this, '_doItalicAndBold_strong_callback'), $text);
1219                 # Then <em>:
1220                 $text = preg_replace_callback(
1221                         '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s(?=\1).) \1 }sx',
1222                         array(&$this, '_doItalicAndBold_em_callback'), $text);
1223
1224                 return $text;
1225         }
1226         function _doItalicAndBold_em_callback($matches) {
1227                 $text = $matches[2];
1228                 $text = $this->runSpanGamut($text);
1229                 return $this->hashSpan("<em>$text</em>");
1230         }
1231         function _doItalicAndBold_strong_callback($matches) {
1232                 $text = $matches[2];
1233                 $text = $this->runSpanGamut($text);
1234                 return $this->hashSpan("<strong>$text</strong>");
1235         }
1236
1237
1238         function doBlockQuotes($text) {
1239                 $text = preg_replace_callback('/
1240                           (                                                             # Wrap whole match in $1
1241                                 (
1242                                   ^[ ]*>[ ]?                    # ">" at the start of a line
1243                                         .+\n                                    # rest of the first line
1244                                   (.+\n)*                                       # subsequent consecutive lines
1245                                   \n*                                           # blanks
1246                                 )+
1247                           )
1248                         /xm',
1249                         array(&$this, '_doBlockQuotes_callback'), $text);
1250
1251                 return $text;
1252         }
1253         function _doBlockQuotes_callback($matches) {
1254                 $bq = $matches[1];
1255                 # trim one level of quoting - trim whitespace-only lines
1256                 $bq = preg_replace(array('/^[ ]*>[ ]?/m', '/^[ ]+$/m'), '', $bq);
1257                 $bq = $this->runBlockGamut($bq);                # recurse
1258
1259                 $bq = preg_replace('/^/m', "  ", $bq);
1260                 # These leading spaces cause problem with <pre> content,
1261                 # so we need to fix that:
1262                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1263                         array(&$this, '_DoBlockQuotes_callback2'), $bq);
1264
1265                 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1266         }
1267         function _doBlockQuotes_callback2($matches) {
1268                 $pre = $matches[1];
1269                 $pre = preg_replace('/^  /m', '', $pre);
1270                 return $pre;
1271         }
1272
1273
1274         function formParagraphs($text) {
1275         #
1276         #       Params:
1277         #               $text - string to process with html <p> tags
1278         #
1279                 # Strip leading and trailing lines:
1280                 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
1281
1282                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1283
1284                 #
1285                 # Wrap <p> tags.
1286                 #
1287                 foreach ($grafs as $key => $value) {
1288                         if (!isset( $this->html_blocks[$value] )) {
1289                                 $value = $this->runSpanGamut($value);
1290                                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1291                                 $value .= "</p>";
1292                                 $grafs[$key] = $this->unhash($value);
1293                         }
1294                 }
1295
1296                 #
1297                 # Unhashify HTML blocks
1298                 #
1299                 foreach ($grafs as $key => $graf) {
1300                         # Modify elements of @grafs in-place...
1301                         if (isset($this->html_blocks[$graf])) {
1302                                 $block = $this->html_blocks[$graf];
1303                                 $graf = $block;
1304 //                              if (preg_match('{
1305 //                                      \A
1306 //                                      (                                                       # $1 = <div> tag
1307 //                                        <div  \s+
1308 //                                        [^>]*
1309 //                                        \b
1310 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1311 //                                        1
1312 //                                        \2
1313 //                                        [^>]*
1314 //                                        >
1315 //                                      )
1316 //                                      (                                                       # $3 = contents
1317 //                                      .*
1318 //                                      )
1319 //                                      (</div>)                                        # $4 = closing tag
1320 //                                      \z
1321 //                                      }xs', $block, $matches))
1322 //                              {
1323 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1324 //
1325 //                                      # We can't call Markdown(), because that resets the hash;
1326 //                                      # that initialization code should be pulled into its own sub, though.
1327 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1328 //
1329 //                                      # Run document gamut methods on the content.
1330 //                                      foreach ($this->document_gamut as $method => $priority) {
1331 //                                              $div_content = $this->$method($div_content);
1332 //                                      }
1333 //
1334 //                                      $div_open = preg_replace(
1335 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1336 //
1337 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1338 //                              }
1339                                 $grafs[$key] = $graf;
1340                         }
1341                 }
1342
1343                 return implode("\n\n", $grafs);
1344         }
1345
1346
1347         function encodeAmpsAndAngles($text) {
1348         # Smart processing for ampersands and angle brackets that need to be encoded.
1349                 if ($this->no_entities) {
1350                         $text = str_replace('&', '&amp;', $text);
1351                         $text = str_replace('<', '&lt;', $text);
1352                         return $text;
1353                 }
1354
1355                 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1356                 #   http://bumppo.net/projects/amputator/
1357                 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1358                                                          '&amp;', $text);;
1359
1360                 # Encode naked <'s
1361                 $text = preg_replace('{<(?![a-z/?\$!%])}i', '&lt;', $text);
1362
1363                 return $text;
1364         }
1365
1366
1367         function encodeBackslashEscapes($text) {
1368         #
1369         #       Parameter:  String.
1370         #       Returns:    The string, with after processing the following backslash
1371         #                               escape sequences.
1372         #
1373                 # Must process escaped backslashes first (should be first in list).
1374                 foreach ($this->backslash_escape_table as $search => $replacement) {
1375                         $text = str_replace($search, $this->hashSpan($replacement), $text);
1376                 }
1377                 return $text;
1378         }
1379
1380
1381         function doAutoLinks($text) {
1382                 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}',
1383                         array(&$this, '_doAutoLinks_url_callback'), $text);
1384
1385                 # Email addresses: <address@domain.foo>
1386                 $text = preg_replace_callback('{
1387                         <
1388                         (?:mailto:)?
1389                         (
1390                                 [-.\w\x80-\xFF]+
1391                                 \@
1392                                 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1393                         )
1394                         >
1395                         }xi',
1396                         array(&$this, '_doAutoLinks_email_callback'), $text);
1397
1398                 return $text;
1399         }
1400         function _doAutoLinks_url_callback($matches) {
1401                 $url = $this->encodeAmpsAndAngles($matches[1]);
1402                 $link = "<a href=\"$url\">$url</a>";
1403                 return $this->hashSpan($link);
1404         }
1405         function _doAutoLinks_email_callback($matches) {
1406                 $address = $matches[1];
1407                 $link = $this->encodeEmailAddress($address);
1408                 return $this->hashSpan($link);
1409         }
1410
1411
1412         function encodeEmailAddress($addr) {
1413         #
1414         #       Input: an email address, e.g. "foo@example.com"
1415         #
1416         #       Output: the email address as a mailto link, with each character
1417         #               of the address encoded as either a decimal or hex entity, in
1418         #               the hopes of foiling most address harvesting spam bots. E.g.:
1419         #
1420         #         <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1421         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1422         #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1423         #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1424         #
1425         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1426         #   With some optimizations by Milian Wolff.
1427         #
1428                 $addr = "mailto:" . $addr;
1429                 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1430                 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1431
1432                 foreach ($chars as $key => $char) {
1433                         $ord = ord($char);
1434                         # Ignore non-ascii chars.
1435                         if ($ord < 128) {
1436                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1437                                 # roughly 10% raw, 45% hex, 45% dec
1438                                 # '@' *must* be encoded. I insist.
1439                                 if ($r > 90 && $char != '@') /* do nothing */;
1440                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1441                                 else              $chars[$key] = '&#'.$ord.';';
1442                         }
1443                 }
1444
1445                 $addr = implode('', $chars);
1446                 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1447                 $addr = "<a href=\"$addr\">$text</a>";
1448
1449                 return $addr;
1450         }
1451
1452
1453         function tokenizeHTML($str) {
1454         #
1455         #   Parameter:  String containing HTML + Markdown markup.
1456         #   Returns:    An array of the tokens comprising the input
1457         #               string. Each token is either a tag or a run of text
1458         #               between tags. Each element of the array is a
1459         #               two-element array; the first is either 'tag' or 'text';
1460         #               the second is the actual value.
1461         #   Note:       Markdown code spans are taken into account: no tag token is
1462         #               generated within a code span.
1463         #
1464                 $tokens = array();
1465
1466                 while ($str != "") {
1467                         #
1468                         # Each loop iteration seach for either the next tag or the next
1469                         # openning code span marker. If a code span marker is found, the
1470                         # code span is extracted in entierty and will result in an extra
1471                         # text token.
1472                         #
1473                         $parts = preg_split('{
1474                                 (
1475                                         (?<![`\\\\])
1476                                         `+                                              # code span marker
1477                                 |
1478                                         <!--    .*?     -->             # comment
1479                                 |
1480                                         <\?.*?\?> | <%.*?%>             # processing instruction
1481                                 |
1482                                         <[/!$]?[-a-zA-Z0-9:]+   # regular tags
1483                                         (?:
1484                                                 \s
1485                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1486                                         )?
1487                                         >
1488                                 )
1489                                 }xs', $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1490
1491                         # Create token from text preceding tag.
1492                         if ($parts[0] != "") {
1493                                 $tokens[] = array('text', $parts[0]);
1494                         }
1495
1496                         # Check if we reach the end.
1497                         if (count($parts) < 3) {
1498                                 break;
1499                         }
1500
1501                         # Create token from tag or code span.
1502                         if ($parts[1]{0} == "`") {
1503                                 $tokens[] = array('text', $parts[1]);
1504                                 $str = $parts[2];
1505
1506                                 # Skip the whole code span, pass as text token.
1507                                 if (preg_match('/^(.*(?<!`\\\\)'.$parts[1].'(?!`))(.*)$/sm',
1508                                         $str, $matches))
1509                                 {
1510                                         $tokens[] = array('text', $matches[1]);
1511                                         $str = $matches[2];
1512                                 }
1513                         } else {
1514                                 $tokens[] = array('tag', $parts[1]);
1515                                 $str = $parts[2];
1516                         }
1517                 }
1518
1519                 return $tokens;
1520         }
1521
1522
1523         function outdent($text) {
1524         #
1525         # Remove one level of line-leading tabs or spaces
1526         #
1527                 return preg_replace("/^(\\t|[ ]{1,$this->tab_width})/m", "", $text);
1528         }
1529
1530
1531         # String length function for detab. `_initDetab` will create a function to
1532         # hanlde UTF-8 if the default function does not exist.
1533         var $utf8_strlen = 'mb_strlen';
1534
1535         function detab($text) {
1536         #
1537         # Replace tabs with the appropriate amount of space.
1538         #
1539                 # For each line we separate the line in blocks delemited by
1540                 # tab characters. Then we reconstruct every line by adding the
1541                 # appropriate number of space between each blocks.
1542
1543                 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1544                 $lines = explode("\n", $text);
1545                 $text = "";
1546
1547                 foreach ($lines as $line) {
1548                         # Split in blocks.
1549                         $blocks = explode("\t", $line);
1550                         # Add each blocks to the line.
1551                         $line = $blocks[0];
1552                         unset($blocks[0]); # Do not add first block twice.
1553                         foreach ($blocks as $block) {
1554                                 # Calculate amount of space, insert spaces, insert block.
1555                                 $amount = $this->tab_width -
1556                                         $strlen($line, 'UTF-8') % $this->tab_width;
1557                                 $line .= str_repeat(" ", $amount) . $block;
1558                         }
1559                         $text .= "$line\n";
1560                 }
1561                 return $text;
1562         }
1563         function _initDetab() {
1564         #
1565         # Check for the availability of the function in the `utf8_strlen` property
1566         # (initially `mb_strlen`). If the function is not available, create a
1567         # function that will loosely count the number of UTF-8 characters with a
1568         # regular expression.
1569         #
1570                 if (function_exists($this->utf8_strlen)) return;
1571                 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1572                         "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1573                         $text, $m);');
1574         }
1575
1576
1577         function unhash($text) {
1578         #
1579         # Swap back in all the tags hashed by _HashHTMLBlocks.
1580         #
1581                 return str_replace(array_keys($this->html_hashes),
1582                                                    array_values($this->html_hashes), $text);
1583         }
1584
1585 }
1586
1587
1588 #
1589 # Markdown Extra Parser Class
1590 #
1591
1592 class MarkdownExtra_Parser extends Markdown_Parser {
1593
1594         # Prefix for footnote ids.
1595         var $fn_id_prefix = "";
1596
1597         # Optional title attribute for footnote links and backlinks.
1598         var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
1599         var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
1600
1601         # Optional class attribute for footnote links and backlinks.
1602         var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
1603         var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
1604
1605
1606         function MarkdownExtra_Parser() {
1607         #
1608         # Constructor function. Initialize the parser object.
1609         #
1610                 # Add extra escapable characters before parent constructor
1611                 # initialize the table.
1612                 $this->escape_chars .= ':|';
1613
1614                 # Insert extra document, block, and span transformations.
1615                 # Parent constructor will do the sorting.
1616                 $this->document_gamut += array(
1617                         "stripFootnotes"     => 15,
1618                         "stripAbbreviations" => 25,
1619                         "appendFootnotes"    => 50,
1620                         );
1621                 $this->block_gamut += array(
1622                         "doTables"           => 15,
1623                         "doDefLists"         => 45,
1624                         );
1625                 $this->span_gamut += array(
1626                         "doFootnotes"        => 5,
1627                         "doAbbreviations"    => 70,
1628                         );
1629
1630                 parent::Markdown_Parser();
1631         }
1632
1633
1634         # Extra hashes used during extra transformations.
1635         var $footnotes = array();
1636         var $footnotes_ordered = array();
1637         var $abbr_desciptions = array();
1638         var $abbr_matches = array();
1639         var $html_cleans = array();
1640
1641         # Status flag to avoid invalid nesting.
1642         var $in_footnote = false;
1643
1644
1645         function transform($text) {
1646         #
1647         # Added clear to the new $html_hashes, reordered `hashHTMLBlocks` before
1648         # blank line stripping and added extra parameter to `runBlockGamut`.
1649         #
1650                 # Clear the global hashes. If we don't clear these, you get conflicts
1651                 # from other articles when generating a page which contains more than
1652                 # one article (e.g. an index page that shows the N most recent
1653                 # articles):
1654                 $this->footnotes = array();
1655                 $this->footnotes_ordered = array();
1656                 $this->abbr_desciptions = array();
1657                 $this->abbr_matches = array();
1658                 $this->html_cleans = array();
1659
1660                 return parent::transform($text);
1661         }
1662
1663
1664         ### HTML Block Parser ###
1665
1666         # Tags that are always treated as block tags:
1667         var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1668
1669         # Tags treated as block tags only if the opening tag is alone on it's line:
1670         var $context_block_tags = 'script|noscript|math|ins|del';
1671
1672         # Tags where markdown="1" default to span mode:
1673         var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1674
1675         # Tags which must not have their contents modified, no matter where
1676         # they appear:
1677         var $clean_tags = 'script|math';
1678
1679         # Tags that do not need to be closed.
1680         var $auto_close_tags = 'hr|img';
1681
1682
1683         function hashHTMLBlocks($text) {
1684         #
1685         # Hashify HTML Blocks and "clean tags".
1686         #
1687         # We only want to do this for block-level HTML tags, such as headers,
1688         # lists, and tables. That's because we still want to wrap <p>s around
1689         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1690         # phrase emphasis, and spans. The list of tags we're looking for is
1691         # hard-coded.
1692         #
1693         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1694         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1695         # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
1696         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1697         # These two functions are calling each other. It's recursive!
1698         #
1699                 #
1700                 # Call the HTML-in-Markdown hasher.
1701                 #
1702                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1703
1704                 return $text;
1705         }
1706         function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1707                                                                                 $enclosing_tag = '', $span = false)
1708         {
1709         #
1710         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1711         #
1712         # *   $indent is the number of space to be ignored when checking for code
1713         #     blocks. This is important because if we don't take the indent into
1714         #     account, something like this (which looks right) won't work as expected:
1715         #
1716         #     <div>
1717         #         <div markdown="1">
1718         #         Hello World.  <-- Is this a Markdown code block or text?
1719         #         </div>  <-- Is this a Markdown code block or a real tag?
1720         #     <div>
1721         #
1722         #     If you don't like this, just don't indent the tag on which
1723         #     you apply the markdown="1" attribute.
1724         #
1725         # *   If $enclosing_tag is not empty, stops at the first unmatched closing
1726         #     tag with that name. Nested tags supported.
1727         #
1728         # *   If $span is true, text inside must treated as span. So any double
1729         #     newline will be replaced by a single newline so that it does not create
1730         #     paragraphs.
1731         #
1732         # Returns an array of that form: ( processed text , remaining text )
1733         #
1734                 if ($text === '') return array('', '');
1735
1736                 # Regex to check for the presense of newlines around a block tag.
1737                 $newline_match_before = '/(?:^\n?|\n\n)*$/';
1738                 $newline_match_after =
1739                         '{
1740                                 ^                                               # Start of text following the tag.
1741                                 (?:[ ]*<!--.*?-->)?             # Optional comment.
1742                                 [ ]*\n                                  # Must be followed by newline.
1743                         }xs';
1744
1745                 # Regex to match any tag.
1746                 $block_tag_match =
1747                         '{
1748                                 (                                       # $2: Capture hole tag.
1749                                         </?                                     # Any opening or closing tag.
1750                                                 (?:                             # Tag name.
1751                                                         '.$this->block_tags.'                   |
1752                                                         '.$this->context_block_tags.'   |
1753                                                         '.$this->clean_tags.'           |
1754                                                         (?!\s)'.$enclosing_tag.'
1755                                                 )
1756                                                 \s*                             # Whitespace.
1757                                                 (?>
1758                                                         ".*?"           |       # Double quotes (can contain `>`)
1759                                                         \'.*?\'         |       # Single quotes (can contain `>`)
1760                                                         .+?                             # Anything but quotes and `>`.
1761                                                 )*?
1762                                         >                                       # End of tag.
1763                                 |
1764                                         <!--    .*?     -->     # HTML Comment
1765                                 |
1766                                         <\?.*?\?> | <%.*?%>     # Processing instruction
1767                                 |
1768                                         <!\[CDATA\[.*?\]\]>     # CData Block
1769                                 )
1770                         }xs';
1771
1772
1773                 $depth = 0;             # Current depth inside the tag tree.
1774                 $parsed = "";   # Parsed text that will be returned.
1775
1776                 #
1777                 # Loop through every tag until we find the closing tag of the parent
1778                 # or loop until reaching the end of text if no parent tag specified.
1779                 #
1780                 do {
1781                         #
1782                         # Split the text using the first $tag_match pattern found.
1783                         # Text before  pattern will be first in the array, text after
1784                         # pattern will be at the end, and between will be any catches made
1785                         # by the pattern.
1786                         #
1787                         $parts = preg_split($block_tag_match, $text, 2,
1788                                                                 PREG_SPLIT_DELIM_CAPTURE);
1789
1790                         # If in Markdown span mode, add a empty-string span-level hash
1791                         # after each newline to prevent triggering any block element.
1792                         if ($span) {
1793                                 $void = $this->hashSpan("", true) ;
1794                                 $newline = $this->hashSpan("", true) . "\n";
1795                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1796                         }
1797
1798                         $parsed .= $parts[0]; # Text before current tag.
1799
1800                         # If end of $text has been reached. Stop loop.
1801                         if (count($parts) < 3) {
1802                                 $text = "";
1803                                 break;
1804                         }
1805
1806                         $tag  = $parts[1]; # Tag to handle.
1807                         $text = $parts[2]; # Remaining text after current tag.
1808
1809                         #
1810                         # Check for: Tag inside code block or span
1811                         #
1812                         if (# Find current paragraph
1813                                 preg_match('/(?>^\n?|\n\n)((?>.\n?)+?)$/', $parsed, $matches) &&
1814                                 (
1815                                 # Then match in it either a code block...
1816                                 preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'.
1817                                                         '(?!\n)$/', $matches[1], $x) ||
1818                                 # ...or unbalenced code span markers. (the regex matches balenced)
1819                                 !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s',
1820                                                          $matches[1])
1821                                 ))
1822                         {
1823                                 # Tag is in code block or span and may not be a tag at all. So we
1824                                 # simply skip the first char (should be a `<`).
1825                                 $parsed .= $tag{0};
1826                                 $text = substr($tag, 1) . $text; # Put back $tag minus first char.
1827                         }
1828                         #
1829                         # Check for: Opening Block level tag or
1830                         #            Opening Content Block tag (like ins and del)
1831                         #               used as a block tag (tag is alone on it's line).
1832                         #
1833                         else if (preg_match("{^<(?:$this->block_tags)\b}", $tag) ||
1834                                 (       preg_match("{^<(?:$this->context_block_tags)\b}", $tag) &&
1835                                         preg_match($newline_match_before, $parsed) &&
1836                                         preg_match($newline_match_after, $text) )
1837                                 )
1838                         {
1839                                 # Need to parse tag and following text using the HTML parser.
1840                                 list($block_text, $text) =
1841                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1842
1843                                 # Make sure it stays outside of any paragraph by adding newlines.
1844                                 $parsed .= "\n\n$block_text\n\n";
1845                         }
1846                         #
1847                         # Check for: Clean tag (like script, math)
1848                         #            HTML Comments, processing instructions.
1849                         #
1850                         else if (preg_match("{^<(?:$this->clean_tags)\b}", $tag) ||
1851                                 $tag{1} == '!' || $tag{1} == '?')
1852                         {
1853                                 # Need to parse tag and following text using the HTML parser.
1854                                 # (don't check for markdown attribute)
1855                                 list($block_text, $text) =
1856                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1857
1858                                 $parsed .= $block_text;
1859                         }
1860                         #
1861                         # Check for: Tag with same name as enclosing tag.
1862                         #
1863                         else if ($enclosing_tag !== '' &&
1864                                 # Same name as enclosing tag.
1865                                 preg_match("{^</?(?:$enclosing_tag)\b}", $tag))
1866                         {
1867                                 #
1868                                 # Increase/decrease nested tag count.
1869                                 #
1870                                 if ($tag{1} == '/')                                             $depth--;
1871                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
1872
1873                                 if ($depth < 0) {
1874                                         #
1875                                         # Going out of parent element. Clean up and break so we
1876                                         # return to the calling function.
1877                                         #
1878                                         $text = $tag . $text;
1879                                         break;
1880                                 }
1881
1882                                 $parsed .= $tag;
1883                         }
1884                         else {
1885                                 $parsed .= $tag;
1886                         }
1887                 } while ($depth >= 0);
1888
1889                 return array($parsed, $text);
1890         }
1891         function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
1892         #
1893         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
1894         #
1895         # *   Calls $hash_method to convert any blocks.
1896         # *   Stops when the first opening tag closes.
1897         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
1898         #     (it is not inside clean tags)
1899         #
1900         # Returns an array of that form: ( processed text , remaining text )
1901         #
1902                 if ($text === '') return array('', '');
1903
1904                 # Regex to match `markdown` attribute inside of a tag.
1905                 $markdown_attr_match = '
1906                         {
1907                                 \s*                     # Eat whitespace before the `markdown` attribute
1908                                 markdown
1909                                 \s*=\s*
1910                                 (?:
1911                                         (["\'])         # $1: quote delimiter
1912                                         (.*?)           # $2: attribute value
1913                                         \1                      # matching delimiter
1914                                 |
1915                                         ([^\s>]*)       # $3: unquoted attribute value
1916                                 )
1917                                 ()                              # $4: make $3 always defined (avoid warnings)
1918                         }xs';
1919
1920                 # Regex to match any tag.
1921                 $tag_match = '{
1922                                 (                                       # $2: Capture hole tag.
1923                                         </?                                     # Any opening or closing tag.
1924                                                 [\w:$]+                 # Tag name.
1925                                                 \s*                             # Whitespace.
1926                                                 (?>
1927                                                         ".*?"           |       # Double quotes (can contain `>`)
1928                                                         \'.*?\'         |       # Single quotes (can contain `>`)
1929                                                         .+?                             # Anything but quotes and `>`.
1930                                                 )*?
1931                                         >                                       # End of tag.
1932                                 |
1933                                         <!--    .*?     -->     # HTML Comment
1934                                 |
1935                                         <\?.*?\?> | <%.*?%>     # Processing instruction
1936                                 |
1937                                         <!\[CDATA\[.*?\]\]>     # CData Block
1938                                 )
1939                         }xs';
1940
1941                 $original_text = $text;         # Save original text in case of faliure.
1942
1943                 $depth          = 0;    # Current depth inside the tag tree.
1944                 $block_text     = "";   # Temporary text holder for current text.
1945                 $parsed         = "";   # Parsed text that will be returned.
1946
1947                 #
1948                 # Get the name of the starting tag.
1949                 #
1950                 if (preg_match("/^<([\w:$]*)\b/", $text, $matches))
1951                         $base_tag_name = $matches[1];
1952
1953                 #
1954                 # Loop through every tag until we find the corresponding closing tag.
1955                 #
1956                 do {
1957                         #
1958                         # Split the text using the first $tag_match pattern found.
1959                         # Text before  pattern will be first in the array, text after
1960                         # pattern will be at the end, and between will be any catches made
1961                         # by the pattern.
1962                         #
1963                         $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1964
1965                         if (count($parts) < 3) {
1966                                 #
1967                                 # End of $text reached with unbalenced tag(s).
1968                                 # In that case, we return original text unchanged and pass the
1969                                 # first character as filtered to prevent an infinite loop in the
1970                                 # parent function.
1971                                 #
1972                                 return array($original_text{0}, substr($original_text, 1));
1973                         }
1974
1975                         $block_text .= $parts[0]; # Text before current tag.
1976                         $tag         = $parts[1]; # Tag to handle.
1977                         $text        = $parts[2]; # Remaining text after current tag.
1978
1979                         #
1980                         # Check for: Auto-close tag (like <hr/>)
1981                         #                        Comments and Processing Instructions.
1982                         #
1983                         if (preg_match("{^</?(?:$this->auto_close_tags)\b}", $tag) ||
1984                                 $tag{1} == '!' || $tag{1} == '?')
1985                         {
1986                                 # Just add the tag to the block as if it was text.
1987                                 $block_text .= $tag;
1988                         }
1989                         else {
1990                                 #
1991                                 # Increase/decrease nested tag count. Only do so if
1992                                 # the tag's name match base tag's.
1993                                 #
1994                                 if (preg_match("{^</?$base_tag_name\b}", $tag)) {
1995                                         if ($tag{1} == '/')                                             $depth--;
1996                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
1997                                 }
1998
1999                                 #
2000                                 # Check for `markdown="1"` attribute and handle it.
2001                                 #
2002                                 if ($md_attr &&
2003                                         preg_match($markdown_attr_match, $tag, $attr_m) &&
2004                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2005                                 {
2006                                         # Remove `markdown` attribute from opening tag.
2007                                         $tag = preg_replace($markdown_attr_match, '', $tag);
2008
2009                                         # Check if text inside this tag must be parsed in span mode.
2010                                         $this->mode = $attr_m[2] . $attr_m[3];
2011                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
2012                                                 preg_match("{^<(?:$this->contain_span_tags)\b}", $tag);
2013
2014                                         # Calculate indent before tag.
2015                                         preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches);
2016                                         $indent = strlen($matches[1]);
2017
2018                                         # End preceding block with this tag.
2019                                         $block_text .= $tag;
2020                                         $parsed .= $this->$hash_method($block_text);
2021
2022                                         # Get enclosing tag name for the ParseMarkdown function.
2023                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2024                                         $tag_name = $matches[1];
2025
2026                                         # Parse the content using the HTML-in-Markdown parser.
2027                                         list ($block_text, $text)
2028                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
2029                                                                                                                 $tag_name, $span_mode);
2030
2031                                         # Outdent markdown text.
2032                                         if ($indent > 0) {
2033                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
2034                                                                                                         $block_text);
2035                                         }
2036
2037                                         # Append tag content to parsed text.
2038                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
2039                                         else                            $parsed .= "$block_text";
2040
2041                                         # Start over a new block.
2042                                         $block_text = "";
2043                                 }
2044                                 else $block_text .= $tag;
2045                         }
2046
2047                 } while ($depth > 0);
2048
2049                 #
2050                 # Hash last block text that wasn't processed inside the loop.
2051                 #
2052                 $parsed .= $this->$hash_method($block_text);
2053
2054                 return array($parsed, $text);
2055         }
2056
2057
2058         function hashClean($text) {
2059         #
2060         # Called whenever a tag must be hashed when a function insert a "clean" tag
2061         # in $text, it pass through this function and is automaticaly escaped,
2062         # blocking invalid nested overlap.
2063         #
2064                 # Swap back any tag hash found in $text so we do not have to `unhash`
2065                 # multiple times at the end.
2066                 $text = $this->unhash($text);
2067
2068                 # Then hash the tag.
2069                 $key = "C\x1A". md5($text);
2070                 $this->html_cleans[$key] = $text;
2071                 $this->html_hashes[$key] = $text;
2072                 return $key; # String that will replace the clean tag.
2073         }
2074
2075
2076         function doHeaders($text) {
2077         #
2078         # Redefined to add id attribute support.
2079         #
2080                 # Setext-style headers:
2081                 #         Header 1  {#header1}
2082                 #         ========
2083                 #
2084                 #         Header 2  {#header2}
2085                 #         --------
2086                 #
2087                 $text = preg_replace_callback(
2088                         '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ ]*\n=+[ ]*\n+ }mx',
2089                         array(&$this, '_doHeaders_callback_setext_h1'), $text);
2090                 $text = preg_replace_callback(
2091                         '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ ]*\n-+[ ]*\n+ }mx',
2092                         array(&$this, '_doHeaders_callback_setext_h2'), $text);
2093
2094                 # atx-style headers:
2095                 #       # Header 1        {#header1}
2096                 #       ## Header 2       {#header2}
2097                 #       ## Header 2 with closing hashes ##  {#header3}
2098                 #       ...
2099                 #       ###### Header 6   {#header2}
2100                 #
2101                 $text = preg_replace_callback('{
2102                                 ^(\#{1,6})      # $1 = string of #\'s
2103                                 [ ]*
2104                                 (.+?)           # $2 = Header text
2105                                 [ ]*
2106                                 \#*                     # optional closing #\'s (not counted)
2107                                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
2108                                 [ ]*
2109                                 \n+
2110                         }xm',
2111                         array(&$this, '_doHeaders_callback_atx'), $text);
2112
2113                 return $text;
2114         }
2115         function _doHeaders_attr($attr) {
2116                 if (empty($attr))  return "";
2117                 return " id=\"$attr\"";
2118         }
2119         function _doHeaders_callback_setext_h1($matches) {
2120                 $attr  = $this->_doHeaders_attr($id =& $matches[2]);
2121                 $block = "<h1$attr>".$this->runSpanGamut($matches[1])."</h1>";
2122                 return "\n" . $this->hashBlock($block) . "\n\n";
2123         }
2124         function _doHeaders_callback_setext_h2($matches) {
2125                 $attr  = $this->_doHeaders_attr($id =& $matches[2]);
2126                 $block = "<h2$attr>".$this->runSpanGamut($matches[1])."</h2>";
2127                 return "\n" . $this->hashBlock($block) . "\n\n";
2128         }
2129         function _doHeaders_callback_atx($matches) {
2130                 $level = strlen($matches[1]);
2131                 $attr  = $this->_doHeaders_attr($id =& $matches[3]);
2132                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2133                 return "\n" . $this->hashBlock($block) . "\n\n";
2134         }
2135
2136
2137         function doTables($text) {
2138         #
2139         # Form HTML tables.
2140         #
2141                 $less_than_tab = $this->tab_width - 1;
2142                 #
2143                 # Find tables with leading pipe.
2144                 #
2145                 #       | Header 1 | Header 2
2146                 #       | -------- | --------
2147                 #       | Cell 1   | Cell 2
2148                 #       | Cell 3   | Cell 4
2149                 #
2150                 $text = preg_replace_callback('
2151                         {
2152                                 ^                                                       # Start of a line
2153                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2154                                 [|]                                                     # Optional leading pipe (present)
2155                                 (.+) \n                                         # $1: Header row (at least one pipe)
2156
2157                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2158                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
2159
2160                                 (                                                       # $3: Cells
2161                                         (?:
2162                                                 [ ]*                            # Allowed whitespace.
2163                                                 [|] .* \n                       # Row content.
2164                                         )*
2165                                 )
2166                                 (?=\n|\Z)                                       # Stop at final double newline.
2167                         }xm',
2168                         array(&$this, '_doTable_leadingPipe_callback'), $text);
2169
2170                 #
2171                 # Find tables without leading pipe.
2172                 #
2173                 #       Header 1 | Header 2
2174                 #       -------- | --------
2175                 #       Cell 1   | Cell 2
2176                 #       Cell 3   | Cell 4
2177                 #
2178                 $text = preg_replace_callback('
2179                         {
2180                                 ^                                                       # Start of a line
2181                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2182                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
2183
2184                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2185                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
2186
2187                                 (                                                       # $3: Cells
2188                                         (?:
2189                                                 .* [|] .* \n            # Row content
2190                                         )*
2191                                 )
2192                                 (?=\n|\Z)                                       # Stop at final double newline.
2193                         }xm',
2194                         array(&$this, '_DoTable_callback'), $text);
2195
2196                 return $text;
2197         }
2198         function _doTable_leadingPipe_callback($matches) {
2199                 $head           = $matches[1];
2200                 $underline      = $matches[2];
2201                 $content        = $matches[3];
2202
2203                 # Remove leading pipe for each row.
2204                 $content        = preg_replace('/^ *[|]/m', '', $content);
2205
2206                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2207         }
2208         function _doTable_callback($matches) {
2209                 $head           = $matches[1];
2210                 $underline      = $matches[2];
2211                 $content        = $matches[3];
2212
2213                 # Remove any tailing pipes for each line.
2214                 $head           = preg_replace('/[|] *$/m', '', $head);
2215                 $underline      = preg_replace('/[|] *$/m', '', $underline);
2216                 $content        = preg_replace('/[|] *$/m', '', $content);
2217
2218                 # Reading alignement from header underline.
2219                 $separators     = preg_split('/ *[|] */', $underline);
2220                 foreach ($separators as $n => $s) {
2221                         if (preg_match('/^ *-+: *$/', $s))              $attr[$n] = ' align="right"';
2222                         else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
2223                         else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
2224                         else                                                                    $attr[$n] = '';
2225                 }
2226
2227                 # Creating code spans before splitting the row is an easy way to
2228                 # handle a code span containg pipes.
2229                 $head   = $this->doCodeSpans($head);
2230                 $headers        = preg_split('/ *[|] */', $head);
2231                 $col_count      = count($headers);
2232
2233                 # Write column headers.
2234                 $text = "<table>\n";
2235                 $text .= "<thead>\n";
2236                 $text .= "<tr>\n";
2237                 foreach ($headers as $n => $header)
2238                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2239                 $text .= "</tr>\n";
2240                 $text .= "</thead>\n";
2241
2242                 # Split content by row.
2243                 $rows = explode("\n", trim($content, "\n"));
2244
2245                 $text .= "<tbody>\n";
2246                 foreach ($rows as $row) {
2247                         # Creating code spans before splitting the row is an easy way to
2248                         # handle a code span containg pipes.
2249                         $row = $this->doCodeSpans($row);
2250
2251                         # Split row by cell.
2252                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
2253                         $row_cells = array_pad($row_cells, $col_count, '');
2254
2255                         $text .= "<tr>\n";
2256                         foreach ($row_cells as $n => $cell)
2257                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2258                         $text .= "</tr>\n";
2259                 }
2260                 $text .= "</tbody>\n";
2261                 $text .= "</table>";
2262
2263                 return $this->hashBlock($text) . "\n";
2264         }
2265
2266
2267         function doDefLists($text) {
2268         #
2269         # Form HTML definition lists.
2270         #
2271                 $less_than_tab = $this->tab_width - 1;
2272
2273                 # Re-usable pattern to match any entire dl list:
2274                 $whole_list = '
2275                         (                                                               # $1 = whole list
2276                           (                                                             # $2
2277                                 [ ]{0,'.$less_than_tab.'}
2278                                 ((?>.*\S.*\n)+)                         # $3 = defined term
2279                                 \n?
2280                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2281                           )
2282                           (?s:.+?)
2283                           (                                                             # $4
2284                                   \z
2285                                 |
2286                                   \n{2,}
2287                                   (?=\S)
2288                                   (?!                                           # Negative lookahead for another term
2289                                         [ ]{0,'.$less_than_tab.'}
2290                                         (?: \S.*\n )+?                  # defined term
2291                                         \n?
2292                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2293                                   )
2294                                   (?!                                           # Negative lookahead for another definition
2295                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2296                                   )
2297                           )
2298                         )
2299                 '; // mx
2300
2301                 $text = preg_replace_callback('{
2302                                 (?:(?<=\n\n)|\A\n?)
2303                                 '.$whole_list.'
2304                         }mx',
2305                         array(&$this, '_doDefLists_callback'), $text);
2306
2307                 return $text;
2308         }
2309         function _doDefLists_callback($matches) {
2310                 # Re-usable patterns to match list item bullets and number markers:
2311                 $list = $matches[1];
2312
2313                 # Turn double returns into triple returns, so that we can make a
2314                 # paragraph for the last item in a list, if necessary:
2315                 $result = trim($this->processDefListItems($list));
2316                 $result = "<dl>\n" . $result . "\n</dl>";
2317                 return $this->hashBlock($result) . "\n\n";
2318         }
2319
2320
2321         function processDefListItems($list_str) {
2322         #
2323         #       Process the contents of a single definition list, splitting it
2324         #       into individual term and definition list items.
2325         #
2326                 $less_than_tab = $this->tab_width - 1;
2327
2328                 # trim trailing blank lines:
2329                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2330
2331                 # Process definition terms.
2332                 $list_str = preg_replace_callback('{
2333                         (?:\n\n+|\A\n?)                                 # leading line
2334                         (                                                               # definition terms = $1
2335                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
2336                                 (?![:][ ]|[ ])                          # negative lookahead for a definition
2337                                                                                         #   mark (colon) or more whitespace.
2338                                 (?: \S.* \n)+?                          # actual term (not whitespace).
2339                         )
2340                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed
2341                                                                                         #   with a definition mark.
2342                         }xm',
2343                         array(&$this, '_processDefListItems_callback_dt'), $list_str);
2344
2345                 # Process actual definitions.
2346                 $list_str = preg_replace_callback('{
2347                         \n(\n+)?                                                # leading line = $1
2348                         [ ]{0,'.$less_than_tab.'}               # whitespace before colon
2349                         [:][ ]+                                                 # definition mark (colon)
2350                         ((?s:.+?))                                              # definition text = $2
2351                         (?= \n+                                                 # stop at next definition mark,
2352                                 (?:                                                     # next term or end of text
2353                                         [ ]{0,'.$less_than_tab.'} [:][ ]        |
2354                                         <dt> | \z
2355                                 )
2356                         )
2357                         }xm',
2358                         array(&$this, '_processDefListItems_callback_dd'), $list_str);
2359
2360                 return $list_str;
2361         }
2362         function _processDefListItems_callback_dt($matches) {
2363                 $terms = explode("\n", trim($matches[1]));
2364                 $text = '';
2365                 foreach ($terms as $term) {
2366                         $term = $this->runSpanGamut(trim($term));
2367                         $text .= "\n<dt>" . $term . "</dt>";
2368                 }
2369                 return $text . "\n";
2370         }
2371         function _processDefListItems_callback_dd($matches) {
2372                 $leading_line   = $matches[1];
2373                 $def                    = $matches[2];
2374
2375                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
2376                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2377                         $def = "\n". $def ."\n";
2378                 }
2379                 else {
2380                         $def = rtrim($def);
2381                         $def = $this->runSpanGamut($this->outdent($def));
2382                 }
2383
2384                 return "\n<dd>" . $def . "</dd>\n";
2385         }
2386
2387
2388         function doItalicsAndBold($text) {
2389         #
2390         # Redefined to change emphasis by underscore behaviour so that it does not
2391         # work in the middle of a word.
2392         #
2393                 # <strong> must go first:
2394                 $text = preg_replace_callback(array(
2395                         '{
2396                                 (                                               # $1: Marker
2397                                         (?<![a-zA-Z0-9])        # Not preceded by alphanum
2398                                         (?<!__)                         #       or by two marker chars.
2399                                         __
2400                                 )
2401                                 (?=\S)                                  # Not followed by whitespace
2402                                 (?!__)                                  #   or two others marker chars.
2403                                 (                                               # $2: Content
2404                                         (?>
2405                                                 [^_]+?                  # Anthing not em markers.
2406                                         |
2407                                                                                 # Balence any regular _ emphasis inside.
2408                                                 (?<![a-zA-Z0-9]) _ (?=\S) (.+?)
2409                                                 (?<=\S) _ (?![a-zA-Z0-9])
2410                                         |
2411                                                 _+                              # Allow unbalenced as last resort.
2412                                         )+?
2413                                 )
2414                                 (?<=\S) __                              # End mark not preceded by whitespace.
2415                                 (?![a-zA-Z0-9])                 # Not followed by alphanum
2416                                 (?!__)                                  #   or two others marker chars.
2417                         }sx',
2418                         '{
2419                                 ( (?<!\*\*) \*\* )              # $1: Marker (not preceded by two *)
2420                                 (?=\S)                                  # Not followed by whitespace
2421                                 (?!\1)                                  #   or two others marker chars.
2422                                 (                                               # $2: Content
2423                                         (?>
2424                                                 [^*]+?                  # Anthing not em markers.
2425                                         |
2426                                                                                 # Balence any regular * emphasis inside.
2427                                                 \* (?=\S) (.+?) (?<=\S) \*
2428                                         |
2429                                                 \*                              # Allow unbalenced as last resort.
2430                                         )+?
2431                                 )
2432                                 (?<=\S) \*\*                    # End mark not preceded by whitespace.
2433                         }sx',
2434                         ),
2435                         array(&$this, '_doItalicAndBold_strong_callback'), $text);
2436                 # Then <em>:
2437                 $text = preg_replace_callback(array(
2438                         '{ ( (?<![a-zA-Z0-9])(?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1(?![a-zA-Z0-9]) }sx',
2439                         '{ ( (?<!\*)\* ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s\*) \1 }sx',
2440                         ),
2441                         array(&$this, '_doItalicAndBold_em_callback'), $text);
2442
2443                 return $text;
2444         }
2445
2446
2447         function formParagraphs($text) {
2448         #
2449         #       Params:
2450         #               $text - string to process with html <p> tags
2451         #
2452                 # Strip leading and trailing lines:
2453                 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
2454
2455                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2456
2457                 #
2458                 # Wrap <p> tags and unhashify HTML blocks
2459                 #
2460                 foreach ($grafs as $key => $value) {
2461                         $value = trim($this->runSpanGamut($value));
2462
2463                         # Check if this should be enclosed in a paragraph.
2464                         # Clean tag hashes & block tag hashes are left alone.
2465                         $clean_key = $value;
2466                         $block_key = substr($value, 0, 34);
2467
2468                         $is_p = (!isset($this->html_blocks[$block_key]) &&
2469                                          !isset($this->html_cleans[$clean_key]));
2470
2471                         if ($is_p) {
2472                                 $value = "<p>$value</p>";
2473                         }
2474                         $grafs[$key] = $value;
2475                 }
2476
2477                 # Join grafs in one text, then unhash HTML tags.
2478                 $text = implode("\n\n", $grafs);
2479
2480                 # Finish by removing any tag hashes still present in $text.
2481                 $text = $this->unhash($text);
2482
2483                 return $text;
2484         }
2485
2486
2487         ### Footnotes
2488
2489         function stripFootnotes($text) {
2490         #
2491         # Strips link definitions from text, stores the URLs and titles in
2492         # hash references.
2493         #
2494                 $less_than_tab = $this->tab_width - 1;
2495
2496                 # Link defs are in the form: [^id]: url "optional title"
2497                 $text = preg_replace_callback('{
2498                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
2499                           [ ]*
2500                           \n?                                   # maybe *one* newline
2501                         (                                               # text = $2 (no blank lines allowed)
2502                                 (?:
2503                                         .+                              # actual text
2504                                 |
2505                                         \n                              # newlines but
2506                                         (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2507                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2508                                                                         # by non-indented content
2509                                 )*
2510                         )
2511                         }xm',
2512                         array(&$this, '_stripFootnotes_callback'),
2513                         $text);
2514                 return $text;
2515         }
2516         function _stripFootnotes_callback($matches) {
2517                 $note_id = $this->fn_id_prefix . $matches[1];
2518                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
2519                 return ''; # String that will replace the block
2520         }
2521
2522
2523         function doFootnotes($text) {
2524         #
2525         # Replace footnote references in $text [^id] with a special text-token
2526         # which will be can be
2527         #
2528                 if (!$this->in_footnote && !$this->in_anchor) {
2529                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2530                 }
2531                 return $text;
2532         }
2533
2534
2535         function appendFootnotes($text) {
2536         #
2537         # Append footnote list to text.
2538         #
2539
2540                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2541                         array(&$this, '_appendFootnotes_callback'), $text);
2542
2543                 if (!empty($this->footnotes_ordered)) {
2544                         $text .= "\n\n";
2545                         $text .= "<div class=\"footnotes\">\n";
2546                         $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n";
2547                         $text .= "<ol>\n\n";
2548
2549                         $attr = " rev=\"footnote\"";
2550                         if ($this->fn_backlink_class != "") {
2551                                 $class = $this->fn_backlink_class;
2552                                 $class = $this->encodeAmpsAndAngles($class);
2553                                 $class = str_replace('"', '&quot;', $class);
2554                                 $attr .= " class=\"$class\"";
2555                         }
2556                         if ($this->fn_backlink_title != "") {
2557                                 $title = $this->fn_backlink_title;
2558                                 $title = $this->encodeAmpsAndAngles($title);
2559                                 $title = str_replace('"', '&quot;', $title);
2560                                 $attr .= " title=\"$title\"";
2561                         }
2562                         $num = 0;
2563
2564                         $this->in_footnote = true;
2565
2566                         foreach ($this->footnotes_ordered as $note_id => $footnote) {
2567                                 $footnote .= "\n"; # Need to append newline before parsing.
2568                                 $footnote = $this->runBlockGamut("$footnote\n");
2569
2570                                 $attr2 = str_replace("%%", ++$num, $attr);
2571
2572                                 # Add backlink to last paragraph; create new paragraph if needed.
2573                                 $backlink = "<a href=\"#fnref:$note_id\"$attr2>&#8617;</a>";
2574                                 if (preg_match('{</p>$}', $footnote)) {
2575                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
2576                                 } else {
2577                                         $footnote .= "\n\n<p>$backlink</p>";
2578                                 }
2579
2580                                 $text .= "<li id=\"fn:$note_id\">\n";
2581                                 $text .= $footnote . "\n";
2582                                 $text .= "</li>\n\n";
2583                         }
2584
2585                         $this->in_footnote = false;
2586
2587                         $text .= "</ol>\n";
2588                         $text .= "</div>";
2589                 }
2590                 return $text;
2591         }
2592         function _appendFootnotes_callback($matches) {
2593                 $node_id = $this->fn_id_prefix . $matches[1];
2594
2595                 # Create footnote marker only if it has a corresponding footnote *and*
2596                 # the footnote hasn't been used by another marker.
2597                 if (isset($this->footnotes[$node_id])) {
2598                         # Transfert footnote content to the ordered list.
2599                         $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
2600                         unset($this->footnotes[$node_id]);
2601
2602                         $num = count($this->footnotes_ordered);
2603                         $attr = " rel=\"footnote\"";
2604                         if ($this->fn_link_class != "") {
2605                                 $class = $this->fn_link_class;
2606                                 $class = $this->encodeAmpsAndAngles($class);
2607                                 $class = str_replace('"', '&quot;', $class);
2608                                 $attr .= " class=\"$class\"";
2609                         }
2610                         if ($this->fn_link_title != "") {
2611                                 $title = $this->fn_link_title;
2612                                 $title = $this->encodeAmpsAndAngles($title);
2613                                 $title = str_replace('"', '&quot;', $title);
2614                                 $attr .= " title=\"$title\"";
2615                         }
2616                         $attr = str_replace("%%", $num, $attr);
2617
2618                         return
2619                                 "<sup id=\"fnref:$node_id\">".
2620                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
2621                                 "</sup>";
2622                 }
2623
2624                 return "[^".$matches[1]."]";
2625         }
2626
2627
2628         ### Abbreviations ###
2629
2630         function stripAbbreviations($text) {
2631         #
2632         # Strips abbreviations from text, stores titles in hash references.
2633         #
2634                 $less_than_tab = $this->tab_width - 1;
2635
2636                 # Link defs are in the form: [id]*: url "optional title"
2637                 $text = preg_replace_callback('{
2638                         ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:      # abbr_id = $1
2639                         (.*)                                    # text = $2 (no blank lines allowed)
2640                         }xm',
2641                         array(&$this, '_stripAbbreviations_callback'),
2642                         $text);
2643                 return $text;
2644         }
2645         function _stripAbbreviations_callback($matches) {
2646                 $abbr_word = $matches[1];
2647                 $abbr_desc = $matches[2];
2648                 $this->abbr_matches[] = preg_quote($abbr_word);
2649                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
2650                 return ''; # String that will replace the block
2651         }
2652
2653
2654         function doAbbreviations($text) {
2655         #
2656         # Find defined abbreviations in text and wrap them in <abbr> elements.
2657         #
2658                 if ($this->abbr_matches) {
2659                         // cannot use the /x modifier because abbr_matches may
2660                         // contain spaces:
2661                         $text = preg_replace_callback('{'.
2662                                 '(?<![\w\x1A])'.
2663                                 '(?:'. implode('|', $this->abbr_matches) .')'.
2664                                 '(?![\w\x1A])'.
2665                                 '}',
2666                                 array(&$this, '_doAbbreviations_callback'), $text);
2667                 }
2668                 return $text;
2669         }
2670         function _doAbbreviations_callback($matches) {
2671                 $abbr = $matches[0];
2672                 if (isset($this->abbr_desciptions[$abbr])) {
2673                         $desc = $this->abbr_desciptions[$abbr];
2674                         if (empty($desc)) {
2675                                 return $this->hashSpan("<abbr>$abbr</abbr>");
2676                         } else {
2677                                 $desc = $this->escapeSpecialCharsWithinTagAttributes($desc);
2678                                 return $this->hashSpan("<abbr title=\"$desc\">$abbr</abbr>");
2679                         }
2680                 } else {
2681                         return $matches[0];
2682                 }
2683         }
2684
2685 }
2686
2687
2688 /*
2689
2690 PHP Markdown Extra
2691 ==================
2692
2693 Description
2694 -----------
2695
2696 This is a PHP port of the original Markdown formatter written in Perl
2697 by John Gruber. This special "Extra" version of PHP Markdown features
2698 further enhancements to the syntax for making additional constructs
2699 such as tables and definition list.
2700
2701 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2702 easy-to-write structured text format into HTML. Markdown's text format
2703 is most similar to that of plain text email, and supports features such
2704 as headers, *emphasis*, code blocks, blockquotes, and links.
2705
2706 Markdown's syntax is designed not as a generic markup language, but
2707 specifically to serve as a front-end to (X)HTML. You can use span-level
2708 HTML tags anywhere in a Markdown document, and you can use block level
2709 HTML tags (like <div> and <table> as well).
2710
2711 For more information about Markdown's syntax, see:
2712
2713 <http://daringfireball.net/projects/markdown/>
2714
2715
2716 Bugs
2717 ----
2718
2719 To file bug reports please send email to:
2720
2721 <michel.fortin@michelf.com>
2722
2723 Please include with your report: (1) the example input; (2) the output you
2724 expected; (3) the output Markdown actually produced.
2725
2726
2727 Version History
2728 ---------------
2729
2730 See Readme file for details.
2731
2732 Extra 1.1.4 (3 Aug 2007):
2733
2734 Extra 1.1.3 (3 Jul 2007):
2735
2736 Extra 1.1.2 (7 Feb 2007)
2737
2738 Extra 1.1.1 (28 Dec 2006)
2739
2740 Extra 1.1 (1 Dec 2006)
2741
2742 Extra 1.0.1 (9 Dec 2005)
2743
2744 Extra 1.0 (5 Sep 2005)
2745
2746
2747 Copyright and License
2748 ---------------------
2749
2750 PHP Markdown & Extra
2751 Copyright (c) 2004-2007 Michel Fortin
2752 <http://www.michelf.com/>
2753 All rights reserved.
2754
2755 Based on Markdown
2756 Copyright (c) 2003-2006 John Gruber
2757 <http://daringfireball.net/>
2758 All rights reserved.
2759
2760 Redistribution and use in source and binary forms, with or without
2761 modification, are permitted provided that the following conditions are
2762 met:
2763
2764 *       Redistributions of source code must retain the above copyright notice,
2765         this list of conditions and the following disclaimer.
2766
2767 *       Redistributions in binary form must reproduce the above copyright
2768         notice, this list of conditions and the following disclaimer in the
2769         documentation and/or other materials provided with the distribution.
2770
2771 *       Neither the name "Markdown" nor the names of its contributors may
2772         be used to endorse or promote products derived from this software
2773         without specific prior written permission.
2774
2775 This software is provided by the copyright holders and contributors "as
2776 is" and any express or implied warranties, including, but not limited
2777 to, the implied warranties of merchantability and fitness for a
2778 particular purpose are disclaimed. In no event shall the copyright owner
2779 or contributors be liable for any direct, indirect, incidental, special,
2780 exemplary, or consequential damages (including, but not limited to,
2781 procurement of substitute goods or services; loss of use, data, or
2782 profits; or business interruption) however caused and on any theory of
2783 liability, whether in contract, strict liability, or tort (including
2784 negligence or otherwise) arising in any way out of the use of this
2785 software, even if advised of the possibility of such damage.
2786
2787 */
2788 ?>