lib/markdown.php

   1 <?php
   2 #
   3 # Markdown Extra  -  A text-to-HTML conversion tool for web writers
   4 #
   5 # PHP Markdown & Extra
   6 # Copyright (c) 2004-2007 Michel Fortin
   7 # <http://www.michelf.com/projects/php-markdown/>
   8 #
   9 # Original Markdown
  10 # Copyright (c) 2004-2006 John Gruber
  11 # <http://daringfireball.net/projects/markdown/>
  12 #
  13
  14
  15 define( 'MARKDOWN_VERSION',       "1.0.1f" ); # Wed 7 Feb 2007
  16 define( 'MARKDOWNEXTRA_VERSION',  "1.1.2" );  # Wed 7 Feb 2007
  17
  18
  19 #
  20 # Global default settings:
  21 #
  22
  23 # Change to ">" for HTML output
  24 define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
  25
  26 # Define the width of a tab for code blocks.
  27 define( 'MARKDOWN_TAB_WIDTH',     4 );
  28
  29 # Optional title attribute for footnote links and backlinks.
  30 define( 'MARKDOWN_FN_LINK_TITLE',         "" );
  31 define( 'MARKDOWN_FN_BACKLINK_TITLE',     "" );
  32
  33 # Optional class attribute for footnote links and backlinks.
  34 define( 'MARKDOWN_FN_LINK_CLASS',         "" );
  35 define( 'MARKDOWN_FN_BACKLINK_CLASS',     "" );
  36
  37
  38 #
  39 # WordPress settings:
  40 #
  41
  42 # Change to false to remove Markdown from posts and/or comments.
  43 define( 'MARKDOWN_WP_POSTS',      true );
  44 define( 'MARKDOWN_WP_COMMENTS',   true );
  45
  46
  47
  48 ### Standard Function Interface ###
  49
  50 define( 'MARKDOWN_PARSER_CLASS',  'MarkdownExtra_Parser' );
  51
  52 function Markdown($text) {
  53 #
  54 # Initialize the parser and return the result of its transform method.
  55 #
  56         # Setup static parser variable.
  57         static $parser;
  58         if (!isset($parser)) {
  59                 $parser_class = MARKDOWN_PARSER_CLASS;
  60                 $parser = new $parser_class;
  61         }
  62
  63         # Transform text using parser.
  64         return $parser->transform($text);
  65 }
  66
  67
  68 ### WordPress Plugin Interface ###
  69
  70 /*
  71 Plugin Name: Markdown Extra
  72 Plugin URI: http://www.michelf.com/projects/php-markdown/
  73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
  74 Version: 1.1.2
  75 Author: Michel Fortin
  76 Author URI: http://www.michelf.com/
  77 */
  78
  79 if (isset($wp_version)) {
  80         # More details about how it works here:
  81         # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
  82
  83         # Post content and excerpts
  84         # - Remove WordPress paragraph generator.
  85         # - Run Markdown on excerpt, then remove all tags.
  86         # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
  87         if (MARKDOWN_WP_POSTS) {
  88                 remove_filter('the_content',  'wpautop');
  89                 remove_filter('the_excerpt',  'wpautop');
  90                 add_filter('the_content',     'Markdown', 6);
  91                 add_filter('get_the_excerpt', 'Markdown', 6);
  92                 add_filter('get_the_excerpt', 'trim', 7);
  93                 add_filter('the_excerpt',     'mdwp_add_p');
  94                 add_filter('the_excerpt_rss', 'mdwp_strip_p');
  95
  96                 remove_filter('content_save_pre',  'balanceTags', 50);
  97                 remove_filter('excerpt_save_pre',  'balanceTags', 50);
  98                 add_filter('the_content',         'balanceTags', 50);
  99                 add_filter('get_the_excerpt', 'balanceTags', 9);
 100         }
 101
 102         # Comments
 103         # - Remove WordPress paragraph generator.
 104         # - Remove WordPress auto-link generator.
 105         # - Scramble important tags before passing them to the kses filter.
 106         # - Run Markdown on excerpt then remove paragraph tags.
 107         if (MARKDOWN_WP_COMMENTS) {
 108                 remove_filter('comment_text', 'wpautop');
 109                 remove_filter('comment_text', 'make_clickable');
 110                 add_filter('pre_comment_content', 'Markdown', 6);
 111                 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
 112                 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
 113                 add_filter('get_comment_text',    'Markdown', 6);
 114                 add_filter('get_comment_excerpt', 'Markdown', 6);
 115                 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
 116
 117                 global $markdown_hidden_tags;
 118                 $markdown_hidden_tags = array(
 119                         '<p>'   => md5('<p>'),          '</p>'  => md5('</p>'),
 120                         '<pre>' => md5('<pre>'),        '</pre>'=> md5('</pre>'),
 121                         '<ol>'  => md5('<ol>'),         '</ol>' => md5('</ol>'),
 122                         '<ul>'  => md5('<ul>'),         '</ul>' => md5('</ul>'),
 123                         '<li>'  => md5('<li>'),         '</li>' => md5('</li>'),
 124                         );
 125         }
 126
 127         function mdwp_add_p($text) {
 128                 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
 129                         $text = '<p>'.$text.'</p>';
 130                         $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
 131                 }
 132                 return $text;
 133         }
 134
 135         function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
 136
 137         function mdwp_hide_tags($text) {
 138                 global $markdown_hidden_tags;
 139                 return str_replace(array_keys($markdown_hidden_tags),
 140                                                         array_values($markdown_hidden_tags), $text);
 141         }
 142         function mdwp_show_tags($text) {
 143                 global $markdown_hidden_tags;
 144                 return str_replace(array_values($markdown_hidden_tags),
 145                                                         array_keys($markdown_hidden_tags), $text);
 146         }
 147 }
 148
 149
 150 ### bBlog Plugin Info ###
 151
 152 function identify_modifier_markdown() {
 153         return array(
 154                 'name' => 'markdown',
 155                 'type' => 'modifier',
 156                 'nicename' => 'PHP Markdown Extra',
 157                 'description' => 'A text-to-HTML conversion tool for web writers',
 158                 'authors' => 'Michel Fortin and John Gruber',
 159                 'licence' => 'GPL',
 160                 'version' => MARKDOWNEXTRA_VERSION,
 161                 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
 162                 );
 163 }
 164
 165
 166 ### Smarty Modifier Interface ###
 167
 168 function smarty_modifier_markdown($text) {
 169         return Markdown($text);
 170 }
 171
 172
 173 ### Textile Compatibility Mode ###
 174
 175 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
 176
 177 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
 178         # Try to include PHP SmartyPants. Should be in the same directory.
 179         @include_once 'smartypants.php';
 180         # Fake Textile class. It calls Markdown instead.
 181         class Textile {
 182                 function TextileThis($text, $lite='', $encode='') {
 183                         if ($lite == '' && $encode == '')    $text = Markdown($text);
 184                         if (function_exists('SmartyPants'))  $text = SmartyPants($text);
 185                         return $text;
 186                 }
 187                 # Fake restricted version: restrictions are not supported for now.
 188                 function TextileRestricted($text, $lite='', $noimage='') {
 189                         return $this->TextileThis($text, $lite);
 190                 }
 191                 # Workaround to ensure compatibility with TextPattern 4.0.3.
 192                 function blockLite($text) { return $text; }
 193         }
 194 }
 195
 196
 197
 198 #
 199 # Markdown Parser Class
 200 #
 201
 202 class Markdown_Parser {
 203
 204         # Regex to match balanced [brackets].
 205         # Needed to insert a maximum bracked depth while converting to PHP.
 206         var $nested_brackets_depth = 6;
 207         var $nested_brackets;
 208
 209         # Table of hash values for escaped characters:
 210         var $escape_chars = '\`*_{}[]()>#+-.!';
 211         var $escape_table = array();
 212         var $backslash_escape_table = array();
 213
 214         # Change to ">" for HTML output.
 215         var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
 216         var $tab_width = MARKDOWN_TAB_WIDTH;
 217
 218
 219         function Markdown_Parser() {
 220         #
 221         # Constructor function. Initialize appropriate member variables.
 222         #
 223                 $this->_initDetab();
 224
 225                 $this->nested_brackets =
 226                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
 227                         str_repeat('\])*', $this->nested_brackets_depth);
 228
 229                 # Create an identical table but for escaped characters.
 230                 foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) {
 231                         $hash = md5($char);
 232                         $this->escape_table[$char] = $hash;
 233                         $this->backslash_escape_table["\\$char"] = $hash;
 234                 }
 235
 236                 # Sort document, block, and span gamut in ascendent priority order.
 237                 asort($this->document_gamut);
 238                 asort($this->block_gamut);
 239                 asort($this->span_gamut);
 240         }
 241
 242
 243         # Internal hashes used during transformation.
 244         var $urls = array();
 245         var $titles = array();
 246         var $html_blocks = array();
 247         var $html_hashes = array(); # Contains both blocks and span hashes.
 248
 249
 250         function transform($text) {
 251         #
 252         # Main function. The order in which other subs are called here is
 253         # essential. Link and image substitutions need to happen before
 254         # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
 255         # and <img> tags get encoded.
 256         #
 257                 # Clear the global hashes. If we don't clear these, you get conflicts
 258                 # from other articles when generating a page which contains more than
 259                 # one article (e.g. an index page that shows the N most recent
 260                 # articles):
 261                 $this->urls = array();
 262                 $this->titles = array();
 263                 $this->html_blocks = array();
 264                 $this->html_hashes = array();
 265
 266                 # Standardize line endings:
 267                 #   DOS to Unix and Mac to Unix
 268                 $text = str_replace(array("\r\n", "\r"), "\n", $text);
 269
 270                 # Make sure $text ends with a couple of newlines:
 271                 $text .= "\n\n";
 272
 273                 # Convert all tabs to spaces.
 274                 $text = $this->detab($text);
 275
 276                 # Turn block-level HTML blocks into hash entries
 277                 $text = $this->hashHTMLBlocks($text);
 278
 279                 # Strip any lines consisting only of spaces and tabs.
 280                 # This makes subsequent regexen easier to write, because we can
 281                 # match consecutive blank lines with /\n+/ instead of something
 282                 # contorted like /[ \t]*\n+/ .
 283                 $text = preg_replace('/^[ \t]+$/m', '', $text);
 284
 285                 # Run document gamut methods.
 286                 foreach ($this->document_gamut as $method => $priority) {
 287                         $text = $this->$method($text);
 288                 }
 289
 290                 return $text . "\n";
 291         }
 292
 293         var $document_gamut = array(
 294                 # Strip link definitions, store in hashes.
 295                 "stripLinkDefinitions" => 20,
 296
 297                 "runBasicBlockGamut"   => 30,
 298                 "unescapeSpecialChars" => 90,
 299                 );
 300
 301
 302         function stripLinkDefinitions($text) {
 303         #
 304         # Strips link definitions from text, stores the URLs and titles in
 305         # hash references.
 306         #
 307                 $less_than_tab = $this->tab_width - 1;
 308
 309                 # Link defs are in the form: ^[id]: url "optional title"
 310                 $text = preg_replace_callback('{
 311                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 312                                                           [ \t]*
 313                                                           \n?                           # maybe *one* newline
 314                                                           [ \t]*
 315                                                         <?(\S+?)>?                      # url = $2
 316                                                           [ \t]*
 317                                                           \n?                           # maybe one newline
 318                                                           [ \t]*
 319                                                         (?:
 320                                                                 (?<=\s)                 # lookbehind for whitespace
 321                                                                 ["(]
 322                                                                 (.*?)                   # title = $3
 323                                                                 [")]
 324                                                                 [ \t]*
 325                                                         )?      # title is optional
 326                                                         (?:\n+|\Z)
 327                         }xm',
 328                         array(&$this, '_stripLinkDefinitions_callback'),
 329                         $text);
 330                 return $text;
 331         }
 332         function _stripLinkDefinitions_callback($matches) {
 333                 $link_id = strtolower($matches[1]);
 334                 $this->urls[$link_id] = $this->encodeAmpsAndAngles($matches[2]);
 335                 if (isset($matches[3]))
 336                         $this->titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
 337                 return ''; # String that will replace the block
 338         }
 339
 340
 341         function hashHTMLBlocks($text) {
 342                 $less_than_tab = $this->tab_width - 1;
 343
 344                 # Hashify HTML blocks:
 345                 # We only want to do this for block-level HTML tags, such as headers,
 346                 # lists, and tables. That's because we still want to wrap <p>s around
 347                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 348                 # phrase emphasis, and spans. The list of tags we're looking for is
 349                 # hard-coded:
 350                 $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 351                                                 'script|noscript|form|fieldset|iframe|math|ins|del';
 352                 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 353                                                 'script|noscript|form|fieldset|iframe|math';
 354
 355                 # Regular expression for the content of a block tag.
 356                 $nested_tags_level = 4;
 357                 $attr = '
 358                         (?>                             # optional tag attributes
 359                           \s                    # starts with whitespace
 360                           (?>
 361                                 [^>"/]+         # text outside quotes
 362                           |
 363                                 /+(?!>)         # slash not followed by ">"
 364                           |
 365                                 "[^"]*"         # text inside double quotes (tolerate ">")
 366                           |
 367                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
 368                           )*
 369                         )?
 370                         ';
 371                 $content =
 372                         str_repeat('
 373                                 (?>
 374                                   [^<]+                 # content without tag
 375                                 |
 376                                   <\2                   # nested opening tag
 377                                         '.$attr.'       # attributes
 378                                         (?:
 379                                           />
 380                                         |
 381                                           >', $nested_tags_level).      # end of opening tag
 382                                           '.*?'.                                        # last level nested tag content
 383                         str_repeat('
 384                                           </\2\s*>      # closing nested tag
 385                                         )
 386                                   |
 387                                         <(?!/\2\s*>     # other tags with a different name
 388                                   )
 389                                 )*',
 390                                 $nested_tags_level);
 391
 392                 # First, look for nested blocks, e.g.:
 393                 #       <div>
 394                 #               <div>
 395                 #               tags for inner block must be indented.
 396                 #               </div>
 397                 #       </div>
 398                 #
 399                 # The outermost tags must start at the left margin for this to match, and
 400                 # the inner nested divs must be indented.
 401                 # We need to do this before the next, more liberal match, because the next
 402                 # match will start at the first `<div>` and stop at the first `</div>`.
 403                 $text = preg_replace_callback('{
 404                                         (                                               # save in $1
 405                                                 ^                                       # start of line  (with /m)
 406                                                 <('.$block_tags_a.')# start tag = $2
 407                                                 '.$attr.'>\n            # attributes followed by > and \n
 408                                                 '.$content.'            # content, support nesting
 409                                                 </\2>                           # the matching end tag
 410                                                 [ \t]*                          # trailing spaces/tabs
 411                                                 (?=\n+|\Z)      # followed by a newline or end of document
 412                                         )
 413                         }xm',
 414                         array(&$this, '_hashHTMLBlocks_callback'),
 415                         $text);
 416
 417                 #
 418                 # Match from `\n<tag>` to `</tag>\n`, handling nested tags in between.
 419                 #
 420                 $text = preg_replace_callback('{
 421                                         (                                               # save in $1
 422                                                 ^                                       # start of line  (with /m)
 423                                                 <('.$block_tags_b.')# start tag = $2
 424                                                 '.$attr.'>                      # attributes followed by >
 425                                                 '.$content.'            # content, support nesting
 426                                                 </\2>                           # the matching end tag
 427                                                 [ \t]*                          # trailing spaces/tabs
 428                                                 (?=\n+|\Z)      # followed by a newline or end of document
 429                                         )
 430                         }xm',
 431                         array(&$this, '_hashHTMLBlocks_callback'),
 432                         $text);
 433
 434                 # Special case just for <hr />. It was easier to make a special case than
 435                 # to make the other regex more complicated.
 436                 $text = preg_replace_callback('{
 437                                         (?:
 438                                                 (?<=\n\n)               # Starting after a blank line
 439                                                 |                               # or
 440                                                 \A\n?                   # the beginning of the doc
 441                                         )
 442                                         (                                               # save in $1
 443                                                 [ ]{0,'.$less_than_tab.'}
 444                                                 <(hr)                           # start tag = $2
 445                                                 \b                                      # word break
 446                                                 ([^<>])*?                       #
 447                                                 /?>                                     # the matching end tag
 448                                                 [ \t]*
 449                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
 450                                         )
 451                         }x',
 452                         array(&$this, '_hashHTMLBlocks_callback'),
 453                         $text);
 454
 455                 # Special case for standalone HTML comments:
 456                 $text = preg_replace_callback('{
 457                                 (?:
 458                                         (?<=\n\n)               # Starting after a blank line
 459                                         |                               # or
 460                                         \A\n?                   # the beginning of the doc
 461                                 )
 462                                 (                                               # save in $1
 463                                         [ ]{0,'.$less_than_tab.'}
 464                                         (?s:
 465                                                 <!-- .*? -->
 466                                         )
 467                                         [ \t]*
 468                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 469                                 )
 470                         }x',
 471                         array(&$this, '_hashHTMLBlocks_callback'),
 472                         $text);
 473
 474                 # PHP and ASP-style processor instructions (<? and <%)
 475                 $text = preg_replace_callback('{
 476                                 (?:
 477                                         (?<=\n\n)               # Starting after a blank line
 478                                         |                               # or
 479                                         \A\n?                   # the beginning of the doc
 480                                 )
 481                                 (                                               # save in $1
 482                                         [ ]{0,'.$less_than_tab.'}
 483                                         (?s:
 484                                                 <([?%])                 # $2
 485                                                 .*?
 486                                                 \2>
 487                                         )
 488                                         [ \t]*
 489                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 490                                 )
 491                         }x',
 492                         array(&$this, '_hashHTMLBlocks_callback'),
 493                         $text);
 494
 495                 return $text;
 496         }
 497         function _hashHTMLBlocks_callback($matches) {
 498                 $text = $matches[1];
 499                 $key  = $this->hashBlock($text);
 500                 return "\n\n$key\n\n";
 501         }
 502
 503
 504         function hashBlock($text) {
 505         #
 506         # Called whenever a tag must be hashed when a function insert a block-level
 507         # tag in $text, it pass through this function and is automaticaly escaped,
 508         # which remove the need to call _HashHTMLBlocks at every step.
 509         #
 510                 # Swap back any tag hash found in $text so we do not have to `unhash`
 511                 # multiple times at the end.
 512                 $text = $this->unhash($text);
 513
 514                 # Then hash the block.
 515                 $key = md5($text);
 516                 $this->html_hashes[$key] = $text;
 517                 $this->html_blocks[$key] = $text;
 518                 return $key; # String that will replace the tag.
 519         }
 520
 521
 522         function hashSpan($text) {
 523         #
 524         # Called whenever a tag must be hashed when a function insert a span-level
 525         # element in $text, it pass through this function and is automaticaly
 526         # escaped, blocking invalid nested overlap.
 527         #
 528                 # Swap back any tag hash found in $text so we do not have to `unhash`
 529                 # multiple times at the end.
 530                 $text = $this->unhash($text);
 531
 532                 # Then hash the span.
 533                 $key = md5($text);
 534                 $this->html_hashes[$key] = $text;
 535                 return $key; # String that will replace the span tag.
 536         }
 537
 538
 539         var $block_gamut = array(
 540         #
 541         # These are all the transformations that form block-level
 542         # tags like paragraphs, headers, and list items.
 543         #
 544                 "doHeaders"         => 10,
 545                 "doHorizontalRules" => 20,
 546
 547                 "doLists"           => 40,
 548                 "doCodeBlocks"      => 50,
 549                 "doBlockQuotes"     => 60,
 550                 );
 551
 552         function runBlockGamut($text) {
 553         #
 554         # Run block gamut tranformations.
 555         #
 556                 # We need to escape raw HTML in Markdown source before doing anything
 557                 # else. This need to be done for each block, and not only at the
 558                 # begining in the Markdown function since hashed blocks can be part of
 559                 # list items and could have been indented. Indented blocks would have
 560                 # been seen as a code block in a previous pass of hashHTMLBlocks.
 561                 $text = $this->hashHTMLBlocks($text);
 562
 563                 return $this->runBasicBlockGamut($text);
 564         }
 565
 566         function runBasicBlockGamut($text) {
 567         #
 568         # Run block gamut tranformations, without hashing HTML blocks. This is
 569         # useful when HTML blocks are known to be already hashed, like in the first
 570         # whole-document pass.
 571         #
 572                 foreach ($this->block_gamut as $method => $priority) {
 573                         $text = $this->$method($text);
 574                 }
 575
 576                 # Finally form paragraph and restore hashed blocks.
 577                 $text = $this->formParagraphs($text);
 578
 579                 return $text;
 580         }
 581
 582
 583         function doHorizontalRules($text) {
 584                 # Do Horizontal Rules:
 585                 return preg_replace(
 586                         array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
 587                                   '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
 588                                   '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
 589                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 590                         $text);
 591         }
 592
 593
 594         var $span_gamut = array(
 595         #
 596         # These are all the transformations that occur *within* block-level
 597         # tags like paragraphs, headers, and list items.
 598         #
 599                 "escapeSpecialCharsWithinTagAttributes" => -20,
 600                 "doCodeSpans"                                                   => -10,
 601                 "encodeBackslashEscapes"                                =>  -5,
 602
 603                 # Process anchor and image tags. Images must come first,
 604                 # because ![foo][f] looks like an anchor.
 605                 "doImages"            =>  10,
 606                 "doAnchors"           =>  20,
 607
 608                 # Make links out of things like `<http://example.com/>`
 609                 # Must come after doAnchors, because you can use < and >
 610                 # delimiters in inline links like [this](<url>).
 611                 "doAutoLinks"         =>  30,
 612                 "encodeAmpsAndAngles" =>  40,
 613
 614                 "doItalicsAndBold"    =>  50,
 615                 "doHardBreaks"        =>  60,
 616                 );
 617
 618         function runSpanGamut($text) {
 619         #
 620         # Run span gamut tranformations.
 621         #
 622                 foreach ($this->span_gamut as $method => $priority) {
 623                         $text = $this->$method($text);
 624                 }
 625
 626                 return $text;
 627         }
 628
 629
 630         function doHardBreaks($text) {
 631                 # Do hard breaks:
 632                 $br_tag = $this->hashSpan("<br$this->empty_element_suffix\n");
 633                 return preg_replace('/ {2,}\n/', $br_tag, $text);
 634         }
 635
 636
 637         function escapeSpecialCharsWithinTagAttributes($text) {
 638         #
 639         # Within tags -- meaning between < and > -- encode [\ ` * _] so they
 640         # don't conflict with their use in Markdown for code, italics and strong.
 641         # We're replacing each such character with its corresponding MD5 checksum
 642         # value; this is likely overkill, but it should prevent us from colliding
 643         # with the escape values by accident.
 644         #
 645                 $tokens = $this->tokenizeHTML($text);
 646                 $text = '';   # rebuild $text from the tokens
 647
 648                 foreach ($tokens as $cur_token) {
 649                         if ($cur_token[0] == 'tag') {
 650                                 $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]);
 651                                 $cur_token[1] = str_replace(array('`'), $this->escape_table['`'], $cur_token[1]);
 652                                 $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]);
 653                                 $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]);
 654                         }
 655                         $text .= $cur_token[1];
 656                 }
 657                 return $text;
 658         }
 659
 660
 661         function doAnchors($text) {
 662         #
 663         # Turn Markdown link shortcuts into XHTML <a> tags.
 664         #
 665                 #
 666                 # First, handle reference-style links: [link text] [id]
 667                 #
 668                 $text = preg_replace_callback('{
 669                         (                                       # wrap whole match in $1
 670                           \[
 671                                 ('.$this->nested_brackets.')    # link text = $2
 672                           \]
 673
 674                           [ ]?                          # one optional space
 675                           (?:\n[ ]*)?           # one optional newline followed by spaces
 676
 677                           \[
 678                                 (.*?)           # id = $3
 679                           \]
 680                         )
 681                         }xs',
 682                         array(&$this, '_doAnchors_reference_callback'), $text);
 683
 684                 #
 685                 # Next, inline-style links: [link text](url "optional title")
 686                 #
 687                 $text = preg_replace_callback('{
 688                         (                               # wrap whole match in $1
 689                           \[
 690                                 ('.$this->nested_brackets.')    # link text = $2
 691                           \]
 692                           \(                    # literal paren
 693                                 [ \t]*
 694                                 <?(.*?)>?       # href = $3
 695                                 [ \t]*
 696                                 (                       # $4
 697                                   ([\'"])       # quote char = $5
 698                                   (.*?)         # Title = $6
 699                                   \5            # matching quote
 700                                   [ \t]*        # ignore any spaces/tabs between closing quote and )
 701                                 )?                      # title is optional
 702                           \)
 703                         )
 704                         }xs',
 705                         array(&$this, '_DoAnchors_inline_callback'), $text);
 706
 707                 #
 708                 # Last, handle reference-style shortcuts: [link text]
 709                 # These must come last in case you've also got [link test][1]
 710                 # or [link test](/foo)
 711                 #
 712 //              $text = preg_replace_callback('{
 713 //                      (                                       # wrap whole match in $1
 714 //                        \[
 715 //                              ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
 716 //                        \]
 717 //                      )
 718 //                      }xs',
 719 //                      array(&$this, '_doAnchors_reference_callback'), $text);
 720
 721                 return $text;
 722         }
 723         function _doAnchors_reference_callback($matches) {
 724                 $whole_match =  $matches[1];
 725                 $link_text   =  $matches[2];
 726                 $link_id     =& $matches[3];
 727
 728                 if ($link_id == "") {
 729                         # for shortcut links like [this][] or [this].
 730                         $link_id = $link_text;
 731                 }
 732
 733                 # lower-case and turn embedded newlines into spaces
 734                 $link_id = strtolower($link_id);
 735                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 736
 737                 if (isset($this->urls[$link_id])) {
 738                         $url = $this->urls[$link_id];
 739                         $url = $this->encodeAmpsAndAngles($url);
 740
 741                         $result = "<a href=\"$url\"";
 742                         if ( isset( $this->titles[$link_id] ) ) {
 743                                 $title = $this->titles[$link_id];
 744                                 $title = $this->encodeAmpsAndAngles($title);
 745                                 $result .=  " title=\"$title\"";
 746                         }
 747
 748                         $link_text = $this->runSpanGamut($link_text);
 749                         $result .= ">$link_text</a>";
 750                         $result = $this->hashSpan($result);
 751                 }
 752                 else {
 753                         $result = $whole_match;
 754                 }
 755                 return $result;
 756         }
 757         function _doAnchors_inline_callback($matches) {
 758                 $whole_match    =  $matches[1];
 759                 $link_text              =  $this->runSpanGamut($matches[2]);
 760                 $url                    =  $matches[3];
 761                 $title                  =& $matches[6];
 762
 763                 $url = $this->encodeAmpsAndAngles($url);
 764
 765                 $result = "<a href=\"$url\"";
 766                 if (isset($title)) {
 767                         $title = str_replace('"', '&quot;', $title);
 768                         $title = $this->encodeAmpsAndAngles($title);
 769                         $result .=  " title=\"$title\"";
 770                 }
 771
 772                 $link_text = $this->runSpanGamut($link_text);
 773                 $result .= ">$link_text</a>";
 774
 775                 return $this->hashSpan($result);
 776         }
 777
 778
 779         function doImages($text) {
 780         #
 781         # Turn Markdown image shortcuts into <img> tags.
 782         #
 783                 #
 784                 # First, handle reference-style labeled images: ![alt text][id]
 785                 #
 786                 $text = preg_replace_callback('{
 787                         (                               # wrap whole match in $1
 788                           !\[
 789                                 ('.$this->nested_brackets.')            # alt text = $2
 790                           \]
 791
 792                           [ ]?                          # one optional space
 793                           (?:\n[ ]*)?           # one optional newline followed by spaces
 794
 795                           \[
 796                                 (.*?)           # id = $3
 797                           \]
 798
 799                         )
 800                         }xs',
 801                         array(&$this, '_doImages_reference_callback'), $text);
 802
 803                 #
 804                 # Next, handle inline images:  ![alt text](url "optional title")
 805                 # Don't forget: encode * and _
 806                 #
 807                 $text = preg_replace_callback('{
 808                         (                               # wrap whole match in $1
 809                           !\[
 810                                 ('.$this->nested_brackets.')            # alt text = $2
 811                           \]
 812                           \s?                   # One optional whitespace character
 813                           \(                    # literal paren
 814                                 [ \t]*
 815                                 <?(\S+?)>?      # src url = $3
 816                                 [ \t]*
 817                                 (                       # $4
 818                                   ([\'"])       # quote char = $5
 819                                   (.*?)         # title = $6
 820                                   \5            # matching quote
 821                                   [ \t]*
 822                                 )?                      # title is optional
 823                           \)
 824                         )
 825                         }xs',
 826                         array(&$this, '_doImages_inline_callback'), $text);
 827
 828                 return $text;
 829         }
 830         function _doImages_reference_callback($matches) {
 831                 $whole_match = $matches[1];
 832                 $alt_text    = $matches[2];
 833                 $link_id     = strtolower($matches[3]);
 834
 835                 if ($link_id == "") {
 836                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 837                 }
 838
 839                 $alt_text = str_replace('"', '&quot;', $alt_text);
 840                 if (isset($this->urls[$link_id])) {
 841                         $url = $this->urls[$link_id];
 842                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 843                         if (isset($this->titles[$link_id])) {
 844                                 $title = $this->titles[$link_id];
 845                                 $result .=  " title=\"$title\"";
 846                         }
 847                         $result .= $this->empty_element_suffix;
 848                         $result = $this->hashSpan($result);
 849                 }
 850                 else {
 851                         # If there's no such link ID, leave intact:
 852                         $result = $whole_match;
 853                 }
 854
 855                 return $result;
 856         }
 857         function _doImages_inline_callback($matches) {
 858                 $whole_match    = $matches[1];
 859                 $alt_text               = $matches[2];
 860                 $url                    = $matches[3];
 861                 $title                  =& $matches[6];
 862
 863                 $alt_text = str_replace('"', '&quot;', $alt_text);
 864                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 865                 if (isset($title)) {
 866                         $title = str_replace('"', '&quot;', $title);
 867                         $result .=  " title=\"$title\""; # $title already quoted
 868                 }
 869                 $result .= $this->empty_element_suffix;
 870
 871                 return $this->hashSpan($result);
 872         }
 873
 874
 875         function doHeaders($text) {
 876                 # Setext-style headers:
 877                 #         Header 1
 878                 #         ========
 879                 #
 880                 #         Header 2
 881                 #         --------
 882                 #
 883                 $text = preg_replace_callback('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }mx',
 884                         array(&$this, '_doHeaders_callback_setext_h1'), $text);
 885                 $text = preg_replace_callback('{ ^(.+)[ \t]*\n-+[ \t]*\n+ }mx',
 886                         array(&$this, '_doHeaders_callback_setext_h2'), $text);
 887
 888                 # atx-style headers:
 889                 #       # Header 1
 890                 #       ## Header 2
 891                 #       ## Header 2 with closing hashes ##
 892                 #       ...
 893                 #       ###### Header 6
 894                 #
 895                 $text = preg_replace_callback('{
 896                                 ^(\#{1,6})      # $1 = string of #\'s
 897                                 [ \t]*
 898                                 (.+?)           # $2 = Header text
 899                                 [ \t]*
 900                                 \#*                     # optional closing #\'s (not counted)
 901                                 \n+
 902                         }xm',
 903                         array(&$this, '_doHeaders_callback_atx'), $text);
 904
 905                 return $text;
 906         }
 907         function _doHeaders_callback_setext_h1($matches) {
 908                 $block = "<h1>".$this->runSpanGamut($matches[1])."</h1>";
 909                 return "\n" . $this->hashBlock($block) . "\n\n";
 910         }
 911         function _doHeaders_callback_setext_h2($matches) {
 912                 $block = "<h2>".$this->runSpanGamut($matches[1])."</h2>";
 913                 return "\n" . $this->hashBlock($block) . "\n\n";
 914         }
 915         function _doHeaders_callback_atx($matches) {
 916                 $level = strlen($matches[1]);
 917                 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
 918                 return "\n" . $this->hashBlock($block) . "\n\n";
 919         }
 920
 921
 922         function doLists($text) {
 923         #
 924         # Form HTML ordered (numbered) and unordered (bulleted) lists.
 925         #
 926                 $less_than_tab = $this->tab_width - 1;
 927
 928                 # Re-usable patterns to match list item bullets and number markers:
 929                 $marker_ul  = '[*+-]';
 930                 $marker_ol  = '\d+[.]';
 931                 $marker_any = "(?:$marker_ul|$marker_ol)";
 932
 933                 $markers = array($marker_ul, $marker_ol);
 934
 935                 foreach ($markers as $marker) {
 936                         # Re-usable pattern to match any entirel ul or ol list:
 937                         $whole_list = '
 938                                 (                                                               # $1 = whole list
 939                                   (                                                             # $2
 940                                         [ ]{0,'.$less_than_tab.'}
 941                                         ('.$marker.')                           # $3 = first list item marker
 942                                         [ \t]+
 943                                   )
 944                                   (?s:.+?)
 945                                   (                                                             # $4
 946                                           \z
 947                                         |
 948                                           \n{2,}
 949                                           (?=\S)
 950                                           (?!                                           # Negative lookahead for another list item marker
 951                                                 [ \t]*
 952                                                 '.$marker.'[ \t]+
 953                                           )
 954                                   )
 955                                 )
 956                         '; // mx
 957
 958                         # We use a different prefix before nested lists than top-level lists.
 959                         # See extended comment in _ProcessListItems().
 960
 961                         if ($this->list_level) {
 962                                 $text = preg_replace_callback('{
 963                                                 ^
 964                                                 '.$whole_list.'
 965                                         }mx',
 966                                         array(&$this, '_doLists_callback'), $text);
 967                         }
 968                         else {
 969                                 $text = preg_replace_callback('{
 970                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
 971                                                 '.$whole_list.'
 972                                         }mx',
 973                                         array(&$this, '_doLists_callback'), $text);
 974                         }
 975                 }
 976
 977                 return $text;
 978         }
 979         function _doLists_callback($matches) {
 980                 # Re-usable patterns to match list item bullets and number markers:
 981                 $marker_ul  = '[*+-]';
 982                 $marker_ol  = '\d+[.]';
 983                 $marker_any = "(?:$marker_ul|$marker_ol)";
 984
 985                 $list = $matches[1];
 986                 $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
 987
 988                 $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
 989
 990                 $list .= "\n";
 991                 $result = $this->processListItems($list, $marker_any);
 992
 993                 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
 994                 return "\n". $result ."\n\n";
 995         }
 996
 997         var $list_level = 0;
 998
 999         function processListItems($list_str, $marker_any) {
1000         #
1001         #       Process the contents of a single ordered or unordered list, splitting it
1002         #       into individual list items.
1003         #
1004                 # The $this->list_level global keeps track of when we're inside a list.
1005                 # Each time we enter a list, we increment it; when we leave a list,
1006                 # we decrement. If it's zero, we're not in a list anymore.
1007                 #
1008                 # We do this because when we're not inside a list, we want to treat
1009                 # something like this:
1010                 #
1011                 #               I recommend upgrading to version
1012                 #               8. Oops, now this line is treated
1013                 #               as a sub-list.
1014                 #
1015                 # As a single paragraph, despite the fact that the second line starts
1016                 # with a digit-period-space sequence.
1017                 #
1018                 # Whereas when we're inside a list (or sub-list), that line will be
1019                 # treated as the start of a sub-list. What a kludge, huh? This is
1020                 # an aspect of Markdown's syntax that's hard to parse perfectly
1021                 # without resorting to mind-reading. Perhaps the solution is to
1022                 # change the syntax rules such that sub-lists must start with a
1023                 # starting cardinal number; e.g. "1." or "a.".
1024
1025                 $this->list_level++;
1026
1027                 # trim trailing blank lines:
1028                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1029
1030                 $list_str = preg_replace_callback('{
1031                         (\n)?                                                   # leading line = $1
1032                         (^[ \t]*)                                               # leading whitespace = $2
1033                         ('.$marker_any.') [ \t]+                # list marker = $3
1034                         ((?s:.+?))                                              # list item text   = $4
1035                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
1036                         (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
1037                         }xm',
1038                         array(&$this, '_processListItems_callback'), $list_str);
1039
1040                 $this->list_level--;
1041                 return $list_str;
1042         }
1043         function _processListItems_callback($matches) {
1044                 $item = $matches[4];
1045                 $leading_line =& $matches[1];
1046                 $leading_space =& $matches[2];
1047                 $tailing_blank_line =& $matches[5];
1048
1049                 if ($leading_line || $tailing_blank_line ||
1050                         preg_match('/\n{2,}/', $item))
1051                 {
1052                         $item = $this->runBlockGamut($this->outdent($item)."\n");
1053                 }
1054                 else {
1055                         # Recursion for sub-lists:
1056                         $item = $this->doLists($this->outdent($item));
1057                         $item = preg_replace('/\n+$/', '', $item);
1058                         $item = $this->runSpanGamut($item);
1059                 }
1060
1061                 return "<li>" . $item . "</li>\n";
1062         }
1063
1064
1065         function doCodeBlocks($text) {
1066         #
1067         #       Process Markdown `<pre><code>` blocks.
1068         #
1069                 $text = preg_replace_callback('{
1070                                 (?:\n\n|\A)
1071                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
1072                                   (?:
1073                                         (?:[ ]{'.$this->tab_width.'} | \t)  # Lines must start with a tab or a tab-width of spaces
1074                                         .*\n+
1075                                   )+
1076                                 )
1077                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1078                         }xm',
1079                         array(&$this, '_doCodeBlocks_callback'), $text);
1080
1081                 return $text;
1082         }
1083         function _doCodeBlocks_callback($matches) {
1084                 $codeblock = $matches[1];
1085
1086                 $codeblock = $this->encodeCode($this->outdent($codeblock));
1087         //      $codeblock = $this->detab($codeblock);
1088                 # trim leading newlines and trailing whitespace
1089                 $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock);
1090
1091                 $result = "\n\n".$this->hashBlock("<pre><code>" . $codeblock . "\n</code></pre>")."\n\n";
1092
1093                 return $result;
1094         }
1095
1096
1097         function doCodeSpans($text) {
1098         #
1099         #       *       Backtick quotes are used for <code></code> spans.
1100         #
1101         #       *       You can use multiple backticks as the delimiters if you want to
1102         #               include literal backticks in the code span. So, this input:
1103         #
1104         #                 Just type ``foo `bar` baz`` at the prompt.
1105         #
1106         #               Will translate to:
1107         #
1108         #                 <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1109         #
1110         #               There's no arbitrary limit to the number of backticks you
1111         #               can use as delimters. If you need three consecutive backticks
1112         #               in your code, use four for delimiters, etc.
1113         #
1114         #       *       You can use spaces to get literal backticks at the edges:
1115         #
1116         #                 ... type `` `bar` `` ...
1117         #
1118         #               Turns to:
1119         #
1120         #                 ... type <code>`bar`</code> ...
1121         #
1122                 $text = preg_replace_callback('@
1123                                 (?<!\\\)        # Character before opening ` can\'t be a backslash
1124                                 (`+)            # $1 = Opening run of `
1125                                 (.+?)           # $2 = The code block
1126                                 (?<!`)
1127                                 \1                      # Matching closer
1128                                 (?!`)
1129                         @xs',
1130                         array(&$this, '_doCodeSpans_callback'), $text);
1131
1132                 return $text;
1133         }
1134         function _doCodeSpans_callback($matches) {
1135                 $c = $matches[2];
1136                 $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
1137                 $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
1138                 $c = $this->encodeCode($c);
1139                 return $this->hashSpan("<code>$c</code>");
1140         }
1141
1142
1143         function encodeCode($_) {
1144         #
1145         # Encode/escape certain characters inside Markdown code runs.
1146         # The point is that in code, these characters are literals,
1147         # and lose their special Markdown meanings.
1148         #
1149                 # Encode all ampersands; HTML entities are not
1150                 # entities within a Markdown code span.
1151                 $_ = str_replace('&', '&amp;', $_);
1152
1153                 # Do the angle bracket song and dance:
1154                 $_ = str_replace(array('<',    '>'),
1155                                                  array('&lt;', '&gt;'), $_);
1156
1157                 # Now, escape characters that are magic in Markdown:
1158 //              $_ = str_replace(array_keys($this->escape_table),
1159 //                                               array_values($this->escape_table), $_);
1160
1161                 return $_;
1162         }
1163
1164
1165         function doItalicsAndBold($text) {
1166                 # <strong> must go first:
1167                 $text = preg_replace_callback('{
1168                                 (                                               # $1: Marker
1169                                         (?<!\*\*) \* |          #     (not preceded by two chars of
1170                                         (?<!__)   _                     #      the same marker)
1171                                 )
1172                                 \1
1173                                 (?=\S)                                  # Not followed by whitespace
1174                                 (?!\1\1)                                #   or two others marker chars.
1175                                 (                                               # $2: Content
1176                                         (?:
1177                                                 [^*_]+?                 # Anthing not em markers.
1178                                         |
1179                                                                                 # Balence any regular emphasis inside.
1180                                                 \1 (?=\S) .+? (?<=\S) \1
1181                                         |
1182                                                 (?! \1 ) .              # Allow unbalenced * and _.
1183                                         )+?
1184                                 )
1185                                 (?<=\S) \1\1                    # End mark not preceded by whitespace.
1186                         }sx',
1187                         array(&$this, '_doItalicAndBold_strong_callback'), $text);
1188                 # Then <em>:
1189                 $text = preg_replace_callback(
1190                         '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1 }sx',
1191                         array(&$this, '_doItalicAndBold_em_callback'), $text);
1192
1193                 return $text;
1194         }
1195         function _doItalicAndBold_em_callback($matches) {
1196                 $text = $matches[2];
1197                 $text = $this->runSpanGamut($text);
1198                 return $this->hashSpan("<em>$text</em>");
1199         }
1200         function _doItalicAndBold_strong_callback($matches) {
1201                 $text = $matches[2];
1202                 $text = $this->runSpanGamut($text);
1203                 return $this->hashSpan("<strong>$text</strong>");
1204         }
1205
1206
1207         function doBlockQuotes($text) {
1208                 $text = preg_replace_callback('/
1209                           (                                                             # Wrap whole match in $1
1210                                 (
1211                                   ^[ \t]*>[ \t]?                        # ">" at the start of a line
1212                                         .+\n                                    # rest of the first line
1213                                   (.+\n)*                                       # subsequent consecutive lines
1214                                   \n*                                           # blanks
1215                                 )+
1216                           )
1217                         /xm',
1218                         array(&$this, '_doBlockQuotes_callback'), $text);
1219
1220                 return $text;
1221         }
1222         function _doBlockQuotes_callback($matches) {
1223                 $bq = $matches[1];
1224                 # trim one level of quoting - trim whitespace-only lines
1225                 $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
1226                 $bq = $this->runBlockGamut($bq);                # recurse
1227
1228                 $bq = preg_replace('/^/m', "  ", $bq);
1229                 # These leading spaces cause problem with <pre> content,
1230                 # so we need to fix that:
1231                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1232                         array(&$this, '_DoBlockQuotes_callback2'), $bq);
1233
1234                 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1235         }
1236         function _doBlockQuotes_callback2($matches) {
1237                 $pre = $matches[1];
1238                 $pre = preg_replace('/^  /m', '', $pre);
1239                 return $pre;
1240         }
1241
1242
1243         function formParagraphs($text) {
1244         #
1245         #       Params:
1246         #               $text - string to process with html <p> tags
1247         #
1248                 # Strip leading and trailing lines:
1249                 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
1250
1251                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1252
1253                 #
1254                 # Wrap <p> tags.
1255                 #
1256                 foreach ($grafs as $key => $value) {
1257                         if (!isset( $this->html_blocks[$value] )) {
1258                                 $value = $this->runSpanGamut($value);
1259                                 $value = preg_replace('/^([ \t]*)/', "<p>", $value);
1260                                 $value .= "</p>";
1261                                 $grafs[$key] = $this->unhash($value);
1262                         }
1263                 }
1264
1265                 #
1266                 # Unhashify HTML blocks
1267                 #
1268                 foreach ($grafs as $key => $graf) {
1269                         # Modify elements of @grafs in-place...
1270                         if (isset($this->html_blocks[$graf])) {
1271                                 $block = $this->html_blocks[$graf];
1272                                 $graf = $block;
1273 //                              if (preg_match('{
1274 //                                      \A
1275 //                                      (                                                       # $1 = <div> tag
1276 //                                        <div  \s+
1277 //                                        [^>]*
1278 //                                        \b
1279 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1280 //                                        1
1281 //                                        \2
1282 //                                        [^>]*
1283 //                                        >
1284 //                                      )
1285 //                                      (                                                       # $3 = contents
1286 //                                      .*
1287 //                                      )
1288 //                                      (</div>)                                        # $4 = closing tag
1289 //                                      \z
1290 //                                      }xs', $block, $matches))
1291 //                              {
1292 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1293 //
1294 //                                      # We can't call Markdown(), because that resets the hash;
1295 //                                      # that initialization code should be pulled into its own sub, though.
1296 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1297 //
1298 //                                      # Run document gamut methods on the content.
1299 //                                      foreach ($this->document_gamut as $method => $priority) {
1300 //                                              $div_content = $this->$method($div_content);
1301 //                                      }
1302 //
1303 //                                      $div_open = preg_replace(
1304 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1305 //
1306 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1307 //                              }
1308                                 $grafs[$key] = $graf;
1309                         }
1310                 }
1311
1312                 return implode("\n\n", $grafs);
1313         }
1314
1315
1316         function encodeAmpsAndAngles($text) {
1317         # Smart processing for ampersands and angle brackets that need to be encoded.
1318
1319                 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1320                 #   http://bumppo.net/projects/amputator/
1321                 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1322                                                          '&amp;', $text);;
1323
1324                 # Encode naked <'s
1325                 $text = preg_replace('{<(?![a-z/?\$!%])}i', '&lt;', $text);
1326
1327                 return $text;
1328         }
1329
1330
1331         function encodeBackslashEscapes($text) {
1332         #
1333         #       Parameter:  String.
1334         #       Returns:    The string, with after processing the following backslash
1335         #                               escape sequences.
1336         #
1337                 # Must process escaped backslashes first.
1338                 return str_replace(array_keys($this->backslash_escape_table),
1339                                                    array_values($this->backslash_escape_table), $text);
1340         }
1341
1342
1343         function doAutoLinks($text) {
1344                 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}',
1345                         array(&$this, '_doAutoLinks_url_callback'), $text);
1346
1347                 # Email addresses: <address@domain.foo>
1348                 $text = preg_replace_callback('{
1349                         <
1350                         (?:mailto:)?
1351                         (
1352                                 [-.\w\x80-\xFF]+
1353                                 \@
1354                                 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1355                         )
1356                         >
1357                         }xi',
1358                         array(&$this, '_doAutoLinks_email_callback'), $text);
1359
1360                 return $text;
1361         }
1362         function _doAutoLinks_url_callback($matches) {
1363                 $url = $this->encodeAmpsAndAngles($matches[1]);
1364                 $link = "<a href=\"$url\">$url</a>";
1365                 return $this->hashSpan($link);
1366         }
1367         function _doAutoLinks_email_callback($matches) {
1368                 $address = $matches[1];
1369                 $address = $this->unescapeSpecialChars($address);
1370                 $link = $this->encodeEmailAddress($address);
1371                 return $this->hashSpan($link);
1372         }
1373
1374
1375         function encodeEmailAddress($addr) {
1376         #
1377         #       Input: an email address, e.g. "foo@example.com"
1378         #
1379         #       Output: the email address as a mailto link, with each character
1380         #               of the address encoded as either a decimal or hex entity, in
1381         #               the hopes of foiling most address harvesting spam bots. E.g.:
1382         #
1383         #         <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1384         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1385         #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1386         #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1387         #
1388         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1389         #   With some optimizations by Milian Wolff.
1390         #
1391                 $addr = "mailto:" . $addr;
1392                 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1393                 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1394
1395                 foreach ($chars as $key => $char) {
1396                         $ord = ord($char);
1397                         # Ignore non-ascii chars.
1398                         if ($ord < 128) {
1399                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1400                                 # roughly 10% raw, 45% hex, 45% dec
1401                                 # '@' *must* be encoded. I insist.
1402                                 if ($r > 90 && $char != '@') /* do nothing */;
1403                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1404                                 else              $chars[$key] = '&#'.$ord.';';
1405                         }
1406                 }
1407
1408                 $addr = implode('', $chars);
1409                 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1410                 $addr = "<a href=\"$addr\">$text</a>";
1411
1412                 return $addr;
1413         }
1414
1415
1416         function unescapeSpecialChars($text) {
1417         #
1418         # Swap back in all the special characters we've hidden.
1419         #
1420                 return str_replace(array_values($this->escape_table),
1421                                                    array_keys($this->escape_table), $text);
1422         }
1423
1424
1425         function tokenizeHTML($str) {
1426         #
1427         #   Parameter:  String containing HTML + Markdown markup.
1428         #   Returns:    An array of the tokens comprising the input
1429         #               string. Each token is either a tag or a run of text
1430         #               between tags. Each element of the array is a
1431         #               two-element array; the first is either 'tag' or 'text';
1432         #               the second is the actual value.
1433         #   Note:       Markdown code spans are taken into account: no tag token is
1434         #               generated within a code span.
1435         #
1436                 $tokens = array();
1437
1438                 while ($str != "") {
1439                         #
1440                         # Each loop iteration seach for either the next tag or the next
1441                         # openning code span marker. If a code span marker is found, the
1442                         # code span is extracted in entierty and will result in an extra
1443                         # text token.
1444                         #
1445                         $parts = preg_split('{
1446                                 (
1447                                         (?<![`\\\\])
1448                                         `+                                              # code span marker
1449                                 |
1450                                         <!--    .*?     -->             # comment
1451                                 |
1452                                         <\?.*?\?> | <%.*?%>             # processing instruction
1453                                 |
1454                                         <[/!$]?[-a-zA-Z0-9:]+   # regular tags
1455                                         (?:
1456                                                 \s
1457                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1458                                         )?
1459                                         >
1460                                 )
1461                                 }xs', $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1462
1463                         # Create token from text preceding tag.
1464                         if ($parts[0] != "") {
1465                                 $tokens[] = array('text', $parts[0]);
1466                         }
1467
1468                         # Check if we reach the end.
1469                         if (count($parts) < 3) {
1470                                 break;
1471                         }
1472
1473                         # Create token from tag or code span.
1474                         if ($parts[1]{0} == "`") {
1475                                 $tokens[] = array('text', $parts[1]);
1476                                 $str = $parts[2];
1477
1478                                 # Skip the whole code span, pass as text token.
1479                                 if (preg_match('/^(.*(?<!`\\\\)'.$parts[1].'(?!`))(.*)$/sm',
1480                                         $str, $matches))
1481                                 {
1482                                         $tokens[] = array('text', $matches[1]);
1483                                         $str = $matches[2];
1484                                 }
1485                         } else {
1486                                 $tokens[] = array('tag', $parts[1]);
1487                                 $str = $parts[2];
1488                         }
1489                 }
1490
1491                 return $tokens;
1492         }
1493
1494
1495         function outdent($text) {
1496         #
1497         # Remove one level of line-leading tabs or spaces
1498         #
1499                 return preg_replace("/^(\\t|[ ]{1,$this->tab_width})/m", "", $text);
1500         }
1501
1502
1503         # String length function for detab. `_initDetab` will create a function to
1504         # hanlde UTF-8 if the default function does not exist.
1505         var $utf8_strlen = 'mb_strlen';
1506
1507         function detab($text) {
1508         #
1509         # Replace tabs with the appropriate amount of space.
1510         #
1511                 # For each line we separate the line in blocks delemited by
1512                 # tab characters. Then we reconstruct every line by adding the
1513                 # appropriate number of space between each blocks.
1514
1515                 $strlen = $this->utf8_strlen; # best strlen function for UTF-8.
1516                 $lines = explode("\n", $text);
1517                 $text = "";
1518
1519                 foreach ($lines as $line) {
1520                         # Split in blocks.
1521                         $blocks = explode("\t", $line);
1522                         # Add each blocks to the line.
1523                         $line = $blocks[0];
1524                         unset($blocks[0]); # Do not add first block twice.
1525                         foreach ($blocks as $block) {
1526                                 # Calculate amount of space, insert spaces, insert block.
1527                                 $amount = $this->tab_width -
1528                                         $strlen($line, 'UTF-8') % $this->tab_width;
1529                                 $line .= str_repeat(" ", $amount) . $block;
1530                         }
1531                         $text .= "$line\n";
1532                 }
1533                 return $text;
1534         }
1535         function _initDetab() {
1536         #
1537         # Check for the availability of the function in the `utf8_strlen` property
1538         # (probably `mb_strlen`). If the function is not available, create a
1539         # function that will loosely count the number of UTF-8 characters with a
1540         # regular expression.
1541         #
1542                 if (function_exists($this->utf8_strlen)) return;
1543                 $this->utf8_strlen = 'Markdown_UTF8_strlen';
1544
1545                 if (function_exists($this->utf8_strlen)) return;
1546                 function Markdown_UTF8_strlen($text) {
1547                         return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/',
1548                                 $text, $m);
1549                 }
1550         }
1551
1552
1553         function unhash($text) {
1554         #
1555         # Swap back in all the tags hashed by _HashHTMLBlocks.
1556         #
1557                 return str_replace(array_keys($this->html_hashes),
1558                                                    array_values($this->html_hashes), $text);
1559         }
1560
1561 }
1562
1563
1564 #
1565 # Markdown Extra Parser Class
1566 #
1567
1568 class MarkdownExtra_Parser extends Markdown_Parser {
1569
1570         # Prefix for footnote ids.
1571         var $fn_id_prefix = "";
1572
1573         # Optional title attribute for footnote links and backlinks.
1574         var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
1575         var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
1576
1577         # Optional class attribute for footnote links and backlinks.
1578         var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
1579         var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
1580
1581
1582         function MarkdownExtra_Parser() {
1583         #
1584         # Constructor function. Initialize the parser object.
1585         #
1586                 # Add extra escapable characters before parent constructor
1587                 # initialize the table.
1588                 $this->escape_chars .= ':|';
1589
1590                 # Insert extra document, block, and span transformations.
1591                 # Parent constructor will do the sorting.
1592                 $this->document_gamut += array(
1593                         "stripFootnotes"     => 15,
1594                         "stripAbbreviations" => 25,
1595                         "appendFootnotes"    => 50,
1596                         );
1597                 $this->block_gamut += array(
1598                         "doTables"           => 15,
1599                         "doDefLists"         => 45,
1600                         );
1601                 $this->span_gamut += array(
1602                         "doFootnotes"        =>  4,
1603                         "doAbbreviations"    =>  5,
1604                         );
1605
1606                 parent::Markdown_Parser();
1607         }
1608
1609
1610         # Extra hashes used during extra transformations.
1611         var $footnotes = array();
1612         var $footnotes_ordered = array();
1613         var $abbr_desciptions = array();
1614         var $abbr_matches = array();
1615         var $html_cleans = array();
1616
1617
1618         function transform($text) {
1619         #
1620         # Added clear to the new $html_hashes, reordered `hashHTMLBlocks` before
1621         # blank line stripping and added extra parameter to `runBlockGamut`.
1622         #
1623                 # Clear the global hashes. If we don't clear these, you get conflicts
1624                 # from other articles when generating a page which contains more than
1625                 # one article (e.g. an index page that shows the N most recent
1626                 # articles):
1627                 $this->footnotes = array();
1628                 $this->footnotes_ordered = array();
1629                 $this->abbr_desciptions = array();
1630                 $this->abbr_matches = array();
1631                 $this->html_cleans = array();
1632
1633                 return parent::transform($text);
1634         }
1635
1636
1637         ### HTML Block Parser ###
1638
1639         # Tags that are always treated as block tags:
1640         var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1641
1642         # Tags treated as block tags only if the opening tag is alone on it's line:
1643         var $context_block_tags = 'script|noscript|math|ins|del';
1644
1645         # Tags where markdown="1" default to span mode:
1646         var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1647
1648         # Tags which must not have their contents modified, no matter where
1649         # they appear:
1650         var $clean_tags = 'script|math';
1651
1652         # Tags that do not need to be closed.
1653         var $auto_close_tags = 'hr|img';
1654
1655
1656         function hashHTMLBlocks($text) {
1657         #
1658         # Hashify HTML Blocks and "clean tags".
1659         #
1660         # We only want to do this for block-level HTML tags, such as headers,
1661         # lists, and tables. That's because we still want to wrap <p>s around
1662         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1663         # phrase emphasis, and spans. The list of tags we're looking for is
1664         # hard-coded.
1665         #
1666         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1667         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1668         # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
1669         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1670         # These two functions are calling each other. It's recursive!
1671         #
1672                 #
1673                 # Call the HTML-in-Markdown hasher.
1674                 #
1675                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1676
1677                 return $text;
1678         }
1679         function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1680                                                                                 $enclosing_tag = '', $span = false)
1681         {
1682         #
1683         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1684         #
1685         # *   $indent is the number of space to be ignored when checking for code
1686         #     blocks. This is important because if we don't take the indent into
1687         #     account, something like this (which looks right) won't work as expected:
1688         #
1689         #     <div>
1690         #         <div markdown="1">
1691         #         Hello World.  <-- Is this a Markdown code block or text?
1692         #         </div>  <-- Is this a Markdown code block or a real tag?
1693         #     <div>
1694         #
1695         #     If you don't like this, just don't indent the tag on which
1696         #     you apply the markdown="1" attribute.
1697         #
1698         # *   If $enclosing_tag is not empty, stops at the first unmatched closing
1699         #     tag with that name. Nested tags supported.
1700         #
1701         # *   If $span is true, text inside must treated as span. So any double
1702         #     newline will be replaced by a single newline so that it does not create
1703         #     paragraphs.
1704         #
1705         # Returns an array of that form: ( processed text , remaining text )
1706         #
1707                 if ($text === '') return array('', '');
1708
1709                 # Regex to check for the presense of newlines around a block tag.
1710                 $newline_match_before = '/(?:^\n?|\n\n)*$/';
1711                 $newline_match_after =
1712                         '{
1713                                 ^                                               # Start of text following the tag.
1714                                 (?:[ ]*<!--.*?-->)?             # Optional comment.
1715                                 [ ]*\n                                  # Must be followed by newline.
1716                         }xs';
1717
1718                 # Regex to match any tag.
1719                 $block_tag_match =
1720                         '{
1721                                 (                                       # $2: Capture hole tag.
1722                                         </?                                     # Any opening or closing tag.
1723                                                 (?:                             # Tag name.
1724                                                         '.$this->block_tags.'                   |
1725                                                         '.$this->context_block_tags.'   |
1726                                                         '.$this->clean_tags.'           |
1727                                                         (?!\s)'.$enclosing_tag.'
1728                                                 )
1729                                                 \s*                             # Whitespace.
1730                                                 (?:
1731                                                         ".*?"           |       # Double quotes (can contain `>`)
1732                                                         \'.*?\'         |       # Single quotes (can contain `>`)
1733                                                         .+?                             # Anything but quotes and `>`.
1734                                                 )*?
1735                                         >                                       # End of tag.
1736                                 |
1737                                         <!--    .*?     -->     # HTML Comment
1738                                 |
1739                                         <\?.*?\?> | <%.*?%>     # Processing instruction
1740                                 |
1741                                         <!\[CDATA\[.*?\]\]>     # CData Block
1742                                 )
1743                         }xs';
1744
1745
1746                 $depth = 0;             # Current depth inside the tag tree.
1747                 $parsed = "";   # Parsed text that will be returned.
1748
1749                 #
1750                 # Loop through every tag until we find the closing tag of the parent
1751                 # or loop until reaching the end of text if no parent tag specified.
1752                 #
1753                 do {
1754                         #
1755                         # Split the text using the first $tag_match pattern found.
1756                         # Text before  pattern will be first in the array, text after
1757                         # pattern will be at the end, and between will be any catches made
1758                         # by the pattern.
1759                         #
1760                         $parts = preg_split($block_tag_match, $text, 2,
1761                                                                 PREG_SPLIT_DELIM_CAPTURE);
1762
1763                         # If in Markdown span mode, add a empty-string span-level hash
1764                         # after each newline to prevent triggering any block element.
1765                         if ($span) {
1766                                 $newline = $this->hashSpan("") . "\n";
1767                                 $parts[0] = str_replace("\n", $newline, $parts[0]);
1768                         }
1769
1770                         $parsed .= $parts[0]; # Text before current tag.
1771
1772                         # If end of $text has been reached. Stop loop.
1773                         if (count($parts) < 3) {
1774                                 $text = "";
1775                                 break;
1776                         }
1777
1778                         $tag  = $parts[1]; # Tag to handle.
1779                         $text = $parts[2]; # Remaining text after current tag.
1780
1781                         #
1782                         # Check for: Tag inside code block or span
1783                         #
1784                         if (# Find current paragraph
1785                                 preg_match('/(?>^\n?|\n\n)((?>.\n?)+?)$/', $parsed, $matches) &&
1786                                 (
1787                                 # Then match in it either a code block...
1788                                 preg_match('/^ {'.($indent+4).'}.*(?>\n {'.($indent+4).'}.*)*'.
1789                                                         '(?!\n)$/', $matches[1], $x) ||
1790                                 # ...or unbalenced code span markers. (the regex matches balenced)
1791                                 !preg_match('/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s',
1792                                                          $matches[1])
1793                                 ))
1794                         {
1795                                 # Tag is in code block or span and may not be a tag at all. So we
1796                                 # simply skip the first char (should be a `<`).
1797                                 $parsed .= $tag{0};
1798                                 $text = substr($tag, 1) . $text; # Put back $tag minus first char.
1799                         }
1800                         #
1801                         # Check for: Opening Block level tag or
1802                         #            Opening Content Block tag (like ins and del)
1803                         #               used as a block tag (tag is alone on it's line).
1804                         #
1805                         else if (preg_match("{^<(?:$this->block_tags)\b}", $tag) ||
1806                                 (       preg_match("{^<(?:$this->context_block_tags)\b}", $tag) &&
1807                                         preg_match($newline_match_before, $parsed) &&
1808                                         preg_match($newline_match_after, $text) )
1809                                 )
1810                         {
1811                                 # Need to parse tag and following text using the HTML parser.
1812                                 list($block_text, $text) =
1813                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1814
1815                                 # Make sure it stays outside of any paragraph by adding newlines.
1816                                 $parsed .= "\n\n$block_text\n\n";
1817                         }
1818                         #
1819                         # Check for: Clean tag (like script, math)
1820                         #            HTML Comments, processing instructions.
1821                         #
1822                         else if (preg_match("{^<(?:$this->clean_tags)\b}", $tag) ||
1823                                 $tag{1} == '!' || $tag{1} == '?')
1824                         {
1825                                 # Need to parse tag and following text using the HTML parser.
1826                                 # (don't check for markdown attribute)
1827                                 list($block_text, $text) =
1828                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1829
1830                                 $parsed .= $block_text;
1831                         }
1832                         #
1833                         # Check for: Tag with same name as enclosing tag.
1834                         #
1835                         else if ($enclosing_tag !== '' &&
1836                                 # Same name as enclosing tag.
1837                                 preg_match("{^</?(?:$enclosing_tag)\b}", $tag))
1838                         {
1839                                 #
1840                                 # Increase/decrease nested tag count.
1841                                 #
1842                                 if ($tag{1} == '/')                                             $depth--;
1843                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
1844
1845                                 if ($depth < 0) {
1846                                         #
1847                                         # Going out of parent element. Clean up and break so we
1848                                         # return to the calling function.
1849                                         #
1850                                         $text = $tag . $text;
1851                                         break;
1852                                 }
1853
1854                                 $parsed .= $tag;
1855                         }
1856                         else {
1857                                 $parsed .= $tag;
1858                         }
1859                 } while ($depth >= 0);
1860
1861                 return array($parsed, $text);
1862         }
1863         function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
1864         #
1865         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
1866         #
1867         # *   Calls $hash_method to convert any blocks.
1868         # *   Stops when the first opening tag closes.
1869         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
1870         #     (it is not inside clean tags)
1871         #
1872         # Returns an array of that form: ( processed text , remaining text )
1873         #
1874                 if ($text === '') return array('', '');
1875
1876                 # Regex to match `markdown` attribute inside of a tag.
1877                 $markdown_attr_match = '
1878                         {
1879                                 \s*                     # Eat whitespace before the `markdown` attribute
1880                                 markdown
1881                                 \s*=\s*
1882                                 (["\'])         # $1: quote delimiter
1883                                 (.*?)           # $2: attribute value
1884                                 \1                      # matching delimiter
1885                         }xs';
1886
1887                 # Regex to match any tag.
1888                 $tag_match = '{
1889                                 (                                       # $2: Capture hole tag.
1890                                         </?                                     # Any opening or closing tag.
1891                                                 [\w:$]+                 # Tag name.
1892                                                 \s*                             # Whitespace.
1893                                                 (?:
1894                                                         ".*?"           |       # Double quotes (can contain `>`)
1895                                                         \'.*?\'         |       # Single quotes (can contain `>`)
1896                                                         .+?                             # Anything but quotes and `>`.
1897                                                 )*?
1898                                         >                                       # End of tag.
1899                                 |
1900                                         <!--    .*?     -->     # HTML Comment
1901                                 |
1902                                         <\?.*?\?> | <%.*?%>     # Processing instruction
1903                                 |
1904                                         <!\[CDATA\[.*?\]\]>     # CData Block
1905                                 )
1906                         }xs';
1907
1908                 $original_text = $text;         # Save original text in case of faliure.
1909
1910                 $depth          = 0;    # Current depth inside the tag tree.
1911                 $block_text     = "";   # Temporary text holder for current text.
1912                 $parsed         = "";   # Parsed text that will be returned.
1913
1914                 #
1915                 # Get the name of the starting tag.
1916                 #
1917                 if (preg_match("/^<([\w:$]*)\b/", $text, $matches))
1918                         $base_tag_name = $matches[1];
1919
1920                 #
1921                 # Loop through every tag until we find the corresponding closing tag.
1922                 #
1923                 do {
1924                         #
1925                         # Split the text using the first $tag_match pattern found.
1926                         # Text before  pattern will be first in the array, text after
1927                         # pattern will be at the end, and between will be any catches made
1928                         # by the pattern.
1929                         #
1930                         $parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1931
1932                         if (count($parts) < 3) {
1933                                 #
1934                                 # End of $text reached with unbalenced tag(s).
1935                                 # In that case, we return original text unchanged and pass the
1936                                 # first character as filtered to prevent an infinite loop in the
1937                                 # parent function.
1938                                 #
1939                                 return array($original_text{0}, substr($original_text, 1));
1940                         }
1941
1942                         $block_text .= $parts[0]; # Text before current tag.
1943                         $tag         = $parts[1]; # Tag to handle.
1944                         $text        = $parts[2]; # Remaining text after current tag.
1945
1946                         #
1947                         # Check for: Auto-close tag (like <hr/>)
1948                         #                        Comments and Processing Instructions.
1949                         #
1950                         if (preg_match("{^</?(?:$this->auto_close_tags)\b}", $tag) ||
1951                                 $tag{1} == '!' || $tag{1} == '?')
1952                         {
1953                                 # Just add the tag to the block as if it was text.
1954                                 $block_text .= $tag;
1955                         }
1956                         else {
1957                                 #
1958                                 # Increase/decrease nested tag count. Only do so if
1959                                 # the tag's name match base tag's.
1960                                 #
1961                                 if (preg_match("{^</?$base_tag_name\b}", $tag)) {
1962                                         if ($tag{1} == '/')                                             $depth--;
1963                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
1964                                 }
1965
1966                                 #
1967                                 # Check for `markdown="1"` attribute and handle it.
1968                                 #
1969                                 if ($md_attr &&
1970                                         preg_match($markdown_attr_match, $tag, $attr_matches) &&
1971                                         preg_match('/^1|block|span$/', $attr_matches[2]))
1972                                 {
1973                                         # Remove `markdown` attribute from opening tag.
1974                                         $tag = preg_replace($markdown_attr_match, '', $tag);
1975
1976                                         # Check if text inside this tag must be parsed in span mode.
1977                                         $this->mode = $attr_matches[2];
1978                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
1979                                                 preg_match("{^<(?:$this->contain_span_tags)\b}", $tag);
1980
1981                                         # Calculate indent before tag.
1982                                         preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches);
1983                                         $indent = strlen($matches[1]);
1984
1985                                         # End preceding block with this tag.
1986                                         $block_text .= $tag;
1987                                         $parsed .= $this->$hash_method($block_text);
1988
1989                                         # Get enclosing tag name for the ParseMarkdown function.
1990                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
1991                                         $tag_name = $matches[1];
1992
1993                                         # Parse the content using the HTML-in-Markdown parser.
1994                                         list ($block_text, $text)
1995                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
1996                                                                                                                 $tag_name, $span_mode);
1997
1998                                         # Outdent markdown text.
1999                                         if ($indent > 0) {
2000                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
2001                                                                                                         $block_text);
2002                                         }
2003
2004                                         # Append tag content to parsed text.
2005                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
2006                                         else                            $parsed .= "$block_text";
2007
2008                                         # Start over a new block.
2009                                         $block_text = "";
2010                                 }
2011                                 else $block_text .= $tag;
2012                         }
2013
2014                 } while ($depth > 0);
2015
2016                 #
2017                 # Hash last block text that wasn't processed inside the loop.
2018                 #
2019                 $parsed .= $this->$hash_method($block_text);
2020
2021                 return array($parsed, $text);
2022         }
2023
2024
2025         function hashClean($text) {
2026         #
2027         # Called whenever a tag must be hashed when a function insert a "clean" tag
2028         # in $text, it pass through this function and is automaticaly escaped,
2029         # blocking invalid nested overlap.
2030         #
2031                 # Swap back any tag hash found in $text so we do not have to `unhash`
2032                 # multiple times at the end.
2033                 $text = $this->unhash($text);
2034
2035                 # Then hash the tag.
2036                 $key = md5($text);
2037                 $this->html_cleans[$key] = $text;
2038                 $this->html_hashes[$key] = $text;
2039                 return $key; # String that will replace the clean tag.
2040         }
2041
2042
2043         function doHeaders($text) {
2044         #
2045         # Redefined to add id attribute support.
2046         #
2047                 # Setext-style headers:
2048                 #         Header 1  {#header1}
2049                 #         ========
2050                 #
2051                 #         Header 2  {#header2}
2052                 #         --------
2053                 #
2054                 $text = preg_replace_callback(
2055                         '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n=+[ \t]*\n+ }mx',
2056                         array(&$this, '_doHeaders_callback_setext_h1'), $text);
2057                 $text = preg_replace_callback(
2058                         '{ (^.+?) (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? [ \t]*\n-+[ \t]*\n+ }mx',
2059                         array(&$this, '_doHeaders_callback_setext_h2'), $text);
2060
2061                 # atx-style headers:
2062                 #       # Header 1        {#header1}
2063                 #       ## Header 2       {#header2}
2064                 #       ## Header 2 with closing hashes ##  {#header3}
2065                 #       ...
2066                 #       ###### Header 6   {#header2}
2067                 #
2068                 $text = preg_replace_callback('{
2069                                 ^(\#{1,6})      # $1 = string of #\'s
2070                                 [ \t]*
2071                                 (.+?)           # $2 = Header text
2072                                 [ \t]*
2073                                 \#*                     # optional closing #\'s (not counted)
2074                                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
2075                                 [ \t]*
2076                                 \n+
2077                         }xm',
2078                         array(&$this, '_doHeaders_callback_atx'), $text);
2079
2080                 return $text;
2081         }
2082         function _doHeaders_attr($attr) {
2083                 if (empty($attr))  return "";
2084                 return " id=\"$attr\"";
2085         }
2086         function _doHeaders_callback_setext_h1($matches) {
2087                 $attr  = $this->_doHeaders_attr($id =& $matches[2]);
2088                 $block = "<h1$attr>".$this->runSpanGamut($matches[1])."</h1>";
2089                 return "\n" . $this->hashBlock($block) . "\n\n";
2090         }
2091         function _doHeaders_callback_setext_h2($matches) {
2092                 $attr  = $this->_doHeaders_attr($id =& $matches[2]);
2093                 $block = "<h2$attr>".$this->runSpanGamut($matches[1])."</h2>";
2094                 return "\n" . $this->hashBlock($block) . "\n\n";
2095         }
2096         function _doHeaders_callback_atx($matches) {
2097                 $level = strlen($matches[1]);
2098                 $attr  = $this->_doHeaders_attr($id =& $matches[3]);
2099                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2100                 return "\n" . $this->hashBlock($block) . "\n\n";
2101         }
2102
2103
2104         function doTables($text) {
2105         #
2106         # Form HTML tables.
2107         #
2108                 $less_than_tab = $this->tab_width - 1;
2109                 #
2110                 # Find tables with leading pipe.
2111                 #
2112                 #       | Header 1 | Header 2
2113                 #       | -------- | --------
2114                 #       | Cell 1   | Cell 2
2115                 #       | Cell 3   | Cell 4
2116                 #
2117                 $text = preg_replace_callback('
2118                         {
2119                                 ^                                                       # Start of a line
2120                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2121                                 [|]                                                     # Optional leading pipe (present)
2122                                 (.+) \n                                         # $1: Header row (at least one pipe)
2123
2124                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2125                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
2126
2127                                 (                                                       # $3: Cells
2128                                         (?:
2129                                                 [ ]*                            # Allowed whitespace.
2130                                                 [|] .* \n                       # Row content.
2131                                         )*
2132                                 )
2133                                 (?=\n|\Z)                                       # Stop at final double newline.
2134                         }xm',
2135                         array(&$this, '_doTable_leadingPipe_callback'), $text);
2136
2137                 #
2138                 # Find tables without leading pipe.
2139                 #
2140                 #       Header 1 | Header 2
2141                 #       -------- | --------
2142                 #       Cell 1   | Cell 2
2143                 #       Cell 3   | Cell 4
2144                 #
2145                 $text = preg_replace_callback('
2146                         {
2147                                 ^                                                       # Start of a line
2148                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2149                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
2150
2151                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2152                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
2153
2154                                 (                                                       # $3: Cells
2155                                         (?:
2156                                                 .* [|] .* \n            # Row content
2157                                         )*
2158                                 )
2159                                 (?=\n|\Z)                                       # Stop at final double newline.
2160                         }xm',
2161                         array(&$this, '_DoTable_callback'), $text);
2162
2163                 return $text;
2164         }
2165         function _doTable_leadingPipe_callback($matches) {
2166                 $head           = $matches[1];
2167                 $underline      = $matches[2];
2168                 $content        = $matches[3];
2169
2170                 # Remove leading pipe for each row.
2171                 $content        = preg_replace('/^ *[|]/m', '', $content);
2172
2173                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2174         }
2175         function _doTable_callback($matches) {
2176                 $head           = $matches[1];
2177                 $underline      = $matches[2];
2178                 $content        = $matches[3];
2179
2180                 # Remove any tailing pipes for each line.
2181                 $head           = preg_replace('/[|] *$/m', '', $head);
2182                 $underline      = preg_replace('/[|] *$/m', '', $underline);
2183                 $content        = preg_replace('/[|] *$/m', '', $content);
2184
2185                 # Reading alignement from header underline.
2186                 $separators     = preg_split('/ *[|] */', $underline);
2187                 foreach ($separators as $n => $s) {
2188                         if (preg_match('/^ *-+: *$/', $s))              $attr[$n] = ' align="right"';
2189                         else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
2190                         else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
2191                         else                                                                    $attr[$n] = '';
2192                 }
2193
2194                 # Creating code spans before splitting the row is an easy way to
2195                 # handle a code span containg pipes.
2196                 $head   = $this->doCodeSpans($head);
2197                 $headers        = preg_split('/ *[|] */', $head);
2198                 $col_count      = count($headers);
2199
2200                 # Write column headers.
2201                 $text = "<table>\n";
2202                 $text .= "<thead>\n";
2203                 $text .= "<tr>\n";
2204                 foreach ($headers as $n => $header)
2205                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2206                 $text .= "</tr>\n";
2207                 $text .= "</thead>\n";
2208
2209                 # Split content by row.
2210                 $rows = explode("\n", trim($content, "\n"));
2211
2212                 $text .= "<tbody>\n";
2213                 foreach ($rows as $row) {
2214                         # Creating code spans before splitting the row is an easy way to
2215                         # handle a code span containg pipes.
2216                         $row = $this->doCodeSpans($row);
2217
2218                         # Split row by cell.
2219                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
2220                         $row_cells = array_pad($row_cells, $col_count, '');
2221
2222                         $text .= "<tr>\n";
2223                         foreach ($row_cells as $n => $cell)
2224                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2225                         $text .= "</tr>\n";
2226                 }
2227                 $text .= "</tbody>\n";
2228                 $text .= "</table>";
2229
2230                 return $this->hashBlock($text) . "\n";
2231         }
2232
2233
2234         function doDefLists($text) {
2235         #
2236         # Form HTML definition lists.
2237         #
2238                 $less_than_tab = $this->tab_width - 1;
2239
2240                 # Re-usable pattern to match any entire dl list:
2241                 $whole_list = '
2242                         (                                                               # $1 = whole list
2243                           (                                                             # $2
2244                                 [ ]{0,'.$less_than_tab.'}
2245                                 ((?>.*\S.*\n)+)                         # $3 = defined term
2246                                 \n?
2247                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2248                           )
2249                           (?s:.+?)
2250                           (                                                             # $4
2251                                   \z
2252                                 |
2253                                   \n{2,}
2254                                   (?=\S)
2255                                   (?!                                           # Negative lookahead for another term
2256                                         [ ]{0,'.$less_than_tab.'}
2257                                         (?: \S.*\n )+?                  # defined term
2258                                         \n?
2259                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2260                                   )
2261                                   (?!                                           # Negative lookahead for another definition
2262                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2263                                   )
2264                           )
2265                         )
2266                 '; // mx
2267
2268                 $text = preg_replace_callback('{
2269                                 (?:(?<=\n\n)|\A\n?)
2270                                 '.$whole_list.'
2271                         }mx',
2272                         array(&$this, '_doDefLists_callback'), $text);
2273
2274                 return $text;
2275         }
2276         function _doDefLists_callback($matches) {
2277                 # Re-usable patterns to match list item bullets and number markers:
2278                 $list = $matches[1];
2279
2280                 # Turn double returns into triple returns, so that we can make a
2281                 # paragraph for the last item in a list, if necessary:
2282                 $result = trim($this->processDefListItems($list));
2283                 $result = "<dl>\n" . $result . "\n</dl>";
2284                 return $this->hashBlock($result) . "\n\n";
2285         }
2286
2287
2288         function processDefListItems($list_str) {
2289         #
2290         #       Process the contents of a single definition list, splitting it
2291         #       into individual term and definition list items.
2292         #
2293                 $less_than_tab = $this->tab_width - 1;
2294
2295                 # trim trailing blank lines:
2296                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2297
2298                 # Process definition terms.
2299                 $list_str = preg_replace_callback('{
2300                         (?:\n\n+|\A\n?)                                 # leading line
2301                         (                                                               # definition terms = $1
2302                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
2303                                 (?![:][ ]|[ ])                          # negative lookahead for a definition
2304                                                                                         #   mark (colon) or more whitespace.
2305                                 (?: \S.* \n)+?                          # actual term (not whitespace).
2306                         )
2307                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed
2308                                                                                         #   with a definition mark.
2309                         }xm',
2310                         array(&$this, '_processDefListItems_callback_dt'), $list_str);
2311
2312                 # Process actual definitions.
2313                 $list_str = preg_replace_callback('{
2314                         \n(\n+)?                                                # leading line = $1
2315                         [ ]{0,'.$less_than_tab.'}               # whitespace before colon
2316                         [:][ ]+                                                 # definition mark (colon)
2317                         ((?s:.+?))                                              # definition text = $2
2318                         (?= \n+                                                 # stop at next definition mark,
2319                                 (?:                                                     # next term or end of text
2320                                         [ ]{0,'.$less_than_tab.'} [:][ ]        |
2321                                         <dt> | \z
2322                                 )
2323                         )
2324                         }xm',
2325                         array(&$this, '_processDefListItems_callback_dd'), $list_str);
2326
2327                 return $list_str;
2328         }
2329         function _processDefListItems_callback_dt($matches) {
2330                 $terms = explode("\n", trim($matches[1]));
2331                 $text = '';
2332                 foreach ($terms as $term) {
2333                         $term = $this->runSpanGamut(trim($term));
2334                         $text .= "\n<dt>" . $term . "</dt>";
2335                 }
2336                 return $text . "\n";
2337         }
2338         function _processDefListItems_callback_dd($matches) {
2339                 $leading_line   = $matches[1];
2340                 $def                    = $matches[2];
2341
2342                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
2343                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2344                         $def = "\n". $def ."\n";
2345                 }
2346                 else {
2347                         $def = rtrim($def);
2348                         $def = $this->runSpanGamut($this->outdent($def));
2349                 }
2350
2351                 return "\n<dd>" . $def . "</dd>\n";
2352         }
2353
2354
2355         function doItalicsAndBold($text) {
2356         #
2357         # Redefined to change emphasis by underscore behaviour so that it does not
2358         # work in the middle of a word.
2359         #
2360                 # <strong> must go first:
2361                 $text = preg_replace_callback(array(
2362                         '{
2363                                 (                                               # $1: Marker
2364                                         (?<![a-zA-Z0-9])        # Not preceded by alphanum
2365                                         (?<!__)                         #       or by two marker chars.
2366                                         __
2367                                 )
2368                                 (?=\S)                                  # Not followed by whitespace
2369                                 (?!__)                                  #   or two others marker chars.
2370                                 (                                               # $2: Content
2371                                         (?:
2372                                                 [^_]+?                  # Anthing not em markers.
2373                                         |
2374                                                                                 # Balence any regular _ emphasis inside.
2375                                                 (?<![a-zA-Z0-9]) _ (?=\S) (.+?)
2376                                                 (?<=\S) _ (?![a-zA-Z0-9])
2377                                         |
2378                                                 ___+
2379                                         )+?
2380                                 )
2381                                 (?<=\S) __                              # End mark not preceded by whitespace.
2382                                 (?![a-zA-Z0-9])                 # Not followed by alphanum
2383                                 (?!__)                                  #   or two others marker chars.
2384                         }sx',
2385                         '{
2386                                 ( (?<!\*\*) \*\* )              # $1: Marker (not preceded by two *)
2387                                 (?=\S)                                  # Not followed by whitespace
2388                                 (?!\1)                                  #   or two others marker chars.
2389                                 (                                               # $2: Content
2390                                         (?:
2391                                                 [^*]+?                  # Anthing not em markers.
2392                                         |
2393                                                                                 # Balence any regular * emphasis inside.
2394                                                 \* (?=\S) (.+?) (?<=\S) \*
2395                                         )+?
2396                                 )
2397                                 (?<=\S) \*\*                    # End mark not preceded by whitespace.
2398                         }sx',
2399                         ),
2400                         array(&$this, '_doItalicAndBold_strong_callback'), $text);
2401                 # Then <em>:
2402                 $text = preg_replace_callback(array(
2403                         '{ ( (?<![a-zA-Z0-9])(?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1(?![a-zA-Z0-9]) }sx',
2404                         '{ ( (?<!\*)\* ) (?=\S) (?! \1) (.+?) (?<=\S) \1 }sx',
2405                         ),
2406                         array(&$this, '_doItalicAndBold_em_callback'), $text);
2407
2408                 return $text;
2409         }
2410
2411
2412         function formParagraphs($text) {
2413         #
2414         #       Params:
2415         #               $text - string to process with html <p> tags
2416         #
2417                 # Strip leading and trailing lines:
2418                 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
2419
2420                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2421
2422                 #
2423                 # Wrap <p> tags and unhashify HTML blocks
2424                 #
2425                 foreach ($grafs as $key => $value) {
2426                         $value = trim($this->runSpanGamut($value));
2427
2428                         # Check if this should be enclosed in a paragraph.
2429                         # Clean tag hashes & block tag hashes are left alone.
2430                         $clean_key = $value;
2431                         $block_key = substr($value, 0, 32);
2432
2433                         $is_p = (!isset($this->html_blocks[$block_key]) &&
2434                                          !isset($this->html_cleans[$clean_key]));
2435
2436                         if ($is_p) {
2437                                 $value = "<p>$value</p>";
2438                         }
2439                         $grafs[$key] = $value;
2440                 }
2441
2442                 # Join grafs in one text, then unhash HTML tags.
2443                 $text = implode("\n\n", $grafs);
2444
2445                 # Finish by removing any tag hashes still present in $text.
2446                 $text = $this->unhash($text);
2447
2448                 return $text;
2449         }
2450
2451
2452         ### Footnotes
2453
2454         function stripFootnotes($text) {
2455         #
2456         # Strips link definitions from text, stores the URLs and titles in
2457         # hash references.
2458         #
2459                 $less_than_tab = $this->tab_width - 1;
2460
2461                 # Link defs are in the form: [^id]: url "optional title"
2462                 $text = preg_replace_callback('{
2463                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
2464                           [ \t]*
2465                           \n?                                   # maybe *one* newline
2466                         (                                               # text = $2 (no blank lines allowed)
2467                                 (?:
2468                                         .+                              # actual text
2469                                 |
2470                                         \n                              # newlines but
2471                                         (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2472                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2473                                                                         # by non-indented content
2474                                 )*
2475                         )
2476                         }xm',
2477                         array(&$this, '_stripFootnotes_callback'),
2478                         $text);
2479                 return $text;
2480         }
2481         function _stripFootnotes_callback($matches) {
2482                 $note_id = $matches[1];
2483                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
2484                 return ''; # String that will replace the block
2485         }
2486
2487
2488         function doFootnotes($text) {
2489         #
2490         # Replace footnote references in $text [^id] with a special text-token
2491         # which will be can be
2492         #
2493                 $text = preg_replace('{\[\^(.+?)\]}', "a\0fn:\\1\0z", $text);
2494                 return $text;
2495         }
2496
2497
2498         function appendFootnotes($text) {
2499         #
2500         # Append footnote list to text.
2501         #
2502                 $text = preg_replace_callback('{a\0fn:(.*?)\0z}',
2503                         array(&$this, '_appendFootnotes_callback'), $text);
2504
2505                 if (!empty($this->footnotes_ordered)) {
2506                         $text .= "\n\n";
2507                         $text .= "<div class=\"footnotes\">\n";
2508                         $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n";
2509                         $text .= "<ol>\n\n";
2510
2511                         $attr = " rev=\"footnote\"";
2512                         if ($this->fn_backlink_class != "") {
2513                                 $class = $this->fn_backlink_class;
2514                                 $class = $this->encodeAmpsAndAngles($class);
2515                                 $class = str_replace('"', '&quot;', $class);
2516                                 $attr .= " class=\"$class\"";
2517                         }
2518                         if ($this->fn_backlink_title != "") {
2519                                 $title = $this->fn_backlink_title;
2520                                 $title = $this->encodeAmpsAndAngles($title);
2521                                 $title = str_replace('"', '&quot;', $title);
2522                                 $attr .= " title=\"$title\"";
2523                         }
2524                         $num = 0;
2525
2526                         foreach ($this->footnotes_ordered as $note_id => $footnote) {
2527                                 $footnote .= "\n"; # Need to append newline before parsing.
2528                                 $footnote = $this->runBlockGamut("$footnote\n");
2529
2530                                 $attr2 = str_replace("%%", ++$num, $attr);
2531
2532                                 # Add backlink to last paragraph; create new paragraph if needed.
2533                                 $backlink = "<a href=\"#fnref:$note_id\"$attr2>&#8617;</a>";
2534                                 if (preg_match('{</p>$}', $footnote)) {
2535                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
2536                                 } else {
2537                                         $footnote .= "\n\n<p>$backlink</p>";
2538                                 }
2539
2540                                 $text .= "<li id=\"fn:$note_id\">\n";
2541                                 $text .= $footnote . "\n";
2542                                 $text .= "</li>\n\n";
2543                         }
2544
2545                         $text .= "</ol>\n";
2546                         $text .= "</div>";
2547
2548                         $text = preg_replace('{a\{fn:(.*?)\}z}', '[^\\1]', $text);
2549                 }
2550                 return $text;
2551         }
2552         function _appendFootnotes_callback($matches) {
2553                 $node_id = $this->fn_id_prefix . $matches[1];
2554
2555                 # Create footnote marker only if it has a corresponding footnote *and*
2556                 # the footnote hasn't been used by another marker.
2557                 if (isset($this->footnotes[$node_id])) {
2558                         # Transfert footnote content to the ordered list.
2559                         $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
2560                         unset($this->footnotes[$node_id]);
2561
2562                         $num = count($this->footnotes_ordered);
2563                         $attr = " rel=\"footnote\"";
2564                         if ($this->fn_link_class != "") {
2565                                 $class = $this->fn_link_class;
2566                                 $class = $this->encodeAmpsAndAngles($class);
2567                                 $class = str_replace('"', '&quot;', $class);
2568                                 $attr .= " class=\"$class\"";
2569                         }
2570                         if ($this->fn_link_title != "") {
2571                                 $title = $this->fn_link_title;
2572                                 $title = $this->encodeAmpsAndAngles($title);
2573                                 $title = str_replace('"', '&quot;', $title);
2574                                 $attr .= " title=\"$title\"";
2575                         }
2576                         $attr = str_replace("%%", $num, $attr);
2577
2578                         return
2579                                 "<sup id=\"fnref:$node_id\">".
2580                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
2581                                 "</sup>";
2582                 }
2583
2584                 return "[^".$matches[1]."]";
2585         }
2586
2587
2588         ### Abbreviations ###
2589
2590         function stripAbbreviations($text) {
2591         #
2592         # Strips abbreviations from text, stores the URLs and titles in
2593         # hash references.
2594         #
2595                 $less_than_tab = $this->tab_width - 1;
2596
2597                 # Link defs are in the form: [id]*: url "optional title"
2598                 $text = preg_replace_callback('{
2599                         ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:      # abbr_id = $1
2600                         (.*)                                    # text = $2 (no blank lines allowed)
2601                         }xm',
2602                         array(&$this, '_stripAbbreviations_callback'),
2603                         $text);
2604                 return $text;
2605         }
2606         function _stripAbbreviations_callback($matches) {
2607                 $abbr_word = $matches[1];
2608                 $abbr_desc = $matches[2];
2609                 $this->abbr_matches[] = preg_quote($abbr_word);
2610                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
2611                 return ''; # String that will replace the block
2612         }
2613
2614
2615         function doAbbreviations($text) {
2616         #
2617         # Replace footnote references in $text [^id] with a link to the footnote.
2618         #
2619                 if ($this->abbr_matches) {
2620                         $regex = '{(?<!\w)(?:'. implode('|', $this->abbr_matches) .')(?!\w)}';
2621
2622                         $text = preg_replace_callback($regex,
2623                                 array(&$this, '_doAbbreviations_callback'), $text);
2624                 }
2625                 return $text;
2626         }
2627         function _doAbbreviations_callback($matches) {
2628                 $abbr = $matches[0];
2629                 if (isset($this->abbr_desciptions[$abbr])) {
2630                         $desc = $this->abbr_desciptions[$abbr];
2631                         if (empty($desc)) {
2632                                 return $this->hashSpan("<abbr>$abbr</abbr>");
2633                         } else {
2634                                 $desc = $this->escapeSpecialCharsWithinTagAttributes($desc);
2635                                 return $this->hashSpan("<abbr title=\"$desc\">$abbr</abbr>");
2636                         }
2637                 } else {
2638                         return $matches[0];
2639                 }
2640         }
2641
2642 }
2643
2644
2645 /*
2646
2647 PHP Markdown Extra
2648 ==================
2649
2650 Description
2651 -----------
2652
2653 This is a PHP port of the original Markdown formatter written in Perl
2654 by John Gruber. This special "Extra" version of PHP Markdown features
2655 further enhancements to the syntax for making additional constructs
2656 such as tables and definition list.
2657
2658 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2659 easy-to-write structured text format into HTML. Markdown's text format
2660 is most similar to that of plain text email, and supports features such
2661 as headers, *emphasis*, code blocks, blockquotes, and links.
2662
2663 Markdown's syntax is designed not as a generic markup language, but
2664 specifically to serve as a front-end to (X)HTML. You can use span-level
2665 HTML tags anywhere in a Markdown document, and you can use block level
2666 HTML tags (like <div> and <table> as well).
2667
2668 For more information about Markdown's syntax, see:
2669
2670 <http://daringfireball.net/projects/markdown/>
2671
2672
2673 Bugs
2674 ----
2675
2676 To file bug reports please send email to:
2677
2678 <michel.fortin@michelf.com>
2679
2680 Please include with your report: (1) the example input; (2) the output you
2681 expected; (3) the output Markdown actually produced.
2682
2683
2684 Version History
2685 ---------------
2686
2687 See Readme file for details.
2688
2689 Extra 1.1.2 (7 Feb 2007)
2690
2691 Extra 1.1.1 (28 Dec 2006)
2692
2693 Extra 1.1 (1 Dec 2006)
2694
2695 Extra 1.0.1 (9 Dec 2005)
2696
2697 Extra 1.0 (5 Sep 2005)
2698
2699
2700 Copyright and License
2701 ---------------------
2702
2703 PHP Markdown & Extra
2704 Copyright (c) 2004-2007 Michel Fortin
2705 <http://www.michelf.com/>
2706 All rights reserved.
2707
2708 Based on Markdown
2709 Copyright (c) 2003-2006 John Gruber
2710 <http://daringfireball.net/>
2711 All rights reserved.
2712
2713 Redistribution and use in source and binary forms, with or without
2714 modification, are permitted provided that the following conditions are
2715 met:
2716
2717 *       Redistributions of source code must retain the above copyright notice,
2718         this list of conditions and the following disclaimer.
2719
2720 *       Redistributions in binary form must reproduce the above copyright
2721         notice, this list of conditions and the following disclaimer in the
2722         documentation and/or other materials provided with the distribution.
2723
2724 *       Neither the name "Markdown" nor the names of its contributors may
2725         be used to endorse or promote products derived from this software
2726         without specific prior written permission.
2727
2728 This software is provided by the copyright holders and contributors "as
2729 is" and any express or implied warranties, including, but not limited
2730 to, the implied warranties of merchantability and fitness for a
2731 particular purpose are disclaimed. In no event shall the copyright owner
2732 or contributors be liable for any direct, indirect, incidental, special,
2733 exemplary, or consequential damages (including, but not limited to,
2734 procurement of substitute goods or services; loss of use, data, or
2735 profits; or business interruption) however caused and on any theory of
2736 liability, whether in contract, strict liability, or tort (including
2737 negligence or otherwise) arising in any way out of the use of this
2738 software, even if advised of the possibility of such damage.
2739
2740 */
2741 ?>