vendor/TextParsers/markdown.php

   1 <?php
   2 #
   3 # Markdown  -  A text-to-HTML conversion tool for web writers
   4 #
   5 # PHP Markdown
   6 # Copyright (c) 2004-2006 Michel Fortin
   7 # <http://www.michelf.com/projects/php-markdown/>
   8 #
   9 # Original Markdown
  10 # Copyright (c) 2004-2006 John Gruber
  11 # <http://daringfireball.net/projects/markdown/>
  12 #
  13
  14
  15 define( 'MARKDOWN_VERSION',  "1.0.1e" ); # Thu 28 Dec 2006
  16
  17
  18 #
  19 # Global default settings:
  20 #
  21
  22 # Change to ">" for HTML output
  23 define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
  24
  25 # Define the width of a tab for code blocks.
  26 define( 'MARKDOWN_TAB_WIDTH',     4 );
  27
  28
  29 #
  30 # WordPress settings:
  31 #
  32
  33 # Change to false to remove Markdown from posts and/or comments.
  34 define( 'MARKDOWN_WP_POSTS',      true );
  35 define( 'MARKDOWN_WP_COMMENTS',   true );
  36
  37
  38
  39 ### Standard Function Interface ###
  40
  41 define( 'MARKDOWN_PARSER_CLASS',  'Markdown_Parser' );
  42
  43 function Markdown($text) {
  44 #
  45 # Initialize the parser and return the result of its transform method.
  46 #
  47         # Setup static parser variable.
  48         static $parser;
  49         if (!isset($parser)) {
  50                 $parser_class = MARKDOWN_PARSER_CLASS;
  51                 $parser = new $parser_class;
  52         }
  53
  54         # Transform text using parser.
  55         return $parser->transform($text);
  56 }
  57
  58
  59 #
  60 # Markdown Parser Class
  61 #
  62
  63 class Markdown_Parser {
  64
  65         # Regex to match balanced [brackets].
  66         # Needed to insert a maximum bracked depth while converting to PHP.
  67         var $nested_brackets_depth = 6;
  68         var $nested_brackets;
  69
  70         # Table of hash values for escaped characters:
  71         var $escape_chars = '\`*_{}[]()>#+-.!';
  72         var $escape_table = array();
  73         var $backslash_escape_table = array();
  74
  75         # Change to ">" for HTML output.
  76         var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
  77         var $tab_width = MARKDOWN_TAB_WIDTH;
  78
  79
  80         function Markdown_Parser() {
  81         #
  82         # Constructor function. Initialize appropriate member variables.
  83         #
  84                 $this->_initDetab();
  85
  86                 $this->nested_brackets =
  87                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  88                         str_repeat('\])*', $this->nested_brackets_depth);
  89
  90                 # Create an identical table but for escaped characters.
  91                 foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) {
  92                         $hash = md5($char);
  93                         $this->escape_table[$char] = $hash;
  94                         $this->backslash_escape_table["\\$char"] = $hash;
  95                 }
  96
  97                 # Sort document, block, and span gamut in ascendent priority order.
  98                 asort($this->document_gamut);
  99                 asort($this->block_gamut);
 100                 asort($this->span_gamut);
 101         }
 102
 103
 104         # Internal hashes used during transformation.
 105         var $urls = array();
 106         var $titles = array();
 107         var $html_blocks = array();
 108         var $html_hashes = array(); # Contains both blocks and span hashes.
 109
 110
 111         function transform($text) {
 112         #
 113         # Main function. The order in which other subs are called here is
 114         # essential. Link and image substitutions need to happen before
 115         # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
 116         # and <img> tags get encoded.
 117         #
 118                 # Clear the global hashes. If we don't clear these, you get conflicts
 119                 # from other articles when generating a page which contains more than
 120                 # one article (e.g. an index page that shows the N most recent
 121                 # articles):
 122                 $this->urls = array();
 123                 $this->titles = array();
 124                 $this->html_blocks = array();
 125                 $this->html_hashes = array();
 126
 127                 # Standardize line endings:
 128                 #   DOS to Unix and Mac to Unix
 129                 $text = str_replace(array("\r\n", "\r"), "\n", $text);
 130
 131                 # Make sure $text ends with a couple of newlines:
 132                 $text .= "\n\n";
 133
 134                 # Convert all tabs to spaces.
 135                 $text = $this->detab($text);
 136
 137                 # Turn block-level HTML blocks into hash entries
 138                 $text = $this->hashHTMLBlocks($text);
 139
 140                 # Strip any lines consisting only of spaces and tabs.
 141                 # This makes subsequent regexen easier to write, because we can
 142                 # match consecutive blank lines with /\n+/ instead of something
 143                 # contorted like /[ \t]*\n+/ .
 144                 $text = preg_replace('/^[ \t]+$/m', '', $text);
 145
 146                 # Run document gamut methods.
 147                 foreach ($this->document_gamut as $method => $priority) {
 148                         $text = $this->$method($text);
 149                 }
 150
 151                 return $text . "\n";
 152         }
 153
 154         var $document_gamut = array(
 155                 # Strip link definitions, store in hashes.
 156                 "stripLinkDefinitions" => 20,
 157
 158                 "runBasicBlockGamut"   => 30,
 159                 "unescapeSpecialChars" => 90,
 160                 );
 161
 162
 163         function stripLinkDefinitions($text) {
 164         #
 165         # Strips link definitions from text, stores the URLs and titles in
 166         # hash references.
 167         #
 168                 $less_than_tab = $this->tab_width - 1;
 169
 170                 # Link defs are in the form: ^[id]: url "optional title"
 171                 $text = preg_replace_callback('{
 172                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 173                                                           [ \t]*
 174                                                           \n?                           # maybe *one* newline
 175                                                           [ \t]*
 176                                                         <?(\S+?)>?                      # url = $2
 177                                                           [ \t]*
 178                                                           \n?                           # maybe one newline
 179                                                           [ \t]*
 180                                                         (?:
 181                                                                 (?<=\s)                 # lookbehind for whitespace
 182                                                                 ["(]
 183                                                                 (.*?)                   # title = $3
 184                                                                 [")]
 185                                                                 [ \t]*
 186                                                         )?      # title is optional
 187                                                         (?:\n+|\Z)
 188                         }xm',
 189                         array(&$this, '_stripLinkDefinitions_callback'),
 190                         $text);
 191                 return $text;
 192         }
 193         function _stripLinkDefinitions_callback($matches) {
 194                 $link_id = strtolower($matches[1]);
 195                 $this->urls[$link_id] = $this->encodeAmpsAndAngles($matches[2]);
 196                 if (isset($matches[3]))
 197                         $this->titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
 198                 return ''; # String that will replace the block
 199         }
 200
 201
 202         function hashHTMLBlocks($text) {
 203                 $less_than_tab = $this->tab_width - 1;
 204
 205                 # Hashify HTML blocks:
 206                 # We only want to do this for block-level HTML tags, such as headers,
 207                 # lists, and tables. That's because we still want to wrap <p>s around
 208                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 209                 # phrase emphasis, and spans. The list of tags we're looking for is
 210                 # hard-coded:
 211                 $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 212                                                 'script|noscript|form|fieldset|iframe|math|ins|del';
 213                 $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 214                                                 'script|noscript|form|fieldset|iframe|math';
 215
 216                 # Regular expression for the content of a block tag.
 217                 $nested_tags_level = 4;
 218                 $attr = '
 219                         (?>                             # optional tag attributes
 220                           \s                    # starts with whitespace
 221                           (?>
 222                                 [^>"/]+         # text outside quotes
 223                           |
 224                                 /+(?!>)         # slash not followed by ">"
 225                           |
 226                                 "[^"]*"         # text inside double quotes (tolerate ">")
 227                           |
 228                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
 229                           )*
 230                         )?
 231                         ';
 232                 $content =
 233                         str_repeat('
 234                                 (?>
 235                                   [^<]+                 # content without tag
 236                                 |
 237                                   <\2                   # nested opening tag
 238                                         '.$attr.'       # attributes
 239                                         (?:
 240                                           />
 241                                         |
 242                                           >', $nested_tags_level).      # end of opening tag
 243                                           '.*?'.                                        # last level nested tag content
 244                         str_repeat('
 245                                           </\2\s*>      # closing nested tag
 246                                         )
 247                                   |
 248                                         <(?!/\2\s*>     # other tags with a different name
 249                                   )
 250                                 )*',
 251                                 $nested_tags_level);
 252
 253                 # First, look for nested blocks, e.g.:
 254                 #       <div>
 255                 #               <div>
 256                 #               tags for inner block must be indented.
 257                 #               </div>
 258                 #       </div>
 259                 #
 260                 # The outermost tags must start at the left margin for this to match, and
 261                 # the inner nested divs must be indented.
 262                 # We need to do this before the next, more liberal match, because the next
 263                 # match will start at the first `<div>` and stop at the first `</div>`.
 264                 $text = preg_replace_callback('{
 265                                         (                                               # save in $1
 266                                                 ^                                       # start of line  (with /m)
 267                                                 <('.$block_tags_a.')# start tag = $2
 268                                                 '.$attr.'>\n            # attributes followed by > and \n
 269                                                 '.$content.'            # content, support nesting
 270                                                 </\2>                           # the matching end tag
 271                                                 [ \t]*                          # trailing spaces/tabs
 272                                                 (?=\n+|\Z)      # followed by a newline or end of document
 273                                         )
 274                         }xm',
 275                         array(&$this, '_hashHTMLBlocks_callback'),
 276                         $text);
 277
 278                 #
 279                 # Match from `\n<tag>` to `</tag>\n`, handling nested tags in between.
 280                 #
 281                 $text = preg_replace_callback('{
 282                                         (                                               # save in $1
 283                                                 ^                                       # start of line  (with /m)
 284                                                 <('.$block_tags_b.')# start tag = $2
 285                                                 '.$attr.'>                      # attributes followed by >
 286                                                 '.$content.'            # content, support nesting
 287                                                 </\2>                           # the matching end tag
 288                                                 [ \t]*                          # trailing spaces/tabs
 289                                                 (?=\n+|\Z)      # followed by a newline or end of document
 290                                         )
 291                         }xm',
 292                         array(&$this, '_hashHTMLBlocks_callback'),
 293                         $text);
 294
 295                 # Special case just for <hr />. It was easier to make a special case than
 296                 # to make the other regex more complicated.
 297                 $text = preg_replace_callback('{
 298                                         (?:
 299                                                 (?<=\n\n)               # Starting after a blank line
 300                                                 |                               # or
 301                                                 \A\n?                   # the beginning of the doc
 302                                         )
 303                                         (                                               # save in $1
 304                                                 [ ]{0,'.$less_than_tab.'}
 305                                                 <(hr)                           # start tag = $2
 306                                                 \b                                      # word break
 307                                                 ([^<>])*?                       #
 308                                                 /?>                                     # the matching end tag
 309                                                 [ \t]*
 310                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
 311                                         )
 312                         }x',
 313                         array(&$this, '_hashHTMLBlocks_callback'),
 314                         $text);
 315
 316                 # Special case for standalone HTML comments:
 317                 $text = preg_replace_callback('{
 318                                 (?:
 319                                         (?<=\n\n)               # Starting after a blank line
 320                                         |                               # or
 321                                         \A\n?                   # the beginning of the doc
 322                                 )
 323                                 (                                               # save in $1
 324                                         [ ]{0,'.$less_than_tab.'}
 325                                         (?s:
 326                                                 <!-- .*? -->
 327                                         )
 328                                         [ \t]*
 329                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 330                                 )
 331                         }x',
 332                         array(&$this, '_hashHTMLBlocks_callback'),
 333                         $text);
 334
 335                 /* PHP and ASP-style processor instructions (<? and <%...%>)*/
 336                 $text = preg_replace_callback('{
 337                                 (?:
 338                                         (?<=\n\n)               # Starting after a blank line
 339                                         |                               # or
 340                                         \A\n?                   # the beginning of the doc
 341                                 )
 342                                 (                                               # save in $1
 343                                         [ ]{0,'.$less_than_tab.'}
 344                                         (?s:
 345                                                 <([?%])                 # $2
 346                                                 .*?
 347                                                 \2>
 348                                         )
 349                                         [ \t]*
 350                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 351                                 )
 352                         }x',
 353                         array(&$this, '_hashHTMLBlocks_callback'),
 354                         $text);
 355
 356                 return $text;
 357         }
 358         function _hashHTMLBlocks_callback($matches) {
 359                 $text = $matches[1];
 360                 $key  = $this->hashBlock($text);
 361                 return "\n\n$key\n\n";
 362         }
 363
 364
 365         function hashBlock($text) {
 366         #
 367         # Called whenever a tag must be hashed when a function insert a block-level
 368         # tag in $text, it pass through this function and is automaticaly escaped,
 369         # which remove the need to call _HashHTMLBlocks at every step.
 370         #
 371                 # Swap back any tag hash found in $text so we do not have to `unhash`
 372                 # multiple times at the end.
 373                 $text = $this->unhash($text);
 374
 375                 # Then hash the block.
 376                 $key = md5($text);
 377                 $this->html_hashes[$key] = $text;
 378                 $this->html_blocks[$key] = $text;
 379                 return $key; # String that will replace the tag.
 380         }
 381
 382
 383         function hashSpan($text) {
 384         #
 385         # Called whenever a tag must be hashed when a function insert a span-level
 386         # element in $text, it pass through this function and is automaticaly
 387         # escaped, blocking invalid nested overlap.
 388         #
 389                 # Swap back any tag hash found in $text so we do not have to `unhash`
 390                 # multiple times at the end.
 391                 $text = $this->unhash($text);
 392
 393                 # Then hash the span.
 394                 $key = md5($text);
 395                 $this->html_hashes[$key] = $text;
 396                 return $key; # String that will replace the span tag.
 397         }
 398
 399
 400         var $block_gamut = array(
 401         #
 402         # These are all the transformations that form block-level
 403         # tags like paragraphs, headers, and list items.
 404         #
 405                 "doHeaders"         => 10,
 406                 "doHorizontalRules" => 20,
 407
 408                 "doLists"           => 40,
 409                 "doCodeBlocks"      => 50,
 410                 "doBlockQuotes"     => 60,
 411                 );
 412
 413         function runBlockGamut($text) {
 414         #
 415         # Run block gamut tranformations.
 416         #
 417                 # We need to escape raw HTML in Markdown source before doing anything
 418                 # else. This need to be done for each block, and not only at the
 419                 # begining in the Markdown function since hashed blocks can be part of
 420                 # list items and could have been indented. Indented blocks would have
 421                 # been seen as a code block in a previous pass of hashHTMLBlocks.
 422                 $text = $this->hashHTMLBlocks($text);
 423
 424                 return $this->runBasicBlockGamut($text);
 425         }
 426
 427         function runBasicBlockGamut($text) {
 428         #
 429         # Run block gamut tranformations, without hashing HTML blocks. This is
 430         # useful when HTML blocks are known to be already hashed, like in the first
 431         # whole-document pass.
 432         #
 433                 foreach ($this->block_gamut as $method => $priority) {
 434                         $text = $this->$method($text);
 435                 }
 436
 437                 # Finally form paragraph and restore hashed blocks.
 438                 $text = $this->formParagraphs($text);
 439
 440                 return $text;
 441         }
 442
 443
 444         function doHorizontalRules($text) {
 445                 # Do Horizontal Rules:
 446                 return preg_replace(
 447                         array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
 448                                   '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
 449                                   '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
 450                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 451                         $text);
 452         }
 453
 454
 455         var $span_gamut = array(
 456         #
 457         # These are all the transformations that occur *within* block-level
 458         # tags like paragraphs, headers, and list items.
 459         #
 460                 "escapeSpecialCharsWithinTagAttributes" => -20,
 461                 "doCodeSpans"                                                   => -10,
 462                 "encodeBackslashEscapes"                                =>  -5,
 463
 464                 # Process anchor and image tags. Images must come first,
 465                 # because ![foo][f] looks like an anchor.
 466                 "doImages"            =>  10,
 467                 "doAnchors"           =>  20,
 468
 469                 # Make links out of things like `<http://example.com/>`
 470                 # Must come after doAnchors, because you can use < and >
 471                 # delimiters in inline links like [this](<url>).
 472                 "doAutoLinks"         =>  30,
 473                 "encodeAmpsAndAngles" =>  40,
 474
 475                 "doItalicsAndBold"    =>  50,
 476                 "doHardBreaks"        =>  60,
 477                 );
 478
 479         function runSpanGamut($text) {
 480         #
 481         # Run span gamut tranformations.
 482         #
 483                 foreach ($this->span_gamut as $method => $priority) {
 484                         $text = $this->$method($text);
 485                 }
 486
 487                 return $text;
 488         }
 489
 490
 491         function doHardBreaks($text) {
 492                 # Do hard breaks:
 493                 $br_tag = $this->hashSpan("<br$this->empty_element_suffix\n");
 494                 return preg_replace('/ {2,}\n/', $br_tag, $text);
 495         }
 496
 497
 498         function escapeSpecialCharsWithinTagAttributes($text) {
 499         #
 500         # Within tags -- meaning between < and > -- encode [\ ` * _] so they
 501         # don't conflict with their use in Markdown for code, italics and strong.
 502         # We're replacing each such character with its corresponding MD5 checksum
 503         # value; this is likely overkill, but it should prevent us from colliding
 504         # with the escape values by accident.
 505         #
 506                 $tokens = $this->tokenizeHTML($text);
 507                 $text = '';   # rebuild $text from the tokens
 508
 509                 foreach ($tokens as $cur_token) {
 510                         if ($cur_token[0] == 'tag') {
 511                                 $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]);
 512                                 $cur_token[1] = str_replace(array('`'), $this->escape_table['`'], $cur_token[1]);
 513                                 $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]);
 514                                 $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]);
 515                         }
 516                         $text .= $cur_token[1];
 517                 }
 518                 return $text;
 519         }
 520
 521
 522         function doAnchors($text) {
 523         #
 524         # Turn Markdown link shortcuts into XHTML <a> tags.
 525         #
 526                 #
 527                 # First, handle reference-style links: [link text] [id]
 528                 #
 529                 $text = preg_replace_callback('{
 530                         (                                       # wrap whole match in $1
 531                           \[
 532                                 ('.$this->nested_brackets.')    # link text = $2
 533                           \]
 534
 535                           [ ]?                          # one optional space
 536                           (?:\n[ ]*)?           # one optional newline followed by spaces
 537
 538                           \[
 539                                 (.*?)           # id = $3
 540                           \]
 541                         )
 542                         }xs',
 543                         array(&$this, '_doAnchors_reference_callback'), $text);
 544
 545                 #
 546                 # Next, inline-style links: [link text](url "optional title")
 547                 #
 548                 $text = preg_replace_callback('{
 549                         (                               # wrap whole match in $1
 550                           \[
 551                                 ('.$this->nested_brackets.')    # link text = $2
 552                           \]
 553                           \(                    # literal paren
 554                                 [ \t]*
 555                                 <?(.*?)>?       # href = $3
 556                                 [ \t]*
 557                                 (                       # $4
 558                                   ([\'"])       # quote char = $5
 559                                   (.*?)         # Title = $6
 560                                   \5            # matching quote
 561                                   [ \t]*        # ignore any spaces/tabs between closing quote and )
 562                                 )?                      # title is optional
 563                           \)
 564                         )
 565                         }xs',
 566                         array(&$this, '_DoAnchors_inline_callback'), $text);
 567
 568                 #
 569                 # Last, handle reference-style shortcuts: [link text]
 570                 # These must come last in case you've also got [link test][1]
 571                 # or [link test](/foo)
 572                 #
 573 //              $text = preg_replace_callback('{
 574 //                      (                                       # wrap whole match in $1
 575 //                        \[
 576 //                              ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
 577 //                        \]
 578 //                      )
 579 //                      }xs',
 580 //                      array(&$this, '_doAnchors_reference_callback'), $text);
 581
 582                 return $text;
 583         }
 584         function _doAnchors_reference_callback($matches) {
 585                 $whole_match =  $matches[1];
 586                 $link_text   =  $matches[2];
 587                 $link_id     =& $matches[3];
 588
 589                 if ($link_id == "") {
 590                         # for shortcut links like [this][] or [this].
 591                         $link_id = $link_text;
 592                 }
 593
 594                 # lower-case and turn embedded newlines into spaces
 595                 $link_id = strtolower($link_id);
 596                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 597
 598                 if (isset($this->urls[$link_id])) {
 599                         $url = $this->urls[$link_id];
 600                         $url = $this->encodeAmpsAndAngles($url);
 601
 602                         $result = "<a href=\"$url\"";
 603                         if ( isset( $this->titles[$link_id] ) ) {
 604                                 $title = $this->titles[$link_id];
 605                                 $title = $this->encodeAmpsAndAngles($title);
 606                                 $result .=  " title=\"$title\"";
 607                         }
 608
 609                         $link_text = $this->runSpanGamut($link_text);
 610                         $result .= ">$link_text</a>";
 611                         $result = $this->hashSpan($result);
 612                 }
 613                 else {
 614                         $result = $whole_match;
 615                 }
 616                 return $result;
 617         }
 618         function _doAnchors_inline_callback($matches) {
 619                 $whole_match    =  $matches[1];
 620                 $link_text              =  $this->runSpanGamut($matches[2]);
 621                 $url                    =  $matches[3];
 622                 $title                  =& $matches[6];
 623
 624                 $url = $this->encodeAmpsAndAngles($url);
 625
 626                 $result = "<a href=\"$url\"";
 627                 if (isset($title)) {
 628                         $title = str_replace('"', '&quot;', $title);
 629                         $title = $this->encodeAmpsAndAngles($title);
 630                         $result .=  " title=\"$title\"";
 631                 }
 632
 633                 $link_text = $this->runSpanGamut($link_text);
 634                 $result .= ">$link_text</a>";
 635
 636                 return $this->hashSpan($result);
 637         }
 638
 639
 640         function doImages($text) {
 641         #
 642         # Turn Markdown image shortcuts into <img> tags.
 643         #
 644                 #
 645                 # First, handle reference-style labeled images: ![alt text][id]
 646                 #
 647                 $text = preg_replace_callback('{
 648                         (                               # wrap whole match in $1
 649                           !\[
 650                                 ('.$this->nested_brackets.')            # alt text = $2
 651                           \]
 652
 653                           [ ]?                          # one optional space
 654                           (?:\n[ ]*)?           # one optional newline followed by spaces
 655
 656                           \[
 657                                 (.*?)           # id = $3
 658                           \]
 659
 660                         )
 661                         }xs',
 662                         array(&$this, '_doImages_reference_callback'), $text);
 663
 664                 #
 665                 # Next, handle inline images:  ![alt text](url "optional title")
 666                 # Don't forget: encode * and _
 667                 #
 668                 $text = preg_replace_callback('{
 669                         (                               # wrap whole match in $1
 670                           !\[
 671                                 ('.$this->nested_brackets.')            # alt text = $2
 672                           \]
 673                           \s?                   # One optional whitespace character
 674                           \(                    # literal paren
 675                                 [ \t]*
 676                                 <?(\S+?)>?      # src url = $3
 677                                 [ \t]*
 678                                 (                       # $4
 679                                   ([\'"])       # quote char = $5
 680                                   (.*?)         # title = $6
 681                                   \5            # matching quote
 682                                   [ \t]*
 683                                 )?                      # title is optional
 684                           \)
 685                         )
 686                         }xs',
 687                         array(&$this, '_doImages_inline_callback'), $text);
 688
 689                 return $text;
 690         }
 691         function _doImages_reference_callback($matches) {
 692                 $whole_match = $matches[1];
 693                 $alt_text    = $matches[2];
 694                 $link_id     = strtolower($matches[3]);
 695
 696                 if ($link_id == "") {
 697                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 698                 }
 699
 700                 $alt_text = str_replace('"', '&quot;', $alt_text);
 701                 if (isset($this->urls[$link_id])) {
 702                         $url = $this->urls[$link_id];
 703                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 704                         if (isset($this->titles[$link_id])) {
 705                                 $title = $this->titles[$link_id];
 706                                 $result .=  " title=\"$title\"";
 707                         }
 708                         $result .= $this->empty_element_suffix;
 709                         $result = $this->hashSpan($result);
 710                 }
 711                 else {
 712                         # If there's no such link ID, leave intact:
 713                         $result = $whole_match;
 714                 }
 715
 716                 return $result;
 717         }
 718         function _doImages_inline_callback($matches) {
 719                 $whole_match    = $matches[1];
 720                 $alt_text               = $matches[2];
 721                 $url                    = $matches[3];
 722                 $title                  =& $matches[6];
 723
 724                 $alt_text = str_replace('"', '&quot;', $alt_text);
 725                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 726                 if (isset($title)) {
 727                         $title = str_replace('"', '&quot;', $title);
 728                         $result .=  " title=\"$title\""; # $title already quoted
 729                 }
 730                 $result .= $this->empty_element_suffix;
 731
 732                 return $this->hashSpan($result);
 733         }
 734
 735
 736         function doHeaders($text) {
 737                 # Setext-style headers:
 738                 #         Header 1
 739                 #         ========
 740                 #
 741                 #         Header 2
 742                 #         --------
 743                 #
 744                 $text = preg_replace_callback('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }mx',
 745                         array(&$this, '_doHeaders_callback_setext_h1'), $text);
 746                 $text = preg_replace_callback('{ ^(.+)[ \t]*\n-+[ \t]*\n+ }mx',
 747                         array(&$this, '_doHeaders_callback_setext_h2'), $text);
 748
 749                 # atx-style headers:
 750                 #       # Header 1
 751                 #       ## Header 2
 752                 #       ## Header 2 with closing hashes ##
 753                 #       ...
 754                 #       ###### Header 6
 755                 #
 756                 $text = preg_replace_callback('{
 757                                 ^(\#{1,6})      # $1 = string of #\'s
 758                                 [ \t]*
 759                                 (.+?)           # $2 = Header text
 760                                 [ \t]*
 761                                 \#*                     # optional closing #\'s (not counted)
 762                                 \n+
 763                         }xm',
 764                         array(&$this, '_doHeaders_callback_atx'), $text);
 765
 766                 return $text;
 767         }
 768         function _doHeaders_callback_setext_h1($matches) {
 769                 return $this->hashBlock("<h1>".$this->runSpanGamut($matches[1])."</h1>")."\n\n";
 770         }
 771         function _doHeaders_callback_setext_h2($matches) {
 772                 return $this->hashBlock("<h2>".$this->runSpanGamut($matches[1])."</h2>")."\n\n";
 773         }
 774         function _doHeaders_callback_atx($matches) {
 775                 $level = strlen($matches[1]);
 776                 return $this->hashBlock("<h$level>".$this->runSpanGamut($matches[2])."</h$level>")."\n\n";
 777         }
 778
 779
 780         function doLists($text) {
 781         #
 782         # Form HTML ordered (numbered) and unordered (bulleted) lists.
 783         #
 784                 $less_than_tab = $this->tab_width - 1;
 785
 786                 # Re-usable patterns to match list item bullets and number markers:
 787                 $marker_ul  = '[*+-]';
 788                 $marker_ol  = '\d+[.]';
 789                 $marker_any = "(?:$marker_ul|$marker_ol)";
 790
 791                 $markers = array($marker_ul, $marker_ol);
 792
 793                 foreach ($markers as $marker) {
 794                         # Re-usable pattern to match any entirel ul or ol list:
 795                         $whole_list = '
 796                                 (                                                               # $1 = whole list
 797                                   (                                                             # $2
 798                                         [ ]{0,'.$less_than_tab.'}
 799                                         ('.$marker.')                           # $3 = first list item marker
 800                                         [ \t]+
 801                                   )
 802                                   (?s:.+?)
 803                                   (                                                             # $4
 804                                           \z
 805                                         |
 806                                           \n{2,}
 807                                           (?=\S)
 808                                           (?!                                           # Negative lookahead for another list item marker
 809                                                 [ \t]*
 810                                                 '.$marker.'[ \t]+
 811                                           )
 812                                   )
 813                                 )
 814                         '; // mx
 815
 816                         # We use a different prefix before nested lists than top-level lists.
 817                         # See extended comment in _ProcessListItems().
 818
 819                         if ($this->list_level) {
 820                                 $text = preg_replace_callback('{
 821                                                 ^
 822                                                 '.$whole_list.'
 823                                         }mx',
 824                                         array(&$this, '_doLists_callback'), $text);
 825                         }
 826                         else {
 827                                 $text = preg_replace_callback('{
 828                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
 829                                                 '.$whole_list.'
 830                                         }mx',
 831                                         array(&$this, '_doLists_callback'), $text);
 832                         }
 833                 }
 834
 835                 return $text;
 836         }
 837         function _doLists_callback($matches) {
 838                 # Re-usable patterns to match list item bullets and number markers:
 839                 $marker_ul  = '[*+-]';
 840                 $marker_ol  = '\d+[.]';
 841                 $marker_any = "(?:$marker_ul|$marker_ol)";
 842
 843                 $list = $matches[1];
 844                 $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
 845
 846                 $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
 847
 848                 # Turn double returns into triple returns, so that we can make a
 849                 # paragraph for the last item in a list, if necessary:
 850                 $list = preg_replace("/\n{2,}/", "\n\n\n", $list);
 851                 $result = $this->processListItems($list, $marker_any);
 852
 853                 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
 854                 return "\n". $result ."\n\n";
 855         }
 856
 857         var $list_level = 0;
 858
 859         function processListItems($list_str, $marker_any) {
 860         #
 861         #       Process the contents of a single ordered or unordered list, splitting it
 862         #       into individual list items.
 863         #
 864                 # The $this->list_level global keeps track of when we're inside a list.
 865                 # Each time we enter a list, we increment it; when we leave a list,
 866                 # we decrement. If it's zero, we're not in a list anymore.
 867                 #
 868                 # We do this because when we're not inside a list, we want to treat
 869                 # something like this:
 870                 #
 871                 #               I recommend upgrading to version
 872                 #               8. Oops, now this line is treated
 873                 #               as a sub-list.
 874                 #
 875                 # As a single paragraph, despite the fact that the second line starts
 876                 # with a digit-period-space sequence.
 877                 #
 878                 # Whereas when we're inside a list (or sub-list), that line will be
 879                 # treated as the start of a sub-list. What a kludge, huh? This is
 880                 # an aspect of Markdown's syntax that's hard to parse perfectly
 881                 # without resorting to mind-reading. Perhaps the solution is to
 882                 # change the syntax rules such that sub-lists must start with a
 883                 # starting cardinal number; e.g. "1." or "a.".
 884
 885                 $this->list_level++;
 886
 887                 # trim trailing blank lines:
 888                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
 889
 890                 $list_str = preg_replace_callback('{
 891                         (\n)?                                                   # leading line = $1
 892                         (^[ \t]*)                                               # leading whitespace = $2
 893                         ('.$marker_any.') [ \t]+                # list marker = $3
 894                         ((?s:.+?)                                               # list item text   = $4
 895                         (\n{1,2}))
 896                         (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
 897                         }xm',
 898                         array(&$this, '_processListItems_callback'), $list_str);
 899
 900                 $this->list_level--;
 901                 return $list_str;
 902         }
 903         function _processListItems_callback($matches) {
 904                 $item = $matches[4];
 905                 $leading_line =& $matches[1];
 906                 $leading_space =& $matches[2];
 907
 908                 if ($leading_line || preg_match('/\n{2,}/', $item)) {
 909                         $item = $this->runBlockGamut($this->outdent($item));
 910                 }
 911                 else {
 912                         # Recursion for sub-lists:
 913                         $item = $this->doLists($this->outdent($item));
 914                         $item = preg_replace('/\n+$/', '', $item);
 915                         $item = $this->runSpanGamut($item);
 916                 }
 917
 918                 return "<li>" . $item . "</li>\n";
 919         }
 920
 921
 922         function doCodeBlocks($text) {
 923         #
 924         #       Process Markdown `<pre><code>` blocks.
 925         #
 926                 $text = preg_replace_callback('{
 927                                 (?:\n\n|\A)
 928                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
 929                                   (?:
 930                                         (?:[ ]{'.$this->tab_width.'} | \t)  # Lines must start with a tab or a tab-width of spaces
 931                                         .*\n+
 932                                   )+
 933                                 )
 934                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
 935                         }xm',
 936                         array(&$this, '_doCodeBlocks_callback'), $text);
 937
 938                 return $text;
 939         }
 940         function _doCodeBlocks_callback($matches) {
 941                 $codeblock = $matches[1];
 942
 943                 $codeblock = $this->encodeCode($this->outdent($codeblock));
 944         //      $codeblock = $this->detab($codeblock);
 945                 # trim leading newlines and trailing whitespace
 946                 $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock);
 947
 948                 $result = "\n\n".$this->hashBlock("<pre><code>" . $codeblock . "\n</code></pre>")."\n\n";
 949
 950                 return $result;
 951         }
 952
 953
 954         function doCodeSpans($text) {
 955         #
 956         #       *       Backtick quotes are used for <code></code> spans.
 957         #
 958         #       *       You can use multiple backticks as the delimiters if you want to
 959         #               include literal backticks in the code span. So, this input:
 960         #
 961         #                 Just type ``foo `bar` baz`` at the prompt.
 962         #
 963         #               Will translate to:
 964         #
 965         #                 <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
 966         #
 967         #               There's no arbitrary limit to the number of backticks you
 968         #               can use as delimters. If you need three consecutive backticks
 969         #               in your code, use four for delimiters, etc.
 970         #
 971         #       *       You can use spaces to get literal backticks at the edges:
 972         #
 973         #                 ... type `` `bar` `` ...
 974         #
 975         #               Turns to:
 976         #
 977         #                 ... type <code>`bar`</code> ...
 978         #
 979                 $text = preg_replace_callback('@
 980                                 (?<!\\\)        # Character before opening ` can\'t be a backslash
 981                                 (`+)            # $1 = Opening run of `
 982                                 (.+?)           # $2 = The code block
 983                                 (?<!`)
 984                                 \1                      # Matching closer
 985                                 (?!`)
 986                         @xs',
 987                         array(&$this, '_doCodeSpans_callback'), $text);
 988
 989                 return $text;
 990         }
 991         function _doCodeSpans_callback($matches) {
 992                 $c = $matches[2];
 993                 $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
 994                 $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
 995                 $c = $this->encodeCode($c);
 996                 return $this->hashSpan("<code>$c</code>");
 997         }
 998
 999
1000         function encodeCode($_) {
1001         #
1002         # Encode/escape certain characters inside Markdown code runs.
1003         # The point is that in code, these characters are literals,
1004         # and lose their special Markdown meanings.
1005         #
1006                 # Encode all ampersands; HTML entities are not
1007                 # entities within a Markdown code span.
1008                 $_ = str_replace('&', '&amp;', $_);
1009
1010                 # Do the angle bracket song and dance:
1011                 $_ = str_replace(array('<',    '>'),
1012                                                  array('&lt;', '&gt;'), $_);
1013
1014                 # Now, escape characters that are magic in Markdown:
1015 //              $_ = str_replace(array_keys($this->escape_table),
1016 //                                               array_values($this->escape_table), $_);
1017
1018                 return $_;
1019         }
1020
1021
1022         function doItalicsAndBold($text) {
1023                 # <strong> must go first:
1024                 $text = preg_replace_callback('{
1025                                 (                                               # $1: Marker
1026                                         (?<!\*\*) \* |          #     (not preceded by two chars of
1027                                         (?<!__)   _                     #      the same marker)
1028                                 )
1029                                 \1
1030                                 (?=\S)                                  # Not followed by whitespace
1031                                 (?!\1\1)                                #   or two others marker chars.
1032                                 (                                               # $2: Content
1033                                         (?:
1034                                                 [^*_]+?                 # Anthing not em markers.
1035                                         |
1036                                                                                 # Balence any regular emphasis inside.
1037                                                 \1 (?=\S) .+? (?<=\S) \1
1038                                         |
1039                                                 (?! \1 ) .              # Allow unbalenced * and _.
1040                                         )+?
1041                                 )
1042                                 (?<=\S) \1\1                    # End mark not preceded by whitespace.
1043                         }sx',
1044                         array(&$this, '_doItalicAndBold_strong_callback'), $text);
1045                 # Then <em>:
1046                 $text = preg_replace_callback(
1047                         '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1 }sx',
1048                         array(&$this, '_doItalicAndBold_em_callback'), $text);
1049
1050                 return $text;
1051         }
1052         function _doItalicAndBold_em_callback($matches) {
1053                 $text = $matches[2];
1054                 $text = $this->runSpanGamut($text);
1055                 return $this->hashSpan("<em>$text</em>");
1056         }
1057         function _doItalicAndBold_strong_callback($matches) {
1058                 $text = $matches[2];
1059                 $text = $this->runSpanGamut($text);
1060                 return $this->hashSpan("<strong>$text</strong>");
1061         }
1062
1063
1064         function doBlockQuotes($text) {
1065                 $text = preg_replace_callback('/
1066                           (                                                             # Wrap whole match in $1
1067                                 (
1068                                   ^[ \t]*>[ \t]?                        # ">" at the start of a line
1069                                         .+\n                                    # rest of the first line
1070                                   (.+\n)*                                       # subsequent consecutive lines
1071                                   \n*                                           # blanks
1072                                 )+
1073                           )
1074                         /xm',
1075                         array(&$this, '_doBlockQuotes_callback'), $text);
1076
1077                 return $text;
1078         }
1079         function _doBlockQuotes_callback($matches) {
1080                 $bq = $matches[1];
1081                 # trim one level of quoting - trim whitespace-only lines
1082                 $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
1083                 $bq = $this->runBlockGamut($bq);                # recurse
1084
1085                 $bq = preg_replace('/^/m', "  ", $bq);
1086                 # These leading spaces cause problem with <pre> content,
1087                 # so we need to fix that:
1088                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1089                         array(&$this, '_DoBlockQuotes_callback2'), $bq);
1090
1091                 return $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1092         }
1093         function _doBlockQuotes_callback2($matches) {
1094                 $pre = $matches[1];
1095                 $pre = preg_replace('/^  /m', '', $pre);
1096                 return $pre;
1097         }
1098
1099
1100         function formParagraphs($text) {
1101         #
1102         #       Params:
1103         #               $text - string to process with html <p> tags
1104         #
1105                 # Strip leading and trailing lines:
1106                 $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
1107
1108                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1109
1110                 #
1111                 # Wrap <p> tags.
1112                 #
1113                 foreach ($grafs as $key => $value) {
1114                         if (!isset( $this->html_blocks[$value] )) {
1115                                 $value = $this->runSpanGamut($value);
1116                                 $value = preg_replace('/^([ \t]*)/', "<p>", $value);
1117                                 $value .= "</p>";
1118                                 $grafs[$key] = $this->unhash($value);
1119                         }
1120                 }
1121
1122                 #
1123                 # Unhashify HTML blocks
1124                 #
1125                 foreach ($grafs as $key => $graf) {
1126                         # Modify elements of @grafs in-place...
1127                         if (isset($this->html_blocks[$graf])) {
1128                                 $block = $this->html_blocks[$graf];
1129                                 $graf = $block;
1130 //                              if (preg_match('{
1131 //                                      \A
1132 //                                      (                                                       # $1 = <div> tag
1133 //                                        <div  \s+
1134 //                                        [^>]*
1135 //                                        \b
1136 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1137 //                                        1
1138 //                                        \2
1139 //                                        [^>]*
1140 //                                        >
1141 //                                      )
1142 //                                      (                                                       # $3 = contents
1143 //                                      .*
1144 //                                      )
1145 //                                      (</div>)                                        # $4 = closing tag
1146 //                                      \z
1147 //                                      }xs', $block, $matches))
1148 //                              {
1149 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1150 //
1151 //                                      # We can't call Markdown(), because that resets the hash;
1152 //                                      # that initialization code should be pulled into its own sub, though.
1153 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1154 //
1155 //                                      # Run document gamut methods on the content.
1156 //                                      foreach ($this->document_gamut as $method => $priority) {
1157 //                                              $div_content = $this->$method($div_content);
1158 //                                      }
1159 //
1160 //                                      $div_open = preg_replace(
1161 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1162 //
1163 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1164 //                              }
1165                                 $grafs[$key] = $graf;
1166                         }
1167                 }
1168
1169                 return implode("\n\n", $grafs);
1170         }
1171
1172
1173         function encodeAmpsAndAngles($text) {
1174         # Smart processing for ampersands and angle brackets that need to be encoded.
1175
1176                 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1177                 #   http://bumppo.net/projects/amputator/
1178                 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1179                                                          '&amp;', $text);;
1180
1181                 # Encode naked <'s
1182                 $text = preg_replace('{<(?![a-z/?\$!])}i', '&lt;', $text);
1183
1184                 return $text;
1185         }
1186
1187
1188         function encodeBackslashEscapes($text) {
1189         #
1190         #       Parameter:  String.
1191         #       Returns:    The string, with after processing the following backslash
1192         #                               escape sequences.
1193         #
1194                 # Must process escaped backslashes first.
1195                 return str_replace(array_keys($this->backslash_escape_table),
1196                                                    array_values($this->backslash_escape_table), $text);
1197         }
1198
1199
1200         function doAutoLinks($text) {
1201                 $text = preg_replace('{<((https?|ftp|dict):[^\'">\s]+)>}',
1202                                                          '<a href="\1">\1</a>', $text);
1203
1204                 # Email addresses: <address@domain.foo>
1205                 $text = preg_replace_callback('{
1206                         <
1207                         (?:mailto:)?
1208                         (
1209                                 [-.\w\x80-\xFF]+
1210                                 \@
1211                                 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1212                         )
1213                         >
1214                         }xi',
1215                         array(&$this, '_doAutoLinks_callback'), $text);
1216
1217                 return $text;
1218         }
1219         function _doAutoLinks_callback($matches) {
1220                 $address = $matches[1];
1221                 $address = $this->unescapeSpecialChars($address);
1222                 $address = $this->encodeEmailAddress($address);
1223                 return $this->hashSpan($address);
1224         }
1225
1226
1227         function encodeEmailAddress($addr) {
1228         #
1229         #       Input: an email address, e.g. "foo@example.com"
1230         #
1231         #       Output: the email address as a mailto link, with each character
1232         #               of the address encoded as either a decimal or hex entity, in
1233         #               the hopes of foiling most address harvesting spam bots. E.g.:
1234         #
1235         #         <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1236         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1237         #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1238         #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1239         #
1240         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1241         #   With some optimizations by Milian Wolff.
1242         #
1243                 $addr = "mailto:" . $addr;
1244                 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1245                 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1246
1247                 foreach ($chars as $key => $char) {
1248                         $ord = ord($char);
1249                         # Ignore non-ascii chars.
1250                         if ($ord < 128) {
1251                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1252                                 # roughly 10% raw, 45% hex, 45% dec
1253                                 # '@' *must* be encoded. I insist.
1254                                 if ($r > 90 && $char != '@') /* do nothing */;
1255                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1256                                 else              $chars[$key] = '&#'.$ord.';';
1257                         }
1258                 }
1259
1260                 $addr = implode('', $chars);
1261                 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1262                 $addr = "<a href=\"$addr\">$text</a>";
1263
1264                 return $addr;
1265         }
1266
1267
1268         function unescapeSpecialChars($text) {
1269         #
1270         # Swap back in all the special characters we've hidden.
1271         #
1272                 return str_replace(array_values($this->escape_table),
1273                                                    array_keys($this->escape_table), $text);
1274         }
1275
1276
1277         function tokenizeHTML($str) {
1278         #
1279         #   Parameter:  String containing HTML + Markdown markup.
1280         #   Returns:    An array of the tokens comprising the input
1281         #               string. Each token is either a tag or a run of text
1282         #               between tags. Each element of the array is a
1283         #               two-element array; the first is either 'tag' or 'text';
1284         #               the second is the actual value.
1285         #   Note:       Markdown code spans are taken into account: no tag token is
1286         #               generated within a code span.
1287         #
1288                 $tokens = array();
1289
1290                 while ($str != "") {
1291                         #
1292                         # Each loop iteration seach for either the next tag or the next
1293                         # openning code span marker. If a code span marker is found, the
1294                         # code span is extracted in entierty and will result in an extra
1295                         # text token.
1296                         #
1297                         $parts = preg_split('{
1298                                 (
1299                                         (?<![`\\\\])
1300                                         `+                                              # code span marker
1301                                 |
1302                                         <!--    .*?     -->             # comment
1303                                 |
1304                                         <\?.*?\?> | <%.*?%>             # processing instruction
1305                                 |
1306                                         <[/!$]?[-a-zA-Z0-9:]+   # regular tags
1307                                         (?:
1308                                                 \s
1309                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1310                                         )?
1311                                         >
1312                                 )
1313                                 }xs', $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1314
1315                         # Create token from text preceding tag.
1316                         if ($parts[0] != "") {
1317                                 $tokens[] = array('text', $parts[0]);
1318                         }
1319
1320                         # Check if we reach the end.
1321                         if (count($parts) < 3) {
1322                                 break;
1323                         }
1324
1325                         # Create token from tag or code span.
1326                         if ($parts[1]{0} == "`") {
1327                                 $tokens[] = array('text', $parts[1]);
1328                                 $str = $parts[2];
1329
1330                                 # Skip the whole code span, pass as text token.
1331                                 if (preg_match('/^(.*(?<!`\\\\)'.$parts[1].'(?!`))(.*)$/',
1332                                         $str, $matches))
1333                                 {
1334                                         $tokens[] = array('text', $matches[1]);
1335                                         $str = $matches[2];
1336                                 }
1337                         } else {
1338                                 $tokens[] = array('tag', $parts[1]);
1339                                 $str = $parts[2];
1340                         }
1341                 }
1342
1343                 return $tokens;
1344         }
1345
1346
1347         function outdent($text) {
1348         #
1349         # Remove one level of line-leading tabs or spaces
1350         #
1351                 return preg_replace("/^(\\t|[ ]{1,$this->tab_width})/m", "", $text);
1352         }
1353
1354
1355         # String length function for detab. `_initDetab` will create a function to
1356         # hanlde UTF-8 if the default function does not exist.
1357         var $utf8_strlen = 'mb_strlen';
1358
1359         function detab($text) {
1360         #
1361         # Replace tabs with the appropriate amount of space.
1362         #
1363                 # For each line we separate the line in blocks delemited by
1364                 # tab characters. Then we reconstruct every line by adding the
1365                 # appropriate number of space between each blocks.
1366
1367                 $strlen = $this->utf8_strlen; # best strlen function for UTF-8.
1368                 $lines = explode("\n", $text);
1369                 $text = "";
1370
1371                 foreach ($lines as $line) {
1372                         # Split in blocks.
1373                         $blocks = explode("\t", $line);
1374                         # Add each blocks to the line.
1375                         $line = $blocks[0];
1376                         unset($blocks[0]); # Do not add first block twice.
1377                         foreach ($blocks as $block) {
1378                                 # Calculate amount of space, insert spaces, insert block.
1379                                 $amount = $this->tab_width -
1380                                         $strlen($line, 'UTF-8') % $this->tab_width;
1381                                 $line .= str_repeat(" ", $amount) . $block;
1382                         }
1383                         $text .= "$line\n";
1384                 }
1385                 return $text;
1386         }
1387         function _initDetab() {
1388         #
1389         # Check for the availability of the function in the `utf8_strlen` property
1390         # (probably `mb_strlen`). If the function is not available, create a
1391         # function that will loosely count the number of UTF-8 characters with a
1392         # regular expression.
1393         #
1394                 if (function_exists($this->utf8_strlen)) return;
1395                 $this->utf8_strlen = 'Markdown_UTF8_strlen';
1396
1397                 if (function_exists($this->utf8_strlen)) return;
1398                 function Markdown_UTF8_strlen($text) {
1399                         return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/',
1400                                 $text, $m);
1401                 }
1402         }
1403
1404
1405         function unhash($text) {
1406         #
1407         # Swap back in all the tags hashed by _HashHTMLBlocks.
1408         #
1409                 return str_replace(array_keys($this->html_hashes),
1410                                                    array_values($this->html_hashes), $text);
1411         }
1412
1413 }
1414
1415
1416 /*
1417
1418 PHP Markdown
1419 ============
1420
1421 Description
1422 -----------
1423
1424 This is a PHP translation of the original Markdown formatter written in
1425 Perl by John Gruber.
1426
1427 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1428 easy-to-write structured text format into HTML. Markdown's text format
1429 is most similar to that of plain text email, and supports features such
1430 as headers, *emphasis*, code blocks, blockquotes, and links.
1431
1432 Markdown's syntax is designed not as a generic markup language, but
1433 specifically to serve as a front-end to (X)HTML. You can use span-level
1434 HTML tags anywhere in a Markdown document, and you can use block level
1435 HTML tags (like <div> and <table> as well).
1436
1437 For more information about Markdown's syntax, see:
1438
1439 <http://daringfireball.net/projects/markdown/>
1440
1441
1442 Bugs
1443 ----
1444
1445 To file bug reports please send email to:
1446
1447 <michel.fortin@michelf.com>
1448
1449 Please include with your report: (1) the example input; (2) the output you
1450 expected; (3) the output Markdown actually produced.
1451
1452
1453 Version History
1454 ---------------
1455
1456 See the readme file for detailed release notes for this version.
1457
1458 1.0.1e (28 Dec 2006)
1459
1460 1.0.1d (1 Dec 2006)
1461
1462 1.0.1c (9 Dec 2005)
1463
1464 1.0.1b (6 Jun 2005)
1465
1466 1.0.1a (15 Apr 2005)
1467
1468 1.0.1 (16 Dec 2004)
1469
1470 1.0 (21 Aug 2004)
1471
1472
1473 Author & Contributors
1474 ---------------------
1475
1476 Original Markdown by John Gruber
1477 <http://daringfireball.net/>
1478
1479 PHP port and extras by Michel Fortin
1480 <http://www.michelf.com/>
1481
1482
1483 Copyright and License
1484 ---------------------
1485
1486 Copyright (c) 2004-2006 Michel Fortin
1487 <http://www.michelf.com/>
1488 All rights reserved.
1489
1490 Copyright (c) 2003-2006 John Gruber
1491 <http://daringfireball.net/>
1492 All rights reserved.
1493
1494 Redistribution and use in source and binary forms, with or without
1495 modification, are permitted provided that the following conditions are
1496 met:
1497
1498 *       Redistributions of source code must retain the above copyright notice,
1499         this list of conditions and the following disclaimer.
1500
1501 *       Redistributions in binary form must reproduce the above copyright
1502         notice, this list of conditions and the following disclaimer in the
1503         documentation and/or other materials provided with the distribution.
1504
1505 *       Neither the name "Markdown" nor the names of its contributors may
1506         be used to endorse or promote products derived from this software
1507         without specific prior written permission.
1508
1509 This software is provided by the copyright holders and contributors "as
1510 is" and any express or implied warranties, including, but not limited
1511 to, the implied warranties of merchantability and fitness for a
1512 particular purpose are disclaimed. In no event shall the copyright owner
1513 or contributors be liable for any direct, indirect, incidental, special,
1514 exemplary, or consequential damages (including, but not limited to,
1515 procurement of substitute goods or services; loss of use, data, or
1516 profits; or business interruption) however caused and on any theory of
1517 liability, whether in contract, strict liability, or tort (including
1518 negligence or otherwise) arising in any way out of the use of this
1519 software, even if advised of the possibility of such damage.
1520
1521 */
1522 ?>