includes/Parser.php

   1 <?php
   2
   3 # Globals used:
   4 #    major:  $wgUser, $wgTitle,
   5 #    minor:  $wgUseTex
   6
   7 class Parser
   8 {
   9         var $mOutput, $mAutonumber, $mLastSection, $mDTopen;
  10
  11         function Parser()
  12         {
  13                 $this->clearState();
  14         }
  15
  16         function clearState()
  17         {
  18                 $this->mOutput = new ParserOutput;
  19                 $this->mAutonumber = 0;
  20                 $this->mLastSection = "";
  21                 $this->mDTopen = false;
  22         }
  23
  24         # First pass--just handle <nowiki> sections, pass the rest off
  25         # to doWikiPass2() which does all the real work.
  26         #
  27         # Returns a ParserOutput
  28         #
  29         function parse( $text, $linestart = true, $clearState = true )
  30         {
  31                 global $wgUseTeX;
  32                 $fname = "Parser::parse";
  33                 wfProfileIn( $fname );
  34                 $unique  = "3iyZiyA7iMwg5rhxP0Dcc9oTnj8qD1jm1Sfv4";
  35                 $unique2 = "4LIQ9nXtiYFPCSfitVwDw7EYwQlL4GeeQ7qSO";
  36                 $unique3 = "fPaA8gDfdLBqzj68Yjg9Hil3qEF8JGO0uszIp";
  37                 $nwlist = array();
  38                 $nwsecs = 0;
  39                 $mathlist = array();
  40                 $mathsecs = 0;
  41                 $prelist = array ();
  42                 $presecs = 0;
  43                 $stripped = "";
  44                 $stripped2 = "";
  45                 $stripped3 = "";
  46
  47                 if ( $clearState ) {
  48                         $this->clearState();
  49                 }
  50
  51                 # Replace any instances of the placeholders
  52                 $text = str_replace( $unique, wfHtmlEscapeFirst( $unique ), $text );
  53                 $text = str_replace( $unique2, wfHtmlEscapeFirst( $unique2 ), $text );
  54                 $text = str_replace( $unique3, wfHtmlEscapeFirst( $unique3 ), $text );
  55
  56                 /*
  57                 global $wgEnableParserCache;
  58                 $use_parser_cache =
  59                         $wgEnableParserCache && $action == "view" &&
  60                         intval($wgUser->getOption( "stubthreshold" )) == 0 &&
  61                         is_object($article) && $article->getID() > 0;
  62
  63                 if( $use_parser_cache ){
  64                         if( $this->fillFromParserCache() ){
  65                                 wfProfileOut( $fname );
  66                                 return;
  67                         }
  68                 }
  69                 */
  70
  71                 while ( "" != $text ) {
  72                         $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
  73                         $stripped .= $p[0];
  74                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $text = ""; }
  75                         else {
  76                                 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
  77                                 ++$nwsecs;
  78                                 $nwlist[$nwsecs] = wfEscapeHTMLTagsOnly($q[0]);
  79                                 $stripped .= $unique . $nwsecs . "s";
  80                                 $text = $q[1];
  81                         }
  82                 }
  83
  84                 if( $wgUseTeX ) {
  85                         while ( "" != $stripped ) {
  86                                 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
  87                                 $stripped2 .= $p[0];
  88                                 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $stripped = ""; }
  89                                 else {
  90                                         $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
  91                                         ++$mathsecs;
  92                                         $mathlist[$mathsecs] = renderMath($q[0]);
  93                                         $stripped2 .= $unique2 . $mathsecs . "s";
  94                                         $stripped = $q[1];
  95                                 }
  96                         }
  97                 } else {
  98                         $stripped2 = $stripped;
  99                 }
 100
 101                 while ( "" != $stripped2 ) {
 102                         $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
 103                         $stripped3 .= $p[0];
 104                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $stripped2 = ""; }
 105                         else {
 106                                 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
 107                                 ++$presecs;
 108                                 $prelist[$presecs] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
 109                                 $stripped3 .= $unique3 . $presecs . "s";
 110                                 $stripped2 = $q[1];
 111                         }
 112                 }
 113
 114                 $text = $this->doWikiPass2( $stripped3, $linestart );
 115
 116                 $specialChars = array("\\", "$");
 117                 $escapedChars = array("\\\\", "\\$");
 118
 119                 # Go backwards so that {$unique1}1 doesn't overwrite {$unique1}10
 120                 for ( $i = $presecs; $i >= 1; --$i ) {
 121                         $text = preg_replace( "/{$unique3}{$i}s/", str_replace( $specialChars,
 122                                 $escapedChars, $prelist[$i] ), $text );
 123                 }
 124
 125                 for ( $i = $mathsecs; $i >= 1; --$i ) {
 126                         $text = preg_replace( "/{$unique2}{$i}s/", str_replace( $specialChars,
 127                                 $escapedChars, $mathlist[$i] ), $text );
 128                 }
 129
 130                 for ( $i = $nwsecs; $i >= 1; --$i ) {
 131                         $text = preg_replace( "/{$unique}{$i}s/", str_replace( $specialChars,
 132                                 $escapedChars, $nwlist[$i] ), $text );
 133                 }
 134
 135                 /*
 136                 if($use_parser_cache ){
 137                         $this->saveParserCache( $text );
 138                 }
 139                 */
 140
 141                 $this->mOutput->setText( $text );
 142                 wfProfileOut( $fname );
 143                 return $this->mOutput;
 144         }
 145
 146         function categoryMagic ()
 147         {
 148                 global $wgTitle , $wgUseCategoryMagic, $wgLang ;
 149                 if ( !isset ( $wgUseCategoryMagic ) || !$wgUseCategoryMagic ) return ;
 150                 $id = $wgTitle->getArticleID() ;
 151                 $cat = ucfirst ( wfMsg ( "category" ) ) ;
 152                 $ti = $wgTitle->getText() ;
 153                 $ti = explode ( ":" , $ti , 2 ) ;
 154                 if ( $cat != $ti[0] ) return "" ;
 155                 $r = "<br break=all>\n" ;
 156
 157                 $articles = array() ;
 158                 $parents = array () ;
 159                 $children = array() ;
 160
 161
 162                 global $wgUser ;
 163                 $sk = $wgUser->getSkin() ;
 164
 165                 $doesexist = false ;
 166                 if ( $doesexist ) {
 167                         $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
 168                 } else {
 169                         $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 170                 }
 171
 172                 $res = wfQuery ( $sql, DB_READ ) ;
 173                 while ( $x = wfFetchObject ( $res ) )
 174                 {
 175                 #  $t = new Title ;
 176                 #  $t->newFromDBkey ( $x->l_from ) ;
 177                 #  $t = $t->getText() ;
 178                         if ( $doesexist ) {
 179                                 $t = $x->l_from ;
 180                         } else {
 181                                 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 182                                 if ( $t != "" ) $t .= ":" ;
 183                                 $t .= $x->cur_title ;
 184                         }
 185
 186                         $y = explode ( ":" , $t , 2 ) ;
 187                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 188                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 189                         } else {
 190                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 191                         }
 192                 }
 193                 wfFreeResult ( $res ) ;
 194
 195                 # Children
 196                 if ( count ( $children ) > 0 )
 197                 {
 198                         asort ( $children ) ;
 199                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 200                         $r .= implode ( ", " , $children ) ;
 201                 }
 202
 203                 # Articles
 204                 if ( count ( $articles ) > 0 )
 205                 {
 206                         asort ( $articles ) ;
 207                         $h =  wfMsg( "category_header", $ti[1] );
 208                         $r .= "<h2>{$h}</h2>\n" ;
 209                         $r .= implode ( ", " , $articles ) ;
 210                 }
 211
 212
 213                 return $r ;
 214         }
 215
 216 function getHTMLattrs ()
 217 {
 218                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 219                         "title", "align", "lang", "dir", "width", "height",
 220                         "bgcolor", "clear", /* BR */ "noshade", /* HR */
 221                         "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 222                         /* FONT */ "type", "start", "value", "compact",
 223                         /* For various lists, mostly deprecated but safe */
 224                         "summary", "width", "border", "frame", "rules",
 225                         "cellspacing", "cellpadding", "valign", "char",
 226                         "charoff", "colgroup", "col", "span", "abbr", "axis",
 227                         "headers", "scope", "rowspan", "colspan", /* Tables */
 228                         "id", "class", "name", "style" /* For CSS */
 229                 );
 230 return $htmlattrs ;
 231 }
 232
 233 function fixTagAttributes ( $t )
 234 {
 235         if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 236         $htmlattrs = $this->getHTMLattrs() ;
 237
 238         # Strip non-approved attributes from the tag
 239         $t = preg_replace(
 240                 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 241                 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 242                 $t);
 243         # Strip javascript "expression" from stylesheets. Brute force approach:
 244         # If anythin offensive is found, all attributes of the HTML tag are dropped
 245
 246         if( preg_match(
 247                 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 248                 wfMungeToUtf8( $t ) ) )
 249         {
 250                 $t="";
 251         }
 252
 253         return trim ( $t ) ;
 254 }
 255
 256 function doTableStuff ( $t )
 257 {
 258   $t = explode ( "\n" , $t ) ;
 259   $td = array () ; # Is currently a td tag open?
 260   $ltd = array () ; # Was it TD or TH?
 261   $tr = array () ; # Is currently a tr tag open?
 262   $ltr = array () ; # tr attributes
 263   foreach ( $t AS $k => $x )
 264     {
 265       $x = rtrim ( $x ) ;
 266       $fc = substr ( $x , 0 , 1 ) ;
 267       if ( "{|" == substr ( $x , 0 , 2 ) )
 268         {
 269           $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 270           array_push ( $td , false ) ;
 271           array_push ( $ltd , "" ) ;
 272           array_push ( $tr , false ) ;
 273           array_push ( $ltr , "" ) ;
 274         }
 275       else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 276       else if ( "|}" == substr ( $x , 0 , 2 ) )
 277         {
 278           $z = "</table>\n" ;
 279           $l = array_pop ( $ltd ) ;
 280           if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 281           if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 282           array_pop ( $ltr ) ;
 283           $t[$k] = $z ;
 284         }
 285 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 286         {
 287         $z = trim ( substr ( $x , 2 ) ) ;
 288         $t[$k] = "<caption>{$z}</caption>\n" ;
 289         }*/
 290       else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 291         {
 292           $x = substr ( $x , 1 ) ;
 293           while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 294           $z = "" ;
 295           $l = array_pop ( $ltd ) ;
 296           if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 297           if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 298           array_pop ( $ltr ) ;
 299           $t[$k] = $z ;
 300           array_push ( $tr , false ) ;
 301           array_push ( $td , false ) ;
 302           array_push ( $ltd , "" ) ;
 303           array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 304         }
 305       else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 306         {
 307           if ( "|+" == substr ( $x , 0 , 2 ) )
 308               {
 309               $fc = "+" ;
 310               $x = substr ( $x , 1 ) ;
 311               }
 312           $after = substr ( $x , 1 ) ;
 313           if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 314           $after = explode ( "||" , $after ) ;
 315           $t[$k] = "" ;
 316           foreach ( $after AS $theline )
 317              {
 318           $z = "" ;
 319           if ( $fc != "+" )
 320           {
 321             $tra = array_pop ( $ltr ) ;
 322             if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 323             array_push ( $tr , true ) ;
 324             array_push ( $ltr , "" ) ;
 325           }
 326
 327           $l = array_pop ( $ltd ) ;
 328           if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 329           if ( $fc == "|" ) $l = "TD" ;
 330           else if ( $fc == "!" ) $l = "TH" ;
 331           else if ( $fc == "+" ) $l = "CAPTION" ;
 332           else $l = "" ;
 333           array_push ( $ltd , $l ) ;
 334           $y = explode ( "|" , $theline , 2 ) ;
 335           if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 336           else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 337           $t[$k] .= $y ;
 338           array_push ( $td , true ) ;
 339              }
 340         }
 341     }
 342
 343 # Closing open td, tr && table
 344 while ( count ( $td ) > 0 )
 345 {
 346 if ( array_pop ( $td ) ) $t[] = "</td>" ;
 347 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 348 $t[] = "</table>" ;
 349 }
 350
 351   $t = implode ( "\n" , $t ) ;
 352 #               $t = $this->removeHTMLtags( $t );
 353   return $t ;
 354 }
 355
 356         # Well, OK, it's actually about 14 passes.  But since all the
 357         # hard lifting is done inside PHP's regex code, it probably
 358         # wouldn't speed things up much to add a real parser.
 359         #
 360         function doWikiPass2( $text, $linestart )
 361         {
 362                 global $wgUser, $wgLang, $wgUseDynamicDates;
 363                 $fname = "OutputPage::doWikiPass2";
 364                 wfProfileIn( $fname );
 365
 366                 $text = $this->removeHTMLtags( $text );
 367                 $text = $this->replaceVariables( $text );
 368
 369                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 370                 $text = str_replace ( "<HR>", "<hr>", $text );
 371
 372                 $text = $this->doAllQuotes( $text );
 373                 $text = $this->doHeadings( $text );
 374                 $text = $this->doBlockLevels( $text, $linestart );
 375
 376                 if($wgUseDynamicDates) {
 377                         global $wgDateFormatter;
 378                         $text = $wgDateFormatter->reformat( $wgUser->getOption("date"), $text );
 379                 }
 380
 381                 $text = $this->replaceExternalLinks( $text );
 382                 $text = $this->replaceInternalLinks ( $text );
 383                 $text = $this->doTableStuff ( $text ) ;
 384
 385                 $text = $this->magicISBN( $text );
 386                 $text = $this->magicRFC( $text );
 387                 $text = $this->formatHeadings( $text );
 388
 389                 $sk = $wgUser->getSkin();
 390                 $text = $sk->transformContent( $text );
 391                 $text .= $this->categoryMagic () ;
 392
 393                 wfProfileOut( $fname );
 394                 return $text;
 395         }
 396
 397         /* private */ function doAllQuotes( $text )
 398         {
 399                 $outtext = "";
 400                 $lines = explode( "\r\n", $text );
 401                 foreach ( $lines as $line ) {
 402                         $outtext .= $this->doQuotes ( "", $line, "" ) . "\r\n";
 403                 }
 404                 return $outtext;
 405         }
 406
 407         /* private */ function doQuotes( $pre, $text, $mode )
 408         {
 409                 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
 410                         $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
 411                         $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
 412                         if ( substr ($m[2], 0, 1) == "'" ) {
 413                                 $m[2] = substr ($m[2], 1);
 414                                 if ($mode == "em") {
 415                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
 416                                 } else if ($mode == "strong") {
 417                                         return $m1_strong . $this->doQuotes ( "", $m[2], "" );
 418                                 } else if (($mode == "emstrong") || ($mode == "both")) {
 419                                         return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
 420                                 } else if ($mode == "strongem") {
 421                                         return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
 422                                 } else {
 423                                         return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
 424                                 }
 425                         } else {
 426                                 if ($mode == "strong") {
 427                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
 428                                 } else if ($mode == "em") {
 429                                         return $m1_em . $this->doQuotes ( "", $m[2], "" );
 430                                 } else if ($mode == "emstrong") {
 431                                         return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
 432                                 } else if (($mode == "strongem") || ($mode == "both")) {
 433                                         return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
 434                                 } else {
 435                                         return $m[1] . $this->doQuotes ( "", $m[2], "em" );
 436                                 }
 437                         }
 438                 } else {
 439                         $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
 440                         $text_em = ($text == "") ? "" : "<em>{$text}</em>";
 441                         if ($mode == "") {
 442                                 return $pre . $text;
 443                         } else if ($mode == "em") {
 444                                 return $pre . $text_em;
 445                         } else if ($mode == "strong") {
 446                                 return $pre . $text_strong;
 447                         } else if ($mode == "strongem") {
 448                                 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
 449                         } else {
 450                                 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
 451                         }
 452                 }
 453         }
 454
 455         /* private */ function doHeadings( $text )
 456         {
 457                 for ( $i = 6; $i >= 1; --$i ) {
 458                         $h = substr( "======", 0, $i );
 459                         $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
 460                           "<h{$i}>\\1</h{$i}>\\2", $text );
 461                 }
 462                 return $text;
 463         }
 464
 465         # Note: we have to do external links before the internal ones,
 466         # and otherwise take great care in the order of things here, so
 467         # that we don't end up interpreting some URLs twice.
 468
 469         /* private */ function replaceExternalLinks( $text )
 470         {
 471                 $fname = "OutputPage::replaceExternalLinks";
 472                 wfProfileIn( $fname );
 473                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 474                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 475                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 476                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 477                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 478                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 479                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 480                 wfProfileOut( $fname );
 481                 return $text;
 482         }
 483
 484         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 485         {
 486                 global $wgUser, $printable;
 487                 global $wgAllowExternalImages;
 488
 489
 490                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 491                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 492
 493                 # this is  the list of separators that should be ignored if they
 494                 # are the last character of an URL but that should be included
 495                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 496                 # in this case, the last comma should not become part of the URL,
 497                 # but in "www.foo.com/123,2342,32.htm" it should.
 498                 $sep = ",;\.:";
 499                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 500                 $images = "gif|png|jpg|jpeg";
 501
 502                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 503                 # they are interpreted as part of the string (used to tell PHP
 504                 # that the content of the string should be inserted there).
 505                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 506                   "((?i){$images})([^{$uc}]|$)/";
 507
 508                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 509                 $sk = $wgUser->getSkin();
 510
 511                 if ( $autonumber and $wgAllowExternalImages) { # Use img tags only for HTTP urls
 512                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 513                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 514                 }
 515                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 516                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 517                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 518                   "</a>\\5", $s );
 519                 $s = str_replace( $unique, $protocol, $s );
 520
 521                 $a = explode( "[{$protocol}:", " " . $s );
 522                 $s = array_shift( $a );
 523                 $s = substr( $s, 1 );
 524
 525                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 526                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 527
 528                 foreach ( $a as $line ) {
 529                         if ( preg_match( $e1, $line, $m ) ) {
 530                                 $link = "{$protocol}:{$m[1]}";
 531                                 $trail = $m[2];
 532                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 533                                 else { $text = wfEscapeHTML( $link ); }
 534                         } else if ( preg_match( $e2, $line, $m ) ) {
 535                                 $link = "{$protocol}:{$m[1]}";
 536                                 $text = $m[2];
 537                                 $trail = $m[3];
 538                         } else {
 539                                 $s .= "[{$protocol}:" . $line;
 540                                 continue;
 541                         }
 542                         if ( $printable == "yes") $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
 543                         else $paren = "";
 544                         $la = $sk->getExternalLinkAttributes( $link, $text );
 545                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 546
 547                 }
 548                 return $s;
 549         }
 550
 551         /* private */ function replaceInternalLinks( $s )
 552         {
 553                 global $wgTitle, $wgUser, $wgLang;
 554                 global $wgLinkCache, $wgInterwikiMagic, $wgUseCategoryMagic;
 555                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 556                 global $wgUseLinkPrefixCombination;
 557                 wfProfileIn( $fname = "OutputPage::replaceInternalLinks" );
 558
 559                 wfProfileIn( "$fname-setup" );
 560                 $tc = Title::legalChars() . "#";
 561                 $sk = $wgUser->getSkin();
 562
 563                 $a = explode( "[[", " " . $s );
 564                 $s = array_shift( $a );
 565                 $s = substr( $s, 1 );
 566
 567                 # Match a link having the form [[namespace:link|alternate]]trail
 568                 $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD";
 569                 # Match the end of a line for a word that's not followed by whitespace,
 570                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 571                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 572                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 573                 $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 574
 575
 576                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 577                 $image = Namespace::getImage();
 578                 $special = Namespace::getSpecial();
 579                 $media = Namespace::getMedia();
 580                 $category = wfMsg ( "category" ) ;
 581                 $nottalk = !Namespace::isTalk( $wgTitle->getNamespace() );
 582
 583                 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
 584                         $new_prefix = $m[2];
 585                         $s = $m[1];
 586                 } else {
 587                         $new_prefix="";
 588                 }
 589
 590                 wfProfileOut( "$fname-setup" );
 591
 592                 foreach ( $a as $line ) {
 593                         $prefix = $new_prefix;
 594                         if ( $wgUseLinkPrefixCombination && preg_match( $e2, $line, $m ) ) {
 595                                 $new_prefix = $m[2];
 596                                 $line = $m[1];
 597                         } else {
 598                                 $new_prefix = "";
 599                         }
 600                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 601                                 $text = $m[2];
 602                                 $trail = $m[3];
 603                         } else { # Invalid form; output directly
 604                                 $s .= $prefix . "[[" . $line ;
 605                                 continue;
 606                         }
 607
 608                         /* Valid link forms:
 609                         Foobar -- normal
 610                         :Foobar -- override special treatment of prefix (images, language links)
 611                         /Foobar -- convert to CurrentPage/Foobar
 612                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 613                         */
 614                         $c = substr($m[1],0,1);
 615                         $noforce = ($c != ":");
 616                         if( $c == "/" ) { # subpage
 617                                 if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 618                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
 619                                         $noslash=$m[1];
 620                                 } else {
 621                                         $noslash=substr($m[1],1);
 622                                 }
 623                                 if($wgNamespacesWithSubpages[$wgTitle->getNamespace()]) { # subpages allowed here
 624                                         $link = $wgTitle->getPrefixedText(). "/" . trim($noslash);
 625                                         if( "" == $text ) {
 626                                                 $text= $m[1];
 627                                         } # this might be changed for ugliness reasons
 628                                 } else {
 629                                         $link = $noslash; # no subpage allowed, use standard link
 630                                 }
 631                         } elseif( $noforce ) { # no subpage
 632                                 $link = $m[1];
 633                         } else {
 634                                 $link = substr( $m[1], 1 );
 635                         }
 636                         if( "" == $text )
 637                                 $text = $link;
 638
 639                         $nt = Title::newFromText( $link );
 640                         if( !$nt ) {
 641                                 $s .= $prefix . "[[" . $line;
 642                                 continue;
 643                         }
 644                         $ns = $nt->getNamespace();
 645                         $iw = $nt->getInterWiki();
 646                         if( $noforce ) {
 647                                 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 648                                         array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 649                                         $s .= $prefix . $trail;
 650                                         continue;
 651                                 }
 652                                 if( $ns == $image ) {
 653                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 654                                         $wgLinkCache->addImageLinkObj( $nt );
 655                                         continue;
 656                                 }
 657                         }
 658                         if( ( $nt->getPrefixedText() == $wgTitle->getPrefixedText() ) &&
 659                             ( strpos( $link, "#" ) == FALSE ) ) {
 660                                 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 661                                 continue;
 662                         }
 663                         if ( $ns == $category && $wgUseCategoryMagic ) {
 664                           $t = explode ( ":" , $nt->getText() ) ;
 665                                 array_shift ( $t ) ;
 666                                 $t = implode ( ":" , $t ) ;
 667                                 $t = $wgLang->ucFirst ( $t ) ;
 668 #                               $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
 669                                 $nnt = Title::newFromText ( $category.":".$t ) ;
 670                                 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 671                                 $this->mCategoryLinks[] = $t ;
 672                                 $s .= $prefix . $trail ;
 673                                 continue ;
 674                         }
 675                         if( $ns == $media ) {
 676                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 677                                 $wgLinkCache->addImageLinkObj( $nt );
 678                                 continue;
 679                         } elseif( $ns == $special ) {
 680                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 681                                 continue;
 682                         }
 683                         $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 684                 }
 685                 wfProfileOut( $fname );
 686                 return $s;
 687         }
 688
 689         # Some functions here used by doBlockLevels()
 690         #
 691         /* private */ function closeParagraph()
 692         {
 693                 $result = "";
 694                 if ( 0 != strcmp( "p", $this->mLastSection ) &&
 695                   0 != strcmp( "", $this->mLastSection ) ) {
 696                         $result = "</" . $this->mLastSection  . ">";
 697                 }
 698                 $this->mLastSection = "";
 699                 return $result."\n";
 700         }
 701         # getCommon() returns the length of the longest common substring
 702         # of both arguments, starting at the beginning of both.
 703         #
 704         /* private */ function getCommon( $st1, $st2 )
 705         {
 706                 $fl = strlen( $st1 );
 707                 $shorter = strlen( $st2 );
 708                 if ( $fl < $shorter ) { $shorter = $fl; }
 709
 710                 for ( $i = 0; $i < $shorter; ++$i ) {
 711                         if ( $st1{$i} != $st2{$i} ) { break; }
 712                 }
 713                 return $i;
 714         }
 715         # These next three functions open, continue, and close the list
 716         # element appropriate to the prefix character passed into them.
 717         #
 718         /* private */ function openList( $char )
 719     {
 720                 $result = $this->closeParagraph();
 721
 722                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 723                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 724                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 725                 else if ( ";" == $char ) {
 726                         $result .= "<dl><dt>";
 727                         $this->mDTopen = true;
 728                 }
 729                 else { $result = "<!-- ERR 1 -->"; }
 730
 731                 return $result;
 732         }
 733
 734         /* private */ function nextItem( $char )
 735         {
 736                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 737                 else if ( ":" == $char || ";" == $char ) {
 738                         $close = "</dd>";
 739                         if ( $this->mDTopen ) { $close = "</dt>"; }
 740                         if ( ";" == $char ) {
 741                                 $this->mDTopen = true;
 742                                 return $close . "<dt>";
 743                         } else {
 744                                 $this->mDTopen = false;
 745                                 return $close . "<dd>";
 746                         }
 747                 }
 748                 return "<!-- ERR 2 -->";
 749         }
 750
 751         /* private */function closeList( $char )
 752         {
 753                 if ( "*" == $char ) { $text = "</li></ul>"; }
 754                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 755                 else if ( ":" == $char ) {
 756                         if ( $this->mDTopen ) {
 757                                 $this->mDTopen = false;
 758                                 $text = "</dt></dl>";
 759                         } else {
 760                                 $text = "</dd></dl>";
 761                         }
 762                 }
 763                 else {  return "<!-- ERR 3 -->"; }
 764                 return $text."\n";
 765         }
 766
 767         /* private */ function doBlockLevels( $text, $linestart )
 768         {
 769                 $fname = "OutputPage::doBlockLevels";
 770                 wfProfileIn( $fname );
 771                 # Parsing through the text line by line.  The main thing
 772                 # happening here is handling of block-level elements p, pre,
 773                 # and making lists from lines starting with * # : etc.
 774                 #
 775                 $a = explode( "\n", $text );
 776                 $text = $lastPref = "";
 777                 $this->mDTopen = $inBlockElem = false;
 778
 779                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 780                 foreach ( $a as $t ) {
 781                         if ( "" != $text ) { $text .= "\n"; }
 782
 783                         $oLine = $t;
 784                         $opl = strlen( $lastPref );
 785                         $npl = strspn( $t, "*#:;" );
 786                         $pref = substr( $t, 0, $npl );
 787                         $pref2 = str_replace( ";", ":", $pref );
 788                         $t = substr( $t, $npl );
 789
 790                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 791                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 792
 793                                 if ( ";" == substr( $pref, -1 ) ) {
 794                                         $cpos = strpos( $t, ":" );
 795                                         if ( ! ( false === $cpos ) ) {
 796                                                 $term = substr( $t, 0, $cpos );
 797                                                 $text .= $term . $this->nextItem( ":" );
 798                                                 $t = substr( $t, $cpos + 1 );
 799                                         }
 800                                 }
 801                         } else if (0 != $npl || 0 != $opl) {
 802                                 $cpl = $this->getCommon( $pref, $lastPref );
 803
 804                                 while ( $cpl < $opl ) {
 805                                         $text .= $this->closeList( $lastPref{$opl-1} );
 806                                         --$opl;
 807                                 }
 808                                 if ( $npl <= $cpl && $cpl > 0 ) {
 809                                         $text .= $this->nextItem( $pref{$cpl-1} );
 810                                 }
 811                                 while ( $npl > $cpl ) {
 812                                         $char = substr( $pref, $cpl, 1 );
 813                                         $text .= $this->openList( $char );
 814
 815                                         if ( ";" == $char ) {
 816                                                 $cpos = strpos( $t, ":" );
 817                                                 if ( ! ( false === $cpos ) ) {
 818                                                         $term = substr( $t, 0, $cpos );
 819                                                         $text .= $term . $this->nextItem( ":" );
 820                                                         $t = substr( $t, $cpos + 1 );
 821                                                 }
 822                                         }
 823                                         ++$cpl;
 824                                 }
 825                                 $lastPref = $pref2;
 826                         }
 827                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
 828                                 if ( preg_match(
 829                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
 830                                         $text .= $this->closeParagraph();
 831                                         $inBlockElem = true;
 832                                 }
 833                                 if ( ! $inBlockElem ) {
 834                                         if ( " " == $t{0} ) {
 835                                                 $newSection = "pre";
 836                                                 # $t = wfEscapeHTML( $t );
 837                                         }
 838                                         else { $newSection = "p"; }
 839
 840                                         if ( 0 == strcmp( "", trim( $oLine ) ) ) {
 841                                                 $text .= $this->closeParagraph();
 842                                                 $text .= "<" . $newSection . ">";
 843                                         } else if ( 0 != strcmp( $this->mLastSection,
 844                                           $newSection ) ) {
 845                                                 $text .= $this->closeParagraph();
 846                                                 if ( 0 != strcmp( "p", $newSection ) ) {
 847                                                         $text .= "<" . $newSection . ">";
 848                                                 }
 849                                         }
 850                                         $this->mLastSection = $newSection;
 851                                 }
 852                                 if ( $inBlockElem &&
 853                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
 854                                         $inBlockElem = false;
 855                                 }
 856                         }
 857                         $text .= $t;
 858                 }
 859                 while ( $npl ) {
 860                         $text .= $this->closeList( $pref2{$npl-1} );
 861                         --$npl;
 862                 }
 863                 if ( "" != $this->mLastSection ) {
 864                         if ( "p" != $this->mLastSection ) {
 865                                 $text .= "</" . $this->mLastSection . ">";
 866                         }
 867                         $this->mLastSection = "";
 868                 }
 869                 wfProfileOut( $fname );
 870                 return $text;
 871         }
 872
 873         /* private */ function replaceVariables( $text )
 874         {
 875                 global $wgLang, $wgCurOut;
 876                 $fname = "OutputPage::replaceVariables";
 877                 wfProfileIn( $fname );
 878
 879                 $magic = array();
 880
 881                 # Basic variables
 882                 # See Language.php for the definition of each magic word
 883                 # As with sigs, this uses the server's local time -- ensure
 884                 # this is appropriate for your audience!
 885
 886                 $magic[MAG_CURRENTMONTH] = date( "m" );
 887                 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
 888                 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
 889                 $magic[MAG_CURRENTDAY] = date("j");
 890                 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
 891                 $magic[MAG_CURRENTYEAR] = date( "Y" );
 892                 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
 893
 894                 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
 895
 896                 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
 897                 if ( $mw->match( $text ) ) {
 898                         $v = wfNumberOfArticles();
 899                         $text = $mw->replace( $v, $text );
 900                         if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
 901                 }
 902
 903                 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
 904                 # The callbacks are at the bottom of this file
 905                 $wgCurOut = $this;
 906                 $mw =& MagicWord::get( MAG_MSG );
 907                 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
 908                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
 909
 910                 $mw =& MagicWord::get( MAG_MSGNW );
 911                 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
 912                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
 913
 914                 wfProfileOut( $fname );
 915                 return $text;
 916         }
 917
 918         # Cleans up HTML, removes dangerous tags and attributes
 919         /* private */ function removeHTMLtags( $text )
 920         {
 921                 $fname = "OutputPage::removeHTMLtags";
 922                 wfProfileIn( $fname );
 923                 $htmlpairs = array( # Tags that must be closed
 924                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
 925                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
 926                         "strike", "strong", "tt", "var", "div", "center",
 927                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
 928                         "ruby", "rt" , "rb" , "rp"
 929                 );
 930                 $htmlsingle = array(
 931                         "br", "p", "hr", "li", "dt", "dd"
 932                 );
 933                 $htmlnest = array( # Tags that can be nested--??
 934                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
 935                         "dl", "font", "big", "small", "sub", "sup"
 936                 );
 937                 $tabletags = array( # Can only appear inside table
 938                         "td", "th", "tr"
 939                 );
 940
 941                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
 942                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
 943
 944                 $htmlattrs = $this->getHTMLattrs () ;
 945
 946                 # Remove HTML comments
 947                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
 948
 949                 $bits = explode( "<", $text );
 950                 $text = array_shift( $bits );
 951                 $tagstack = array(); $tablestack = array();
 952
 953                 foreach ( $bits as $x ) {
 954                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
 955                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
 956                           $x, $regs );
 957                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
 958                         error_reporting( $prev );
 959
 960                         $badtag = 0 ;
 961                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
 962                                 # Check our stack
 963                                 if ( $slash ) {
 964                                         # Closing a tag...
 965                                         if ( ! in_array( $t, $htmlsingle ) &&
 966                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
 967                                                 array_push( $tagstack, $ot );
 968                                                 $badtag = 1;
 969                                         } else {
 970                                                 if ( $t == "table" ) {
 971                                                         $tagstack = array_pop( $tablestack );
 972                                                 }
 973                                                 $newparams = "";
 974                                         }
 975                                 } else {
 976                                         # Keep track for later
 977                                         if ( in_array( $t, $tabletags ) &&
 978                                           ! in_array( "table", $tagstack ) ) {
 979                                                 $badtag = 1;
 980                                         } else if ( in_array( $t, $tagstack ) &&
 981                                           ! in_array ( $t , $htmlnest ) ) {
 982                                                 $badtag = 1 ;
 983                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
 984                                                 if ( $t == "table" ) {
 985                                                         array_push( $tablestack, $tagstack );
 986                                                         $tagstack = array();
 987                                                 }
 988                                                 array_push( $tagstack, $t );
 989                                         }
 990                                         # Strip non-approved attributes from the tag
 991                                         $newparams = $this->fixTagAttributes($params);
 992
 993                                 }
 994                                 if ( ! $badtag ) {
 995                                         $rest = str_replace( ">", "&gt;", $rest );
 996                                         $text .= "<$slash$t $newparams$brace$rest";
 997                                         continue;
 998                                 }
 999                         }
1000                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1001                 }
1002                 # Close off any remaining tags
1003                 while ( $t = array_pop( $tagstack ) ) {
1004                         $text .= "</$t>\n";
1005                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1006                 }
1007                 wfProfileOut( $fname );
1008                 return $text;
1009         }
1010
1011 /*
1012  *
1013  * This function accomplishes several tasks:
1014  * 1) Auto-number headings if that option is enabled
1015  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1016  * 3) Add a Table of contents on the top for users who have enabled the option
1017  * 4) Auto-anchor headings
1018  *
1019  * It loops through all headlines, collects the necessary data, then splits up the
1020  * string and re-inserts the newly formatted headlines.
1021  *
1022  * */
1023         /* private */ function formatHeadings( $text )
1024         {
1025                 global $wgUser,$wgArticle,$wgTitle,$wpPreview;
1026                 $nh=$wgUser->getOption( "numberheadings" );
1027                 $st=$wgUser->getOption( "showtoc" );
1028                 if(!$wgTitle->userCanEdit()) {
1029                         $es=0;
1030                         $esr=0;
1031                 } else {
1032                         $es=$wgUser->getID() && $wgUser->getOption( "editsection" );
1033                         $esr=$wgUser->getID() && $wgUser->getOption( "editsectiononrightclick" );
1034                 }
1035
1036                 # Inhibit editsection links if requested in the page
1037                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1038                 if ($esw->matchAndRemove( $text )) {
1039                         $es=0;
1040                 }
1041                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1042                 # do not add TOC
1043                 $mw =& MagicWord::get( MAG_NOTOC );
1044                 if ($mw->matchAndRemove( $text ))
1045                 {
1046                         $st = 0;
1047                 }
1048
1049                 # never add the TOC to the Main Page. This is an entry page that should not
1050                 # be more than 1-2 screens large anyway
1051                 if($wgTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1052
1053                 # We need this to perform operations on the HTML
1054                 $sk=$wgUser->getSkin();
1055
1056                 # Get all headlines for numbering them and adding funky stuff like [edit]
1057                 # links
1058                 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1059
1060                 # headline counter
1061                 $c=0;
1062
1063                 # Ugh .. the TOC should have neat indentation levels which can be
1064                 # passed to the skin functions. These are determined here
1065                 foreach($matches[3] as $headline) {
1066                         if($level) { $prevlevel=$level;}
1067                         $level=$matches[1][$c];
1068                         if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1069
1070                                 $h[$level]=0; // reset when we enter a new level
1071                                 $toc.=$sk->tocIndent($level-$prevlevel);
1072                                 $toclevel+=$level-$prevlevel;
1073
1074                         }
1075                         if(($nh||$st) && $level<$prevlevel) {
1076                                 $h[$level+1]=0; // reset when we step back a level
1077                                 $toc.=$sk->tocUnindent($prevlevel-$level);
1078                                 $toclevel-=$prevlevel-$level;
1079
1080                         }
1081                         $h[$level]++; // count number of headlines for each level
1082
1083                         if($nh||$st) {
1084                                 for($i=1;$i<=$level;$i++) {
1085                                         if($h[$i]) {
1086                                                 if($dot) {$numbering.=".";}
1087                                                 $numbering.=$h[$i];
1088                                                 $dot=1;
1089                                         }
1090                                 }
1091                         }
1092
1093                         // The canonized header is a version of the header text safe to use for links
1094
1095                         $canonized_headline=preg_replace("/<.*?>/","",$headline); // strip out HTML
1096                         $tocline = trim( $canonized_headline );
1097                         $canonized_headline=str_replace('"',"",$canonized_headline);
1098                         $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1099                         $refer[$c]=$canonized_headline;
1100                         $refers[$canonized_headline]++;  // count how many in assoc. array so we can track dupes in anchors
1101                         $refcount[$c]=$refers[$canonized_headline];
1102
1103             // Prepend the number to the heading text
1104
1105                         if($nh||$st) {
1106                                 $tocline=$numbering ." ". $tocline;
1107
1108                                 // Don't number the heading if it is the only one (looks silly)
1109                                 if($nh && count($matches[3]) > 1) {
1110                                         $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1111                                 }
1112                         }
1113
1114                         // Create the anchor for linking from the TOC to the section
1115
1116                         $anchor=$canonized_headline;
1117                         if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1118                         if($st) {
1119                                 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1120                         }
1121                         if($es && !isset($wpPreview)) {
1122                                 $head[$c].=$sk->editSectionLink($c+1);
1123                         }
1124
1125                         // Put it all together
1126
1127                         $head[$c].="<h".$level.$matches[2][$c]
1128                          ."<a name=\"".$anchor."\">"
1129                          .$headline
1130                          ."</a>"
1131                          ."</h".$level.">";
1132
1133                         // Add the edit section link
1134
1135                         if($esr && !isset($wpPreview)) {
1136                                 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1137                         }
1138
1139                         $numbering="";
1140                         $c++;
1141                         $dot=0;
1142                 }
1143
1144                 if($st) {
1145                         $toclines=$c;
1146                         $toc.=$sk->tocUnindent($toclevel);
1147                         $toc=$sk->tocTable($toc);
1148                 }
1149
1150                 // split up and insert constructed headlines
1151
1152                 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1153                 $i=0;
1154
1155                 foreach($blocks as $block) {
1156                         if(($es) && !isset($wpPreview) && $c>0 && $i==0) {
1157                             # This is the [edit] link that appears for the top block of text when
1158                                 # section editing is enabled
1159                                 $full.=$sk->editSectionLink(0);
1160                         }
1161                         $full.=$block;
1162                         if($st && $toclines>3 && !$i) {
1163                                 # Let's add a top anchor just in case we want to link to the top of the page
1164                                 $full="<a name=\"top\"></a>".$full.$toc;
1165                         }
1166
1167                         $full.=$head[$i];
1168                         $i++;
1169                 }
1170
1171                 return $full;
1172         }
1173
1174         /* private */ function magicISBN( $text )
1175         {
1176                 global $wgLang;
1177
1178                 $a = split( "ISBN ", " $text" );
1179                 if ( count ( $a ) < 2 ) return $text;
1180                 $text = substr( array_shift( $a ), 1);
1181                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1182
1183                 foreach ( $a as $x ) {
1184                         $isbn = $blank = "" ;
1185                         while ( " " == $x{0} ) {
1186                                 $blank .= " ";
1187                                 $x = substr( $x, 1 );
1188                         }
1189                         while ( strstr( $valid, $x{0} ) != false ) {
1190                                 $isbn .= $x{0};
1191                                 $x = substr( $x, 1 );
1192                         }
1193                         $num = str_replace( "-", "", $isbn );
1194                         $num = str_replace( " ", "", $num );
1195
1196                         if ( "" == $num ) {
1197                                 $text .= "ISBN $blank$x";
1198                         } else {
1199                                 $text .= "<a href=\"" . wfLocalUrlE( $wgLang->specialPage(
1200                                   "Booksources"), "isbn={$num}" ) . "\" class=\"internal\">ISBN $isbn</a>";
1201                                 $text .= $x;
1202                         }
1203                 }
1204                 return $text;
1205         }
1206
1207         /* private */ function magicRFC( $text )
1208         {
1209                 return $text;
1210         }
1211
1212
1213 }
1214
1215 class ParserOutput
1216 {
1217         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1218
1219         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1220                 $containsOldMagic = false )
1221         {
1222                 $this->mText = $text;
1223                 $this->mLanguageLinks = $languageLinks;
1224                 $this->mCategoryLinks = $categoryLinks;
1225                 $this->mContainsOldMagic = $containsOldMagic;
1226         }
1227
1228         function getText() { return $this->mText; }
1229         function getLanguageLinks() { return $this->mLanguageLinks; }
1230         function getCategoryLinks() { return $this->mCategoryLinks; }
1231         function containsOldMagic() { return $this->mContainsOldMagic; }
1232         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1233         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1234         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1235         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1236 }
1237
1238 # Regex callbacks, used in OutputPage::replaceVariables
1239
1240 # Just get rid of the dangerous stuff
1241 # Necessary because replaceVariables is called after removeHTMLtags,
1242 # and message text can come from any user
1243 function wfReplaceMsgVar( $matches ) {
1244         global $wgCurOut, $wgLinkCache;
1245         $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1246         $wgLinkCache->suspend();
1247         $text = $wgCurOut->replaceInternalLinks( $text );
1248         $wgLinkCache->resume();
1249         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1250         return $text;
1251 }
1252
1253 # Effective <nowiki></nowiki>
1254 # Not real <nowiki> because this is called after nowiki sections are processed
1255 function wfReplaceMsgnwVar( $matches ) {
1256         global $wgCurOut, $wgLinkCache;
1257         $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1258         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1259         return $text;
1260 }
1261
1262
1263
1264 ?>