includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Converts wikitext to HTML.
   8 #
   9 # Globals used:
  10 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
  11 #
  12 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  13 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*
  14 #
  15 #      * only within ParserOptions
  16
  17 class Parser
  18 {
  19         # Cleared with clearState():
  20         var $mOutput, $mAutonumber, $mLastSection, $mDTopen;
  21
  22         # Temporary:
  23         var $mOptions, $mTitle;
  24
  25         function Parser()
  26         {
  27                 $this->clearState();
  28         }
  29
  30         function clearState()
  31         {
  32                 $this->mOutput = new ParserOutput;
  33                 $this->mAutonumber = 0;
  34                 $this->mLastSection = "";
  35                 $this->mDTopen = false;
  36         }
  37
  38         # First pass--just handle <nowiki> sections, pass the rest off
  39         # to doWikiPass2() which does all the real work.
  40         #
  41         # Returns a ParserOutput
  42         #
  43         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  44         {
  45                 $fname = "Parser::parse";
  46                 wfProfileIn( $fname );
  47                 $unique  = "3iyZiyA7iMwg5rhxP0Dcc9oTnj8qD1jm1Sfv4";
  48                 $unique2 = "4LIQ9nXtiYFPCSfitVwDw7EYwQlL4GeeQ7qSO";
  49                 $unique3 = "fPaA8gDfdLBqzj68Yjg9Hil3qEF8JGO0uszIp";
  50                 $nwlist = array();
  51                 $nwsecs = 0;
  52                 $mathlist = array();
  53                 $mathsecs = 0;
  54                 $prelist = array ();
  55                 $presecs = 0;
  56                 $stripped = "";
  57                 $stripped2 = "";
  58                 $stripped3 = "";
  59
  60                 if ( $clearState ) {
  61                         $this->clearState();
  62                 }
  63
  64                 $this->mOptions = $options;
  65                 $this->mTitle =& $title;
  66
  67                 # Replace any instances of the placeholders
  68                 $text = str_replace( $unique, wfHtmlEscapeFirst( $unique ), $text );
  69                 $text = str_replace( $unique2, wfHtmlEscapeFirst( $unique2 ), $text );
  70                 $text = str_replace( $unique3, wfHtmlEscapeFirst( $unique3 ), $text );
  71
  72                 while ( "" != $text ) {
  73                         $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
  74                         $stripped .= $p[0];
  75                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $text = ""; }
  76                         else {
  77                                 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
  78                                 ++$nwsecs;
  79                                 $nwlist[$nwsecs] = wfEscapeHTMLTagsOnly($q[0]);
  80                                 $stripped .= $unique . $nwsecs . "s";
  81                                 $text = $q[1];
  82                         }
  83                 }
  84
  85                 if( $this->mOptions->getUseTeX() ) {
  86                         while ( "" != $stripped ) {
  87                                 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
  88                                 $stripped2 .= $p[0];
  89                                 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $stripped = ""; }
  90                                 else {
  91                                         $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
  92                                         ++$mathsecs;
  93                                         $mathlist[$mathsecs] = renderMath($q[0]);
  94                                         $stripped2 .= $unique2 . $mathsecs . "s";
  95                                         $stripped = $q[1];
  96                                 }
  97                         }
  98                 } else {
  99                         $stripped2 = $stripped;
 100                 }
 101
 102                 while ( "" != $stripped2 ) {
 103                         $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
 104                         $stripped3 .= $p[0];
 105                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $stripped2 = ""; }
 106                         else {
 107                                 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
 108                                 ++$presecs;
 109                                 $prelist[$presecs] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
 110                                 $stripped3 .= $unique3 . $presecs . "s";
 111                                 $stripped2 = $q[1];
 112                         }
 113                 }
 114
 115                 $text = $this->doWikiPass2( $stripped3, $linestart );
 116
 117                 $specialChars = array("\\", "$");
 118                 $escapedChars = array("\\\\", "\\$");
 119
 120                 # Go backwards so that {$unique1}1 doesn't overwrite {$unique1}10
 121                 for ( $i = $presecs; $i >= 1; --$i ) {
 122                         $text = preg_replace( "/{$unique3}{$i}s/", str_replace( $specialChars,
 123                                 $escapedChars, $prelist[$i] ), $text );
 124                 }
 125
 126                 for ( $i = $mathsecs; $i >= 1; --$i ) {
 127                         $text = preg_replace( "/{$unique2}{$i}s/", str_replace( $specialChars,
 128                                 $escapedChars, $mathlist[$i] ), $text );
 129                 }
 130
 131                 for ( $i = $nwsecs; $i >= 1; --$i ) {
 132                         $text = preg_replace( "/{$unique}{$i}s/", str_replace( $specialChars,
 133                                 $escapedChars, $nwlist[$i] ), $text );
 134                 }
 135
 136                 $this->mOutput->setText( $text );
 137                 wfProfileOut( $fname );
 138                 return $this->mOutput;
 139         }
 140
 141         function categoryMagic ()
 142         {
 143                 global $wgLang , $wgUser ;
 144                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 145                 $id = $this->mTitle->getArticleID() ;
 146                 $cat = ucfirst ( wfMsg ( "category" ) ) ;
 147                 $ti = $this->mTitle->getText() ;
 148                 $ti = explode ( ":" , $ti , 2 ) ;
 149                 if ( $cat != $ti[0] ) return "" ;
 150                 $r = "<br break=all>\n" ;
 151
 152                 $articles = array() ;
 153                 $parents = array () ;
 154                 $children = array() ;
 155
 156
 157 #               $sk =& $this->mGetSkin();
 158                 $sk =& $wgUser->getSkin() ;
 159
 160                 $doesexist = false ;
 161                 if ( $doesexist ) {
 162                         $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
 163                 } else {
 164                         $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 165                 }
 166
 167                 $res = wfQuery ( $sql, DB_READ ) ;
 168                 while ( $x = wfFetchObject ( $res ) )
 169                 {
 170                 #  $t = new Title ;
 171                 #  $t->newFromDBkey ( $x->l_from ) ;
 172                 #  $t = $t->getText() ;
 173                         if ( $doesexist ) {
 174                                 $t = $x->l_from ;
 175                         } else {
 176                                 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 177                                 if ( $t != "" ) $t .= ":" ;
 178                                 $t .= $x->cur_title ;
 179                         }
 180
 181                         $y = explode ( ":" , $t , 2 ) ;
 182                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 183                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 184                         } else {
 185                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 186                         }
 187                 }
 188                 wfFreeResult ( $res ) ;
 189
 190                 # Children
 191                 if ( count ( $children ) > 0 )
 192                 {
 193                         asort ( $children ) ;
 194                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 195                         $r .= implode ( ", " , $children ) ;
 196                 }
 197
 198                 # Articles
 199                 if ( count ( $articles ) > 0 )
 200                 {
 201                         asort ( $articles ) ;
 202                         $h =  wfMsg( "category_header", $ti[1] );
 203                         $r .= "<h2>{$h}</h2>\n" ;
 204                         $r .= implode ( ", " , $articles ) ;
 205                 }
 206
 207
 208                 return $r ;
 209         }
 210
 211         function getHTMLattrs ()
 212         {
 213                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 214                                 "title", "align", "lang", "dir", "width", "height",
 215                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 216                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 217                                 /* FONT */ "type", "start", "value", "compact",
 218                                 /* For various lists, mostly deprecated but safe */
 219                                 "summary", "width", "border", "frame", "rules",
 220                                 "cellspacing", "cellpadding", "valign", "char",
 221                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 222                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 223                                 "id", "class", "name", "style" /* For CSS */
 224                                 );
 225                 return $htmlattrs ;
 226         }
 227
 228         function fixTagAttributes ( $t )
 229         {
 230                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 231                 $htmlattrs = $this->getHTMLattrs() ;
 232
 233                 # Strip non-approved attributes from the tag
 234                 $t = preg_replace(
 235                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 236                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 237                         $t);
 238                 # Strip javascript "expression" from stylesheets. Brute force approach:
 239                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 240
 241                 if( preg_match(
 242                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 243                         wfMungeToUtf8( $t ) ) )
 244                 {
 245                         $t="";
 246                 }
 247
 248                 return trim ( $t ) ;
 249         }
 250
 251         function doTableStuff ( $t )
 252         {
 253                 $t = explode ( "\n" , $t ) ;
 254                 $td = array () ; # Is currently a td tag open?
 255                         $ltd = array () ; # Was it TD or TH?
 256                         $tr = array () ; # Is currently a tr tag open?
 257                         $ltr = array () ; # tr attributes
 258                         foreach ( $t AS $k => $x )
 259                         {
 260                                 $x = rtrim ( $x ) ;
 261                                 $fc = substr ( $x , 0 , 1 ) ;
 262                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 263                                 {
 264                                         $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 265                                         array_push ( $td , false ) ;
 266                                         array_push ( $ltd , "" ) ;
 267                                         array_push ( $tr , false ) ;
 268                                         array_push ( $ltr , "" ) ;
 269                                 }
 270                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 271                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 272                                 {
 273                                         $z = "</table>\n" ;
 274                                         $l = array_pop ( $ltd ) ;
 275                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 276                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 277                                         array_pop ( $ltr ) ;
 278                                         $t[$k] = $z ;
 279                                 }
 280                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 281                                                 {
 282                                                 $z = trim ( substr ( $x , 2 ) ) ;
 283                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 284                                                 }*/
 285                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 286                                 {
 287                                         $x = substr ( $x , 1 ) ;
 288                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 289                                         $z = "" ;
 290                                         $l = array_pop ( $ltd ) ;
 291                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 292                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 293                                         array_pop ( $ltr ) ;
 294                                         $t[$k] = $z ;
 295                                         array_push ( $tr , false ) ;
 296                                         array_push ( $td , false ) ;
 297                                         array_push ( $ltd , "" ) ;
 298                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 299                                 }
 300                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 301                                 {
 302                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 303                                         {
 304                                                 $fc = "+" ;
 305                                                 $x = substr ( $x , 1 ) ;
 306                                         }
 307                                         $after = substr ( $x , 1 ) ;
 308                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 309                                         $after = explode ( "||" , $after ) ;
 310                                         $t[$k] = "" ;
 311                                         foreach ( $after AS $theline )
 312                                         {
 313                                                 $z = "" ;
 314                                                 if ( $fc != "+" )
 315                                                 {
 316                                                         $tra = array_pop ( $ltr ) ;
 317                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 318                                                         array_push ( $tr , true ) ;
 319                                                         array_push ( $ltr , "" ) ;
 320                                                 }
 321
 322                                                 $l = array_pop ( $ltd ) ;
 323                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 324                                                 if ( $fc == "|" ) $l = "TD" ;
 325                                                 else if ( $fc == "!" ) $l = "TH" ;
 326                                                 else if ( $fc == "+" ) $l = "CAPTION" ;
 327                                                 else $l = "" ;
 328                                                 array_push ( $ltd , $l ) ;
 329                                                 $y = explode ( "|" , $theline , 2 ) ;
 330                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 331                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 332                                                 $t[$k] .= $y ;
 333                                                 array_push ( $td , true ) ;
 334                                         }
 335                                 }
 336                         }
 337
 338                 # Closing open td, tr && table
 339                 while ( count ( $td ) > 0 )
 340                 {
 341                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 342                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 343                         $t[] = "</table>" ;
 344                 }
 345
 346                 $t = implode ( "\n" , $t ) ;
 347                 #               $t = $this->removeHTMLtags( $t );
 348                 return $t ;
 349         }
 350
 351         # Well, OK, it's actually about 14 passes.  But since all the
 352         # hard lifting is done inside PHP's regex code, it probably
 353         # wouldn't speed things up much to add a real parser.
 354         #
 355         function doWikiPass2( $text, $linestart )
 356         {
 357                 $fname = "OutputPage::doWikiPass2";
 358                 wfProfileIn( $fname );
 359
 360                 $text = $this->removeHTMLtags( $text );
 361                 $text = $this->replaceVariables( $text );
 362
 363                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 364                 $text = str_replace ( "<HR>", "<hr>", $text );
 365
 366                 $text = $this->doHeadings( $text );
 367                 $text = $this->doBlockLevels( $text, $linestart );
 368
 369                 if($this->mOptions->getUseDynamicDates()) {
 370                         global $wgDateFormatter;
 371                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 372                 }
 373
 374                 $text = $this->replaceExternalLinks( $text );
 375                 $text = $this->replaceInternalLinks ( $text );
 376                 $text = $this->doTableStuff ( $text ) ;
 377
 378                 $text = $this->magicISBN( $text );
 379                 $text = $this->magicRFC( $text );
 380                 $text = $this->formatHeadings( $text );
 381
 382                 $sk =& $this->mOptions->getSkin();
 383                 $text = $sk->transformContent( $text );
 384                 $text .= $this->categoryMagic () ;
 385
 386                 wfProfileOut( $fname );
 387                 return $text;
 388         }
 389
 390
 391         /* private */ function doHeadings( $text )
 392         {
 393                 for ( $i = 6; $i >= 1; --$i ) {
 394                         $h = substr( "======", 0, $i );
 395                         $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
 396                           "<h{$i}>\\1</h{$i}>\\2", $text );
 397                 }
 398                 return $text;
 399         }
 400
 401         # Note: we have to do external links before the internal ones,
 402         # and otherwise take great care in the order of things here, so
 403         # that we don't end up interpreting some URLs twice.
 404
 405         /* private */ function replaceExternalLinks( $text )
 406         {
 407                 $fname = "OutputPage::replaceExternalLinks";
 408                 wfProfileIn( $fname );
 409                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 410                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 411                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 412                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 413                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 414                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 415                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 416                 wfProfileOut( $fname );
 417                 return $text;
 418         }
 419
 420         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 421         {
 422                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 423                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 424
 425                 # this is  the list of separators that should be ignored if they
 426                 # are the last character of an URL but that should be included
 427                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 428                 # in this case, the last comma should not become part of the URL,
 429                 # but in "www.foo.com/123,2342,32.htm" it should.
 430                 $sep = ",;\.:";
 431                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 432                 $images = "gif|png|jpg|jpeg";
 433
 434                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 435                 # they are interpreted as part of the string (used to tell PHP
 436                 # that the content of the string should be inserted there).
 437                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 438                   "((?i){$images})([^{$uc}]|$)/";
 439
 440                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 441                 $sk =& $this->mOptions->getSkin();
 442
 443                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 444                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 445                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 446                 }
 447                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 448                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 449                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 450                   "</a>\\5", $s );
 451                 $s = str_replace( $unique, $protocol, $s );
 452
 453                 $a = explode( "[{$protocol}:", " " . $s );
 454                 $s = array_shift( $a );
 455                 $s = substr( $s, 1 );
 456
 457                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 458                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 459
 460                 foreach ( $a as $line ) {
 461                         if ( preg_match( $e1, $line, $m ) ) {
 462                                 $link = "{$protocol}:{$m[1]}";
 463                                 $trail = $m[2];
 464                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 465                                 else { $text = wfEscapeHTML( $link ); }
 466                         } else if ( preg_match( $e2, $line, $m ) ) {
 467                                 $link = "{$protocol}:{$m[1]}";
 468                                 $text = $m[2];
 469                                 $trail = $m[3];
 470                         } else {
 471                                 $s .= "[{$protocol}:" . $line;
 472                                 continue;
 473                         }
 474                         if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
 475                         else $paren = "";
 476                         $la = $sk->getExternalLinkAttributes( $link, $text );
 477                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 478
 479                 }
 480                 return $s;
 481         }
 482
 483         /* private */ function handle3Quotes( &$state, $token )
 484         {
 485                 if ( $state["strong"] ) {
 486                         if ( $state["em"] && $state["em"] > $state["strong"] )
 487                         {
 488                                 # ''' lala ''lala '''
 489                                 $s = "</em></strong><em>";
 490                         } else {
 491                                 $s = "</strong>";
 492                         }
 493                         $state["strong"] = FALSE;
 494                 } else {
 495                         $s = "<strong>";
 496                         $state["strong"] = $token["pos"];
 497                 }
 498                 return $s;
 499         }
 500
 501         /* private */ function handle2Quotes( &$state, $token )
 502         {
 503                 if ( $state["em"] ) {
 504                         if ( $state["strong"] && $state["strong"] > $state["em"] )
 505                         {
 506                                 # ''lala'''lala'' ....'''
 507                                 $s = "</strong></em><strong>";
 508                         } else {
 509                                 $s = "</em>";
 510                         }
 511                         $state["em"] = FALSE;
 512                 } else {
 513                         $s = "<em>";
 514                         $state["em"] = $token["pos"];
 515                 }
 516                 return $s;
 517         }
 518
 519         /* private */ function handle5Quotes( &$state, $token )
 520         {
 521                 if ( $state["em"] && $state["strong"] ) {
 522                         if ( $state["em"] < $state["strong"] ) {
 523                                 $s .= "</strong></em>";
 524                         } else {
 525                                 $s .= "</em></strong>";
 526                         }
 527                         $state["strong"] = $state["em"] = FALSE;
 528                 } elseif ( $state["em"] ) {
 529                         $s .= "</em><strong>";
 530                         $state["em"] = FALSE;
 531                         $state["strong"] = $token["pos"];
 532                 } elseif ( $state["strong"] ) {
 533                         $s .= "</strong><em>";
 534                         $state["strong"] = FALSE;
 535                         $state["em"] = $token["pos"];
 536                 } else { # not $em and not $strong
 537                         $s .= "<strong><em>";
 538                         $state["strong"] = $state["em"] = $token["pos"];
 539                 }
 540                 return $s;
 541         }
 542
 543         /* private */ function replaceInternalLinks( $str )
 544         {
 545                 global $wgLang; # for language specific parser hook
 546
 547                 $tokenizer=Tokenizer::newFromString( $str );
 548                 $tokenStack = array();
 549
 550                 $s="";
 551                 $state["em"]      = FALSE;
 552                 $state["strong"]  = FALSE;
 553                 $tagIsOpen = FALSE;
 554
 555                 # The tokenizer splits the text into tokens and returns them one by one.
 556                 # Every call to the tokenizer returns a new token.
 557                 while ( $token = $tokenizer->nextToken() )
 558                 {
 559                         switch ( $token["type"] )
 560                         {
 561                                 case "text":
 562                                         # simple text with no further markup
 563                                         $txt = $token["text"];
 564                                         break;
 565                                 case "[[":
 566                                         # link opening tag.
 567                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 568                                         $tagIsOpen = TRUE;
 569                                         array_push( $tokenStack, $token );
 570                                         $txt="";
 571                                         break;
 572                                 case "]]":
 573                                         # link close tag.
 574                                         # get text from stack, glue it together, and call the code to handle a
 575                                         # link
 576                                         if ( count( $tokenStack ) == 0 )
 577                                         {
 578                                                 # stack empty. Found a ]] without an opening [[
 579                                                 $txt = "]]";
 580                                         } else {
 581                                                 $linkText = "";
 582                                                 $lastToken = array_pop( $tokenStack );
 583                                                 while ( $lastToken["type"] != "[[" )
 584                                                 {
 585                                                         $linkText = $lastToken["text"] . $linkText;
 586                                                         $lastToken = array_pop( $tokenStack );
 587                                                 }
 588                                                 $txt = $linkText ."]]";
 589                                                 $prefix = $lastToken["text"];
 590                                                 $nextToken = $tokenizer->previewToken();
 591                                                 if ( $nextToken["type"] == "text" )
 592                                                 {
 593                                                         # Preview just looks at it. Now we have to fetch it.
 594                                                         $nextToken = $tokenizer->nextToken();
 595                                                         $txt .= $nextToken["text"];
 596                                                 }
 597                                                 $txt = $this->handleInternalLink( $txt, $prefix );
 598                                         }
 599                                         $tagIsOpen = (count( $tokenStack ) != 0);
 600                                         break;
 601                                 case "----":
 602                                         $txt = "\n<hr>\n";
 603                                         break;
 604                                 case "'''":
 605                                         # This and the three next ones handle quotes
 606                                         $txt = $this->handle3Quotes( $state, $token );
 607                                         break;
 608                                 case "''":
 609                                         $txt = $this->handle2Quotes( $state, $token );
 610                                         break;
 611                                 case "'''''":
 612                                         $txt = $this->handle5Quotes( $state, $token );
 613                                         break;
 614                                 case "":
 615                                         # empty token
 616                                         $txt="";
 617                                         break;
 618                                 default:
 619                                         # Call language specific Hook.
 620                                         $txt = $wgLang->processToken( $token, $tokenStack );
 621                                         if ( NULL == $txt ) {
 622                                                 # An unkown token. Highlight.
 623                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 624                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 625                                         }
 626                                         break;
 627                         }
 628                         # If we're parsing the interior of a link, don't append the interior to $s,
 629                         # but push it to the stack so it can be processed when a ]] token is found.
 630                         if ( $tagIsOpen  && $txt != "" ) {
 631                                 $token["type"] = "text";
 632                                 $token["text"] = $txt;
 633                                 array_push( $tokenStack, $token );
 634                         } else {
 635                                 $s .= $txt;
 636                         }
 637                 } #end while
 638                 if ( count( $tokenStack ) != 0 )
 639                 {
 640                         # still objects on stack. opened [[ tag without closing ]] tag.
 641                         $txt = "";
 642                         while ( $lastToken = array_pop( $tokenStack ) )
 643                         {
 644                                 if ( $lastToken["type"] == "text" )
 645                                 {
 646                                         $txt = $lastToken["text"] . $txt;
 647                                 } else {
 648                                         $txt = $lastToken["type"] . $txt;
 649                                 }
 650                         }
 651                         $s .= $txt;
 652                 }
 653                 return $s;
 654         }
 655
 656         /* private */ function handleInternalLink( $line, $prefix )
 657         {
 658                 global $wgLang, $wgLinkCache;
 659                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 660                 static $fname = "OutputPage::replaceInternalLinks" ;
 661                 wfProfileIn( $fname );
 662
 663                 wfProfileIn( "$fname-setup" );
 664                 static $tc = FALSE;
 665                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 666                 $sk =& $this->mOptions->getSkin();
 667
 668                 # Match a link having the form [[namespace:link|alternate]]trail
 669                 static $e1 = FALSE;
 670                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 671                 # Match the end of a line for a word that's not followed by whitespace,
 672                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 673                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 674                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 675                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 676
 677
 678                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 679                 static $image = FALSE;
 680                 static $special = FALSE;
 681                 static $media = FALSE;
 682                 static $category = FALSE;
 683                 if ( !$image ) { $image = Namespace::getImage(); }
 684                 if ( !$special ) { $special = Namespace::getSpecial(); }
 685                 if ( !$media ) { $media = Namespace::getMedia(); }
 686                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 687
 688                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 689
 690                 wfProfileOut( "$fname-setup" );
 691
 692                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 693                         $text = $m[2];
 694                         $trail = $m[3];
 695                 } else { # Invalid form; output directly
 696                         $s .= $prefix . "[[" . $line ;
 697                         return $s;
 698                 }
 699
 700                 /* Valid link forms:
 701                 Foobar -- normal
 702                 :Foobar -- override special treatment of prefix (images, language links)
 703                 /Foobar -- convert to CurrentPage/Foobar
 704                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 705                 */
 706                 $c = substr($m[1],0,1);
 707                 $noforce = ($c != ":");
 708                 if( $c == "/" ) { # subpage
 709                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 710                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 711                                 $noslash=$m[1];
 712                         } else {
 713                                 $noslash=substr($m[1],1);
 714                         }
 715                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 716                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 717                                 if( "" == $text ) {
 718                                         $text= $m[1];
 719                                 } # this might be changed for ugliness reasons
 720                         } else {
 721                                 $link = $noslash; # no subpage allowed, use standard link
 722                         }
 723                 } elseif( $noforce ) { # no subpage
 724                         $link = $m[1];
 725                 } else {
 726                         $link = substr( $m[1], 1 );
 727                 }
 728                 if( "" == $text )
 729                         $text = $link;
 730
 731                 $nt = Title::newFromText( $link );
 732                 if( !$nt ) {
 733                         $s .= $prefix . "[[" . $line;
 734                         return $s;
 735                 }
 736                 $ns = $nt->getNamespace();
 737                 $iw = $nt->getInterWiki();
 738                 if( $noforce ) {
 739                         if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 740                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 741                                 $s .= $prefix . $trail;
 742                                 return $s;
 743                         }
 744                         if( $ns == $image ) {
 745                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 746                                 $wgLinkCache->addImageLinkObj( $nt );
 747                                 return $s;
 748                         }
 749                 }
 750                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 751                     ( strpos( $link, "#" ) == FALSE ) ) {
 752                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 753                         return $s;
 754                 }
 755                 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
 756                         $t = explode ( ":" , $nt->getText() ) ;
 757                         array_shift ( $t ) ;
 758                         $t = implode ( ":" , $t ) ;
 759                         $t = $wgLang->ucFirst ( $t ) ;
 760 #                       $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
 761                         $nnt = Title::newFromText ( $category.":".$t ) ;
 762                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 763                         $this->mCategoryLinks[] = $t ;
 764                         $s .= $prefix . $trail ;
 765                         return $s ;
 766                 }
 767                 if( $ns == $media ) {
 768                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 769                         $wgLinkCache->addImageLinkObj( $nt );
 770                         return $s;
 771                 } elseif( $ns == $special ) {
 772                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 773                         return $s;
 774                 }
 775                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 776
 777                 wfProfileOut( $fname );
 778                 return $s;
 779         }
 780
 781         # Some functions here used by doBlockLevels()
 782         #
 783         /* private */ function closeParagraph()
 784         {
 785                 $result = "";
 786                 if ( 0 != strcmp( "p", $this->mLastSection ) &&
 787                   0 != strcmp( "", $this->mLastSection ) ) {
 788                         $result = "</" . $this->mLastSection  . ">";
 789                 }
 790                 $this->mLastSection = "";
 791                 return $result."\n";
 792         }
 793         # getCommon() returns the length of the longest common substring
 794         # of both arguments, starting at the beginning of both.
 795         #
 796         /* private */ function getCommon( $st1, $st2 )
 797         {
 798                 $fl = strlen( $st1 );
 799                 $shorter = strlen( $st2 );
 800                 if ( $fl < $shorter ) { $shorter = $fl; }
 801
 802                 for ( $i = 0; $i < $shorter; ++$i ) {
 803                         if ( $st1{$i} != $st2{$i} ) { break; }
 804                 }
 805                 return $i;
 806         }
 807         # These next three functions open, continue, and close the list
 808         # element appropriate to the prefix character passed into them.
 809         #
 810         /* private */ function openList( $char )
 811     {
 812                 $result = $this->closeParagraph();
 813
 814                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 815                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 816                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 817                 else if ( ";" == $char ) {
 818                         $result .= "<dl><dt>";
 819                         $this->mDTopen = true;
 820                 }
 821                 else { $result = "<!-- ERR 1 -->"; }
 822
 823                 return $result;
 824         }
 825
 826         /* private */ function nextItem( $char )
 827         {
 828                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 829                 else if ( ":" == $char || ";" == $char ) {
 830                         $close = "</dd>";
 831                         if ( $this->mDTopen ) { $close = "</dt>"; }
 832                         if ( ";" == $char ) {
 833                                 $this->mDTopen = true;
 834                                 return $close . "<dt>";
 835                         } else {
 836                                 $this->mDTopen = false;
 837                                 return $close . "<dd>";
 838                         }
 839                 }
 840                 return "<!-- ERR 2 -->";
 841         }
 842
 843         /* private */function closeList( $char )
 844         {
 845                 if ( "*" == $char ) { $text = "</li></ul>"; }
 846                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 847                 else if ( ":" == $char ) {
 848                         if ( $this->mDTopen ) {
 849                                 $this->mDTopen = false;
 850                                 $text = "</dt></dl>";
 851                         } else {
 852                                 $text = "</dd></dl>";
 853                         }
 854                 }
 855                 else {  return "<!-- ERR 3 -->"; }
 856                 return $text."\n";
 857         }
 858
 859         /* private */ function doBlockLevels( $text, $linestart )
 860         {
 861                 $fname = "OutputPage::doBlockLevels";
 862                 wfProfileIn( $fname );
 863                 # Parsing through the text line by line.  The main thing
 864                 # happening here is handling of block-level elements p, pre,
 865                 # and making lists from lines starting with * # : etc.
 866                 #
 867                 $a = explode( "\n", $text );
 868                 $text = $lastPref = "";
 869                 $this->mDTopen = $inBlockElem = false;
 870
 871                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 872                 foreach ( $a as $t ) {
 873                         if ( "" != $text ) { $text .= "\n"; }
 874
 875                         $oLine = $t;
 876                         $opl = strlen( $lastPref );
 877                         $npl = strspn( $t, "*#:;" );
 878                         $pref = substr( $t, 0, $npl );
 879                         $pref2 = str_replace( ";", ":", $pref );
 880                         $t = substr( $t, $npl );
 881
 882                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 883                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 884
 885                                 if ( ";" == substr( $pref, -1 ) ) {
 886                                         $cpos = strpos( $t, ":" );
 887                                         if ( ! ( false === $cpos ) ) {
 888                                                 $term = substr( $t, 0, $cpos );
 889                                                 $text .= $term . $this->nextItem( ":" );
 890                                                 $t = substr( $t, $cpos + 1 );
 891                                         }
 892                                 }
 893                         } else if (0 != $npl || 0 != $opl) {
 894                                 $cpl = $this->getCommon( $pref, $lastPref );
 895
 896                                 while ( $cpl < $opl ) {
 897                                         $text .= $this->closeList( $lastPref{$opl-1} );
 898                                         --$opl;
 899                                 }
 900                                 if ( $npl <= $cpl && $cpl > 0 ) {
 901                                         $text .= $this->nextItem( $pref{$cpl-1} );
 902                                 }
 903                                 while ( $npl > $cpl ) {
 904                                         $char = substr( $pref, $cpl, 1 );
 905                                         $text .= $this->openList( $char );
 906
 907                                         if ( ";" == $char ) {
 908                                                 $cpos = strpos( $t, ":" );
 909                                                 if ( ! ( false === $cpos ) ) {
 910                                                         $term = substr( $t, 0, $cpos );
 911                                                         $text .= $term . $this->nextItem( ":" );
 912                                                         $t = substr( $t, $cpos + 1 );
 913                                                 }
 914                                         }
 915                                         ++$cpl;
 916                                 }
 917                                 $lastPref = $pref2;
 918                         }
 919                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
 920                                 if ( preg_match(
 921                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
 922                                         $text .= $this->closeParagraph();
 923                                         $inBlockElem = true;
 924                                 }
 925                                 if ( ! $inBlockElem ) {
 926                                         if ( " " == $t{0} ) {
 927                                                 $newSection = "pre";
 928                                                 # $t = wfEscapeHTML( $t );
 929                                         }
 930                                         else { $newSection = "p"; }
 931
 932                                         if ( 0 == strcmp( "", trim( $oLine ) ) ) {
 933                                                 $text .= $this->closeParagraph();
 934                                                 $text .= "<" . $newSection . ">";
 935                                         } else if ( 0 != strcmp( $this->mLastSection,
 936                                           $newSection ) ) {
 937                                                 $text .= $this->closeParagraph();
 938                                                 if ( 0 != strcmp( "p", $newSection ) ) {
 939                                                         $text .= "<" . $newSection . ">";
 940                                                 }
 941                                         }
 942                                         $this->mLastSection = $newSection;
 943                                 }
 944                                 if ( $inBlockElem &&
 945                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
 946                                         $inBlockElem = false;
 947                                 }
 948                         }
 949                         $text .= $t;
 950                 }
 951                 while ( $npl ) {
 952                         $text .= $this->closeList( $pref2{$npl-1} );
 953                         --$npl;
 954                 }
 955                 if ( "" != $this->mLastSection ) {
 956                         if ( "p" != $this->mLastSection ) {
 957                                 $text .= "</" . $this->mLastSection . ">";
 958                         }
 959                         $this->mLastSection = "";
 960                 }
 961                 wfProfileOut( $fname );
 962                 return $text;
 963         }
 964
 965         /* private */ function replaceVariables( $text )
 966         {
 967                 global $wgLang, $wgCurOut;
 968                 $fname = "OutputPage::replaceVariables";
 969                 wfProfileIn( $fname );
 970
 971                 $magic = array();
 972
 973                 # Basic variables
 974                 # See Language.php for the definition of each magic word
 975                 # As with sigs, this uses the server's local time -- ensure
 976                 # this is appropriate for your audience!
 977
 978                 $magic[MAG_CURRENTMONTH] = date( "m" );
 979                 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
 980                 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
 981                 $magic[MAG_CURRENTDAY] = date("j");
 982                 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
 983                 $magic[MAG_CURRENTYEAR] = date( "Y" );
 984                 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
 985
 986                 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
 987
 988                 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
 989                 if ( $mw->match( $text ) ) {
 990                         $v = wfNumberOfArticles();
 991                         $text = $mw->replace( $v, $text );
 992                         if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
 993                 }
 994
 995                 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
 996                 # The callbacks are at the bottom of this file
 997                 $wgCurOut = $this;
 998                 $mw =& MagicWord::get( MAG_MSG );
 999                 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1000                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1001
1002                 $mw =& MagicWord::get( MAG_MSGNW );
1003                 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1004                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1005
1006                 wfProfileOut( $fname );
1007                 return $text;
1008         }
1009
1010         # Cleans up HTML, removes dangerous tags and attributes
1011         /* private */ function removeHTMLtags( $text )
1012         {
1013                 $fname = "OutputPage::removeHTMLtags";
1014                 wfProfileIn( $fname );
1015                 $htmlpairs = array( # Tags that must be closed
1016                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1017                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1018                         "strike", "strong", "tt", "var", "div", "center",
1019                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1020                         "ruby", "rt" , "rb" , "rp"
1021                 );
1022                 $htmlsingle = array(
1023                         "br", "p", "hr", "li", "dt", "dd"
1024                 );
1025                 $htmlnest = array( # Tags that can be nested--??
1026                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1027                         "dl", "font", "big", "small", "sub", "sup"
1028                 );
1029                 $tabletags = array( # Can only appear inside table
1030                         "td", "th", "tr"
1031                 );
1032
1033                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1034                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1035
1036                 $htmlattrs = $this->getHTMLattrs () ;
1037
1038                 # Remove HTML comments
1039                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1040
1041                 $bits = explode( "<", $text );
1042                 $text = array_shift( $bits );
1043                 $tagstack = array(); $tablestack = array();
1044
1045                 foreach ( $bits as $x ) {
1046                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1047                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1048                           $x, $regs );
1049                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1050                         error_reporting( $prev );
1051
1052                         $badtag = 0 ;
1053                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1054                                 # Check our stack
1055                                 if ( $slash ) {
1056                                         # Closing a tag...
1057                                         if ( ! in_array( $t, $htmlsingle ) &&
1058                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1059                                                 array_push( $tagstack, $ot );
1060                                                 $badtag = 1;
1061                                         } else {
1062                                                 if ( $t == "table" ) {
1063                                                         $tagstack = array_pop( $tablestack );
1064                                                 }
1065                                                 $newparams = "";
1066                                         }
1067                                 } else {
1068                                         # Keep track for later
1069                                         if ( in_array( $t, $tabletags ) &&
1070                                           ! in_array( "table", $tagstack ) ) {
1071                                                 $badtag = 1;
1072                                         } else if ( in_array( $t, $tagstack ) &&
1073                                           ! in_array ( $t , $htmlnest ) ) {
1074                                                 $badtag = 1 ;
1075                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1076                                                 if ( $t == "table" ) {
1077                                                         array_push( $tablestack, $tagstack );
1078                                                         $tagstack = array();
1079                                                 }
1080                                                 array_push( $tagstack, $t );
1081                                         }
1082                                         # Strip non-approved attributes from the tag
1083                                         $newparams = $this->fixTagAttributes($params);
1084
1085                                 }
1086                                 if ( ! $badtag ) {
1087                                         $rest = str_replace( ">", "&gt;", $rest );
1088                                         $text .= "<$slash$t $newparams$brace$rest";
1089                                         continue;
1090                                 }
1091                         }
1092                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1093                 }
1094                 # Close off any remaining tags
1095                 while ( $t = array_pop( $tagstack ) ) {
1096                         $text .= "</$t>\n";
1097                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1098                 }
1099                 wfProfileOut( $fname );
1100                 return $text;
1101         }
1102
1103 /*
1104  *
1105  * This function accomplishes several tasks:
1106  * 1) Auto-number headings if that option is enabled
1107  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1108  * 3) Add a Table of contents on the top for users who have enabled the option
1109  * 4) Auto-anchor headings
1110  *
1111  * It loops through all headlines, collects the necessary data, then splits up the
1112  * string and re-inserts the newly formatted headlines.
1113  *
1114  * */
1115         /* private */ function formatHeadings( $text )
1116         {
1117                 $nh=$this->mOptions->getNumberHeadings();
1118                 $st=$this->mOptions->getShowToc();
1119                 if(!$this->mTitle->userCanEdit()) {
1120                         $es=0;
1121                         $esr=0;
1122                 } else {
1123                         $es=$this->mOptions->getEditSection();
1124                         $esr=$this->mOptions->getEditSectionOnRightClick();
1125                 }
1126
1127                 # Inhibit editsection links if requested in the page
1128                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1129                 if ($esw->matchAndRemove( $text )) {
1130                         $es=0;
1131                 }
1132                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1133                 # do not add TOC
1134                 $mw =& MagicWord::get( MAG_NOTOC );
1135                 if ($mw->matchAndRemove( $text ))
1136                 {
1137                         $st = 0;
1138                 }
1139
1140                 # never add the TOC to the Main Page. This is an entry page that should not
1141                 # be more than 1-2 screens large anyway
1142                 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1143
1144                 # We need this to perform operations on the HTML
1145                 $sk =& $this->mOptions->getSkin();
1146
1147                 # Get all headlines for numbering them and adding funky stuff like [edit]
1148                 # links
1149                 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1150
1151                 # headline counter
1152                 $c=0;
1153
1154                 # Ugh .. the TOC should have neat indentation levels which can be
1155                 # passed to the skin functions. These are determined here
1156                 foreach($matches[3] as $headline) {
1157                         if($level) { $prevlevel=$level;}
1158                         $level=$matches[1][$c];
1159                         if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1160
1161                                 $h[$level]=0; // reset when we enter a new level
1162                                 $toc.=$sk->tocIndent($level-$prevlevel);
1163                                 $toclevel+=$level-$prevlevel;
1164
1165                         }
1166                         if(($nh||$st) && $level<$prevlevel) {
1167                                 $h[$level+1]=0; // reset when we step back a level
1168                                 $toc.=$sk->tocUnindent($prevlevel-$level);
1169                                 $toclevel-=$prevlevel-$level;
1170
1171                         }
1172                         $h[$level]++; // count number of headlines for each level
1173
1174                         if($nh||$st) {
1175                                 for($i=1;$i<=$level;$i++) {
1176                                         if($h[$i]) {
1177                                                 if($dot) {$numbering.=".";}
1178                                                 $numbering.=$h[$i];
1179                                                 $dot=1;
1180                                         }
1181                                 }
1182                         }
1183
1184                         // The canonized header is a version of the header text safe to use for links
1185
1186                         $canonized_headline=preg_replace("/<.*?>/","",$headline); // strip out HTML
1187                         $tocline = trim( $canonized_headline );
1188                         $canonized_headline=str_replace('"',"",$canonized_headline);
1189                         $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1190                         $refer[$c]=$canonized_headline;
1191                         $refers[$canonized_headline]++;  // count how many in assoc. array so we can track dupes in anchors
1192                         $refcount[$c]=$refers[$canonized_headline];
1193
1194             // Prepend the number to the heading text
1195
1196                         if($nh||$st) {
1197                                 $tocline=$numbering ." ". $tocline;
1198
1199                                 // Don't number the heading if it is the only one (looks silly)
1200                                 if($nh && count($matches[3]) > 1) {
1201                                         $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1202                                 }
1203                         }
1204
1205                         // Create the anchor for linking from the TOC to the section
1206
1207                         $anchor=$canonized_headline;
1208                         if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1209                         if($st) {
1210                                 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1211                         }
1212                         if($es) {
1213                                 $head[$c].=$sk->editSectionLink($c+1);
1214                         }
1215
1216                         // Put it all together
1217
1218                         $head[$c].="<h".$level.$matches[2][$c]
1219                          ."<a name=\"".$anchor."\">"
1220                          .$headline
1221                          ."</a>"
1222                          ."</h".$level.">";
1223
1224                         // Add the edit section link
1225
1226                         if($esr) {
1227                                 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1228                         }
1229
1230                         $numbering="";
1231                         $c++;
1232                         $dot=0;
1233                 }
1234
1235                 if($st) {
1236                         $toclines=$c;
1237                         $toc.=$sk->tocUnindent($toclevel);
1238                         $toc=$sk->tocTable($toc);
1239                 }
1240
1241                 // split up and insert constructed headlines
1242
1243                 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1244                 $i=0;
1245
1246                 foreach($blocks as $block) {
1247                         if(($es) && $c>0 && $i==0) {
1248                             # This is the [edit] link that appears for the top block of text when
1249                                 # section editing is enabled
1250                                 $full.=$sk->editSectionLink(0);
1251                         }
1252                         $full.=$block;
1253                         if($st && $toclines>3 && !$i) {
1254                                 # Let's add a top anchor just in case we want to link to the top of the page
1255                                 $full="<a name=\"top\"></a>".$full.$toc;
1256                         }
1257
1258                         $full.=$head[$i];
1259                         $i++;
1260                 }
1261
1262                 return $full;
1263         }
1264
1265         /* private */ function magicISBN( $text )
1266         {
1267                 global $wgLang;
1268
1269                 $a = split( "ISBN ", " $text" );
1270                 if ( count ( $a ) < 2 ) return $text;
1271                 $text = substr( array_shift( $a ), 1);
1272                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1273
1274                 foreach ( $a as $x ) {
1275                         $isbn = $blank = "" ;
1276                         while ( " " == $x{0} ) {
1277                                 $blank .= " ";
1278                                 $x = substr( $x, 1 );
1279                         }
1280                         while ( strstr( $valid, $x{0} ) != false ) {
1281                                 $isbn .= $x{0};
1282                                 $x = substr( $x, 1 );
1283                         }
1284                         $num = str_replace( "-", "", $isbn );
1285                         $num = str_replace( " ", "", $num );
1286
1287                         if ( "" == $num ) {
1288                                 $text .= "ISBN $blank$x";
1289                         } else {
1290                                 $text .= "<a href=\"" . wfLocalUrlE( $wgLang->specialPage(
1291                                   "Booksources"), "isbn={$num}" ) . "\" class=\"internal\">ISBN $isbn</a>";
1292                                 $text .= $x;
1293                         }
1294                 }
1295                 return $text;
1296         }
1297
1298         /* private */ function magicRFC( $text )
1299         {
1300                 return $text;
1301         }
1302
1303
1304 }
1305
1306 class ParserOutput
1307 {
1308         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1309
1310         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1311                 $containsOldMagic = false )
1312         {
1313                 $this->mText = $text;
1314                 $this->mLanguageLinks = $languageLinks;
1315                 $this->mCategoryLinks = $categoryLinks;
1316                 $this->mContainsOldMagic = $containsOldMagic;
1317         }
1318
1319         function getText() { return $this->mText; }
1320         function getLanguageLinks() { return $this->mLanguageLinks; }
1321         function getCategoryLinks() { return $this->mCategoryLinks; }
1322         function containsOldMagic() { return $this->mContainsOldMagic; }
1323         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1324         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1325         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1326         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1327 }
1328
1329 class ParserOptions
1330 {
1331         # All variables are private
1332         var $mUseTeX;                    # Use texvc to expand <math> tags
1333         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1334         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1335         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1336         var $mAllowExternalImages;       # Allow external images inline
1337         var $mSkin;                      # Reference to the preferred skin
1338         var $mDateFormat;                # Date format index
1339         var $mEditSection;               # Create "edit section" links
1340         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1341         var $mPrintable;                 # Generate printable output
1342         var $mNumberHeadings;            # Automatically number headings
1343         var $mShowToc;                   # Show table of contents
1344
1345         function getUseTeX() { return $this->mUseTeX; }
1346         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1347         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1348         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1349         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1350         function getSkin() { return $this->mSkin; }
1351         function getDateFormat() { return $this->mDateFormat; }
1352         function getEditSection() { return $this->mEditSection; }
1353         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1354         function getPrintable() { return $this->mPrintable; }
1355         function getNumberHeadings() { return $this->mNumberHeadings; }
1356         function getShowToc() { return $this->mShowToc; }
1357
1358         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1359         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1360         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1361         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1362         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1363         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1364         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1365         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1366         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1367         function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1368         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1369         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1370
1371         /* static */ function newFromUser( &$user )
1372         {
1373                 $popts = new ParserOptions;
1374                 $popts->initialiseFromUser( &$user );
1375                 return $popts;
1376         }
1377
1378         function initialiseFromUser( &$userInput )
1379         {
1380                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1381
1382                 if ( !$userInput ) {
1383                         $user = new User;
1384                 } else {
1385                         $user =& $userInput;
1386                 }
1387
1388                 $this->mUseTeX = $wgUseTeX;
1389                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1390                 $this->mUseDynamicDates = $wgUseDynamicDates;
1391                 $this->mInterwikiMagic = $wgInterwikiMagic;
1392                 $this->mAllowExternalImages = $wgAllowExternalImages;
1393                 $this->mSkin =& $user->getSkin();
1394                 $this->mDateFormat = $user->getOption( "date" );
1395                 $this->mEditSection = $user->getOption( "editsection" );
1396                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1397                 $this->mPrintable = false;
1398                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1399                 $this->mShowToc = $user->getOption( "showtoc" );
1400         }
1401 }
1402
1403 # Regex callbacks, used in OutputPage::replaceVariables
1404
1405 # Just get rid of the dangerous stuff
1406 # Necessary because replaceVariables is called after removeHTMLtags,
1407 # and message text can come from any user
1408 function wfReplaceMsgVar( $matches ) {
1409         global $wgCurOut, $wgLinkCache;
1410         $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1411         $wgLinkCache->suspend();
1412         $text = $wgCurOut->replaceInternalLinks( $text );
1413         $wgLinkCache->resume();
1414         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1415         return $text;
1416 }
1417
1418 # Effective <nowiki></nowiki>
1419 # Not real <nowiki> because this is called after nowiki sections are processed
1420 function wfReplaceMsgnwVar( $matches ) {
1421         global $wgCurOut, $wgLinkCache;
1422         $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1423         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1424         return $text;
1425 }
1426
1427
1428
1429 ?>