includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Converts wikitext to HTML.
   8 #
   9 # Globals used:
  10 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
  11 #
  12 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  13 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*
  14 #
  15 #      * only within ParserOptions
  16
  17 class Parser
  18 {
  19         # Cleared with clearState():
  20         var $mOutput, $mAutonumber, $mLastSection, $mDTopen;
  21
  22         # Temporary:
  23         var $mOptions, $mTitle;
  24
  25         function Parser()
  26         {
  27                 $this->clearState();
  28         }
  29
  30         function clearState()
  31         {
  32                 $this->mOutput = new ParserOutput;
  33                 $this->mAutonumber = 0;
  34                 $this->mLastSection = "";
  35                 $this->mDTopen = false;
  36         }
  37
  38         # First pass--just handle <nowiki> sections, pass the rest off
  39         # to doWikiPass2() which does all the real work.
  40         #
  41         # Returns a ParserOutput
  42         #
  43         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  44         {
  45                 $fname = "Parser::parse";
  46                 wfProfileIn( $fname );
  47                 $unique  = "3iyZiyA7iMwg5rhxP0Dcc9oTnj8qD1jm1Sfv4";
  48                 $unique2 = "4LIQ9nXtiYFPCSfitVwDw7EYwQlL4GeeQ7qSO";
  49                 $unique3 = "fPaA8gDfdLBqzj68Yjg9Hil3qEF8JGO0uszIp";
  50                 $nwlist = array();
  51                 $nwsecs = 0;
  52                 $mathlist = array();
  53                 $mathsecs = 0;
  54                 $prelist = array ();
  55                 $presecs = 0;
  56                 $stripped = "";
  57                 $stripped2 = "";
  58                 $stripped3 = "";
  59
  60                 if ( $clearState ) {
  61                         $this->clearState();
  62                 }
  63
  64                 $this->mOptions = $options;
  65                 $this->mTitle =& $title;
  66
  67                 # Replace any instances of the placeholders
  68                 $text = str_replace( $unique, wfHtmlEscapeFirst( $unique ), $text );
  69                 $text = str_replace( $unique2, wfHtmlEscapeFirst( $unique2 ), $text );
  70                 $text = str_replace( $unique3, wfHtmlEscapeFirst( $unique3 ), $text );
  71
  72                 while ( "" != $text ) {
  73                         $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
  74                         $stripped .= $p[0];
  75                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $text = ""; }
  76                         else {
  77                                 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
  78                                 ++$nwsecs;
  79                                 $nwlist[$nwsecs] = wfEscapeHTMLTagsOnly($q[0]);
  80                                 $stripped .= $unique . $nwsecs . "s";
  81                                 $text = $q[1];
  82                         }
  83                 }
  84
  85                 if( $this->mOptions->getUseTeX() ) {
  86                         while ( "" != $stripped ) {
  87                                 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
  88                                 $stripped2 .= $p[0];
  89                                 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $stripped = ""; }
  90                                 else {
  91                                         $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
  92                                         ++$mathsecs;
  93                                         $mathlist[$mathsecs] = renderMath($q[0]);
  94                                         $stripped2 .= $unique2 . $mathsecs . "s";
  95                                         $stripped = $q[1];
  96                                 }
  97                         }
  98                 } else {
  99                         $stripped2 = $stripped;
 100                 }
 101
 102                 while ( "" != $stripped2 ) {
 103                         $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
 104                         $stripped3 .= $p[0];
 105                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) { $stripped2 = ""; }
 106                         else {
 107                                 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
 108                                 ++$presecs;
 109                                 $prelist[$presecs] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
 110                                 $stripped3 .= $unique3 . $presecs . "s";
 111                                 $stripped2 = $q[1];
 112                         }
 113                 }
 114
 115                 $text = $this->doWikiPass2( $stripped3, $linestart );
 116
 117                 $specialChars = array("\\", "$");
 118                 $escapedChars = array("\\\\", "\\$");
 119
 120                 # Go backwards so that {$unique1}1 doesn't overwrite {$unique1}10
 121                 for ( $i = $presecs; $i >= 1; --$i ) {
 122                         $text = preg_replace( "/{$unique3}{$i}s/", str_replace( $specialChars,
 123                                 $escapedChars, $prelist[$i] ), $text );
 124                 }
 125
 126                 for ( $i = $mathsecs; $i >= 1; --$i ) {
 127                         $text = preg_replace( "/{$unique2}{$i}s/", str_replace( $specialChars,
 128                                 $escapedChars, $mathlist[$i] ), $text );
 129                 }
 130
 131                 for ( $i = $nwsecs; $i >= 1; --$i ) {
 132                         $text = preg_replace( "/{$unique}{$i}s/", str_replace( $specialChars,
 133                                 $escapedChars, $nwlist[$i] ), $text );
 134                 }
 135
 136                 $this->mOutput->setText( $text );
 137                 wfProfileOut( $fname );
 138                 return $this->mOutput;
 139         }
 140
 141         function categoryMagic ()
 142         {
 143                 global $wgLang ;
 144                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 145                 $id = $this->mTitle->getArticleID() ;
 146                 $cat = ucfirst ( wfMsg ( "category" ) ) ;
 147                 $ti = $this->mTitle->getText() ;
 148                 $ti = explode ( ":" , $ti , 2 ) ;
 149                 if ( $cat != $ti[0] ) return "" ;
 150                 $r = "<br break=all>\n" ;
 151
 152                 $articles = array() ;
 153                 $parents = array () ;
 154                 $children = array() ;
 155
 156
 157                 $sk =& $this->mGetSkin();
 158
 159                 $doesexist = false ;
 160                 if ( $doesexist ) {
 161                         $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
 162                 } else {
 163                         $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 164                 }
 165
 166                 $res = wfQuery ( $sql, DB_READ ) ;
 167                 while ( $x = wfFetchObject ( $res ) )
 168                 {
 169                 #  $t = new Title ;
 170                 #  $t->newFromDBkey ( $x->l_from ) ;
 171                 #  $t = $t->getText() ;
 172                         if ( $doesexist ) {
 173                                 $t = $x->l_from ;
 174                         } else {
 175                                 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 176                                 if ( $t != "" ) $t .= ":" ;
 177                                 $t .= $x->cur_title ;
 178                         }
 179
 180                         $y = explode ( ":" , $t , 2 ) ;
 181                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 182                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 183                         } else {
 184                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 185                         }
 186                 }
 187                 wfFreeResult ( $res ) ;
 188
 189                 # Children
 190                 if ( count ( $children ) > 0 )
 191                 {
 192                         asort ( $children ) ;
 193                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 194                         $r .= implode ( ", " , $children ) ;
 195                 }
 196
 197                 # Articles
 198                 if ( count ( $articles ) > 0 )
 199                 {
 200                         asort ( $articles ) ;
 201                         $h =  wfMsg( "category_header", $ti[1] );
 202                         $r .= "<h2>{$h}</h2>\n" ;
 203                         $r .= implode ( ", " , $articles ) ;
 204                 }
 205
 206
 207                 return $r ;
 208         }
 209
 210         function getHTMLattrs ()
 211         {
 212                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 213                                 "title", "align", "lang", "dir", "width", "height",
 214                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 215                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 216                                 /* FONT */ "type", "start", "value", "compact",
 217                                 /* For various lists, mostly deprecated but safe */
 218                                 "summary", "width", "border", "frame", "rules",
 219                                 "cellspacing", "cellpadding", "valign", "char",
 220                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 221                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 222                                 "id", "class", "name", "style" /* For CSS */
 223                                 );
 224                 return $htmlattrs ;
 225         }
 226
 227         function fixTagAttributes ( $t )
 228         {
 229                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 230                 $htmlattrs = $this->getHTMLattrs() ;
 231
 232                 # Strip non-approved attributes from the tag
 233                 $t = preg_replace(
 234                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 235                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 236                         $t);
 237                 # Strip javascript "expression" from stylesheets. Brute force approach:
 238                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 239
 240                 if( preg_match(
 241                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 242                         wfMungeToUtf8( $t ) ) )
 243                 {
 244                         $t="";
 245                 }
 246
 247                 return trim ( $t ) ;
 248         }
 249
 250         function doTableStuff ( $t )
 251         {
 252                 $t = explode ( "\n" , $t ) ;
 253                 $td = array () ; # Is currently a td tag open?
 254                         $ltd = array () ; # Was it TD or TH?
 255                         $tr = array () ; # Is currently a tr tag open?
 256                         $ltr = array () ; # tr attributes
 257                         foreach ( $t AS $k => $x )
 258                         {
 259                                 $x = rtrim ( $x ) ;
 260                                 $fc = substr ( $x , 0 , 1 ) ;
 261                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 262                                 {
 263                                         $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 264                                         array_push ( $td , false ) ;
 265                                         array_push ( $ltd , "" ) ;
 266                                         array_push ( $tr , false ) ;
 267                                         array_push ( $ltr , "" ) ;
 268                                 }
 269                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 270                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 271                                 {
 272                                         $z = "</table>\n" ;
 273                                         $l = array_pop ( $ltd ) ;
 274                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 275                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 276                                         array_pop ( $ltr ) ;
 277                                         $t[$k] = $z ;
 278                                 }
 279                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 280                                                 {
 281                                                 $z = trim ( substr ( $x , 2 ) ) ;
 282                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 283                                                 }*/
 284                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 285                                 {
 286                                         $x = substr ( $x , 1 ) ;
 287                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 288                                         $z = "" ;
 289                                         $l = array_pop ( $ltd ) ;
 290                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 291                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 292                                         array_pop ( $ltr ) ;
 293                                         $t[$k] = $z ;
 294                                         array_push ( $tr , false ) ;
 295                                         array_push ( $td , false ) ;
 296                                         array_push ( $ltd , "" ) ;
 297                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 298                                 }
 299                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 300                                 {
 301                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 302                                         {
 303                                                 $fc = "+" ;
 304                                                 $x = substr ( $x , 1 ) ;
 305                                         }
 306                                         $after = substr ( $x , 1 ) ;
 307                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 308                                         $after = explode ( "||" , $after ) ;
 309                                         $t[$k] = "" ;
 310                                         foreach ( $after AS $theline )
 311                                         {
 312                                                 $z = "" ;
 313                                                 if ( $fc != "+" )
 314                                                 {
 315                                                         $tra = array_pop ( $ltr ) ;
 316                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 317                                                         array_push ( $tr , true ) ;
 318                                                         array_push ( $ltr , "" ) ;
 319                                                 }
 320
 321                                                 $l = array_pop ( $ltd ) ;
 322                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 323                                                 if ( $fc == "|" ) $l = "TD" ;
 324                                                 else if ( $fc == "!" ) $l = "TH" ;
 325                                                 else if ( $fc == "+" ) $l = "CAPTION" ;
 326                                                 else $l = "" ;
 327                                                 array_push ( $ltd , $l ) ;
 328                                                 $y = explode ( "|" , $theline , 2 ) ;
 329                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 330                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 331                                                 $t[$k] .= $y ;
 332                                                 array_push ( $td , true ) ;
 333                                         }
 334                                 }
 335                         }
 336
 337                 # Closing open td, tr && table
 338                 while ( count ( $td ) > 0 )
 339                 {
 340                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 341                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 342                         $t[] = "</table>" ;
 343                 }
 344
 345                 $t = implode ( "\n" , $t ) ;
 346                 #               $t = $this->removeHTMLtags( $t );
 347                 return $t ;
 348         }
 349
 350         # Well, OK, it's actually about 14 passes.  But since all the
 351         # hard lifting is done inside PHP's regex code, it probably
 352         # wouldn't speed things up much to add a real parser.
 353         #
 354         function doWikiPass2( $text, $linestart )
 355         {
 356                 $fname = "OutputPage::doWikiPass2";
 357                 wfProfileIn( $fname );
 358
 359                 $text = $this->removeHTMLtags( $text );
 360                 $text = $this->replaceVariables( $text );
 361
 362                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 363                 $text = str_replace ( "<HR>", "<hr>", $text );
 364
 365                 $text = $this->doHeadings( $text );
 366                 $text = $this->doBlockLevels( $text, $linestart );
 367
 368                 if($this->mOptions->getUseDynamicDates()) {
 369                         global $wgDateFormatter;
 370                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 371                 }
 372
 373                 $text = $this->replaceExternalLinks( $text );
 374                 $text = $this->replaceInternalLinks ( $text );
 375                 $text = $this->doTableStuff ( $text ) ;
 376
 377                 $text = $this->magicISBN( $text );
 378                 $text = $this->magicRFC( $text );
 379                 $text = $this->formatHeadings( $text );
 380
 381                 $sk =& $this->mOptions->getSkin();
 382                 $text = $sk->transformContent( $text );
 383                 $text .= $this->categoryMagic () ;
 384
 385                 wfProfileOut( $fname );
 386                 return $text;
 387         }
 388
 389
 390         /* private */ function doHeadings( $text )
 391         {
 392                 for ( $i = 6; $i >= 1; --$i ) {
 393                         $h = substr( "======", 0, $i );
 394                         $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
 395                           "<h{$i}>\\1</h{$i}>\\2", $text );
 396                 }
 397                 return $text;
 398         }
 399
 400         # Note: we have to do external links before the internal ones,
 401         # and otherwise take great care in the order of things here, so
 402         # that we don't end up interpreting some URLs twice.
 403
 404         /* private */ function replaceExternalLinks( $text )
 405         {
 406                 $fname = "OutputPage::replaceExternalLinks";
 407                 wfProfileIn( $fname );
 408                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 409                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 410                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 411                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 412                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 413                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 414                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 415                 wfProfileOut( $fname );
 416                 return $text;
 417         }
 418
 419         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 420         {
 421                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 422                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 423
 424                 # this is  the list of separators that should be ignored if they
 425                 # are the last character of an URL but that should be included
 426                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 427                 # in this case, the last comma should not become part of the URL,
 428                 # but in "www.foo.com/123,2342,32.htm" it should.
 429                 $sep = ",;\.:";
 430                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 431                 $images = "gif|png|jpg|jpeg";
 432
 433                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 434                 # they are interpreted as part of the string (used to tell PHP
 435                 # that the content of the string should be inserted there).
 436                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 437                   "((?i){$images})([^{$uc}]|$)/";
 438
 439                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 440                 $sk =& $this->mOptions->getSkin();
 441
 442                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 443                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 444                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 445                 }
 446                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 447                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 448                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 449                   "</a>\\5", $s );
 450                 $s = str_replace( $unique, $protocol, $s );
 451
 452                 $a = explode( "[{$protocol}:", " " . $s );
 453                 $s = array_shift( $a );
 454                 $s = substr( $s, 1 );
 455
 456                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 457                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 458
 459                 foreach ( $a as $line ) {
 460                         if ( preg_match( $e1, $line, $m ) ) {
 461                                 $link = "{$protocol}:{$m[1]}";
 462                                 $trail = $m[2];
 463                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 464                                 else { $text = wfEscapeHTML( $link ); }
 465                         } else if ( preg_match( $e2, $line, $m ) ) {
 466                                 $link = "{$protocol}:{$m[1]}";
 467                                 $text = $m[2];
 468                                 $trail = $m[3];
 469                         } else {
 470                                 $s .= "[{$protocol}:" . $line;
 471                                 continue;
 472                         }
 473                         if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
 474                         else $paren = "";
 475                         $la = $sk->getExternalLinkAttributes( $link, $text );
 476                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 477
 478                 }
 479                 return $s;
 480         }
 481
 482         /* private */ function handle3Quotes( &$state, $token )
 483         {
 484                 if ( $state["strong"] ) {
 485                         if ( $state["em"] && $state["em"] > $state["strong"] )
 486                         {
 487                                 # ''' lala ''lala '''
 488                                 $s = "</em></strong><em>";
 489                         } else {
 490                                 $s = "</strong>";
 491                         }
 492                         $state["strong"] = FALSE;
 493                 } else {
 494                         $s = "<strong>";
 495                         $state["strong"] = $token["pos"];
 496                 }
 497                 return $s;
 498         }
 499
 500         /* private */ function handle2Quotes( &$state, $token )
 501         {
 502                 if ( $state["em"] ) {
 503                         if ( $state["strong"] && $state["strong"] > $state["em"] )
 504                         {
 505                                 # ''lala'''lala'' ....'''
 506                                 $s = "</strong></em><strong>";
 507                         } else {
 508                                 $s = "</em>";
 509                         }
 510                         $state["em"] = FALSE;
 511                 } else {
 512                         $s = "<em>";
 513                         $state["em"] = $token["pos"];
 514                 }
 515                 return $s;
 516         }
 517
 518         /* private */ function handle5Quotes( &$state, $token )
 519         {
 520                 if ( $state["em"] && $state["strong"] ) {
 521                         if ( $state["em"] < $state["strong"] ) {
 522                                 $s .= "</strong></em>";
 523                         } else {
 524                                 $s .= "</em></strong>";
 525                         }
 526                         $state["strong"] = $state["em"] = FALSE;
 527                 } elseif ( $state["em"] ) {
 528                         $s .= "</em><strong>";
 529                         $state["em"] = FALSE;
 530                         $state["strong"] = $token["pos"];
 531                 } elseif ( $state["strong"] ) {
 532                         $s .= "</strong><em>";
 533                         $state["strong"] = FALSE;
 534                         $state["em"] = $token["pos"];
 535                 } else { # not $em and not $strong
 536                         $s .= "<strong><em>";
 537                         $state["strong"] = $state["em"] = $token["pos"];
 538                 }
 539                 return $s;
 540         }
 541
 542         /* private */ function replaceInternalLinks( $str )
 543         {
 544                 $tokenizer=Tokenizer::newFromString( $str );
 545                 $tokenStack = array();
 546
 547                 $s="";
 548                 $state["em"]      = FALSE;
 549                 $state["strong"]  = FALSE;
 550                 $tagIsOpen = FALSE;
 551
 552                 # The tokenizer splits the text into tokens and returns them one by one.
 553                 # Every call to the tokenizer returns a new token.
 554                 while ( $token = $tokenizer->nextToken() )
 555                 {
 556                         switch ( $token["type"] )
 557                         {
 558                                 case "text":
 559                                         # simple text with no further markup
 560                                         $txt = $token["text"];
 561                                         break;
 562                                 case "[[":
 563                                         # link opening tag.
 564                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 565                                         $tagIsOpen = TRUE;
 566                                         array_push( $tokenStack, $token );
 567                                         $txt="";
 568                                         break;
 569                                 case "]]":
 570                                         # link close tag.
 571                                         # get text from stack, glue it together, and call the code to handle a
 572                                         # link
 573                                         if ( count( $tokenStack ) == 0 )
 574                                         {
 575                                                 # stack empty. Found a ]] without an opening [[
 576                                                 $txt = "]]";
 577                                         } else {
 578                                                 $linkText = "";
 579                                                 $lastToken = array_pop( $tokenStack );
 580                                                 while ( $lastToken["type"] != "[[" )
 581                                                 {
 582                                                         $linkText = $lastToken["text"] . $linkText;
 583                                                         $lastToken = array_pop( $tokenStack );
 584                                                 }
 585                                                 $txt = $linkText ."]]";
 586                                                 $prefix = $lastToken["text"];
 587                                                 $nextToken = $tokenizer->previewToken();
 588                                                 if ( $nextToken["type"] == "text" )
 589                                                 {
 590                                                         # Preview just looks at it. Now we have to fetch it.
 591                                                         $nextToken = $tokenizer->nextToken();
 592                                                         $txt .= $nextToken["text"];
 593                                                 }
 594                                                 $txt = $this->handleInternalLink( $txt, $prefix );
 595                                         }
 596                                         $tagIsOpen = (count( $tokenStack ) != 0);
 597                                         break;
 598                                 case "'''":
 599                                         # This and the three next ones handle quotes
 600                                         $txt = $this->handle3Quotes( $state, $token );
 601                                         break;
 602                                 case "''":
 603                                         $txt = $this->handle2Quotes( $state, $token );
 604                                         break;
 605                                 case "'''''":
 606                                         $txt = $this->handle5Quotes( $state, $token );
 607                                         break;
 608                                 case "":
 609                                         # empty token
 610                                         $txt="";
 611                                         break;
 612                                 default:
 613                                         # An unkown token. Highlight.
 614                                         $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 615                                         $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 616                                         break;
 617                         }
 618                         # If we're parsing the interior of a link, don't append the interior to $s,
 619                         # but push it to the stack so it can be processed when a ]] token is found.
 620                         if ( $tagIsOpen  && $txt != "" ) {
 621                                 $token["type"] = "text";
 622                                 $token["text"] = $txt;
 623                                 array_push( $tokenStack, $token );
 624                         } else {
 625                                 $s .= $txt;
 626                         }
 627                 } #end while
 628                 if ( count( $tokenStack ) != 0 )
 629                 {
 630                         # still objects on stack. opened [[ tag without closing ]] tag.
 631                         $txt = "";
 632                         while ( $lastToken = array_pop( $tokenStack ) )
 633                         {
 634                                 if ( $lastToken["type"] == "text" )
 635                                 {
 636                                         $txt = $lastToken["text"] . $txt;
 637                                 } else {
 638                                         $txt = $lastToken["type"] . $txt;
 639                                 }
 640                         }
 641                         $s .= $txt;
 642                 }
 643                 return $s;
 644         }
 645
 646         /* private */ function handleInternalLink( $line, $prefix )
 647         {
 648                 global $wgLang, $wgLinkCache;
 649                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 650                 static $fname = "OutputPage::replaceInternalLinks" ;
 651                 wfProfileIn( $fname );
 652
 653                 wfProfileIn( "$fname-setup" );
 654                 static $tc = FALSE;
 655                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 656                 $sk =& $this->mOptions->getSkin();
 657
 658                 # Match a link having the form [[namespace:link|alternate]]trail
 659                 static $e1 = FALSE;
 660                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 661                 # Match the end of a line for a word that's not followed by whitespace,
 662                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 663                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 664                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 665                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 666
 667
 668                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 669                 static $image = FALSE;
 670                 static $special = FALSE;
 671                 static $media = FALSE;
 672                 static $category = FALSE;
 673                 if ( !$image ) { $image = Namespace::getImage(); }
 674                 if ( !$special ) { $special = Namespace::getSpecial(); }
 675                 if ( !$media ) { $media = Namespace::getMedia(); }
 676                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 677
 678                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 679
 680                 wfProfileOut( "$fname-setup" );
 681
 682                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 683                         $text = $m[2];
 684                         $trail = $m[3];
 685                 } else { # Invalid form; output directly
 686                         $s .= $prefix . "[[" . $line ;
 687                         return $s;
 688                 }
 689
 690                 /* Valid link forms:
 691                 Foobar -- normal
 692                 :Foobar -- override special treatment of prefix (images, language links)
 693                 /Foobar -- convert to CurrentPage/Foobar
 694                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 695                 */
 696                 $c = substr($m[1],0,1);
 697                 $noforce = ($c != ":");
 698                 if( $c == "/" ) { # subpage
 699                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 700                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 701                                 $noslash=$m[1];
 702                         } else {
 703                                 $noslash=substr($m[1],1);
 704                         }
 705                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 706                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 707                                 if( "" == $text ) {
 708                                         $text= $m[1];
 709                                 } # this might be changed for ugliness reasons
 710                         } else {
 711                                 $link = $noslash; # no subpage allowed, use standard link
 712                         }
 713                 } elseif( $noforce ) { # no subpage
 714                         $link = $m[1];
 715                 } else {
 716                         $link = substr( $m[1], 1 );
 717                 }
 718                 if( "" == $text )
 719                         $text = $link;
 720
 721                 $nt = Title::newFromText( $link );
 722                 if( !$nt ) {
 723                         $s .= $prefix . "[[" . $line;
 724                         return $s;
 725                 }
 726                 $ns = $nt->getNamespace();
 727                 $iw = $nt->getInterWiki();
 728                 if( $noforce ) {
 729                         if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 730                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 731                                 $s .= $prefix . $trail;
 732                                 return $s;
 733                         }
 734                         if( $ns == $image ) {
 735                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 736                                 $wgLinkCache->addImageLinkObj( $nt );
 737                                 return $s;
 738                         }
 739                 }
 740                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 741                     ( strpos( $link, "#" ) == FALSE ) ) {
 742                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 743                         return $s;
 744                 }
 745                 if ( $ns == $category && $wgUseCategoryMagic ) {
 746                         $t = explode ( ":" , $nt->getText() ) ;
 747                         array_shift ( $t ) ;
 748                         $t = implode ( ":" , $t ) ;
 749                         $t = $wgLang->ucFirst ( $t ) ;
 750 #                       $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
 751                         $nnt = Title::newFromText ( $category.":".$t ) ;
 752                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 753                         $this->mCategoryLinks[] = $t ;
 754                         $s .= $prefix . $trail ;
 755                         return $s ;
 756                 }
 757                 if( $ns == $media ) {
 758                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 759                         $wgLinkCache->addImageLinkObj( $nt );
 760                         return $s;
 761                 } elseif( $ns == $special ) {
 762                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 763                         return $s;
 764                 }
 765                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 766
 767                 wfProfileOut( $fname );
 768                 return $s;
 769         }
 770
 771         # Some functions here used by doBlockLevels()
 772         #
 773         /* private */ function closeParagraph()
 774         {
 775                 $result = "";
 776                 if ( 0 != strcmp( "p", $this->mLastSection ) &&
 777                   0 != strcmp( "", $this->mLastSection ) ) {
 778                         $result = "</" . $this->mLastSection  . ">";
 779                 }
 780                 $this->mLastSection = "";
 781                 return $result."\n";
 782         }
 783         # getCommon() returns the length of the longest common substring
 784         # of both arguments, starting at the beginning of both.
 785         #
 786         /* private */ function getCommon( $st1, $st2 )
 787         {
 788                 $fl = strlen( $st1 );
 789                 $shorter = strlen( $st2 );
 790                 if ( $fl < $shorter ) { $shorter = $fl; }
 791
 792                 for ( $i = 0; $i < $shorter; ++$i ) {
 793                         if ( $st1{$i} != $st2{$i} ) { break; }
 794                 }
 795                 return $i;
 796         }
 797         # These next three functions open, continue, and close the list
 798         # element appropriate to the prefix character passed into them.
 799         #
 800         /* private */ function openList( $char )
 801     {
 802                 $result = $this->closeParagraph();
 803
 804                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 805                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 806                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 807                 else if ( ";" == $char ) {
 808                         $result .= "<dl><dt>";
 809                         $this->mDTopen = true;
 810                 }
 811                 else { $result = "<!-- ERR 1 -->"; }
 812
 813                 return $result;
 814         }
 815
 816         /* private */ function nextItem( $char )
 817         {
 818                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 819                 else if ( ":" == $char || ";" == $char ) {
 820                         $close = "</dd>";
 821                         if ( $this->mDTopen ) { $close = "</dt>"; }
 822                         if ( ";" == $char ) {
 823                                 $this->mDTopen = true;
 824                                 return $close . "<dt>";
 825                         } else {
 826                                 $this->mDTopen = false;
 827                                 return $close . "<dd>";
 828                         }
 829                 }
 830                 return "<!-- ERR 2 -->";
 831         }
 832
 833         /* private */function closeList( $char )
 834         {
 835                 if ( "*" == $char ) { $text = "</li></ul>"; }
 836                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 837                 else if ( ":" == $char ) {
 838                         if ( $this->mDTopen ) {
 839                                 $this->mDTopen = false;
 840                                 $text = "</dt></dl>";
 841                         } else {
 842                                 $text = "</dd></dl>";
 843                         }
 844                 }
 845                 else {  return "<!-- ERR 3 -->"; }
 846                 return $text."\n";
 847         }
 848
 849         /* private */ function doBlockLevels( $text, $linestart )
 850         {
 851                 $fname = "OutputPage::doBlockLevels";
 852                 wfProfileIn( $fname );
 853                 # Parsing through the text line by line.  The main thing
 854                 # happening here is handling of block-level elements p, pre,
 855                 # and making lists from lines starting with * # : etc.
 856                 #
 857                 $a = explode( "\n", $text );
 858                 $text = $lastPref = "";
 859                 $this->mDTopen = $inBlockElem = false;
 860
 861                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 862                 foreach ( $a as $t ) {
 863                         if ( "" != $text ) { $text .= "\n"; }
 864
 865                         $oLine = $t;
 866                         $opl = strlen( $lastPref );
 867                         $npl = strspn( $t, "*#:;" );
 868                         $pref = substr( $t, 0, $npl );
 869                         $pref2 = str_replace( ";", ":", $pref );
 870                         $t = substr( $t, $npl );
 871
 872                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 873                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 874
 875                                 if ( ";" == substr( $pref, -1 ) ) {
 876                                         $cpos = strpos( $t, ":" );
 877                                         if ( ! ( false === $cpos ) ) {
 878                                                 $term = substr( $t, 0, $cpos );
 879                                                 $text .= $term . $this->nextItem( ":" );
 880                                                 $t = substr( $t, $cpos + 1 );
 881                                         }
 882                                 }
 883                         } else if (0 != $npl || 0 != $opl) {
 884                                 $cpl = $this->getCommon( $pref, $lastPref );
 885
 886                                 while ( $cpl < $opl ) {
 887                                         $text .= $this->closeList( $lastPref{$opl-1} );
 888                                         --$opl;
 889                                 }
 890                                 if ( $npl <= $cpl && $cpl > 0 ) {
 891                                         $text .= $this->nextItem( $pref{$cpl-1} );
 892                                 }
 893                                 while ( $npl > $cpl ) {
 894                                         $char = substr( $pref, $cpl, 1 );
 895                                         $text .= $this->openList( $char );
 896
 897                                         if ( ";" == $char ) {
 898                                                 $cpos = strpos( $t, ":" );
 899                                                 if ( ! ( false === $cpos ) ) {
 900                                                         $term = substr( $t, 0, $cpos );
 901                                                         $text .= $term . $this->nextItem( ":" );
 902                                                         $t = substr( $t, $cpos + 1 );
 903                                                 }
 904                                         }
 905                                         ++$cpl;
 906                                 }
 907                                 $lastPref = $pref2;
 908                         }
 909                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
 910                                 if ( preg_match(
 911                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
 912                                         $text .= $this->closeParagraph();
 913                                         $inBlockElem = true;
 914                                 }
 915                                 if ( ! $inBlockElem ) {
 916                                         if ( " " == $t{0} ) {
 917                                                 $newSection = "pre";
 918                                                 # $t = wfEscapeHTML( $t );
 919                                         }
 920                                         else { $newSection = "p"; }
 921
 922                                         if ( 0 == strcmp( "", trim( $oLine ) ) ) {
 923                                                 $text .= $this->closeParagraph();
 924                                                 $text .= "<" . $newSection . ">";
 925                                         } else if ( 0 != strcmp( $this->mLastSection,
 926                                           $newSection ) ) {
 927                                                 $text .= $this->closeParagraph();
 928                                                 if ( 0 != strcmp( "p", $newSection ) ) {
 929                                                         $text .= "<" . $newSection . ">";
 930                                                 }
 931                                         }
 932                                         $this->mLastSection = $newSection;
 933                                 }
 934                                 if ( $inBlockElem &&
 935                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
 936                                         $inBlockElem = false;
 937                                 }
 938                         }
 939                         $text .= $t;
 940                 }
 941                 while ( $npl ) {
 942                         $text .= $this->closeList( $pref2{$npl-1} );
 943                         --$npl;
 944                 }
 945                 if ( "" != $this->mLastSection ) {
 946                         if ( "p" != $this->mLastSection ) {
 947                                 $text .= "</" . $this->mLastSection . ">";
 948                         }
 949                         $this->mLastSection = "";
 950                 }
 951                 wfProfileOut( $fname );
 952                 return $text;
 953         }
 954
 955         /* private */ function replaceVariables( $text )
 956         {
 957                 global $wgLang, $wgCurOut;
 958                 $fname = "OutputPage::replaceVariables";
 959                 wfProfileIn( $fname );
 960
 961                 $magic = array();
 962
 963                 # Basic variables
 964                 # See Language.php for the definition of each magic word
 965                 # As with sigs, this uses the server's local time -- ensure
 966                 # this is appropriate for your audience!
 967
 968                 $magic[MAG_CURRENTMONTH] = date( "m" );
 969                 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
 970                 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
 971                 $magic[MAG_CURRENTDAY] = date("j");
 972                 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
 973                 $magic[MAG_CURRENTYEAR] = date( "Y" );
 974                 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
 975
 976                 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
 977
 978                 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
 979                 if ( $mw->match( $text ) ) {
 980                         $v = wfNumberOfArticles();
 981                         $text = $mw->replace( $v, $text );
 982                         if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
 983                 }
 984
 985                 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
 986                 # The callbacks are at the bottom of this file
 987                 $wgCurOut = $this;
 988                 $mw =& MagicWord::get( MAG_MSG );
 989                 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
 990                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
 991
 992                 $mw =& MagicWord::get( MAG_MSGNW );
 993                 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
 994                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
 995
 996                 wfProfileOut( $fname );
 997                 return $text;
 998         }
 999
1000         # Cleans up HTML, removes dangerous tags and attributes
1001         /* private */ function removeHTMLtags( $text )
1002         {
1003                 $fname = "OutputPage::removeHTMLtags";
1004                 wfProfileIn( $fname );
1005                 $htmlpairs = array( # Tags that must be closed
1006                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1007                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1008                         "strike", "strong", "tt", "var", "div", "center",
1009                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1010                         "ruby", "rt" , "rb" , "rp"
1011                 );
1012                 $htmlsingle = array(
1013                         "br", "p", "hr", "li", "dt", "dd"
1014                 );
1015                 $htmlnest = array( # Tags that can be nested--??
1016                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1017                         "dl", "font", "big", "small", "sub", "sup"
1018                 );
1019                 $tabletags = array( # Can only appear inside table
1020                         "td", "th", "tr"
1021                 );
1022
1023                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1024                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1025
1026                 $htmlattrs = $this->getHTMLattrs () ;
1027
1028                 # Remove HTML comments
1029                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1030
1031                 $bits = explode( "<", $text );
1032                 $text = array_shift( $bits );
1033                 $tagstack = array(); $tablestack = array();
1034
1035                 foreach ( $bits as $x ) {
1036                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1037                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1038                           $x, $regs );
1039                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1040                         error_reporting( $prev );
1041
1042                         $badtag = 0 ;
1043                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1044                                 # Check our stack
1045                                 if ( $slash ) {
1046                                         # Closing a tag...
1047                                         if ( ! in_array( $t, $htmlsingle ) &&
1048                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1049                                                 array_push( $tagstack, $ot );
1050                                                 $badtag = 1;
1051                                         } else {
1052                                                 if ( $t == "table" ) {
1053                                                         $tagstack = array_pop( $tablestack );
1054                                                 }
1055                                                 $newparams = "";
1056                                         }
1057                                 } else {
1058                                         # Keep track for later
1059                                         if ( in_array( $t, $tabletags ) &&
1060                                           ! in_array( "table", $tagstack ) ) {
1061                                                 $badtag = 1;
1062                                         } else if ( in_array( $t, $tagstack ) &&
1063                                           ! in_array ( $t , $htmlnest ) ) {
1064                                                 $badtag = 1 ;
1065                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1066                                                 if ( $t == "table" ) {
1067                                                         array_push( $tablestack, $tagstack );
1068                                                         $tagstack = array();
1069                                                 }
1070                                                 array_push( $tagstack, $t );
1071                                         }
1072                                         # Strip non-approved attributes from the tag
1073                                         $newparams = $this->fixTagAttributes($params);
1074
1075                                 }
1076                                 if ( ! $badtag ) {
1077                                         $rest = str_replace( ">", "&gt;", $rest );
1078                                         $text .= "<$slash$t $newparams$brace$rest";
1079                                         continue;
1080                                 }
1081                         }
1082                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1083                 }
1084                 # Close off any remaining tags
1085                 while ( $t = array_pop( $tagstack ) ) {
1086                         $text .= "</$t>\n";
1087                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1088                 }
1089                 wfProfileOut( $fname );
1090                 return $text;
1091         }
1092
1093 /*
1094  *
1095  * This function accomplishes several tasks:
1096  * 1) Auto-number headings if that option is enabled
1097  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1098  * 3) Add a Table of contents on the top for users who have enabled the option
1099  * 4) Auto-anchor headings
1100  *
1101  * It loops through all headlines, collects the necessary data, then splits up the
1102  * string and re-inserts the newly formatted headlines.
1103  *
1104  * */
1105         /* private */ function formatHeadings( $text )
1106         {
1107                 $nh=$this->mOptions->getNumberHeadings();
1108                 $st=$this->mOptions->getShowToc();
1109                 if(!$this->mTitle->userCanEdit()) {
1110                         $es=0;
1111                         $esr=0;
1112                 } else {
1113                         $es=$this->mOptions->getEditSection();
1114                         $esr=$this->mOptions->getEditSectionOnRightClick();
1115                 }
1116
1117                 # Inhibit editsection links if requested in the page
1118                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1119                 if ($esw->matchAndRemove( $text )) {
1120                         $es=0;
1121                 }
1122                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1123                 # do not add TOC
1124                 $mw =& MagicWord::get( MAG_NOTOC );
1125                 if ($mw->matchAndRemove( $text ))
1126                 {
1127                         $st = 0;
1128                 }
1129
1130                 # never add the TOC to the Main Page. This is an entry page that should not
1131                 # be more than 1-2 screens large anyway
1132                 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1133
1134                 # We need this to perform operations on the HTML
1135                 $sk =& $this->mOptions->getSkin();
1136
1137                 # Get all headlines for numbering them and adding funky stuff like [edit]
1138                 # links
1139                 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1140
1141                 # headline counter
1142                 $c=0;
1143
1144                 # Ugh .. the TOC should have neat indentation levels which can be
1145                 # passed to the skin functions. These are determined here
1146                 foreach($matches[3] as $headline) {
1147                         if($level) { $prevlevel=$level;}
1148                         $level=$matches[1][$c];
1149                         if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1150
1151                                 $h[$level]=0; // reset when we enter a new level
1152                                 $toc.=$sk->tocIndent($level-$prevlevel);
1153                                 $toclevel+=$level-$prevlevel;
1154
1155                         }
1156                         if(($nh||$st) && $level<$prevlevel) {
1157                                 $h[$level+1]=0; // reset when we step back a level
1158                                 $toc.=$sk->tocUnindent($prevlevel-$level);
1159                                 $toclevel-=$prevlevel-$level;
1160
1161                         }
1162                         $h[$level]++; // count number of headlines for each level
1163
1164                         if($nh||$st) {
1165                                 for($i=1;$i<=$level;$i++) {
1166                                         if($h[$i]) {
1167                                                 if($dot) {$numbering.=".";}
1168                                                 $numbering.=$h[$i];
1169                                                 $dot=1;
1170                                         }
1171                                 }
1172                         }
1173
1174                         // The canonized header is a version of the header text safe to use for links
1175
1176                         $canonized_headline=preg_replace("/<.*?>/","",$headline); // strip out HTML
1177                         $tocline = trim( $canonized_headline );
1178                         $canonized_headline=str_replace('"',"",$canonized_headline);
1179                         $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1180                         $refer[$c]=$canonized_headline;
1181                         $refers[$canonized_headline]++;  // count how many in assoc. array so we can track dupes in anchors
1182                         $refcount[$c]=$refers[$canonized_headline];
1183
1184             // Prepend the number to the heading text
1185
1186                         if($nh||$st) {
1187                                 $tocline=$numbering ." ". $tocline;
1188
1189                                 // Don't number the heading if it is the only one (looks silly)
1190                                 if($nh && count($matches[3]) > 1) {
1191                                         $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1192                                 }
1193                         }
1194
1195                         // Create the anchor for linking from the TOC to the section
1196
1197                         $anchor=$canonized_headline;
1198                         if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1199                         if($st) {
1200                                 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1201                         }
1202                         if($es) {
1203                                 $head[$c].=$sk->editSectionLink($c+1);
1204                         }
1205
1206                         // Put it all together
1207
1208                         $head[$c].="<h".$level.$matches[2][$c]
1209                          ."<a name=\"".$anchor."\">"
1210                          .$headline
1211                          ."</a>"
1212                          ."</h".$level.">";
1213
1214                         // Add the edit section link
1215
1216                         if($esr) {
1217                                 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1218                         }
1219
1220                         $numbering="";
1221                         $c++;
1222                         $dot=0;
1223                 }
1224
1225                 if($st) {
1226                         $toclines=$c;
1227                         $toc.=$sk->tocUnindent($toclevel);
1228                         $toc=$sk->tocTable($toc);
1229                 }
1230
1231                 // split up and insert constructed headlines
1232
1233                 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1234                 $i=0;
1235
1236                 foreach($blocks as $block) {
1237                         if(($es) && $c>0 && $i==0) {
1238                             # This is the [edit] link that appears for the top block of text when
1239                                 # section editing is enabled
1240                                 $full.=$sk->editSectionLink(0);
1241                         }
1242                         $full.=$block;
1243                         if($st && $toclines>3 && !$i) {
1244                                 # Let's add a top anchor just in case we want to link to the top of the page
1245                                 $full="<a name=\"top\"></a>".$full.$toc;
1246                         }
1247
1248                         $full.=$head[$i];
1249                         $i++;
1250                 }
1251
1252                 return $full;
1253         }
1254
1255         /* private */ function magicISBN( $text )
1256         {
1257                 global $wgLang;
1258
1259                 $a = split( "ISBN ", " $text" );
1260                 if ( count ( $a ) < 2 ) return $text;
1261                 $text = substr( array_shift( $a ), 1);
1262                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1263
1264                 foreach ( $a as $x ) {
1265                         $isbn = $blank = "" ;
1266                         while ( " " == $x{0} ) {
1267                                 $blank .= " ";
1268                                 $x = substr( $x, 1 );
1269                         }
1270                         while ( strstr( $valid, $x{0} ) != false ) {
1271                                 $isbn .= $x{0};
1272                                 $x = substr( $x, 1 );
1273                         }
1274                         $num = str_replace( "-", "", $isbn );
1275                         $num = str_replace( " ", "", $num );
1276
1277                         if ( "" == $num ) {
1278                                 $text .= "ISBN $blank$x";
1279                         } else {
1280                                 $text .= "<a href=\"" . wfLocalUrlE( $wgLang->specialPage(
1281                                   "Booksources"), "isbn={$num}" ) . "\" class=\"internal\">ISBN $isbn</a>";
1282                                 $text .= $x;
1283                         }
1284                 }
1285                 return $text;
1286         }
1287
1288         /* private */ function magicRFC( $text )
1289         {
1290                 return $text;
1291         }
1292
1293
1294 }
1295
1296 class ParserOutput
1297 {
1298         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1299
1300         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1301                 $containsOldMagic = false )
1302         {
1303                 $this->mText = $text;
1304                 $this->mLanguageLinks = $languageLinks;
1305                 $this->mCategoryLinks = $categoryLinks;
1306                 $this->mContainsOldMagic = $containsOldMagic;
1307         }
1308
1309         function getText() { return $this->mText; }
1310         function getLanguageLinks() { return $this->mLanguageLinks; }
1311         function getCategoryLinks() { return $this->mCategoryLinks; }
1312         function containsOldMagic() { return $this->mContainsOldMagic; }
1313         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1314         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1315         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1316         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1317 }
1318
1319 class ParserOptions
1320 {
1321         # All variables are private
1322         var $mUseTeX;                    # Use texvc to expand <math> tags
1323         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1324         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1325         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1326         var $mAllowExternalImages;       # Allow external images inline
1327         var $mSkin;                      # Reference to the preferred skin
1328         var $mDateFormat;                # Date format index
1329         var $mEditSection;               # Create "edit section" links
1330         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1331         var $mPrintable;                 # Generate printable output
1332         var $mNumberHeadings;            # Automatically number headings
1333         var $mShowToc;                   # Show table of contents
1334
1335         function getUseTeX() { return $this->mUseTeX; }
1336         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1337         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1338         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1339         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1340         function getSkin() { return $this->mSkin; }
1341         function getDateFormat() { return $this->mDateFormat; }
1342         function getEditSection() { return $this->mEditSection; }
1343         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1344         function getPrintable() { return $this->mPrintable; }
1345         function getNumberHeadings() { return $this->mNumberHeadings; }
1346         function getShowToc() { return $this->mShowToc; }
1347
1348         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1349         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1350         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1351         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1352         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1353         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1354         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1355         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1356         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1357         function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1358         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1359         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1360
1361         /* static */ function newFromUser( &$user )
1362         {
1363                 $popts = new ParserOptions;
1364                 $popts->initialiseFromUser( &$user );
1365                 return $popts;
1366         }
1367
1368         function initialiseFromUser( &$userInput )
1369         {
1370                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1371
1372                 if ( !$userInput ) {
1373                         $user = new User;
1374                 } else {
1375                         $user =& $userInput;
1376                 }
1377
1378                 $this->mUseTeX = $wgUseTeX;
1379                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1380                 $this->mUseDynamicDates = $wgUseDynamicDates;
1381                 $this->mInterwikiMagic = $wgInterwikiMagic;
1382                 $this->mAllowExternalImages = $wgAllowExternalImages;
1383                 $this->mSkin =& $user->getSkin();
1384                 $this->mDateFormat = $user->getOption( "date" );
1385                 $this->mEditSection = $user->getOption( "editsection" );
1386                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1387                 $this->mPrintable = false;
1388                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1389                 $this->mShowToc = $user->getOption( "showtoc" );
1390         }
1391 }
1392
1393 # Regex callbacks, used in OutputPage::replaceVariables
1394
1395 # Just get rid of the dangerous stuff
1396 # Necessary because replaceVariables is called after removeHTMLtags,
1397 # and message text can come from any user
1398 function wfReplaceMsgVar( $matches ) {
1399         global $wgCurOut, $wgLinkCache;
1400         $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1401         $wgLinkCache->suspend();
1402         $text = $wgCurOut->replaceInternalLinks( $text );
1403         $wgLinkCache->resume();
1404         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1405         return $text;
1406 }
1407
1408 # Effective <nowiki></nowiki>
1409 # Not real <nowiki> because this is called after nowiki sections are processed
1410 function wfReplaceMsgnwVar( $matches ) {
1411         global $wgCurOut, $wgLinkCache;
1412         $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1413         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1414         return $text;
1415 }
1416
1417
1418
1419 ?>