includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Converts wikitext to HTML.
   8 #
   9 # Globals used:
  10 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
  11 #
  12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  13 #
  14 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  15 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  16 #               $wgLocaltimezone
  17 #
  18 #      * only within ParserOptions
  19
  20 class Parser
  21 {
  22         # Cleared with clearState():
  23         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
  24
  25         # Temporary:
  26         var $mOptions, $mTitle;
  27
  28         function Parser()
  29         {
  30                 $this->clearState();
  31         }
  32
  33         function clearState()
  34         {
  35                 $this->mOutput = new ParserOutput;
  36                 $this->mAutonumber = 0;
  37                 $this->mLastSection = "";
  38                 $this->mDTopen = false;
  39                 $this->mStripState = false;
  40         }
  41
  42         # First pass--just handle <nowiki> sections, pass the rest off
  43         # to doWikiPass2() which does all the real work.
  44         #
  45         # Returns a ParserOutput
  46         #
  47         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  48         {
  49                 $fname = "Parser::parse";
  50                 wfProfileIn( $fname );
  51
  52                 if ( $clearState ) {
  53                         $this->clearState();
  54                 }
  55
  56                 $this->mOptions = $options;
  57                 $this->mTitle =& $title;
  58
  59                 $stripState = NULL;
  60                 $text = $this->strip( $text, $this->mStripState, true );
  61                 $text = $this->doWikiPass2( $text, $linestart );
  62                 $text = $this->unstrip( $text, $this->mStripState );
  63
  64                 $this->mOutput->setText( $text );
  65                 wfProfileOut( $fname );
  66                 return $this->mOutput;
  67         }
  68
  69         /* static */ function getRandomString()
  70         {
  71                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
  72         }
  73
  74         # Strips <nowiki>, <pre> and <math>
  75         # Returns the text, and fills an array with data needed in unstrip()
  76         #
  77         function strip( $text, &$state, $render = true )
  78         {
  79                 $state = array(
  80                         'nwlist' => array(),
  81                         'nwsecs' => 0,
  82                         'nwunq' => Parser::getRandomString(),
  83                         'mathlist' => array(),
  84                         'mathsecs' => 0,
  85                         'mathunq' => Parser::getRandomString(),
  86                         'prelist' => array(),
  87                         'presecs' => 0,
  88                         'preunq' => Parser::getRandomString()
  89                 );
  90
  91                 $stripped = "";
  92                 $stripped2 = "";
  93                 $stripped3 = "";
  94
  95                 # Replace any instances of the placeholders
  96                 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
  97                 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
  98                 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
  99
 100                 while ( "" != $text ) {
 101                         $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
 102                         $stripped .= $p[0];
 103                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 104                                 $text = "";
 105                         } else {
 106                                 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
 107                                 ++$state['nwsecs'];
 108
 109                                 if ( $render ) {
 110                                         $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
 111                                 } else {
 112                                         $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
 113                                 }
 114
 115                                 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
 116                                 $text = $q[1];
 117                         }
 118                 }
 119
 120                 if( $this->mOptions->getUseTeX() ) {
 121                         while ( "" != $stripped ) {
 122                                 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
 123                                 $stripped2 .= $p[0];
 124                                 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 125                                         $stripped = "";
 126                                 } else {
 127                                         $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
 128                                         ++$state['mathsecs'];
 129
 130                                         if ( $render ) {
 131                                                 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
 132                                         } else {
 133                                                 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
 134                                         }
 135
 136                                         $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
 137                                         $stripped = $q[1];
 138                                 }
 139                         }
 140                 } else {
 141                         $stripped2 = $stripped;
 142                 }
 143
 144                 while ( "" != $stripped2 ) {
 145                         $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
 146                         $stripped3 .= $p[0];
 147                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 148                                 $stripped2 = "";
 149                         } else {
 150                                 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
 151                                 ++$state['presecs'];
 152
 153                                 if ( $render ) {
 154                                         $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
 155                                 } else {
 156                                         $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
 157                                 }
 158
 159                                 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
 160                                 $stripped2 = $q[1];
 161                         }
 162                 }
 163                 return $stripped3;
 164         }
 165
 166         function unstrip( $text, &$state )
 167         {
 168                 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
 169                         $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
 170                 }
 171
 172                 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
 173                         $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
 174                 }
 175
 176                 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
 177                         $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
 178                 }
 179                 return $text;
 180         }
 181
 182         function categoryMagic ()
 183         {
 184                 global $wgLang , $wgUser ;
 185                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 186                 $id = $this->mTitle->getArticleID() ;
 187                 $cat = ucfirst ( wfMsg ( "category" ) ) ;
 188                 $ti = $this->mTitle->getText() ;
 189                 $ti = explode ( ":" , $ti , 2 ) ;
 190                 if ( $cat != $ti[0] ) return "" ;
 191                 $r = "<br break=all>\n" ;
 192
 193                 $articles = array() ;
 194                 $parents = array () ;
 195                 $children = array() ;
 196
 197
 198 #               $sk =& $this->mGetSkin();
 199                 $sk =& $wgUser->getSkin() ;
 200
 201                 $doesexist = false ;
 202                 if ( $doesexist ) {
 203                         $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
 204                 } else {
 205                         $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 206                 }
 207
 208                 $res = wfQuery ( $sql, DB_READ ) ;
 209                 while ( $x = wfFetchObject ( $res ) )
 210                 {
 211                 #  $t = new Title ;
 212                 #  $t->newFromDBkey ( $x->l_from ) ;
 213                 #  $t = $t->getText() ;
 214                         if ( $doesexist ) {
 215                                 $t = $x->l_from ;
 216                         } else {
 217                                 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 218                                 if ( $t != "" ) $t .= ":" ;
 219                                 $t .= $x->cur_title ;
 220                         }
 221
 222                         $y = explode ( ":" , $t , 2 ) ;
 223                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 224                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 225                         } else {
 226                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 227                         }
 228                 }
 229                 wfFreeResult ( $res ) ;
 230
 231                 # Children
 232                 if ( count ( $children ) > 0 )
 233                 {
 234                         asort ( $children ) ;
 235                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 236                         $r .= implode ( ", " , $children ) ;
 237                 }
 238
 239                 # Articles
 240                 if ( count ( $articles ) > 0 )
 241                 {
 242                         asort ( $articles ) ;
 243                         $h =  wfMsg( "category_header", $ti[1] );
 244                         $r .= "<h2>{$h}</h2>\n" ;
 245                         $r .= implode ( ", " , $articles ) ;
 246                 }
 247
 248
 249                 return $r ;
 250         }
 251
 252         function getHTMLattrs ()
 253         {
 254                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 255                                 "title", "align", "lang", "dir", "width", "height",
 256                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 257                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 258                                 /* FONT */ "type", "start", "value", "compact",
 259                                 /* For various lists, mostly deprecated but safe */
 260                                 "summary", "width", "border", "frame", "rules",
 261                                 "cellspacing", "cellpadding", "valign", "char",
 262                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 263                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 264                                 "id", "class", "name", "style" /* For CSS */
 265                                 );
 266                 return $htmlattrs ;
 267         }
 268
 269         function fixTagAttributes ( $t )
 270         {
 271                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 272                 $htmlattrs = $this->getHTMLattrs() ;
 273
 274                 # Strip non-approved attributes from the tag
 275                 $t = preg_replace(
 276                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 277                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 278                         $t);
 279                 # Strip javascript "expression" from stylesheets. Brute force approach:
 280                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 281
 282                 if( preg_match(
 283                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 284                         wfMungeToUtf8( $t ) ) )
 285                 {
 286                         $t="";
 287                 }
 288
 289                 return trim ( $t ) ;
 290         }
 291
 292         function doTableStuff ( $t )
 293         {
 294                 $t = explode ( "\n" , $t ) ;
 295                 $td = array () ; # Is currently a td tag open?
 296                         $ltd = array () ; # Was it TD or TH?
 297                         $tr = array () ; # Is currently a tr tag open?
 298                         $ltr = array () ; # tr attributes
 299                         foreach ( $t AS $k => $x )
 300                         {
 301                                 $x = rtrim ( $x ) ;
 302                                 $fc = substr ( $x , 0 , 1 ) ;
 303                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 304                                 {
 305                                         $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 306                                         array_push ( $td , false ) ;
 307                                         array_push ( $ltd , "" ) ;
 308                                         array_push ( $tr , false ) ;
 309                                         array_push ( $ltr , "" ) ;
 310                                 }
 311                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 312                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 313                                 {
 314                                         $z = "</table>\n" ;
 315                                         $l = array_pop ( $ltd ) ;
 316                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 317                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 318                                         array_pop ( $ltr ) ;
 319                                         $t[$k] = $z ;
 320                                 }
 321                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 322                                                 {
 323                                                 $z = trim ( substr ( $x , 2 ) ) ;
 324                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 325                                                 }*/
 326                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 327                                 {
 328                                         $x = substr ( $x , 1 ) ;
 329                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 330                                         $z = "" ;
 331                                         $l = array_pop ( $ltd ) ;
 332                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 333                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 334                                         array_pop ( $ltr ) ;
 335                                         $t[$k] = $z ;
 336                                         array_push ( $tr , false ) ;
 337                                         array_push ( $td , false ) ;
 338                                         array_push ( $ltd , "" ) ;
 339                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 340                                 }
 341                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 342                                 {
 343                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 344                                         {
 345                                                 $fc = "+" ;
 346                                                 $x = substr ( $x , 1 ) ;
 347                                         }
 348                                         $after = substr ( $x , 1 ) ;
 349                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 350                                         $after = explode ( "||" , $after ) ;
 351                                         $t[$k] = "" ;
 352                                         foreach ( $after AS $theline )
 353                                         {
 354                                                 $z = "" ;
 355                                                 if ( $fc != "+" )
 356                                                 {
 357                                                         $tra = array_pop ( $ltr ) ;
 358                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 359                                                         array_push ( $tr , true ) ;
 360                                                         array_push ( $ltr , "" ) ;
 361                                                 }
 362
 363                                                 $l = array_pop ( $ltd ) ;
 364                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 365                                                 if ( $fc == "|" ) $l = "TD" ;
 366                                                 else if ( $fc == "!" ) $l = "TH" ;
 367                                                 else if ( $fc == "+" ) $l = "CAPTION" ;
 368                                                 else $l = "" ;
 369                                                 array_push ( $ltd , $l ) ;
 370                                                 $y = explode ( "|" , $theline , 2 ) ;
 371                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 372                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 373                                                 $t[$k] .= $y ;
 374                                                 array_push ( $td , true ) ;
 375                                         }
 376                                 }
 377                         }
 378
 379                 # Closing open td, tr && table
 380                 while ( count ( $td ) > 0 )
 381                 {
 382                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 383                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 384                         $t[] = "</table>" ;
 385                 }
 386
 387                 $t = implode ( "\n" , $t ) ;
 388                 #               $t = $this->removeHTMLtags( $t );
 389                 return $t ;
 390         }
 391
 392         # Well, OK, it's actually about 14 passes.  But since all the
 393         # hard lifting is done inside PHP's regex code, it probably
 394         # wouldn't speed things up much to add a real parser.
 395         #
 396         function doWikiPass2( $text, $linestart )
 397         {
 398                 $fname = "OutputPage::doWikiPass2";
 399                 wfProfileIn( $fname );
 400
 401                 $text = $this->removeHTMLtags( $text );
 402                 $text = $this->replaceVariables( $text );
 403
 404                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 405                 $text = str_replace ( "<HR>", "<hr>", $text );
 406
 407                 $text = $this->doHeadings( $text );
 408                 $text = $this->doBlockLevels( $text, $linestart );
 409
 410                 if($this->mOptions->getUseDynamicDates()) {
 411                         global $wgDateFormatter;
 412                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 413                 }
 414
 415                 $text = $this->replaceExternalLinks( $text );
 416                 $text = $this->replaceInternalLinks ( $text );
 417                 $text = $this->doTableStuff ( $text ) ;
 418
 419                 $text = $this->formatHeadings( $text );
 420
 421                 $sk =& $this->mOptions->getSkin();
 422                 $text = $sk->transformContent( $text );
 423                 $text .= $this->categoryMagic () ;
 424
 425                 wfProfileOut( $fname );
 426                 return $text;
 427         }
 428
 429
 430         /* private */ function doHeadings( $text )
 431         {
 432                 for ( $i = 6; $i >= 1; --$i ) {
 433                         $h = substr( "======", 0, $i );
 434                         $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
 435                           "<h{$i}>\\1</h{$i}>\\2", $text );
 436                 }
 437                 return $text;
 438         }
 439
 440         # Note: we have to do external links before the internal ones,
 441         # and otherwise take great care in the order of things here, so
 442         # that we don't end up interpreting some URLs twice.
 443
 444         /* private */ function replaceExternalLinks( $text )
 445         {
 446                 $fname = "OutputPage::replaceExternalLinks";
 447                 wfProfileIn( $fname );
 448                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 449                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 450                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 451                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 452                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 453                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 454                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 455                 wfProfileOut( $fname );
 456                 return $text;
 457         }
 458
 459         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 460         {
 461                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 462                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 463
 464                 # this is  the list of separators that should be ignored if they
 465                 # are the last character of an URL but that should be included
 466                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 467                 # in this case, the last comma should not become part of the URL,
 468                 # but in "www.foo.com/123,2342,32.htm" it should.
 469                 $sep = ",;\.:";
 470                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 471                 $images = "gif|png|jpg|jpeg";
 472
 473                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 474                 # they are interpreted as part of the string (used to tell PHP
 475                 # that the content of the string should be inserted there).
 476                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 477                   "((?i){$images})([^{$uc}]|$)/";
 478
 479                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 480                 $sk =& $this->mOptions->getSkin();
 481
 482                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 483                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 484                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 485                 }
 486                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 487                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 488                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 489                   "</a>\\5", $s );
 490                 $s = str_replace( $unique, $protocol, $s );
 491
 492                 $a = explode( "[{$protocol}:", " " . $s );
 493                 $s = array_shift( $a );
 494                 $s = substr( $s, 1 );
 495
 496                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 497                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 498
 499                 foreach ( $a as $line ) {
 500                         if ( preg_match( $e1, $line, $m ) ) {
 501                                 $link = "{$protocol}:{$m[1]}";
 502                                 $trail = $m[2];
 503                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 504                                 else { $text = wfEscapeHTML( $link ); }
 505                         } else if ( preg_match( $e2, $line, $m ) ) {
 506                                 $link = "{$protocol}:{$m[1]}";
 507                                 $text = $m[2];
 508                                 $trail = $m[3];
 509                         } else {
 510                                 $s .= "[{$protocol}:" . $line;
 511                                 continue;
 512                         }
 513                         if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
 514                         else $paren = "";
 515                         $la = $sk->getExternalLinkAttributes( $link, $text );
 516                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 517
 518                 }
 519                 return $s;
 520         }
 521
 522         /* private */ function handle3Quotes( &$state, $token )
 523         {
 524                 if ( $state["strong"] ) {
 525                         if ( $state["em"] && $state["em"] > $state["strong"] )
 526                         {
 527                                 # ''' lala ''lala '''
 528                                 $s = "</em></strong><em>";
 529                         } else {
 530                                 $s = "</strong>";
 531                         }
 532                         $state["strong"] = FALSE;
 533                 } else {
 534                         $s = "<strong>";
 535                         $state["strong"] = $token["pos"];
 536                 }
 537                 return $s;
 538         }
 539
 540         /* private */ function handle2Quotes( &$state, $token )
 541         {
 542                 if ( $state["em"] ) {
 543                         if ( $state["strong"] && $state["strong"] > $state["em"] )
 544                         {
 545                                 # ''lala'''lala'' ....'''
 546                                 $s = "</strong></em><strong>";
 547                         } else {
 548                                 $s = "</em>";
 549                         }
 550                         $state["em"] = FALSE;
 551                 } else {
 552                         $s = "<em>";
 553                         $state["em"] = $token["pos"];
 554                 }
 555                 return $s;
 556         }
 557
 558         /* private */ function handle5Quotes( &$state, $token )
 559         {
 560                 if ( $state["em"] && $state["strong"] ) {
 561                         if ( $state["em"] < $state["strong"] ) {
 562                                 $s .= "</strong></em>";
 563                         } else {
 564                                 $s .= "</em></strong>";
 565                         }
 566                         $state["strong"] = $state["em"] = FALSE;
 567                 } elseif ( $state["em"] ) {
 568                         $s .= "</em><strong>";
 569                         $state["em"] = FALSE;
 570                         $state["strong"] = $token["pos"];
 571                 } elseif ( $state["strong"] ) {
 572                         $s .= "</strong><em>";
 573                         $state["strong"] = FALSE;
 574                         $state["em"] = $token["pos"];
 575                 } else { # not $em and not $strong
 576                         $s .= "<strong><em>";
 577                         $state["strong"] = $state["em"] = $token["pos"];
 578                 }
 579                 return $s;
 580         }
 581
 582         /* private */ function replaceInternalLinks( $str )
 583         {
 584                 global $wgLang; # for language specific parser hook
 585
 586                 $tokenizer=Tokenizer::newFromString( $str );
 587                 $tokenStack = array();
 588
 589                 $s="";
 590                 $state["em"]      = FALSE;
 591                 $state["strong"]  = FALSE;
 592                 $tagIsOpen = FALSE;
 593
 594                 # The tokenizer splits the text into tokens and returns them one by one.
 595                 # Every call to the tokenizer returns a new token.
 596                 while ( $token = $tokenizer->nextToken() )
 597                 {
 598                         switch ( $token["type"] )
 599                         {
 600                                 case "text":
 601                                         # simple text with no further markup
 602                                         $txt = $token["text"];
 603                                         break;
 604                                 case "[[":
 605                                         # link opening tag.
 606                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 607                                         $tagIsOpen = TRUE;
 608                                         array_push( $tokenStack, $token );
 609                                         $txt="";
 610                                         break;
 611                                 case "]]":
 612                                         # link close tag.
 613                                         # get text from stack, glue it together, and call the code to handle a
 614                                         # link
 615                                         if ( count( $tokenStack ) == 0 )
 616                                         {
 617                                                 # stack empty. Found a ]] without an opening [[
 618                                                 $txt = "]]";
 619                                         } else {
 620                                                 $linkText = "";
 621                                                 $lastToken = array_pop( $tokenStack );
 622                                                 while ( $lastToken["type"] != "[[" )
 623                                                 {
 624                                                         if( !empty( $lastToken["text"] ) ) {
 625                                                                 $linkText = $lastToken["text"] . $linkText;
 626                                                         }
 627                                                         $lastToken = array_pop( $tokenStack );
 628                                                 }
 629                                                 $txt = $linkText ."]]";
 630                                                 if( isset( $lastToken["text"] ) ) {
 631                                                         $prefix = $lastToken["text"];
 632                                                 } else {
 633                                                         $prefix = "";
 634                                                 }
 635                                                 $nextToken = $tokenizer->previewToken();
 636                                                 if ( $nextToken["type"] == "text" )
 637                                                 {
 638                                                         # Preview just looks at it. Now we have to fetch it.
 639                                                         $nextToken = $tokenizer->nextToken();
 640                                                         $txt .= $nextToken["text"];
 641                                                 }
 642                                                 $txt = $this->handleInternalLink( $txt, $prefix );
 643                                         }
 644                                         $tagIsOpen = (count( $tokenStack ) != 0);
 645                                         break;
 646                                 case "----":
 647                                         $txt = "\n<hr>\n";
 648                                         break;
 649                                 case "'''":
 650                                         # This and the three next ones handle quotes
 651                                         $txt = $this->handle3Quotes( $state, $token );
 652                                         break;
 653                                 case "''":
 654                                         $txt = $this->handle2Quotes( $state, $token );
 655                                         break;
 656                                 case "'''''":
 657                                         $txt = $this->handle5Quotes( $state, $token );
 658                                         break;
 659                                 case "":
 660                                         # empty token
 661                                         $txt="";
 662                                         break;
 663                                 case "RFC ":
 664                                         if ( $tagIsOpen ) {
 665                                                 $txt = "RFC ";
 666                                         } else {
 667                                                 $txt = $this->doMagicRFC( $tokenizer );
 668                                         }
 669                                         break;
 670                                 case "ISBN ":
 671                                         if ( $tagIsOpen ) {
 672                                                 $txt = "ISBN ";
 673                                         } else {
 674                                                 $txt = $this->doMagicISBN( $tokenizer );
 675                                         }
 676                                         break;
 677                                 default:
 678                                         # Call language specific Hook.
 679                                         $txt = $wgLang->processToken( $token, $tokenStack );
 680                                         if ( NULL == $txt ) {
 681                                                 # An unkown token. Highlight.
 682                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 683                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 684                                         }
 685                                         break;
 686                         }
 687                         # If we're parsing the interior of a link, don't append the interior to $s,
 688                         # but push it to the stack so it can be processed when a ]] token is found.
 689                         if ( $tagIsOpen  && $txt != "" ) {
 690                                 $token["type"] = "text";
 691                                 $token["text"] = $txt;
 692                                 array_push( $tokenStack, $token );
 693                         } else {
 694                                 $s .= $txt;
 695                         }
 696                 } #end while
 697                 if ( count( $tokenStack ) != 0 )
 698                 {
 699                         # still objects on stack. opened [[ tag without closing ]] tag.
 700                         $txt = "";
 701                         while ( $lastToken = array_pop( $tokenStack ) )
 702                         {
 703                                 if ( $lastToken["type"] == "text" )
 704                                 {
 705                                         $txt = $lastToken["text"] . $txt;
 706                                 } else {
 707                                         $txt = $lastToken["type"] . $txt;
 708                                 }
 709                         }
 710                         $s .= $txt;
 711                 }
 712                 return $s;
 713         }
 714
 715         /* private */ function handleInternalLink( $line, $prefix )
 716         {
 717                 global $wgLang, $wgLinkCache;
 718                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 719                 static $fname = "OutputPage::replaceInternalLinks" ;
 720                 wfProfileIn( $fname );
 721
 722                 wfProfileIn( "$fname-setup" );
 723                 static $tc = FALSE;
 724                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 725                 $sk =& $this->mOptions->getSkin();
 726
 727                 # Match a link having the form [[namespace:link|alternate]]trail
 728                 static $e1 = FALSE;
 729                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 730                 # Match the end of a line for a word that's not followed by whitespace,
 731                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 732                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 733                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 734                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 735
 736
 737                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 738                 static $image = FALSE;
 739                 static $special = FALSE;
 740                 static $media = FALSE;
 741                 static $category = FALSE;
 742                 if ( !$image ) { $image = Namespace::getImage(); }
 743                 if ( !$special ) { $special = Namespace::getSpecial(); }
 744                 if ( !$media ) { $media = Namespace::getMedia(); }
 745                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 746
 747                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 748
 749                 wfProfileOut( "$fname-setup" );
 750                 $s = "";
 751
 752                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 753                         $text = $m[2];
 754                         $trail = $m[3];
 755                 } else { # Invalid form; output directly
 756                         $s .= $prefix . "[[" . $line ;
 757                         return $s;
 758                 }
 759
 760                 /* Valid link forms:
 761                 Foobar -- normal
 762                 :Foobar -- override special treatment of prefix (images, language links)
 763                 /Foobar -- convert to CurrentPage/Foobar
 764                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 765                 */
 766                 $c = substr($m[1],0,1);
 767                 $noforce = ($c != ":");
 768                 if( $c == "/" ) { # subpage
 769                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 770                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 771                                 $noslash=$m[1];
 772                         } else {
 773                                 $noslash=substr($m[1],1);
 774                         }
 775                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 776                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 777                                 if( "" == $text ) {
 778                                         $text= $m[1];
 779                                 } # this might be changed for ugliness reasons
 780                         } else {
 781                                 $link = $noslash; # no subpage allowed, use standard link
 782                         }
 783                 } elseif( $noforce ) { # no subpage
 784                         $link = $m[1];
 785                 } else {
 786                         $link = substr( $m[1], 1 );
 787                 }
 788                 if( "" == $text )
 789                         $text = $link;
 790
 791                 $nt = Title::newFromText( $link );
 792                 if( !$nt ) {
 793                         $s .= $prefix . "[[" . $line;
 794                         return $s;
 795                 }
 796                 $ns = $nt->getNamespace();
 797                 $iw = $nt->getInterWiki();
 798                 if( $noforce ) {
 799                         if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 800                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 801                                 $s .= $prefix . $trail;
 802                                 return $s;
 803                         }
 804                         if( $ns == $image ) {
 805                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 806                                 $wgLinkCache->addImageLinkObj( $nt );
 807                                 return $s;
 808                         }
 809                 }
 810                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 811                     ( strpos( $link, "#" ) == FALSE ) ) {
 812                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 813                         return $s;
 814                 }
 815                 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
 816                         $t = explode ( ":" , $nt->getText() ) ;
 817                         array_shift ( $t ) ;
 818                         $t = implode ( ":" , $t ) ;
 819                         $t = $wgLang->ucFirst ( $t ) ;
 820 #                       $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
 821                         $nnt = Title::newFromText ( $category.":".$t ) ;
 822                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 823                         $this->mCategoryLinks[] = $t ;
 824                         $s .= $prefix . $trail ;
 825                         return $s ;
 826                 }
 827                 if( $ns == $media ) {
 828                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 829                         $wgLinkCache->addImageLinkObj( $nt );
 830                         return $s;
 831                 } elseif( $ns == $special ) {
 832                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 833                         return $s;
 834                 }
 835                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 836
 837                 wfProfileOut( $fname );
 838                 return $s;
 839         }
 840
 841         # Some functions here used by doBlockLevels()
 842         #
 843         /* private */ function closeParagraph()
 844         {
 845                 $result = "";
 846                 if ( 0 != strcmp( "p", $this->mLastSection ) &&
 847                   0 != strcmp( "", $this->mLastSection ) ) {
 848                         $result = "</" . $this->mLastSection  . ">";
 849                 }
 850                 $this->mLastSection = "";
 851                 return $result."\n";
 852         }
 853         # getCommon() returns the length of the longest common substring
 854         # of both arguments, starting at the beginning of both.
 855         #
 856         /* private */ function getCommon( $st1, $st2 )
 857         {
 858                 $fl = strlen( $st1 );
 859                 $shorter = strlen( $st2 );
 860                 if ( $fl < $shorter ) { $shorter = $fl; }
 861
 862                 for ( $i = 0; $i < $shorter; ++$i ) {
 863                         if ( $st1{$i} != $st2{$i} ) { break; }
 864                 }
 865                 return $i;
 866         }
 867         # These next three functions open, continue, and close the list
 868         # element appropriate to the prefix character passed into them.
 869         #
 870         /* private */ function openList( $char )
 871     {
 872                 $result = $this->closeParagraph();
 873
 874                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 875                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 876                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 877                 else if ( ";" == $char ) {
 878                         $result .= "<dl><dt>";
 879                         $this->mDTopen = true;
 880                 }
 881                 else { $result = "<!-- ERR 1 -->"; }
 882
 883                 return $result;
 884         }
 885
 886         /* private */ function nextItem( $char )
 887         {
 888                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 889                 else if ( ":" == $char || ";" == $char ) {
 890                         $close = "</dd>";
 891                         if ( $this->mDTopen ) { $close = "</dt>"; }
 892                         if ( ";" == $char ) {
 893                                 $this->mDTopen = true;
 894                                 return $close . "<dt>";
 895                         } else {
 896                                 $this->mDTopen = false;
 897                                 return $close . "<dd>";
 898                         }
 899                 }
 900                 return "<!-- ERR 2 -->";
 901         }
 902
 903         /* private */function closeList( $char )
 904         {
 905                 if ( "*" == $char ) { $text = "</li></ul>"; }
 906                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 907                 else if ( ":" == $char ) {
 908                         if ( $this->mDTopen ) {
 909                                 $this->mDTopen = false;
 910                                 $text = "</dt></dl>";
 911                         } else {
 912                                 $text = "</dd></dl>";
 913                         }
 914                 }
 915                 else {  return "<!-- ERR 3 -->"; }
 916                 return $text."\n";
 917         }
 918
 919         /* private */ function doBlockLevels( $text, $linestart )
 920         {
 921                 $fname = "OutputPage::doBlockLevels";
 922                 wfProfileIn( $fname );
 923                 # Parsing through the text line by line.  The main thing
 924                 # happening here is handling of block-level elements p, pre,
 925                 # and making lists from lines starting with * # : etc.
 926                 #
 927                 $a = explode( "\n", $text );
 928                 $text = $lastPref = "";
 929                 $this->mDTopen = $inBlockElem = false;
 930
 931                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 932                 foreach ( $a as $t ) {
 933                         if ( "" != $text ) { $text .= "\n"; }
 934
 935                         $oLine = $t;
 936                         $opl = strlen( $lastPref );
 937                         $npl = strspn( $t, "*#:;" );
 938                         $pref = substr( $t, 0, $npl );
 939                         $pref2 = str_replace( ";", ":", $pref );
 940                         $t = substr( $t, $npl );
 941
 942                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 943                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 944
 945                                 if ( ";" == substr( $pref, -1 ) ) {
 946                                         $cpos = strpos( $t, ":" );
 947                                         if ( ! ( false === $cpos ) ) {
 948                                                 $term = substr( $t, 0, $cpos );
 949                                                 $text .= $term . $this->nextItem( ":" );
 950                                                 $t = substr( $t, $cpos + 1 );
 951                                         }
 952                                 }
 953                         } else if (0 != $npl || 0 != $opl) {
 954                                 $cpl = $this->getCommon( $pref, $lastPref );
 955
 956                                 while ( $cpl < $opl ) {
 957                                         $text .= $this->closeList( $lastPref{$opl-1} );
 958                                         --$opl;
 959                                 }
 960                                 if ( $npl <= $cpl && $cpl > 0 ) {
 961                                         $text .= $this->nextItem( $pref{$cpl-1} );
 962                                 }
 963                                 while ( $npl > $cpl ) {
 964                                         $char = substr( $pref, $cpl, 1 );
 965                                         $text .= $this->openList( $char );
 966
 967                                         if ( ";" == $char ) {
 968                                                 $cpos = strpos( $t, ":" );
 969                                                 if ( ! ( false === $cpos ) ) {
 970                                                         $term = substr( $t, 0, $cpos );
 971                                                         $text .= $term . $this->nextItem( ":" );
 972                                                         $t = substr( $t, $cpos + 1 );
 973                                                 }
 974                                         }
 975                                         ++$cpl;
 976                                 }
 977                                 $lastPref = $pref2;
 978                         }
 979                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
 980                                 if ( preg_match(
 981                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
 982                                         $text .= $this->closeParagraph();
 983                                         $inBlockElem = true;
 984                                 }
 985                                 if ( ! $inBlockElem ) {
 986                                         if ( " " == $t{0} ) {
 987                                                 $newSection = "pre";
 988                                                 # $t = wfEscapeHTML( $t );
 989                                         }
 990                                         else { $newSection = "p"; }
 991
 992                                         if ( 0 == strcmp( "", trim( $oLine ) ) ) {
 993                                                 $text .= $this->closeParagraph();
 994                                                 $text .= "<" . $newSection . ">";
 995                                         } else if ( 0 != strcmp( $this->mLastSection,
 996                                           $newSection ) ) {
 997                                                 $text .= $this->closeParagraph();
 998                                                 if ( 0 != strcmp( "p", $newSection ) ) {
 999                                                         $text .= "<" . $newSection . ">";
1000                                                 }
1001                                         }
1002                                         $this->mLastSection = $newSection;
1003                                 }
1004                                 if ( $inBlockElem &&
1005                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
1006                                         $inBlockElem = false;
1007                                 }
1008                         }
1009                         $text .= $t;
1010                 }
1011                 while ( $npl ) {
1012                         $text .= $this->closeList( $pref2{$npl-1} );
1013                         --$npl;
1014                 }
1015                 if ( "" != $this->mLastSection ) {
1016                         if ( "p" != $this->mLastSection ) {
1017                                 $text .= "</" . $this->mLastSection . ">";
1018                         }
1019                         $this->mLastSection = "";
1020                 }
1021                 wfProfileOut( $fname );
1022                 return $text;
1023         }
1024
1025         /* private */ function replaceVariables( $text )
1026         {
1027                 global $wgLang, $wgCurOut;
1028                 $fname = "OutputPage::replaceVariables";
1029                 wfProfileIn( $fname );
1030
1031                 $magic = array();
1032
1033                 # Basic variables
1034                 # See Language.php for the definition of each magic word
1035                 # As with sigs, this uses the server's local time -- ensure
1036                 # this is appropriate for your audience!
1037
1038                 $magic[MAG_CURRENTMONTH] = date( "m" );
1039                 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1040                 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1041                 $magic[MAG_CURRENTDAY] = date("j");
1042                 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1043                 $magic[MAG_CURRENTYEAR] = date( "Y" );
1044                 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1045
1046                 $this->mOutput->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1047
1048                 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1049                 if ( $mw->match( $text ) ) {
1050                         $v = wfNumberOfArticles();
1051                         $text = $mw->replace( $v, $text );
1052                         if( $mw->getWasModified() ) { $this->mOutput->mContainsOldMagic++; }
1053                 }
1054
1055                 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1056                 # The callbacks are at the bottom of this file
1057                 $wgCurOut = $this;
1058                 $mw =& MagicWord::get( MAG_MSG );
1059                 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1060                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1061
1062                 $mw =& MagicWord::get( MAG_MSGNW );
1063                 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1064                 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1065
1066                 wfProfileOut( $fname );
1067                 return $text;
1068         }
1069
1070         # Cleans up HTML, removes dangerous tags and attributes
1071         /* private */ function removeHTMLtags( $text )
1072         {
1073                 $fname = "OutputPage::removeHTMLtags";
1074                 wfProfileIn( $fname );
1075                 $htmlpairs = array( # Tags that must be closed
1076                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1077                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1078                         "strike", "strong", "tt", "var", "div", "center",
1079                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1080                         "ruby", "rt" , "rb" , "rp"
1081                 );
1082                 $htmlsingle = array(
1083                         "br", "p", "hr", "li", "dt", "dd"
1084                 );
1085                 $htmlnest = array( # Tags that can be nested--??
1086                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1087                         "dl", "font", "big", "small", "sub", "sup"
1088                 );
1089                 $tabletags = array( # Can only appear inside table
1090                         "td", "th", "tr"
1091                 );
1092
1093                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1094                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1095
1096                 $htmlattrs = $this->getHTMLattrs () ;
1097
1098                 # Remove HTML comments
1099                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1100
1101                 $bits = explode( "<", $text );
1102                 $text = array_shift( $bits );
1103                 $tagstack = array(); $tablestack = array();
1104
1105                 foreach ( $bits as $x ) {
1106                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1107                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1108                           $x, $regs );
1109                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1110                         error_reporting( $prev );
1111
1112                         $badtag = 0 ;
1113                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1114                                 # Check our stack
1115                                 if ( $slash ) {
1116                                         # Closing a tag...
1117                                         if ( ! in_array( $t, $htmlsingle ) &&
1118                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1119                                                 array_push( $tagstack, $ot );
1120                                                 $badtag = 1;
1121                                         } else {
1122                                                 if ( $t == "table" ) {
1123                                                         $tagstack = array_pop( $tablestack );
1124                                                 }
1125                                                 $newparams = "";
1126                                         }
1127                                 } else {
1128                                         # Keep track for later
1129                                         if ( in_array( $t, $tabletags ) &&
1130                                           ! in_array( "table", $tagstack ) ) {
1131                                                 $badtag = 1;
1132                                         } else if ( in_array( $t, $tagstack ) &&
1133                                           ! in_array ( $t , $htmlnest ) ) {
1134                                                 $badtag = 1 ;
1135                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1136                                                 if ( $t == "table" ) {
1137                                                         array_push( $tablestack, $tagstack );
1138                                                         $tagstack = array();
1139                                                 }
1140                                                 array_push( $tagstack, $t );
1141                                         }
1142                                         # Strip non-approved attributes from the tag
1143                                         $newparams = $this->fixTagAttributes($params);
1144
1145                                 }
1146                                 if ( ! $badtag ) {
1147                                         $rest = str_replace( ">", "&gt;", $rest );
1148                                         $text .= "<$slash$t $newparams$brace$rest";
1149                                         continue;
1150                                 }
1151                         }
1152                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1153                 }
1154                 # Close off any remaining tags
1155                 while ( $t = array_pop( $tagstack ) ) {
1156                         $text .= "</$t>\n";
1157                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1158                 }
1159                 wfProfileOut( $fname );
1160                 return $text;
1161         }
1162
1163 /*
1164  *
1165  * This function accomplishes several tasks:
1166  * 1) Auto-number headings if that option is enabled
1167  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1168  * 3) Add a Table of contents on the top for users who have enabled the option
1169  * 4) Auto-anchor headings
1170  *
1171  * It loops through all headlines, collects the necessary data, then splits up the
1172  * string and re-inserts the newly formatted headlines.
1173  *
1174  * */
1175         /* private */ function formatHeadings( $text )
1176         {
1177                 $nh=$this->mOptions->getNumberHeadings();
1178                 $st=$this->mOptions->getShowToc();
1179                 if(!$this->mTitle->userCanEdit()) {
1180                         $es=0;
1181                         $esr=0;
1182                 } else {
1183                         $es=$this->mOptions->getEditSection();
1184                         $esr=$this->mOptions->getEditSectionOnRightClick();
1185                 }
1186
1187                 # Inhibit editsection links if requested in the page
1188                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1189                 if ($esw->matchAndRemove( $text )) {
1190                         $es=0;
1191                 }
1192                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1193                 # do not add TOC
1194                 $mw =& MagicWord::get( MAG_NOTOC );
1195                 if ($mw->matchAndRemove( $text ))
1196                 {
1197                         $st = 0;
1198                 }
1199
1200                 # never add the TOC to the Main Page. This is an entry page that should not
1201                 # be more than 1-2 screens large anyway
1202                 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1203
1204                 # We need this to perform operations on the HTML
1205                 $sk =& $this->mOptions->getSkin();
1206
1207                 # Get all headlines for numbering them and adding funky stuff like [edit]
1208                 # links
1209                 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1210
1211                 # headline counter
1212                 $c=0;
1213
1214                 # Ugh .. the TOC should have neat indentation levels which can be
1215                 # passed to the skin functions. These are determined here
1216                 $toclevel = 0;
1217                 $toc = "";
1218                 $full = "";
1219                 $head = array();
1220                 foreach($matches[3] as $headline) {
1221                         if($level) { $prevlevel=$level;}
1222                         $level=$matches[1][$c];
1223                         if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1224
1225                                 $h[$level]=0; // reset when we enter a new level
1226                                 $toc.=$sk->tocIndent($level-$prevlevel);
1227                                 $toclevel+=$level-$prevlevel;
1228
1229                         }
1230                         if(($nh||$st) && $level<$prevlevel) {
1231                                 $h[$level+1]=0; // reset when we step back a level
1232                                 $toc.=$sk->tocUnindent($prevlevel-$level);
1233                                 $toclevel-=$prevlevel-$level;
1234
1235                         }
1236                         $h[$level]++; // count number of headlines for each level
1237
1238                         if($nh||$st) {
1239                                 for($i=1;$i<=$level;$i++) {
1240                                         if($h[$i]) {
1241                                                 if($dot) {$numbering.=".";}
1242                                                 $numbering.=$h[$i];
1243                                                 $dot=1;
1244                                         }
1245                                 }
1246                         }
1247
1248                         // The canonized header is a version of the header text safe to use for links
1249                         // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1250                         $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1251                         $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1252                         $tocline = trim( $canonized_headline );
1253                         $canonized_headline=str_replace('"',"",$canonized_headline);
1254                         $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1255                         $refer[$c]=$canonized_headline;
1256                         $refers[$canonized_headline]++;  // count how many in assoc. array so we can track dupes in anchors
1257                         $refcount[$c]=$refers[$canonized_headline];
1258
1259             // Prepend the number to the heading text
1260
1261                         if($nh||$st) {
1262                                 $tocline=$numbering ." ". $tocline;
1263
1264                                 // Don't number the heading if it is the only one (looks silly)
1265                                 if($nh && count($matches[3]) > 1) {
1266                                         $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1267                                 }
1268                         }
1269
1270                         // Create the anchor for linking from the TOC to the section
1271                         $anchor=$canonized_headline;
1272                         if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1273                         if($st) {
1274                                 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1275                         }
1276                         if($es) {
1277                                 $head[$c].=$sk->editSectionLink($c+1);
1278                         }
1279
1280                         // Put it all together
1281
1282                         $head[$c].="<h".$level.$matches[2][$c]
1283                          ."<a name=\"".$anchor."\">"
1284                          .$headline
1285                          ."</a>"
1286                          ."</h".$level.">";
1287
1288                         // Add the edit section link
1289
1290                         if($esr) {
1291                                 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1292                         }
1293
1294                         $numbering="";
1295                         $c++;
1296                         $dot=0;
1297                 }
1298
1299                 if($st) {
1300                         $toclines=$c;
1301                         $toc.=$sk->tocUnindent($toclevel);
1302                         $toc=$sk->tocTable($toc);
1303                 }
1304
1305                 // split up and insert constructed headlines
1306
1307                 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1308                 $i=0;
1309
1310                 foreach($blocks as $block) {
1311                         if(($es) && $c>0 && $i==0) {
1312                             # This is the [edit] link that appears for the top block of text when
1313                                 # section editing is enabled
1314                                 $full.=$sk->editSectionLink(0);
1315                         }
1316                         $full.=$block;
1317                         if($st && $toclines>3 && !$i) {
1318                                 # Let's add a top anchor just in case we want to link to the top of the page
1319                                 $full="<a name=\"top\"></a>".$full.$toc;
1320                         }
1321
1322                         if( !empty( $head[$i] ) ) {
1323                                 $full .= $head[$i];
1324                         }
1325                         $i++;
1326                 }
1327
1328                 return $full;
1329         }
1330
1331         /* private */ function doMagicISBN( &$tokenizer )
1332         {
1333                 global $wgLang;
1334
1335                 # Check whether next token is a text token
1336                 # If yes, fetch it and convert the text into a
1337                 # Special::BookSources link
1338                 $token = $tokenizer->previewToken();
1339                 while ( $token["type"] == "" )
1340                 {
1341                         $tokenizer->nextToken();
1342                         $token = $tokenizer->previewToken();
1343                 }
1344                 if ( $token["type"] == "text" )
1345                 {
1346                         $token = $tokenizer->nextToken();
1347                         $x = $token["text"];
1348                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1349
1350                         $isbn = $blank = "" ;
1351                         while ( " " == $x{0} ) {
1352                                 $blank .= " ";
1353                                 $x = substr( $x, 1 );
1354                         }
1355                         while ( strstr( $valid, $x{0} ) != false ) {
1356                                 $isbn .= $x{0};
1357                                 $x = substr( $x, 1 );
1358                         }
1359                         $num = str_replace( "-", "", $isbn );
1360                         $num = str_replace( " ", "", $num );
1361
1362                         if ( "" == $num ) {
1363                                 $text = "ISBN $blank$x";
1364                         } else {
1365                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1366                                 $text = "<a href=\"" .
1367                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1368                                         "\" class=\"internal\">ISBN $isbn</a>";
1369                                 $text .= $x;
1370                         }
1371                 } else {
1372                         $text = "ISBN ";
1373                 }
1374                 return $text;
1375         }
1376         /* private */ function doMagicRFC( &$tokenizer )
1377         {
1378                 global $wgLang;
1379
1380                 # Check whether next token is a text token
1381                 # If yes, fetch it and convert the text into a
1382                 # link to an RFC source
1383                 $token = $tokenizer->previewToken();
1384                 while ( $token["type"] == "" )
1385                 {
1386                         $tokenizer->nextToken();
1387                         $token = $tokenizer->previewToken();
1388                 }
1389                 if ( $token["type"] == "text" )
1390                 {
1391                         $token = $tokenizer->nextToken();
1392                         $x = $token["text"];
1393                         $valid = "0123456789";
1394
1395                         $rfc = $blank = "" ;
1396                         while ( " " == $x{0} ) {
1397                                 $blank .= " ";
1398                                 $x = substr( $x, 1 );
1399                         }
1400                         while ( strstr( $valid, $x{0} ) != false ) {
1401                                 $rfc .= $x{0};
1402                                 $x = substr( $x, 1 );
1403                         }
1404
1405                         if ( "" == $rfc ) {
1406                                 $text .= "RFC $blank$x";
1407                         } else {
1408                                 $url = wfmsg( "rfcurl" );
1409                                 $url = str_replace( "$1", $rfc, $url);
1410                                 $sk =& $this->mOptions->getSkin();
1411                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1412                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1413                         }
1414                 } else {
1415                         $text = "RFC ";
1416                 }
1417                 return $text;
1418         }
1419
1420         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1421         {
1422                 $this->mOptions = $options;
1423                 $this->mTitle = $title;
1424                 if ( $clearState ) {
1425                         $this->clearState();
1426                 }
1427
1428                 $stripState = false;
1429                 $text = $this->strip( $text, $stripState, false );
1430                 $text = $this->pstPass2( $text, $user );
1431                 $text = $this->unstrip( $text, $stripState );
1432                 return $text;
1433         }
1434
1435         /* private */ function pstPass2( $text, &$user )
1436         {
1437                 global $wgLang, $wgLocaltimezone;
1438
1439                 # Signatures
1440                 #
1441                 $n = $user->getName();
1442                 $k = $user->getOption( "nickname" );
1443                 if ( "" == $k ) { $k = $n; }
1444                 if(isset($wgLocaltimezone)) {
1445                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1446                 }
1447                 /* Note: this is an ugly timezone hack for the European wikis */
1448                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1449                   " (" . date( "T" ) . ")";
1450                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1451
1452                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1453                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1454                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1455                   Namespace::getUser() ) . ":$n|$k]]", $text );
1456
1457                 # Context links: [[|name]] and [[name (context)|]]
1458                 #
1459                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1460                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1461                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1462                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1463
1464                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1465                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1466                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1467                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1468                                                                                                                 # [[ns:page (cont)|]]
1469                 $context = "";
1470                 $t = $this->mTitle->getText();
1471                 if ( preg_match( $conpat, $t, $m ) ) {
1472                         $context = $m[2];
1473                 }
1474                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1475                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1476                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1477
1478                 if ( "" == $context ) {
1479                         $text = preg_replace( $p2, "[[\\1]]", $text );
1480                 } else {
1481                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1482                 }
1483
1484                 # {{SUBST:xxx}} variables
1485                 #
1486                 $mw =& MagicWord::get( MAG_SUBST );
1487                 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1488
1489                 # Trim trailing whitespace
1490                 # MAG_END (__END__) tag allows for trailing
1491                 # whitespace to be deliberately included
1492                 $text = rtrim( $text );
1493                 $mw =& MagicWord::get( MAG_END );
1494                 $mw->matchAndRemove( $text );
1495
1496                 return $text;
1497         }
1498
1499
1500 }
1501
1502 class ParserOutput
1503 {
1504         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1505
1506         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1507                 $containsOldMagic = false )
1508         {
1509                 $this->mText = $text;
1510                 $this->mLanguageLinks = $languageLinks;
1511                 $this->mCategoryLinks = $categoryLinks;
1512                 $this->mContainsOldMagic = $containsOldMagic;
1513         }
1514
1515         function getText() { return $this->mText; }
1516         function getLanguageLinks() { return $this->mLanguageLinks; }
1517         function getCategoryLinks() { return $this->mCategoryLinks; }
1518         function containsOldMagic() { return $this->mContainsOldMagic; }
1519         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1520         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1521         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1522         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1523 }
1524
1525 class ParserOptions
1526 {
1527         # All variables are private
1528         var $mUseTeX;                    # Use texvc to expand <math> tags
1529         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1530         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1531         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1532         var $mAllowExternalImages;       # Allow external images inline
1533         var $mSkin;                      # Reference to the preferred skin
1534         var $mDateFormat;                # Date format index
1535         var $mEditSection;               # Create "edit section" links
1536         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1537         var $mPrintable;                 # Generate printable output
1538         var $mNumberHeadings;            # Automatically number headings
1539         var $mShowToc;                   # Show table of contents
1540
1541         function getUseTeX() { return $this->mUseTeX; }
1542         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1543         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1544         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1545         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1546         function getSkin() { return $this->mSkin; }
1547         function getDateFormat() { return $this->mDateFormat; }
1548         function getEditSection() { return $this->mEditSection; }
1549         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1550         function getPrintable() { return $this->mPrintable; }
1551         function getNumberHeadings() { return $this->mNumberHeadings; }
1552         function getShowToc() { return $this->mShowToc; }
1553
1554         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1555         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1556         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1557         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1558         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1559         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1560         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1561         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1562         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1563         function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1564         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1565         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1566
1567         /* static */ function newFromUser( &$user )
1568         {
1569                 $popts = new ParserOptions;
1570                 $popts->initialiseFromUser( &$user );
1571                 return $popts;
1572         }
1573
1574         function initialiseFromUser( &$userInput )
1575         {
1576                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1577
1578                 if ( !$userInput ) {
1579                         $user = new User;
1580                 } else {
1581                         $user =& $userInput;
1582                 }
1583
1584                 $this->mUseTeX = $wgUseTeX;
1585                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1586                 $this->mUseDynamicDates = $wgUseDynamicDates;
1587                 $this->mInterwikiMagic = $wgInterwikiMagic;
1588                 $this->mAllowExternalImages = $wgAllowExternalImages;
1589                 $this->mSkin =& $user->getSkin();
1590                 $this->mDateFormat = $user->getOption( "date" );
1591                 $this->mEditSection = $user->getOption( "editsection" );
1592                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1593                 $this->mPrintable = false;
1594                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1595                 $this->mShowToc = $user->getOption( "showtoc" );
1596         }
1597
1598
1599 }
1600
1601 # Regex callbacks, used in OutputPage::replaceVariables
1602
1603 # Just get rid of the dangerous stuff
1604 # Necessary because replaceVariables is called after removeHTMLtags,
1605 # and message text can come from any user
1606 function wfReplaceMsgVar( $matches ) {
1607         global $wgCurOut, $wgLinkCache;
1608         $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1609         $wgLinkCache->suspend();
1610         $text = $wgCurOut->replaceInternalLinks( $text );
1611         $wgLinkCache->resume();
1612         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1613         return $text;
1614 }
1615
1616 # Effective <nowiki></nowiki>
1617 # Not real <nowiki> because this is called after nowiki sections are processed
1618 function wfReplaceMsgnwVar( $matches ) {
1619         global $wgCurOut, $wgLinkCache;
1620         $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1621         $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1622         return $text;
1623 }
1624
1625
1626
1627 ?>