includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36 define( "MAX_INCLUDE_REPEAT", 5 );
  37
  38 # Recursion depth of variable/inclusion evaluation
  39 define( "MAX_INCLUDE_PASSES", 3 );
  40
  41 # Allowed values for $mOutputType
  42 define( "OT_HTML", 1 );
  43 define( "OT_WIKI", 2 );
  44 define( "OT_MSG", 3 );
  45
  46 class Parser
  47 {
  48         # Cleared with clearState():
  49         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array();
  50         var $mVariables, $mIncludeCount;
  51
  52         # Temporary:
  53         var $mOptions, $mTitle, $mOutputType;
  54
  55         function Parser()
  56         {
  57                 $this->clearState();
  58         }
  59
  60         function clearState()
  61         {
  62                 $this->mOutput = new ParserOutput;
  63                 $this->mAutonumber = 0;
  64                 $this->mLastSection = "";
  65                 $this->mDTopen = false;
  66                 $this->mVariables = false;
  67                 $this->mIncludeCount = array();
  68                 $this->mStripState = array();
  69         }
  70
  71         # First pass--just handle <nowiki> sections, pass the rest off
  72         # to doWikiPass2() which does all the real work.
  73         #
  74         # Returns a ParserOutput
  75         #
  76         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  77         {
  78                 $fname = "Parser::parse";
  79                 wfProfileIn( $fname );
  80
  81                 if ( $clearState ) {
  82                         $this->clearState();
  83                 }
  84
  85                 $this->mOptions = $options;
  86                 $this->mTitle =& $title;
  87                 $this->mOutputType = OT_HTML;
  88
  89                 $stripState = NULL;
  90                 $text = $this->strip( $text, $this->mStripState );
  91                 $text = $this->doWikiPass2( $text, $linestart );
  92                 $text = $this->unstrip( $text, $this->mStripState );
  93
  94                 $this->mOutput->setText( $text );
  95                 wfProfileOut( $fname );
  96                 return $this->mOutput;
  97         }
  98
  99         /* static */ function getRandomString()
 100         {
 101                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 102         }
 103
 104         # Replaces all occurences of <$tag>content</$tag> in the text
 105         # with a random marker and returns the new text. the output parameter
 106         # $content will be an associative array filled with data on the form
 107         # $unique_marker => content.
 108
 109         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 110                 $result = array();
 111                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 112                 $content = array( );
 113                 $n = 1;
 114                 $stripped = "";
 115
 116                 while ( "" != $text ) {
 117                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 118                         $stripped .= $p[0];
 119                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 120                                 $text = "";
 121                         } else {
 122                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 123                                 $marker = $rnd . sprintf("%08X", $n++);
 124                                 $content[$marker] = $q[0];
 125                                 $stripped .= $marker;
 126                                 $text = $q[1];
 127                         }
 128                 }
 129                 return $stripped;
 130         }
 131
 132         # Strips <nowiki>, <pre> and <math>
 133         # Returns the text, and fills an array with data needed in unstrip()
 134         #
 135         function strip( $text, &$state )
 136         {
 137                 $render = ($this->mOutputType == OT_HTML);
 138                 $nowiki_content = array();
 139                 $hiero_content = array();
 140                 $math_content = array();
 141                 $pre_content = array();
 142
 143                 # Replace any instances of the placeholders
 144                 $uniq_prefix = "NaodW29";
 145                 $text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 146
 147                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 148                 foreach( $nowiki_content as $marker => $content ){
 149                         if( $render ){
 150                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 151                         } else {
 152                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 153                         }
 154                 }
 155
 156                 if( $GLOBALS['wgUseWikiHiero'] ){
 157                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 158                         foreach( $hiero_content as $marker => $content ){
 159                                 if( $render ){
 160                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 161                                 } else {
 162                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 163                                 }
 164                         }
 165                 }
 166
 167                 if( $this->mOptions->getUseTeX() ){
 168                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 169                         foreach( $math_content as $marker => $content ){
 170                                 if( $render ){
 171                                         $math_content[$marker] = renderMath( $content );
 172                                 } else {
 173                                         $math_content[$marker] = "<math>$content</math>";
 174                                 }
 175                         }
 176                 }
 177
 178                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 179                 foreach( $pre_content as $marker => $content ){
 180                         if( $render ){
 181                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 182                         } else {
 183                                 $pre_content[$marker] = "<pre>$content</pre>";
 184                         }
 185                 }
 186
 187                 # Must expand in reverse order, otherwise nested tags will be corrupted
 188                 $state = array( $pre_content, $math_content, $hiero_content, $nowiki_content );
 189                 return $text;
 190         }
 191
 192         function unstrip( $text, &$state )
 193         {
 194                 foreach( $state as $content_dict ){
 195                         foreach( $content_dict as $marker => $content ){
 196                                 $text = str_replace( $marker, $content, $text );
 197                         }
 198                 }
 199                 return $text;
 200         }
 201
 202         function categoryMagic ()
 203         {
 204                 global $wgLang , $wgUser ;
 205                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 206                 $id = $this->mTitle->getArticleID() ;
 207                 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
 208                 $ti = $this->mTitle->getText() ;
 209                 $ti = explode ( ":" , $ti , 2 ) ;
 210                 if ( $cat != $ti[0] ) return "" ;
 211                 $r = "<br break='all' />\n" ;
 212
 213                 $articles = array() ;
 214                 $parents = array () ;
 215                 $children = array() ;
 216
 217
 218 #               $sk =& $this->mGetSkin();
 219                 $sk =& $wgUser->getSkin() ;
 220
 221                 $data = array () ;
 222                 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 223                 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 224
 225                 $res = wfQuery ( $sql1, DB_READ ) ;
 226                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 227
 228                 $res = wfQuery ( $sql2, DB_READ ) ;
 229                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 230
 231
 232                 foreach ( $data AS $x )
 233                 {
 234                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 235                         if ( $t != "" ) $t .= ":" ;
 236                         $t .= $x->cur_title ;
 237
 238                         $y = explode ( ":" , $t , 2 ) ;
 239                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 240                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 241                         } else {
 242                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 243                         }
 244                 }
 245                 wfFreeResult ( $res ) ;
 246
 247                 # Children
 248                 if ( count ( $children ) > 0 )
 249                 {
 250                         asort ( $children ) ;
 251                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 252                         $r .= implode ( ", " , $children ) ;
 253                 }
 254
 255                 # Articles
 256                 if ( count ( $articles ) > 0 )
 257                 {
 258                         asort ( $articles ) ;
 259                         $h =  wfMsg( "category_header", $ti[1] );
 260                         $r .= "<h2>{$h}</h2>\n" ;
 261                         $r .= implode ( ", " , $articles ) ;
 262                 }
 263
 264
 265                 return $r ;
 266         }
 267
 268         function getHTMLattrs ()
 269         {
 270                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 271                                 "title", "align", "lang", "dir", "width", "height",
 272                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 273                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 274                                 /* FONT */ "type", "start", "value", "compact",
 275                                 /* For various lists, mostly deprecated but safe */
 276                                 "summary", "width", "border", "frame", "rules",
 277                                 "cellspacing", "cellpadding", "valign", "char",
 278                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 279                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 280                                 "id", "class", "name", "style" /* For CSS */
 281                                 );
 282                 return $htmlattrs ;
 283         }
 284
 285         function fixTagAttributes ( $t )
 286         {
 287                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 288                 $htmlattrs = $this->getHTMLattrs() ;
 289
 290                 # Strip non-approved attributes from the tag
 291                 $t = preg_replace(
 292                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 293                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 294                         $t);
 295                 # Strip javascript "expression" from stylesheets. Brute force approach:
 296                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 297
 298                 if( preg_match(
 299                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 300                         wfMungeToUtf8( $t ) ) )
 301                 {
 302                         $t="";
 303                 }
 304
 305                 return trim ( $t ) ;
 306         }
 307
 308         function doTableStuff ( $t )
 309         {
 310                 $t = explode ( "\n" , $t ) ;
 311                 $td = array () ; # Is currently a td tag open?
 312                         $ltd = array () ; # Was it TD or TH?
 313                         $tr = array () ; # Is currently a tr tag open?
 314                         $ltr = array () ; # tr attributes
 315                         foreach ( $t AS $k => $x )
 316                         {
 317                                 $x = rtrim ( $x ) ;
 318                                 $fc = substr ( $x , 0 , 1 ) ;
 319                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 320                                 {
 321                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 322                                         array_push ( $td , false ) ;
 323                                         array_push ( $ltd , "" ) ;
 324                                         array_push ( $tr , false ) ;
 325                                         array_push ( $ltr , "" ) ;
 326                                 }
 327                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 328                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 329                                 {
 330                                         $z = "</table>\n" ;
 331                                         $l = array_pop ( $ltd ) ;
 332                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 333                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 334                                         array_pop ( $ltr ) ;
 335                                         $t[$k] = $z ;
 336                                 }
 337                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 338                                                 {
 339                                                 $z = trim ( substr ( $x , 2 ) ) ;
 340                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 341                                                 }*/
 342                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 343                                 {
 344                                         $x = substr ( $x , 1 ) ;
 345                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 346                                         $z = "" ;
 347                                         $l = array_pop ( $ltd ) ;
 348                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 349                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 350                                         array_pop ( $ltr ) ;
 351                                         $t[$k] = $z ;
 352                                         array_push ( $tr , false ) ;
 353                                         array_push ( $td , false ) ;
 354                                         array_push ( $ltd , "" ) ;
 355                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 356                                 }
 357                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 358                                 {
 359                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 360                                         {
 361                                                 $fc = "+" ;
 362                                                 $x = substr ( $x , 1 ) ;
 363                                         }
 364                                         $after = substr ( $x , 1 ) ;
 365                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 366                                         $after = explode ( "||" , $after ) ;
 367                                         $t[$k] = "" ;
 368                                         foreach ( $after AS $theline )
 369                                         {
 370                                                 $z = "" ;
 371                                                 if ( $fc != "+" )
 372                                                 {
 373                                                         $tra = array_pop ( $ltr ) ;
 374                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 375                                                         array_push ( $tr , true ) ;
 376                                                         array_push ( $ltr , "" ) ;
 377                                                 }
 378
 379                                                 $l = array_pop ( $ltd ) ;
 380                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 381                                                 if ( $fc == "|" ) $l = "td" ;
 382                                                 else if ( $fc == "!" ) $l = "th" ;
 383                                                 else if ( $fc == "+" ) $l = "caption" ;
 384                                                 else $l = "" ;
 385                                                 array_push ( $ltd , $l ) ;
 386                                                 $y = explode ( "|" , $theline , 2 ) ;
 387                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 388                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 389                                                 $t[$k] .= $y ;
 390                                                 array_push ( $td , true ) ;
 391                                         }
 392                                 }
 393                         }
 394
 395                 # Closing open td, tr && table
 396                 while ( count ( $td ) > 0 )
 397                 {
 398                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 399                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 400                         $t[] = "</table>" ;
 401                 }
 402
 403                 $t = implode ( "\n" , $t ) ;
 404                 #               $t = $this->removeHTMLtags( $t );
 405                 return $t ;
 406         }
 407
 408         # Well, OK, it's actually about 14 passes.  But since all the
 409         # hard lifting is done inside PHP's regex code, it probably
 410         # wouldn't speed things up much to add a real parser.
 411         #
 412         function doWikiPass2( $text, $linestart )
 413         {
 414                 $fname = "Parser::doWikiPass2";
 415                 wfProfileIn( $fname );
 416
 417                 $text = $this->removeHTMLtags( $text );
 418                 $text = $this->replaceVariables( $text );
 419
 420                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 421
 422                 $text = $this->doHeadings( $text );
 423
 424                 if($this->mOptions->getUseDynamicDates()) {
 425                         global $wgDateFormatter;
 426                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 427                 }
 428
 429                 $text = $this->replaceExternalLinks( $text );
 430                 $text = $this->doTokenizedParser ( $text );
 431                 $text = $this->doTableStuff ( $text ) ;
 432
 433                 $text = $this->formatHeadings( $text );
 434
 435                 $sk =& $this->mOptions->getSkin();
 436                 $text = $sk->transformContent( $text );
 437                 $fixtags = array(
 438                         "/<hr *>/i" => '<hr/>',
 439                         "/<br *>/i" => '<br/>',
 440                         "/<center *>/i"=>'<span style="text-align:center;">',
 441                         "/<\\/center *>/i" => '</span>'
 442                 );
 443                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 444                 $text = $this->doBlockLevels( $text, $linestart );
 445                 $text .= $this->categoryMagic () ;
 446
 447                 wfProfileOut( $fname );
 448                 return $text;
 449         }
 450
 451
 452         /* private */ function doHeadings( $text )
 453         {
 454                 for ( $i = 6; $i >= 1; --$i ) {
 455                         $h = substr( "======", 0, $i );
 456                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 457                           "<h{$i}>\\1</h{$i}>\\2", $text );
 458                 }
 459                 return $text;
 460         }
 461
 462         # Note: we have to do external links before the internal ones,
 463         # and otherwise take great care in the order of things here, so
 464         # that we don't end up interpreting some URLs twice.
 465
 466         /* private */ function replaceExternalLinks( $text )
 467         {
 468                 $fname = "Parser::replaceExternalLinks";
 469                 wfProfileIn( $fname );
 470                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 471                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 472                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 473                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 474                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 475                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 476                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 477                 wfProfileOut( $fname );
 478                 return $text;
 479         }
 480
 481         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 482         {
 483                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 484                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 485
 486                 # this is  the list of separators that should be ignored if they
 487                 # are the last character of an URL but that should be included
 488                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 489                 # in this case, the last comma should not become part of the URL,
 490                 # but in "www.foo.com/123,2342,32.htm" it should.
 491                 $sep = ",;\.:";
 492                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 493                 $images = "gif|png|jpg|jpeg";
 494
 495                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 496                 # they are interpreted as part of the string (used to tell PHP
 497                 # that the content of the string should be inserted there).
 498                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 499                   "((?i){$images})([^{$uc}]|$)/";
 500
 501                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 502                 $sk =& $this->mOptions->getSkin();
 503
 504                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 505                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 506                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 507                 }
 508                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 509                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 510                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 511                   "</a>\\5", $s );
 512                 $s = str_replace( $unique, $protocol, $s );
 513
 514                 $a = explode( "[{$protocol}:", " " . $s );
 515                 $s = array_shift( $a );
 516                 $s = substr( $s, 1 );
 517
 518                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 519                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 520
 521                 foreach ( $a as $line ) {
 522                         if ( preg_match( $e1, $line, $m ) ) {
 523                                 $link = "{$protocol}:{$m[1]}";
 524                                 $trail = $m[2];
 525                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 526                                 else { $text = wfEscapeHTML( $link ); }
 527                         } else if ( preg_match( $e2, $line, $m ) ) {
 528                                 $link = "{$protocol}:{$m[1]}";
 529                                 $text = $m[2];
 530                                 $trail = $m[3];
 531                         } else {
 532                                 $s .= "[{$protocol}:" . $line;
 533                                 continue;
 534                         }
 535                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 536                                 $paren = "";
 537                         } else {
 538                                 # Expand the URL for printable version
 539                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 540                         }
 541                         $la = $sk->getExternalLinkAttributes( $link, $text );
 542                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 543
 544                 }
 545                 return $s;
 546         }
 547
 548         /* private */ function handle3Quotes( &$state, $token )
 549         {
 550                 if ( $state["strong"] !== false ) {
 551                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 552                         {
 553                                 # ''' lala ''lala '''
 554                                 $s = "</em></strong><em>";
 555                         } else {
 556                                 $s = "</strong>";
 557                         }
 558                         $state["strong"] = FALSE;
 559                 } else {
 560                         $s = "<strong>";
 561                         $state["strong"] = $token["pos"];
 562                 }
 563                 return $s;
 564         }
 565
 566         /* private */ function handle2Quotes( &$state, $token )
 567         {
 568                 if ( $state["em"] !== false ) {
 569                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 570                         {
 571                                 # ''lala'''lala'' ....'''
 572                                 $s = "</strong></em><strong>";
 573                         } else {
 574                                 $s = "</em>";
 575                         }
 576                         $state["em"] = FALSE;
 577                 } else {
 578                         $s = "<em>";
 579                         $state["em"] = $token["pos"];
 580                 }
 581                 return $s;
 582         }
 583
 584         /* private */ function handle5Quotes( &$state, $token )
 585         {
 586                 $s = "";
 587                 if ( $state["em"] !== false && $state["strong"] ) {
 588                         if ( $state["em"] < $state["strong"] ) {
 589                                 $s .= "</strong></em>";
 590                         } else {
 591                                 $s .= "</em></strong>";
 592                         }
 593                         $state["strong"] = $state["em"] = FALSE;
 594                 } elseif ( $state["em"] !== false ) {
 595                         $s .= "</em><strong>";
 596                         $state["em"] = FALSE;
 597                         $state["strong"] = $token["pos"];
 598                 } elseif ( $state["strong"] !== false ) {
 599                         $s .= "</strong><em>";
 600                         $state["strong"] = FALSE;
 601                         $state["em"] = $token["pos"];
 602                 } else { # not $em and not $strong
 603                         $s .= "<strong><em>";
 604                         $state["strong"] = $state["em"] = $token["pos"];
 605                 }
 606                 return $s;
 607         }
 608
 609         /* private */ function doTokenizedParser( $str )
 610         {
 611                 global $wgLang; # for language specific parser hook
 612
 613                 $tokenizer=Tokenizer::newFromString( $str );
 614                 $tokenStack = array();
 615
 616                 $s="";
 617                 $state["em"]      = FALSE;
 618                 $state["strong"]  = FALSE;
 619                 $tagIsOpen = FALSE;
 620                 $threeopen = false;
 621
 622                 # The tokenizer splits the text into tokens and returns them one by one.
 623                 # Every call to the tokenizer returns a new token.
 624                 while ( $token = $tokenizer->nextToken() )
 625                 {
 626                         switch ( $token["type"] )
 627                         {
 628                                 case "text":
 629                                         # simple text with no further markup
 630                                         $txt = $token["text"];
 631                                         break;
 632                                 case "[[[":
 633                                         # remember the tag opened with 3 [
 634                                         $threeopen = true;
 635                                 case "[[":
 636                                         # link opening tag.
 637                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 638                                         $tagIsOpen = TRUE;
 639                                         array_push( $tokenStack, $token );
 640                                         $txt="";
 641                                         break;
 642
 643                                 case "]]]":
 644                                 case "]]":
 645                                         # link close tag.
 646                                         # get text from stack, glue it together, and call the code to handle a
 647                                         # link
 648
 649                                         if ( count( $tokenStack ) == 0 )
 650                                         {
 651                                                 # stack empty. Found a ]] without an opening [[
 652                                                 $txt = "]]";
 653                                         } else {
 654                                                 $linkText = "";
 655                                                 $lastToken = array_pop( $tokenStack );
 656                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 657                                                 {
 658                                                         if( !empty( $lastToken["text"] ) ) {
 659                                                                 $linkText = $lastToken["text"] . $linkText;
 660                                                         }
 661                                                         $lastToken = array_pop( $tokenStack );
 662                                                 }
 663
 664                                                 $txt = $linkText ."]]";
 665
 666                                                 if( isset( $lastToken["text"] ) ) {
 667                                                         $prefix = $lastToken["text"];
 668                                                 } else {
 669                                                         $prefix = "";
 670                                                 }
 671                                                 $nextToken = $tokenizer->previewToken();
 672                                                 if ( $nextToken["type"] == "text" )
 673                                                 {
 674                                                         # Preview just looks at it. Now we have to fetch it.
 675                                                         $nextToken = $tokenizer->nextToken();
 676                                                         $txt .= $nextToken["text"];
 677                                                 }
 678                                                 $fakestate = $this->mStripState;
 679                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$fakestate), $prefix );
 680
 681                                                 # did the tag start with 3 [ ?
 682                                                 if($threeopen) {
 683                                                         # show the first as text
 684                                                         $txt = "[".$txt;
 685                                                         $threeopen=false;
 686                                                 }
 687
 688                                         }
 689                                         $tagIsOpen = (count( $tokenStack ) != 0);
 690                                         break;
 691                                 case "----":
 692                                         $txt = "\n<hr />\n";
 693                                         break;
 694                                 case "'''":
 695                                         # This and the three next ones handle quotes
 696                                         $txt = $this->handle3Quotes( $state, $token );
 697                                         break;
 698                                 case "''":
 699                                         $txt = $this->handle2Quotes( $state, $token );
 700                                         break;
 701                                 case "'''''":
 702                                         $txt = $this->handle5Quotes( $state, $token );
 703                                         break;
 704                                 case "":
 705                                         # empty token
 706                                         $txt="";
 707                                         break;
 708                                 case "RFC ":
 709                                         if ( $tagIsOpen ) {
 710                                                 $txt = "RFC ";
 711                                         } else {
 712                                                 $txt = $this->doMagicRFC( $tokenizer );
 713                                         }
 714                                         break;
 715                                 case "ISBN ":
 716                                         if ( $tagIsOpen ) {
 717                                                 $txt = "ISBN ";
 718                                         } else {
 719                                                 $txt = $this->doMagicISBN( $tokenizer );
 720                                         }
 721                                         break;
 722                                 default:
 723                                         # Call language specific Hook.
 724                                         $txt = $wgLang->processToken( $token, $tokenStack );
 725                                         if ( NULL == $txt ) {
 726                                                 # An unkown token. Highlight.
 727                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 728                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 729                                         }
 730                                         break;
 731                         }
 732                         # If we're parsing the interior of a link, don't append the interior to $s,
 733                         # but push it to the stack so it can be processed when a ]] token is found.
 734                         if ( $tagIsOpen  && $txt != "" ) {
 735                                 $token["type"] = "text";
 736                                 $token["text"] = $txt;
 737                                 array_push( $tokenStack, $token );
 738                         } else {
 739                                 $s .= $txt;
 740                         }
 741                 } #end while
 742                 if ( count( $tokenStack ) != 0 )
 743                 {
 744                         # still objects on stack. opened [[ tag without closing ]] tag.
 745                         $txt = "";
 746                         while ( $lastToken = array_pop( $tokenStack ) )
 747                         {
 748                                 if ( $lastToken["type"] == "text" )
 749                                 {
 750                                         $txt = $lastToken["text"] . $txt;
 751                                 } else {
 752                                         $txt = $lastToken["type"] . $txt;
 753                                 }
 754                         }
 755                         $s .= $txt;
 756                 }
 757                 return $s;
 758         }
 759
 760         /* private */ function handleInternalLink( $line, $prefix )
 761         {
 762                 global $wgLang, $wgLinkCache;
 763                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 764                 static $fname = "Parser::handleInternalLink" ;
 765                 wfProfileIn( $fname );
 766
 767                 wfProfileIn( "$fname-setup" );
 768                 static $tc = FALSE;
 769                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 770                 $sk =& $this->mOptions->getSkin();
 771
 772                 # Match a link having the form [[namespace:link|alternate]]trail
 773                 static $e1 = FALSE;
 774                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 775                 # Match the end of a line for a word that's not followed by whitespace,
 776                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 777                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 778                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 779                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 780
 781
 782                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 783                 static $image = FALSE;
 784                 static $special = FALSE;
 785                 static $media = FALSE;
 786                 static $category = FALSE;
 787                 if ( !$image ) { $image = Namespace::getImage(); }
 788                 if ( !$special ) { $special = Namespace::getSpecial(); }
 789                 if ( !$media ) { $media = Namespace::getMedia(); }
 790                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 791
 792                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 793
 794                 wfProfileOut( "$fname-setup" );
 795                 $s = "";
 796
 797                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 798                         $text = $m[2];
 799                         $trail = $m[3];
 800                 } else { # Invalid form; output directly
 801                         $s .= $prefix . "[[" . $line ;
 802                         return $s;
 803                 }
 804
 805                 /* Valid link forms:
 806                 Foobar -- normal
 807                 :Foobar -- override special treatment of prefix (images, language links)
 808                 /Foobar -- convert to CurrentPage/Foobar
 809                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 810                 */
 811                 $c = substr($m[1],0,1);
 812                 $noforce = ($c != ":");
 813                 if( $c == "/" ) { # subpage
 814                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 815                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 816                                 $noslash=$m[1];
 817                         } else {
 818                                 $noslash=substr($m[1],1);
 819                         }
 820                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 821                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 822                                 if( "" == $text ) {
 823                                         $text= $m[1];
 824                                 } # this might be changed for ugliness reasons
 825                         } else {
 826                                 $link = $noslash; # no subpage allowed, use standard link
 827                         }
 828                 } elseif( $noforce ) { # no subpage
 829                         $link = $m[1];
 830                 } else {
 831                         $link = substr( $m[1], 1 );
 832                 }
 833                 if( "" == $text )
 834                         $text = $link;
 835
 836                 $nt = Title::newFromText( $link );
 837                 if( !$nt ) {
 838                         $s .= $prefix . "[[" . $line;
 839                         return $s;
 840                 }
 841                 $ns = $nt->getNamespace();
 842                 $iw = $nt->getInterWiki();
 843                 if( $noforce ) {
 844                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 845                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 846                                 return (trim($s) == '')? '': $s;
 847                         }
 848                         if( $ns == $image ) {
 849                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 850                                 $wgLinkCache->addImageLinkObj( $nt );
 851                                 return $s;
 852                         }
 853                 }
 854                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 855                     ( strpos( $link, "#" ) == FALSE ) ) {
 856                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 857                         return $s;
 858                 }
 859
 860                 # Category feature
 861                 $catns = strtoupper ( $nt->getDBkey () ) ;
 862                 $catns = explode ( ":" , $catns ) ;
 863                 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
 864                 else $catns = "" ;
 865                 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
 866                         $t = explode ( ":" , $nt->getText() ) ;
 867                         array_shift ( $t ) ;
 868                         $t = implode ( ":" , $t ) ;
 869                         $t = $wgLang->ucFirst ( $t ) ;
 870                         $nnt = Title::newFromText ( $category.":".$t ) ;
 871                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 872                         $this->mOutput->mCategoryLinks[] = $t ;
 873                         $s .= $prefix . $trail ;
 874                         return $s ;
 875                 }
 876
 877                 if( $ns == $media ) {
 878                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 879                         $wgLinkCache->addImageLinkObj( $nt );
 880                         return $s;
 881                 } elseif( $ns == $special ) {
 882                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 883                         return $s;
 884                 }
 885                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 886
 887                 wfProfileOut( $fname );
 888                 return $s;
 889         }
 890
 891         # Some functions here used by doBlockLevels()
 892         #
 893         /* private */ function closeParagraph()
 894         {
 895                 $result = "";
 896                 if ( '' != $this->mLastSection ) {
 897                         $result = "</" . $this->mLastSection  . ">";
 898                 }
 899                 $this->mLastSection = "";
 900                 return $result."\n";
 901         }
 902         # getCommon() returns the length of the longest common substring
 903         # of both arguments, starting at the beginning of both.
 904         #
 905         /* private */ function getCommon( $st1, $st2 )
 906         {
 907                 $fl = strlen( $st1 );
 908                 $shorter = strlen( $st2 );
 909                 if ( $fl < $shorter ) { $shorter = $fl; }
 910
 911                 for ( $i = 0; $i < $shorter; ++$i ) {
 912                         if ( $st1{$i} != $st2{$i} ) { break; }
 913                 }
 914                 return $i;
 915         }
 916         # These next three functions open, continue, and close the list
 917         # element appropriate to the prefix character passed into them.
 918         #
 919         /* private */ function openList( $char )
 920     {
 921                 $result = $this->closeParagraph();
 922
 923                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 924                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 925                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 926                 else if ( ";" == $char ) {
 927                         $result .= "<dl><dt>";
 928                         $this->mDTopen = true;
 929                 }
 930                 else { $result = "<!-- ERR 1 -->"; }
 931
 932                 return $result;
 933         }
 934
 935         /* private */ function nextItem( $char )
 936         {
 937                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 938                 else if ( ":" == $char || ";" == $char ) {
 939                         $close = "</dd>";
 940                         if ( $this->mDTopen ) { $close = "</dt>"; }
 941                         if ( ";" == $char ) {
 942                                 $this->mDTopen = true;
 943                                 return $close . "<dt>";
 944                         } else {
 945                                 $this->mDTopen = false;
 946                                 return $close . "<dd>";
 947                         }
 948                 }
 949                 return "<!-- ERR 2 -->";
 950         }
 951
 952         /* private */function closeList( $char )
 953         {
 954                 if ( "*" == $char ) { $text = "</li></ul>"; }
 955                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 956                 else if ( ":" == $char ) {
 957                         if ( $this->mDTopen ) {
 958                                 $this->mDTopen = false;
 959                                 $text = "</dt></dl>";
 960                         } else {
 961                                 $text = "</dd></dl>";
 962                         }
 963                 }
 964                 else {  return "<!-- ERR 3 -->"; }
 965                 return $text."\n";
 966         }
 967
 968         /* private */ function doBlockLevels( $text, $linestart )
 969         {
 970                 $fname = "Parser::doBlockLevels";
 971                 wfProfileIn( $fname );
 972                 # Parsing through the text line by line.  The main thing
 973                 # happening here is handling of block-level elements p, pre,
 974                 # and making lists from lines starting with * # : etc.
 975                 #
 976                 $a = explode( "\n", $text );
 977                 $lastPref = $text = '';
 978                 $this->mDTopen = $inBlockElem = false;
 979
 980                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 981                 foreach ( $a as $t ) {
 982                         if ( "" != $text ) { $text .= "\n"; }
 983
 984                         $oLine = $t;
 985                         $opl = strlen( $lastPref );
 986                         $npl = strspn( $t, "*#:;" );
 987                         $pref = substr( $t, 0, $npl );
 988                         $pref2 = str_replace( ";", ":", $pref );
 989                         $t = substr( $t, $npl );
 990
 991                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 992                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 993
 994                                 if ( ";" == substr( $pref, -1 ) ) {
 995                                         $cpos = strpos( $t, ":" );
 996                                         if ( ! ( false === $cpos ) ) {
 997                                                 $term = substr( $t, 0, $cpos );
 998                                                 $text .= $term . $this->nextItem( ":" );
 999                                                 $t = substr( $t, $cpos + 1 );
1000                                         }
1001                                 }
1002                         } else if (0 != $npl || 0 != $opl) {
1003                                 $cpl = $this->getCommon( $pref, $lastPref );
1004
1005                                 while ( $cpl < $opl ) {
1006                                         $text .= $this->closeList( $lastPref{$opl-1} );
1007                                         --$opl;
1008                                 }
1009                                 if ( $npl <= $cpl && $cpl > 0 ) {
1010                                         $text .= $this->nextItem( $pref{$cpl-1} );
1011                                 }
1012                                 while ( $npl > $cpl ) {
1013                                         $char = substr( $pref, $cpl, 1 );
1014                                         $text .= $this->openList( $char );
1015
1016                                         if ( ";" == $char ) {
1017                                                 $cpos = strpos( $t, ":" );
1018                                                 if ( ! ( false === $cpos ) ) {
1019                                                         $term = substr( $t, 0, $cpos );
1020                                                         $text .= $term . $this->nextItem( ":" );
1021                                                         $t = substr( $t, $cpos + 1 );
1022                                                 }
1023                                         }
1024                                         ++$cpl;
1025                                 }
1026                                 $lastPref = $pref2;
1027                         }
1028                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
1029                                 if ( preg_match(
1030                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div)/i", $t ) ) {
1031                                         $text .= $this->closeParagraph();
1032                                         $inBlockElem = true;
1033                                 } else if ( preg_match("/(<hr|<\\/td|".$uniq_prefix."-pre)/i", $t ) ) {
1034                                         $text .= $this->closeParagraph();
1035                                         $inBlockElem = false;
1036                                 }
1037                                 if ( ! $inBlockElem ) {
1038                                         if ( " " == $t{0} ) {
1039                                                 $newSection = "pre";
1040                                                 $text .= $this->closeParagraph();
1041                                                 # $t = wfEscapeHTML( $t );
1042                                         }
1043                                         else { $newSection = "p"; }
1044
1045                                         if ( '' == trim( $oLine ) ) {
1046                                                 if ( $this->mLastSection != 'p') {
1047                                                         $text .= $this->closeParagraph();
1048                                                         $text .= "<" . $newSection . ">";
1049                                                         $this->mLastSection = $newSection;
1050                                                 } else if ( $this->mLastSection == 'p' and '' == $oLine) {
1051                                                         $text .= '<br />';
1052                                                 }
1053                                         } else if ( $this->mLastSection == $newSection and $newSection != 'p' ) {
1054                                                 $text .= $this->closeParagraph();
1055                                                 $text .= "<" . $newSection . ">";
1056                                                 $this->mLastSection = $newSection;
1057                                         }
1058                                 }
1059                                 if ( $inBlockElem &&
1060                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|<\\/p<\\/div)/i", $t ) ) {
1061                                         $inBlockElem = false;
1062                                 }
1063                         }
1064                         $text .= $t;
1065                 }
1066                 while ( $npl ) {
1067                         $text .= $this->closeList( $pref2{$npl-1} );
1068                         --$npl;
1069                 }
1070                 if ( "" != $this->mLastSection ) {
1071                         $text .= "</" . $this->mLastSection . ">";
1072                         $this->mLastSection = "";
1073                 }
1074                 wfProfileOut( $fname );
1075                 return $text;
1076         }
1077
1078         function getVariableValue( $index ) {
1079                 global $wgLang, $wgSitename, $wgServer;
1080
1081                 switch ( $index ) {
1082                         case MAG_CURRENTMONTH:
1083                                 return date( "m" );
1084                         case MAG_CURRENTMONTHNAME:
1085                                 return $wgLang->getMonthName( date("n") );
1086                         case MAG_CURRENTMONTHNAMEGEN:
1087                                 return $wgLang->getMonthNameGen( date("n") );
1088                         case MAG_CURRENTDAY:
1089                                 return date("j");
1090                         case MAG_CURRENTDAYNAME:
1091                                 return $wgLang->getWeekdayName( date("w")+1 );
1092                         case MAG_CURRENTYEAR:
1093                                 return date( "Y" );
1094                         case MAG_CURRENTTIME:
1095                                 return $wgLang->time( wfTimestampNow(), false );
1096                         case MAG_NUMBEROFARTICLES:
1097                                 return wfNumberOfArticles();
1098                         case MAG_SITENAME:
1099                                 return $wgSitename;
1100                         case MAG_SERVER:
1101                                 return $wgServer;
1102                         default:
1103                                 return NULL;
1104                 }
1105         }
1106
1107         function initialiseVariables()
1108         {
1109                 global $wgVariableIDs;
1110                 $this->mVariables = array();
1111                 foreach ( $wgVariableIDs as $id ) {
1112                         $mw =& MagicWord::get( $id );
1113                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1114                 }
1115         }
1116
1117         /* private */ function replaceVariables( $text )
1118         {
1119                 global $wgLang, $wgCurParser;
1120                 global $wgScript, $wgArticlePath;
1121
1122                 $fname = "Parser::replaceVariables";
1123                 wfProfileIn( $fname );
1124
1125                 $bail = false;
1126                 if ( !$this->mVariables ) {
1127                         $this->initialiseVariables();
1128                 }
1129                 $titleChars = Title::legalChars();
1130                 $regex = "/{{([$titleChars\\|]*?)}}/s";
1131
1132                 # "Recursive" variable expansion: run it through a couple of passes
1133                 for ( $i=0; $i<MAX_INCLUDE_REPEAT && !$bail; $i++ ) {
1134                         $oldText = $text;
1135
1136                         # It's impossible to rebind a global in PHP
1137                         # Instead, we run the substitution on a copy, then merge the changed fields back in
1138                         $wgCurParser = $this->fork();
1139
1140                         $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1141                         if ( $oldText == $text ) {
1142                                 $bail = true;
1143                         }
1144                         $this->merge( $wgCurParser );
1145                 }
1146
1147                 return $text;
1148         }
1149
1150         # Returns a copy of this object except with various variables cleared
1151         # This copy can be re-merged with the parent after operations on the copy
1152         function fork()
1153         {
1154                 $copy = $this;
1155                 $copy->mOutput = new ParserOutput;
1156                 return $copy;
1157         }
1158
1159         # Merges a copy split off with fork()
1160         function merge( &$copy )
1161         {
1162                 $this->mOutput->merge( $copy->mOutput );
1163
1164                 # Merge include throttling arrays
1165                 foreach( $copy->mIncludeCount as $dbk => $count ) {
1166                         if ( array_key_exists( $dbk, $this->mIncludeCount ) ) {
1167                                 $this->mIncludeCount[$dbk] += $count;
1168                         } else {
1169                                 $this->mIncludeCount[$dbk] = $count;
1170                         }
1171                 }
1172         }
1173
1174         function braceSubstitution( $matches )
1175         {
1176                 global $wgLinkCache, $wgLang;
1177                 $fname = "Parser::braceSubstitution";
1178                 $found = false;
1179                 $nowiki = false;
1180
1181                 $text = $matches[1];
1182
1183                 # SUBST
1184                 $mwSubst =& MagicWord::get( MAG_SUBST );
1185                 if ( $mwSubst->matchStartAndRemove( $text ) ) {
1186                         if ( $this->mOutputType != OT_WIKI ) {
1187                                 # Invalid SUBST not replaced at PST time
1188                                 # Return without further processing
1189                                 $text = $matches[0];
1190                                 $found = true;
1191                         }
1192                 } elseif ( $this->mOutputType == OT_WIKI ) {
1193                         # SUBST not found in PST pass, do nothing
1194                         $text = $matches[0];
1195                         $found = true;
1196                 }
1197
1198                 # MSG, MSGNW and INT
1199                 if ( !$found ) {
1200                         # Check for MSGNW:
1201                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1202                         if ( $mwMsgnw->matchStartAndRemove( $text ) ) {
1203                                 $nowiki = true;
1204                         } else {
1205                                 # Remove obsolete MSG:
1206                                 $mwMsg =& MagicWord::get( MAG_MSG );
1207                                 $mwMsg->matchStartAndRemove( $text );
1208                         }
1209
1210                         # Check if it is an internal message
1211                         $mwInt =& MagicWord::get( MAG_INT );
1212                         if ( $mwInt->matchStartAndRemove( $text ) ) {
1213                                 $text = wfMsg( $text );
1214                                 $found = true;
1215                         }
1216                 }
1217
1218                 # NS
1219                 if ( !$found ) {
1220                         # Check for NS: (namespace expansion)
1221                         $mwNs = MagicWord::get( MAG_NS );
1222                         if ( $mwNs->matchStartAndRemove( $text ) ) {
1223                                 if ( intval( $text ) ) {
1224                                         $text = $wgLang->getNsText( intval( $text ) );
1225                                         $found = true;
1226                                 } else {
1227                                         $index = Namespace::getCanonicalIndex( strtolower( $text ) );
1228                                         if ( !is_null( $index ) ) {
1229                                                 $text = $wgLang->getNsText( $index );
1230                                                 $found = true;
1231                                         }
1232                                 }
1233                         }
1234                 }
1235
1236                 # LOCALURL and LOCALURLE
1237                 if ( !$found ) {
1238                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1239                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1240
1241                         if ( $mwLocal->matchStartAndRemove( $text ) ) {
1242                                 $func = 'getLocalURL';
1243                         } elseif ( $mwLocalE->matchStartAndRemove( $text ) ) {
1244                                 $func = 'escapeLocalURL';
1245                         } else {
1246                                 $func = '';
1247                         }
1248
1249                         if ( $func !== '' ) {
1250                                 $args = explode( "|", $text );
1251                                 $n = count( $args );
1252                                 if ( $n > 0 ) {
1253                                         $title = Title::newFromText( $args[0] );
1254                                         if ( !is_null( $title ) ) {
1255                                                 if ( $n > 1 ) {
1256                                                         $text = $title->$func( $args[1] );
1257                                                 } else {
1258                                                         $text = $title->$func();
1259                                                 }
1260                                                 $found = true;
1261                                         }
1262                                 }
1263                         }
1264                 }
1265
1266                 # Check for a match against internal variables
1267                 if ( !$found && array_key_exists( $text, $this->mVariables ) ) {
1268                         $text = $this->mVariables[$text];
1269                         $found = true;
1270                         $this->mOutput->mContainsOldMagic = true;
1271                 }
1272
1273                 # Load from database
1274                 if ( !$found ) {
1275                         $title = Title::newFromText( $text, NS_TEMPLATE );
1276                         if ( is_object( $title ) && !$title->isExternal() ) {
1277                                 # Check for excessive inclusion
1278                                 $dbk = $title->getPrefixedDBkey();
1279                                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1280                                         $this->mIncludeCount[$dbk] = 0;
1281                                 }
1282                                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1283                                         $article = new Article( $title );
1284                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1285                                         if ( $articleContent !== false ) {
1286                                                 $found = true;
1287                                                 $text = $articleContent;
1288
1289                                                 # Escaping and link table handling
1290                                                 # Not required for preSaveTransform()
1291                                                 if ( $this->mOutputType == OT_HTML ) {
1292                                                         if ( $nowiki ) {
1293                                                                 $text = wfEscapeWikiText( $text );
1294                                                         } else {
1295                                                                 $text = $this->removeHTMLtags( $text );
1296                                                         }
1297                                                         $wgLinkCache->suspend();
1298                                                         $text = $this->doTokenizedParser( $text );
1299                                                         $wgLinkCache->resume();
1300                                                         $wgLinkCache->addLinkObj( $title );
1301
1302                                                 }
1303                                         }
1304                                 }
1305
1306                                 # If the title is valid but undisplayable, make a link to it
1307                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1308                                         $text = "[[" . $title->getPrefixedText() . "]]";
1309                                         $found = true;
1310                                 }
1311                         }
1312                 }
1313
1314                 if ( !$found ) {
1315                         return $matches[0];
1316                 } else {
1317                         return $text;
1318                 }
1319         }
1320
1321         # Cleans up HTML, removes dangerous tags and attributes
1322         /* private */ function removeHTMLtags( $text )
1323         {
1324                 $fname = "Parser::removeHTMLtags";
1325                 wfProfileIn( $fname );
1326                 $htmlpairs = array( # Tags that must be closed
1327                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1328                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1329                         "strike", "strong", "tt", "var", "div", "center",
1330                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1331                         "ruby", "rt" , "rb" , "rp", "p"
1332                 );
1333                 $htmlsingle = array(
1334                         "br", "hr", "li", "dt", "dd"
1335                 );
1336                 $htmlnest = array( # Tags that can be nested--??
1337                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1338                         "dl", "font", "big", "small", "sub", "sup"
1339                 );
1340                 $tabletags = array( # Can only appear inside table
1341                         "td", "th", "tr"
1342                 );
1343
1344                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1345                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1346
1347                 $htmlattrs = $this->getHTMLattrs () ;
1348
1349                 # Remove HTML comments
1350                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1351
1352                 $bits = explode( "<", $text );
1353                 $text = array_shift( $bits );
1354                 $tagstack = array(); $tablestack = array();
1355
1356                 foreach ( $bits as $x ) {
1357                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1358                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1359                           $x, $regs );
1360                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1361                         error_reporting( $prev );
1362
1363                         $badtag = 0 ;
1364                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1365                                 # Check our stack
1366                                 if ( $slash ) {
1367                                         # Closing a tag...
1368                                         if ( ! in_array( $t, $htmlsingle ) &&
1369                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1370                                                 array_push( $tagstack, $ot );
1371                                                 $badtag = 1;
1372                                         } else {
1373                                                 if ( $t == "table" ) {
1374                                                         $tagstack = array_pop( $tablestack );
1375                                                 }
1376                                                 $newparams = "";
1377                                         }
1378                                 } else {
1379                                         # Keep track for later
1380                                         if ( in_array( $t, $tabletags ) &&
1381                                           ! in_array( "table", $tagstack ) ) {
1382                                                 $badtag = 1;
1383                                         } else if ( in_array( $t, $tagstack ) &&
1384                                           ! in_array ( $t , $htmlnest ) ) {
1385                                                 $badtag = 1 ;
1386                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1387                                                 if ( $t == "table" ) {
1388                                                         array_push( $tablestack, $tagstack );
1389                                                         $tagstack = array();
1390                                                 }
1391                                                 array_push( $tagstack, $t );
1392                                         }
1393                                         # Strip non-approved attributes from the tag
1394                                         $newparams = $this->fixTagAttributes($params);
1395
1396                                 }
1397                                 if ( ! $badtag ) {
1398                                         $rest = str_replace( ">", "&gt;", $rest );
1399                                         $text .= "<$slash$t $newparams$brace$rest";
1400                                         continue;
1401                                 }
1402                         }
1403                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1404                 }
1405                 # Close off any remaining tags
1406                 while ( $t = array_pop( $tagstack ) ) {
1407                         $text .= "</$t>\n";
1408                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1409                 }
1410                 wfProfileOut( $fname );
1411                 return $text;
1412         }
1413
1414 /*
1415  *
1416  * This function accomplishes several tasks:
1417  * 1) Auto-number headings if that option is enabled
1418  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1419  * 3) Add a Table of contents on the top for users who have enabled the option
1420  * 4) Auto-anchor headings
1421  *
1422  * It loops through all headlines, collects the necessary data, then splits up the
1423  * string and re-inserts the newly formatted headlines.
1424  *
1425  */
1426
1427         /* private */ function formatHeadings( $text )
1428         {
1429                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1430                 $doShowToc = $this->mOptions->getShowToc();
1431                 if( !$this->mTitle->userCanEdit() ) {
1432                         $showEditLink = 0;
1433                         $rightClickHack = 0;
1434                 } else {
1435                         $showEditLink = $this->mOptions->getEditSection();
1436                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1437                 }
1438
1439                 # Inhibit editsection links if requested in the page
1440                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1441                 if( $esw->matchAndRemove( $text ) ) {
1442                         $showEditLink = 0;
1443                 }
1444                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1445                 # do not add TOC
1446                 $mw =& MagicWord::get( MAG_NOTOC );
1447                 if( $mw->matchAndRemove( $text ) ) {
1448                         $doShowToc = 0;
1449                 }
1450
1451                 # never add the TOC to the Main Page. This is an entry page that should not
1452                 # be more than 1-2 screens large anyway
1453                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1454                         $doShowToc = 0;
1455                 }
1456
1457                 # Get all headlines for numbering them and adding funky stuff like [edit]
1458                 # links - this is for later, but we need the number of headlines right now
1459                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1460
1461                 # if there are fewer than 4 headlines in the article, do not show TOC
1462                 if( $numMatches < 4 ) {
1463                         $doShowToc = 0;
1464                 }
1465
1466                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1467                 # override above conditions and always show TOC
1468                 $mw =& MagicWord::get( MAG_FORCETOC );
1469                 if ($mw->matchAndRemove( $text ) ) {
1470                         $doShowToc = 1;
1471                 }
1472
1473
1474                 # We need this to perform operations on the HTML
1475                 $sk =& $this->mOptions->getSkin();
1476
1477                 # headline counter
1478                 $headlineCount = 0;
1479
1480                 # Ugh .. the TOC should have neat indentation levels which can be
1481                 # passed to the skin functions. These are determined here
1482                 $toclevel = 0;
1483                 $toc = "";
1484                 $full = "";
1485                 $head = array();
1486                 $sublevelCount = array();
1487                 $level = 0;
1488                 $prevlevel = 0;
1489                 foreach( $matches[3] as $headline ) {
1490                         $numbering = "";
1491                         if( $level ) {
1492                                 $prevlevel = $level;
1493                         }
1494                         $level = $matches[1][$headlineCount];
1495                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1496                                 # reset when we enter a new level
1497                                 $sublevelCount[$level] = 0;
1498                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1499                                 $toclevel += $level - $prevlevel;
1500                         }
1501                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1502                                 # reset when we step back a level
1503                                 $sublevelCount[$level+1]=0;
1504                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1505                                 $toclevel -= $prevlevel - $level;
1506                         }
1507                         # count number of headlines for each level
1508                         @$sublevelCount[$level]++;
1509                         if( $doNumberHeadings || $doShowToc ) {
1510                                 $dot = 0;
1511                                 for( $i = 1; $i <= $level; $i++ ) {
1512                                         if( !empty( $sublevelCount[$i] ) ) {
1513                                                 if( $dot ) {
1514                                                         $numbering .= ".";
1515                                                 }
1516                                                 $numbering .= $sublevelCount[$i];
1517                                                 $dot = 1;
1518                                         }
1519                                 }
1520                         }
1521
1522                         # The canonized header is a version of the header text safe to use for links
1523                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1524                         $canonized_headline = Parser::unstrip( $headline, $this->mStripState );
1525
1526                         # strip out HTML
1527                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1528                         $tocline = trim( $canonized_headline );
1529                         $canonized_headline = preg_replace("/[ &\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1530                         $refer[$headlineCount] = $canonized_headline;
1531
1532                         # count how many in assoc. array so we can track dupes in anchors
1533                         @$refers[$canonized_headline]++;
1534                         $refcount[$headlineCount]=$refers[$canonized_headline];
1535
1536                         # Prepend the number to the heading text
1537
1538                         if( $doNumberHeadings || $doShowToc ) {
1539                                 $tocline = $numbering . " " . $tocline;
1540
1541                                 # Don't number the heading if it is the only one (looks silly)
1542                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1543                                         # the two are different if the line contains a link
1544                                         $headline=$numbering . " " . $headline;
1545                                 }
1546                         }
1547
1548                         # Create the anchor for linking from the TOC to the section
1549                         $anchor = $canonized_headline;
1550                         if($refcount[$headlineCount] > 1 ) {
1551                                 $anchor .= "_" . $refcount[$headlineCount];
1552                         }
1553                         if( $doShowToc ) {
1554                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1555                         }
1556                         if( $showEditLink ) {
1557                                 if ( empty( $head[$headlineCount] ) ) {
1558                                         $head[$headlineCount] = "";
1559                                 }
1560                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1561                         }
1562
1563                         # Add the edit section span
1564                         if( $rightClickHack ) {
1565                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1566                         }
1567
1568                         # give headline the correct <h#> tag
1569                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1570
1571                         $headlineCount++;
1572                 }
1573
1574                 if( $doShowToc ) {
1575                         $toclines = $headlineCount;
1576                         $toc .= $sk->tocUnindent( $toclevel );
1577                         $toc = $sk->tocTable( $toc );
1578                 }
1579
1580                 # split up and insert constructed headlines
1581
1582                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1583                 $i = 0;
1584
1585                 foreach( $blocks as $block ) {
1586                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1587                             # This is the [edit] link that appears for the top block of text when
1588                                 # section editing is enabled
1589                                 $full .= $sk->editSectionLink(0);
1590                         }
1591                         $full .= $block;
1592                         if( $doShowToc && !$i) {
1593                         # Top anchor now in skin
1594                                 $full = $full.$toc;
1595                         }
1596
1597                         if( !empty( $head[$i] ) ) {
1598                                 $full .= $head[$i];
1599                         }
1600                         $i++;
1601                 }
1602
1603                 return $full;
1604         }
1605
1606         /* private */ function doMagicISBN( &$tokenizer )
1607         {
1608                 global $wgLang;
1609
1610                 # Check whether next token is a text token
1611                 # If yes, fetch it and convert the text into a
1612                 # Special::BookSources link
1613                 $token = $tokenizer->previewToken();
1614                 while ( $token["type"] == "" )
1615                 {
1616                         $tokenizer->nextToken();
1617                         $token = $tokenizer->previewToken();
1618                 }
1619                 if ( $token["type"] == "text" )
1620                 {
1621                         $token = $tokenizer->nextToken();
1622                         $x = $token["text"];
1623                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1624
1625                         $isbn = $blank = "" ;
1626                         while ( " " == $x{0} ) {
1627                                 $blank .= " ";
1628                                 $x = substr( $x, 1 );
1629                         }
1630                         while ( strstr( $valid, $x{0} ) != false ) {
1631                                 $isbn .= $x{0};
1632                                 $x = substr( $x, 1 );
1633                         }
1634                         $num = str_replace( "-", "", $isbn );
1635                         $num = str_replace( " ", "", $num );
1636
1637                         if ( "" == $num ) {
1638                                 $text = "ISBN $blank$x";
1639                         } else {
1640                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1641                                 $text = "<a href=\"" .
1642                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1643                                         "\" class=\"internal\">ISBN $isbn</a>";
1644                                 $text .= $x;
1645                         }
1646                 } else {
1647                         $text = "ISBN ";
1648                 }
1649                 return $text;
1650         }
1651         /* private */ function doMagicRFC( &$tokenizer )
1652         {
1653                 global $wgLang;
1654
1655                 # Check whether next token is a text token
1656                 # If yes, fetch it and convert the text into a
1657                 # link to an RFC source
1658                 $token = $tokenizer->previewToken();
1659                 while ( $token["type"] == "" )
1660                 {
1661                         $tokenizer->nextToken();
1662                         $token = $tokenizer->previewToken();
1663                 }
1664                 if ( $token["type"] == "text" )
1665                 {
1666                         $token = $tokenizer->nextToken();
1667                         $x = $token["text"];
1668                         $valid = "0123456789";
1669
1670                         $rfc = $blank = "" ;
1671                         while ( " " == $x{0} ) {
1672                                 $blank .= " ";
1673                                 $x = substr( $x, 1 );
1674                         }
1675                         while ( strstr( $valid, $x{0} ) != false ) {
1676                                 $rfc .= $x{0};
1677                                 $x = substr( $x, 1 );
1678                         }
1679
1680                         if ( "" == $rfc ) {
1681                                 $text .= "RFC $blank$x";
1682                         } else {
1683                                 $url = wfmsg( "rfcurl" );
1684                                 $url = str_replace( "$1", $rfc, $url);
1685                                 $sk =& $this->mOptions->getSkin();
1686                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1687                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1688                         }
1689                 } else {
1690                         $text = "RFC ";
1691                 }
1692                 return $text;
1693         }
1694
1695         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1696         {
1697                 $this->mOptions = $options;
1698                 $this->mTitle =& $title;
1699                 $this->mOutputType = OT_WIKI;
1700
1701                 if ( $clearState ) {
1702                         $this->clearState();
1703                 }
1704
1705                 $stripState = false;
1706                 $text = str_replace("\r\n", "\n", $text);
1707                 $text = $this->strip( $text, $stripState, false );
1708                 $text = $this->pstPass2( $text, $user );
1709                 $text = $this->unstrip( $text, $stripState );
1710                 return $text;
1711         }
1712
1713         /* private */ function pstPass2( $text, &$user )
1714         {
1715                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1716
1717                 # Variable replacement
1718                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1719                 $text = $this->replaceVariables( $text );
1720
1721                 # Signatures
1722                 #
1723                 $n = $user->getName();
1724                 $k = $user->getOption( "nickname" );
1725                 if ( "" == $k ) { $k = $n; }
1726                 if(isset($wgLocaltimezone)) {
1727                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1728                 }
1729                 /* Note: this is an ugly timezone hack for the European wikis */
1730                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1731                   " (" . date( "T" ) . ")";
1732                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1733
1734                 $text = preg_replace( "/~~~~~/", $d, $text );
1735                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1736                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1737                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1738                   Namespace::getUser() ) . ":$n|$k]]", $text );
1739
1740                 # Context links: [[|name]] and [[name (context)|]]
1741                 #
1742                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1743                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1744                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1745                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1746
1747                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1748                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1749                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1750                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1751                                                                                                                 # [[ns:page (cont)|]]
1752                 $context = "";
1753                 $t = $this->mTitle->getText();
1754                 if ( preg_match( $conpat, $t, $m ) ) {
1755                         $context = $m[2];
1756                 }
1757                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1758                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1759                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1760
1761                 if ( "" == $context ) {
1762                         $text = preg_replace( $p2, "[[\\1]]", $text );
1763                 } else {
1764                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1765                 }
1766
1767                 /*
1768                 $mw =& MagicWord::get( MAG_SUBST );
1769                 $wgCurParser = $this->fork();
1770                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1771                 $this->merge( $wgCurParser );
1772                 */
1773
1774                 # Trim trailing whitespace
1775                 # MAG_END (__END__) tag allows for trailing
1776                 # whitespace to be deliberately included
1777                 $text = rtrim( $text );
1778                 $mw =& MagicWord::get( MAG_END );
1779                 $mw->matchAndRemove( $text );
1780
1781                 return $text;
1782         }
1783
1784         # Set up some variables which are usually set up in parse()
1785         # so that an external function can call some class members with confidence
1786         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1787         {
1788                 $this->mTitle =& $title;
1789                 $this->mOptions = $options;
1790                 $this->mOutputType = $outputType;
1791                 if ( $clearState ) {
1792                         $this->clearState();
1793                 }
1794         }
1795
1796         function transformMsg( $text, $options ) {
1797                 global $wgTitle;
1798                 static $executing = false;
1799
1800                 # Guard against infinite recursion
1801                 if ( $executing ) {
1802                         return $text;
1803                 }
1804                 $executing = true;
1805
1806                 $this->mTitle = $wgTitle;
1807                 $this->mOptions = $options;
1808                 $this->mOutputType = OT_MSG;
1809                 $this->clearState();
1810                 $text = $this->replaceVariables( $text );
1811
1812                 $executing = false;
1813                 return $text;
1814         }
1815 }
1816
1817 class ParserOutput
1818 {
1819         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1820
1821         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1822                 $containsOldMagic = false )
1823         {
1824                 $this->mText = $text;
1825                 $this->mLanguageLinks = $languageLinks;
1826                 $this->mCategoryLinks = $categoryLinks;
1827                 $this->mContainsOldMagic = $containsOldMagic;
1828         }
1829
1830         function getText() { return $this->mText; }
1831         function getLanguageLinks() { return $this->mLanguageLinks; }
1832         function getCategoryLinks() { return $this->mCategoryLinks; }
1833         function containsOldMagic() { return $this->mContainsOldMagic; }
1834         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1835         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1836         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1837         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1838
1839         function merge( $other ) {
1840                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1841                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1842                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1843         }
1844
1845 }
1846
1847 class ParserOptions
1848 {
1849         # All variables are private
1850         var $mUseTeX;                    # Use texvc to expand <math> tags
1851         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1852         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1853         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1854         var $mAllowExternalImages;       # Allow external images inline
1855         var $mSkin;                      # Reference to the preferred skin
1856         var $mDateFormat;                # Date format index
1857         var $mEditSection;               # Create "edit section" links
1858         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1859         var $mNumberHeadings;            # Automatically number headings
1860         var $mShowToc;                   # Show table of contents
1861
1862         function getUseTeX() { return $this->mUseTeX; }
1863         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1864         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1865         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1866         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1867         function getSkin() { return $this->mSkin; }
1868         function getDateFormat() { return $this->mDateFormat; }
1869         function getEditSection() { return $this->mEditSection; }
1870         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1871         function getNumberHeadings() { return $this->mNumberHeadings; }
1872         function getShowToc() { return $this->mShowToc; }
1873
1874         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1875         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1876         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1877         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1878         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1879         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1880         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1881         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1882         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1883         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1884         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1885
1886         /* static */ function newFromUser( &$user )
1887         {
1888                 $popts = new ParserOptions;
1889                 $popts->initialiseFromUser( &$user );
1890                 return $popts;
1891         }
1892
1893         function initialiseFromUser( &$userInput )
1894         {
1895                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1896
1897                 if ( !$userInput ) {
1898                         $user = new User;
1899                         $user->setLoaded( true );
1900                 } else {
1901                         $user =& $userInput;
1902                 }
1903
1904                 $this->mUseTeX = $wgUseTeX;
1905                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1906                 $this->mUseDynamicDates = $wgUseDynamicDates;
1907                 $this->mInterwikiMagic = $wgInterwikiMagic;
1908                 $this->mAllowExternalImages = $wgAllowExternalImages;
1909                 $this->mSkin =& $user->getSkin();
1910                 $this->mDateFormat = $user->getOption( "date" );
1911                 $this->mEditSection = $user->getOption( "editsection" );
1912                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1913                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1914                 $this->mShowToc = $user->getOption( "showtoc" );
1915         }
1916
1917
1918 }
1919
1920 # Regex callbacks, used in Parser::replaceVariables
1921 function wfBraceSubstitution( $matches )
1922 {
1923         global $wgCurParser;
1924         return $wgCurParser->braceSubstitution( $matches );
1925 }
1926
1927 ?>