includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36 define( "MAX_INCLUDE_REPEAT", 5 );
  37
  38 # Recursion depth of variable/inclusion evaluation
  39 define( "MAX_INCLUDE_PASSES", 3 );
  40
  41 # Allowed values for $mOutputType
  42 define( "OT_HTML", 1 );
  43 define( "OT_WIKI", 2 );
  44 define( "OT_MSG", 3 );
  45
  46 class Parser
  47 {
  48         # Cleared with clearState():
  49         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array();
  50         var $mVariables, $mIncludeCount;
  51
  52         # Temporary:
  53         var $mOptions, $mTitle, $mOutputType;
  54
  55         function Parser()
  56         {
  57                 $this->clearState();
  58         }
  59
  60         function clearState()
  61         {
  62                 $this->mOutput = new ParserOutput;
  63                 $this->mAutonumber = 0;
  64                 $this->mLastSection = "";
  65                 $this->mDTopen = false;
  66                 $this->mVariables = false;
  67                 $this->mIncludeCount = array();
  68                 $this->mStripState = array();
  69         }
  70
  71         # First pass--just handle <nowiki> sections, pass the rest off
  72         # to doWikiPass2() which does all the real work.
  73         #
  74         # Returns a ParserOutput
  75         #
  76         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  77         {
  78                 $fname = "Parser::parse";
  79                 wfProfileIn( $fname );
  80
  81                 if ( $clearState ) {
  82                         $this->clearState();
  83                 }
  84
  85                 $this->mOptions = $options;
  86                 $this->mTitle =& $title;
  87                 $this->mOutputType = OT_HTML;
  88
  89                 $stripState = NULL;
  90                 $text = $this->strip( $text, $this->mStripState );
  91                 $text = $this->doWikiPass2( $text, $linestart );
  92                 $text = $this->unstrip( $text, $this->mStripState );
  93
  94                 $this->mOutput->setText( $text );
  95                 wfProfileOut( $fname );
  96                 return $this->mOutput;
  97         }
  98
  99         /* static */ function getRandomString()
 100         {
 101                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 102         }
 103
 104         # Replaces all occurences of <$tag>content</$tag> in the text
 105         # with a random marker and returns the new text. the output parameter
 106         # $content will be an associative array filled with data on the form
 107         # $unique_marker => content.
 108
 109         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 110                 $result = array();
 111                 $rnd = $uniq_prefix . Parser::getRandomString();
 112                 $content = array( );
 113                 $n = 1;
 114                 $stripped = "";
 115
 116                 while ( "" != $text ) {
 117                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 118                         $stripped .= $p[0];
 119                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 120                                 $text = "";
 121                         } else {
 122                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 123                                 $marker = $rnd . sprintf("%08X", $n++);
 124                                 $content[$marker] = $q[0];
 125                                 $stripped .= $marker;
 126                                 $text = $q[1];
 127                         }
 128                 }
 129                 return $stripped;
 130         }
 131
 132         # Strips <nowiki>, <pre> and <math>
 133         # Returns the text, and fills an array with data needed in unstrip()
 134         #
 135         function strip( $text, &$state )
 136         {
 137                 $render = ($this->mOutputType == OT_HTML);
 138                 $nowiki_content = array();
 139                 $hiero_content = array();
 140                 $math_content = array();
 141                 $pre_content = array();
 142
 143                 # Replace any instances of the placeholders
 144                 $uniq_prefix = "NaodW29";
 145                 $text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 146
 147                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 148                 foreach( $nowiki_content as $marker => $content ){
 149                         if( $render ){
 150                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 151                         } else {
 152                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 153                         }
 154                 }
 155
 156                 if( $GLOBALS['wgUseWikiHiero'] ){
 157                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 158                         foreach( $hiero_content as $marker => $content ){
 159                                 if( $render ){
 160                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 161                                 } else {
 162                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 163                                 }
 164                         }
 165                 }
 166
 167                 if( $this->mOptions->getUseTeX() ){
 168                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 169                         foreach( $math_content as $marker => $content ){
 170                                 if( $render ){
 171                                         $math_content[$marker] = renderMath( $content );
 172                                 } else {
 173                                         $math_content[$marker] = "<math>$content</math>";
 174                                 }
 175                         }
 176                 }
 177
 178                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 179                 foreach( $pre_content as $marker => $content ){
 180                         if( $render ){
 181                                 $pre_content[$marker] = "\n<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 182                         } else {
 183                                 $pre_content[$marker] = "\n<pre>$content</pre>";
 184                         }
 185                 }
 186
 187                 # Must expand in reverse order, otherwise nested tags will be corrupted
 188                 $state = array( $pre_content, $math_content, $hiero_content, $nowiki_content );
 189                 return $text;
 190         }
 191
 192         function unstrip( $text, &$state )
 193         {
 194                 foreach( $state as $content_dict ){
 195                         foreach( $content_dict as $marker => $content ){
 196                                 $text = str_replace( $marker, $content, $text );
 197                         }
 198                 }
 199                 return $text;
 200         }
 201
 202         function categoryMagic ()
 203         {
 204                 global $wgLang , $wgUser ;
 205                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 206                 $id = $this->mTitle->getArticleID() ;
 207                 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
 208                 $ti = $this->mTitle->getText() ;
 209                 $ti = explode ( ":" , $ti , 2 ) ;
 210                 if ( $cat != $ti[0] ) return "" ;
 211                 $r = "<br break='all' />\n" ;
 212
 213                 $articles = array() ;
 214                 $parents = array () ;
 215                 $children = array() ;
 216
 217
 218 #               $sk =& $this->mGetSkin();
 219                 $sk =& $wgUser->getSkin() ;
 220
 221                 $data = array () ;
 222                 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 223                 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 224
 225                 $res = wfQuery ( $sql1, DB_READ ) ;
 226                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 227
 228                 $res = wfQuery ( $sql2, DB_READ ) ;
 229                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 230
 231
 232                 foreach ( $data AS $x )
 233                 {
 234                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 235                         if ( $t != "" ) $t .= ":" ;
 236                         $t .= $x->cur_title ;
 237
 238                         $y = explode ( ":" , $t , 2 ) ;
 239                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 240                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 241                         } else {
 242                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 243                         }
 244                 }
 245                 wfFreeResult ( $res ) ;
 246
 247                 # Children
 248                 if ( count ( $children ) > 0 )
 249                 {
 250                         asort ( $children ) ;
 251                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 252                         $r .= implode ( ", " , $children ) ;
 253                 }
 254
 255                 # Articles
 256                 if ( count ( $articles ) > 0 )
 257                 {
 258                         asort ( $articles ) ;
 259                         $h =  wfMsg( "category_header", $ti[1] );
 260                         $r .= "<h2>{$h}</h2>\n" ;
 261                         $r .= implode ( ", " , $articles ) ;
 262                 }
 263
 264
 265                 return $r ;
 266         }
 267
 268         function getHTMLattrs ()
 269         {
 270                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 271                                 "title", "align", "lang", "dir", "width", "height",
 272                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 273                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 274                                 /* FONT */ "type", "start", "value", "compact",
 275                                 /* For various lists, mostly deprecated but safe */
 276                                 "summary", "width", "border", "frame", "rules",
 277                                 "cellspacing", "cellpadding", "valign", "char",
 278                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 279                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 280                                 "id", "class", "name", "style" /* For CSS */
 281                                 );
 282                 return $htmlattrs ;
 283         }
 284
 285         function fixTagAttributes ( $t )
 286         {
 287                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 288                 $htmlattrs = $this->getHTMLattrs() ;
 289
 290                 # Strip non-approved attributes from the tag
 291                 $t = preg_replace(
 292                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 293                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 294                         $t);
 295                 # Strip javascript "expression" from stylesheets. Brute force approach:
 296                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 297
 298                 if( preg_match(
 299                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 300                         wfMungeToUtf8( $t ) ) )
 301                 {
 302                         $t="";
 303                 }
 304
 305                 return trim ( $t ) ;
 306         }
 307
 308         function doTableStuff ( $t )
 309         {
 310                 $t = explode ( "\n" , $t ) ;
 311                 $td = array () ; # Is currently a td tag open?
 312                         $ltd = array () ; # Was it TD or TH?
 313                         $tr = array () ; # Is currently a tr tag open?
 314                         $ltr = array () ; # tr attributes
 315                         foreach ( $t AS $k => $x )
 316                         {
 317                                 $x = rtrim ( $x ) ;
 318                                 $fc = substr ( $x , 0 , 1 ) ;
 319                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 320                                 {
 321                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 322                                         array_push ( $td , false ) ;
 323                                         array_push ( $ltd , "" ) ;
 324                                         array_push ( $tr , false ) ;
 325                                         array_push ( $ltr , "" ) ;
 326                                 }
 327                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 328                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 329                                 {
 330                                         $z = "</table>\n" ;
 331                                         $l = array_pop ( $ltd ) ;
 332                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 333                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 334                                         array_pop ( $ltr ) ;
 335                                         $t[$k] = $z ;
 336                                 }
 337                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 338                                                 {
 339                                                 $z = trim ( substr ( $x , 2 ) ) ;
 340                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 341                                                 }*/
 342                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 343                                 {
 344                                         $x = substr ( $x , 1 ) ;
 345                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 346                                         $z = "" ;
 347                                         $l = array_pop ( $ltd ) ;
 348                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 349                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 350                                         array_pop ( $ltr ) ;
 351                                         $t[$k] = $z ;
 352                                         array_push ( $tr , false ) ;
 353                                         array_push ( $td , false ) ;
 354                                         array_push ( $ltd , "" ) ;
 355                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 356                                 }
 357                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 358                                 {
 359                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 360                                         {
 361                                                 $fc = "+" ;
 362                                                 $x = substr ( $x , 1 ) ;
 363                                         }
 364                                         $after = substr ( $x , 1 ) ;
 365                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 366                                         $after = explode ( "||" , $after ) ;
 367                                         $t[$k] = "" ;
 368                                         foreach ( $after AS $theline )
 369                                         {
 370                                                 $z = "" ;
 371                                                 if ( $fc != "+" )
 372                                                 {
 373                                                         $tra = array_pop ( $ltr ) ;
 374                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 375                                                         array_push ( $tr , true ) ;
 376                                                         array_push ( $ltr , "" ) ;
 377                                                 }
 378
 379                                                 $l = array_pop ( $ltd ) ;
 380                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 381                                                 if ( $fc == "|" ) $l = "td" ;
 382                                                 else if ( $fc == "!" ) $l = "th" ;
 383                                                 else if ( $fc == "+" ) $l = "caption" ;
 384                                                 else $l = "" ;
 385                                                 array_push ( $ltd , $l ) ;
 386                                                 $y = explode ( "|" , $theline , 2 ) ;
 387                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 388                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 389                                                 $t[$k] .= $y ;
 390                                                 array_push ( $td , true ) ;
 391                                         }
 392                                 }
 393                         }
 394
 395                 # Closing open td, tr && table
 396                 while ( count ( $td ) > 0 )
 397                 {
 398                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 399                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 400                         $t[] = "</table>" ;
 401                 }
 402
 403                 $t = implode ( "\n" , $t ) ;
 404                 #               $t = $this->removeHTMLtags( $t );
 405                 return $t ;
 406         }
 407
 408         # Well, OK, it's actually about 14 passes.  But since all the
 409         # hard lifting is done inside PHP's regex code, it probably
 410         # wouldn't speed things up much to add a real parser.
 411         #
 412         function doWikiPass2( $text, $linestart )
 413         {
 414                 $fname = "Parser::doWikiPass2";
 415                 wfProfileIn( $fname );
 416
 417                 $text = $this->removeHTMLtags( $text );
 418                 $text = $this->replaceVariables( $text );
 419
 420                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 421
 422                 $text = $this->doHeadings( $text );
 423
 424                 if($this->mOptions->getUseDynamicDates()) {
 425                         global $wgDateFormatter;
 426                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 427                 }
 428
 429                 $text = ' '.$this->replaceExternalLinks( $text );
 430                 $text = $this->doTokenizedParser ( $text );
 431                 $text = $this->doTableStuff ( $text ) ;
 432
 433                 $text = $this->formatHeadings( $text );
 434
 435                 $sk =& $this->mOptions->getSkin();
 436                 $text = $sk->transformContent( $text );
 437                 $fixtags = array(
 438                         "/<hr *>/i" => '<hr/>',
 439                         "/<br *>/i" => '<br/>',
 440                         "/<center *>/i"=>'<span style="text-align:center;">',
 441                         "/<\\/center *>/i" => '</span>'
 442                 );
 443                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 444                 $text = $this->doBlockLevels( $text, $linestart );
 445                 $text .= $this->categoryMagic () ;
 446
 447                 wfProfileOut( $fname );
 448                 return $text;
 449         }
 450
 451
 452         /* private */ function doHeadings( $text )
 453         {
 454                 for ( $i = 6; $i >= 1; --$i ) {
 455                         $h = substr( "======", 0, $i );
 456                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 457                           "<h{$i}>\\1</h{$i}>\\2", $text );
 458                 }
 459                 return $text;
 460         }
 461
 462         # Note: we have to do external links before the internal ones,
 463         # and otherwise take great care in the order of things here, so
 464         # that we don't end up interpreting some URLs twice.
 465
 466         /* private */ function replaceExternalLinks( $text )
 467         {
 468                 $fname = "Parser::replaceExternalLinks";
 469                 wfProfileIn( $fname );
 470                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 471                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 472                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 473                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 474                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 475                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 476                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 477                 wfProfileOut( $fname );
 478                 return $text;
 479         }
 480
 481         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 482         {
 483                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 484                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 485
 486                 # this is  the list of separators that should be ignored if they
 487                 # are the last character of an URL but that should be included
 488                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 489                 # in this case, the last comma should not become part of the URL,
 490                 # but in "www.foo.com/123,2342,32.htm" it should.
 491                 $sep = ",;\.:";
 492                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 493                 $images = "gif|png|jpg|jpeg";
 494
 495                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 496                 # they are interpreted as part of the string (used to tell PHP
 497                 # that the content of the string should be inserted there).
 498                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 499                   "((?i){$images})([^{$uc}]|$)/";
 500
 501                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 502                 $sk =& $this->mOptions->getSkin();
 503
 504                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 505                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 506                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 507                 }
 508                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 509                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 510                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 511                   "</a>\\5", $s );
 512                 $s = str_replace( $unique, $protocol, $s );
 513
 514                 $a = explode( "[{$protocol}:", " " . $s );
 515                 $s = array_shift( $a );
 516                 $s = substr( $s, 1 );
 517
 518                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 519                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 520
 521                 foreach ( $a as $line ) {
 522                         if ( preg_match( $e1, $line, $m ) ) {
 523                                 $link = "{$protocol}:{$m[1]}";
 524                                 $trail = $m[2];
 525                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 526                                 else { $text = wfEscapeHTML( $link ); }
 527                         } else if ( preg_match( $e2, $line, $m ) ) {
 528                                 $link = "{$protocol}:{$m[1]}";
 529                                 $text = $m[2];
 530                                 $trail = $m[3];
 531                         } else {
 532                                 $s .= "[{$protocol}:" . $line;
 533                                 continue;
 534                         }
 535                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 536                                 $paren = "";
 537                         } else {
 538                                 # Expand the URL for printable version
 539                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 540                         }
 541                         $la = $sk->getExternalLinkAttributes( $link, $text );
 542                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 543
 544                 }
 545                 return $s;
 546         }
 547
 548         /* private */ function handle3Quotes( &$state, $token )
 549         {
 550                 if ( $state["strong"] ) {
 551                         if ( $state["em"] && $state["em"] > $state["strong"] )
 552                         {
 553                                 # ''' lala ''lala '''
 554                                 $s = "</em></strong><em>";
 555                         } else {
 556                                 $s = "</strong>";
 557                         }
 558                         $state["strong"] = FALSE;
 559                 } else {
 560                         $s = "<strong>";
 561                         $state["strong"] = $token["pos"];
 562                 }
 563                 return $s;
 564         }
 565
 566         /* private */ function handle2Quotes( &$state, $token )
 567         {
 568                 if ( $state["em"] ) {
 569                         if ( $state["strong"] && $state["strong"] > $state["em"] )
 570                         {
 571                                 # ''lala'''lala'' ....'''
 572                                 $s = "</strong></em><strong>";
 573                         } else {
 574                                 $s = "</em>";
 575                         }
 576                         $state["em"] = FALSE;
 577                 } else {
 578                         $s = "<em>";
 579                         $state["em"] = $token["pos"];
 580                 }
 581                 return $s;
 582         }
 583
 584         /* private */ function handle5Quotes( &$state, $token )
 585         {
 586                 $s = "";
 587                 if ( $state["em"] && $state["strong"] ) {
 588                         if ( $state["em"] < $state["strong"] ) {
 589                                 $s .= "</strong></em>";
 590                         } else {
 591                                 $s .= "</em></strong>";
 592                         }
 593                         $state["strong"] = $state["em"] = FALSE;
 594                 } elseif ( $state["em"] ) {
 595                         $s .= "</em><strong>";
 596                         $state["em"] = FALSE;
 597                         $state["strong"] = $token["pos"];
 598                 } elseif ( $state["strong"] ) {
 599                         $s .= "</strong><em>";
 600                         $state["strong"] = FALSE;
 601                         $state["em"] = $token["pos"];
 602                 } else { # not $em and not $strong
 603                         $s .= "<strong><em>";
 604                         $state["strong"] = $state["em"] = $token["pos"];
 605                 }
 606                 return $s;
 607         }
 608
 609         /* private */ function doTokenizedParser( $str )
 610         {
 611                 global $wgLang; # for language specific parser hook
 612
 613                 $tokenizer=Tokenizer::newFromString( $str );
 614                 $tokenStack = array();
 615
 616                 $s="";
 617                 $state["em"]      = FALSE;
 618                 $state["strong"]  = FALSE;
 619                 $tagIsOpen = FALSE;
 620                 $threeopen = false;
 621
 622                 # The tokenizer splits the text into tokens and returns them one by one.
 623                 # Every call to the tokenizer returns a new token.
 624                 while ( $token = $tokenizer->nextToken() )
 625                 {
 626                         $threeopen = false;
 627                         switch ( $token["type"] )
 628                         {
 629                                 case "text":
 630                                         # simple text with no further markup
 631                                         $txt = $token["text"];
 632                                         break;
 633                                 case "[[[":
 634                                         # remember the tag opened with 3 [
 635                                         $threeopen = true;
 636                                 case "[[":
 637                                         # link opening tag.
 638                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 639                                         $tagIsOpen = TRUE;
 640                                         array_push( $tokenStack, $token );
 641                                         $txt="";
 642                                         break;
 643
 644                                 case "]]]":
 645                                 case "]]":
 646                                         # link close tag.
 647                                         # get text from stack, glue it together, and call the code to handle a
 648                                         # link
 649
 650                                         if ( count( $tokenStack ) == 0 )
 651                                         {
 652                                                 # stack empty. Found a ]] without an opening [[
 653                                                 $txt = "]]";
 654                                         } else {
 655                                                 $linkText = "";
 656                                                 $lastToken = array_pop( $tokenStack );
 657                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 658                                                 {
 659                                                         if( !empty( $lastToken["text"] ) ) {
 660                                                                 $linkText = $lastToken["text"] . $linkText;
 661                                                         }
 662                                                         $lastToken = array_pop( $tokenStack );
 663                                                 }
 664
 665                                                 $txt = $linkText ."]]";
 666
 667                                                 if( isset( $lastToken["text"] ) ) {
 668                                                         $prefix = $lastToken["text"];
 669                                                 } else {
 670                                                         $prefix = "";
 671                                                 }
 672                                                 $nextToken = $tokenizer->previewToken();
 673                                                 if ( $nextToken["type"] == "text" )
 674                                                 {
 675                                                         # Preview just looks at it. Now we have to fetch it.
 676                                                         $nextToken = $tokenizer->nextToken();
 677                                                         $txt .= $nextToken["text"];
 678                                                 }
 679                                                 $txt = $this->handleInternalLink( $txt, $prefix );
 680
 681                                                 # did the tag start with 3 [ ?
 682                                                 if($threeopen) {
 683                                                         # show the first as text
 684                                                         $txt = "[".$txt;
 685                                                         $threeopen=false;
 686                                                 }
 687
 688                                         }
 689                                         $tagIsOpen = (count( $tokenStack ) != 0);
 690                                         break;
 691                                 case "----":
 692                                         $txt = "\n<hr />\n";
 693                                         break;
 694                                 case "'''":
 695                                         # This and the three next ones handle quotes
 696                                         $txt = $this->handle3Quotes( $state, $token );
 697                                         break;
 698                                 case "''":
 699                                         $txt = $this->handle2Quotes( $state, $token );
 700                                         break;
 701                                 case "'''''":
 702                                         $txt = $this->handle5Quotes( $state, $token );
 703                                         break;
 704                                 case "":
 705                                         # empty token
 706                                         $txt="";
 707                                         break;
 708                                 case "RFC ":
 709                                         if ( $tagIsOpen ) {
 710                                                 $txt = "RFC ";
 711                                         } else {
 712                                                 $txt = $this->doMagicRFC( $tokenizer );
 713                                         }
 714                                         break;
 715                                 case "ISBN ":
 716                                         if ( $tagIsOpen ) {
 717                                                 $txt = "ISBN ";
 718                                         } else {
 719                                                 $txt = $this->doMagicISBN( $tokenizer );
 720                                         }
 721                                         break;
 722                                 default:
 723                                         # Call language specific Hook.
 724                                         $txt = $wgLang->processToken( $token, $tokenStack );
 725                                         if ( NULL == $txt ) {
 726                                                 # An unkown token. Highlight.
 727                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 728                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 729                                         }
 730                                         break;
 731                         }
 732                         # If we're parsing the interior of a link, don't append the interior to $s,
 733                         # but push it to the stack so it can be processed when a ]] token is found.
 734                         if ( $tagIsOpen  && $txt != "" ) {
 735                                 $token["type"] = "text";
 736                                 $token["text"] = $txt;
 737                                 array_push( $tokenStack, $token );
 738                         } else {
 739                                 $s .= $txt;
 740                         }
 741                 } #end while
 742                 if ( count( $tokenStack ) != 0 )
 743                 {
 744                         # still objects on stack. opened [[ tag without closing ]] tag.
 745                         $txt = "";
 746                         while ( $lastToken = array_pop( $tokenStack ) )
 747                         {
 748                                 if ( $lastToken["type"] == "text" )
 749                                 {
 750                                         $txt = $lastToken["text"] . $txt;
 751                                 } else {
 752                                         $txt = $lastToken["type"] . $txt;
 753                                 }
 754                         }
 755                         $s .= $txt;
 756                 }
 757                 return $s;
 758         }
 759
 760         /* private */ function handleInternalLink( $line, $prefix )
 761         {
 762                 global $wgLang, $wgLinkCache;
 763                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 764                 static $fname = "Parser::handleInternalLink" ;
 765                 wfProfileIn( $fname );
 766
 767                 wfProfileIn( "$fname-setup" );
 768                 static $tc = FALSE;
 769                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 770                 $sk =& $this->mOptions->getSkin();
 771
 772                 # Match a link having the form [[namespace:link|alternate]]trail
 773                 static $e1 = FALSE;
 774                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 775                 # Match the end of a line for a word that's not followed by whitespace,
 776                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 777                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 778                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 779                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 780
 781
 782                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 783                 static $image = FALSE;
 784                 static $special = FALSE;
 785                 static $media = FALSE;
 786                 static $category = FALSE;
 787                 if ( !$image ) { $image = Namespace::getImage(); }
 788                 if ( !$special ) { $special = Namespace::getSpecial(); }
 789                 if ( !$media ) { $media = Namespace::getMedia(); }
 790                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 791
 792                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 793
 794                 wfProfileOut( "$fname-setup" );
 795                 $s = "";
 796
 797                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 798                         $text = $m[2];
 799                         $trail = $m[3];
 800                 } else { # Invalid form; output directly
 801                         $s .= $prefix . "[[" . $line ;
 802                         return $s;
 803                 }
 804
 805                 /* Valid link forms:
 806                 Foobar -- normal
 807                 :Foobar -- override special treatment of prefix (images, language links)
 808                 /Foobar -- convert to CurrentPage/Foobar
 809                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 810                 */
 811                 $c = substr($m[1],0,1);
 812                 $noforce = ($c != ":");
 813                 if( $c == "/" ) { # subpage
 814                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 815                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 816                                 $noslash=$m[1];
 817                         } else {
 818                                 $noslash=substr($m[1],1);
 819                         }
 820                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 821                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 822                                 if( "" == $text ) {
 823                                         $text= $m[1];
 824                                 } # this might be changed for ugliness reasons
 825                         } else {
 826                                 $link = $noslash; # no subpage allowed, use standard link
 827                         }
 828                 } elseif( $noforce ) { # no subpage
 829                         $link = $m[1];
 830                 } else {
 831                         $link = substr( $m[1], 1 );
 832                 }
 833                 if( "" == $text )
 834                         $text = $link;
 835
 836                 $nt = Title::newFromText( $link );
 837                 if( !$nt ) {
 838                         $s .= $prefix . "[[" . $line;
 839                         return $s;
 840                 }
 841                 $ns = $nt->getNamespace();
 842                 $iw = $nt->getInterWiki();
 843                 if( $noforce ) {
 844                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 845                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 846                                 return (trim($s) == '')? '': $s;
 847                         }
 848                         if( $ns == $image ) {
 849                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 850                                 $wgLinkCache->addImageLinkObj( $nt );
 851                                 return $s;
 852                         }
 853                 }
 854                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 855                     ( strpos( $link, "#" ) == FALSE ) ) {
 856                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 857                         return $s;
 858                 }
 859
 860                 # Category feature
 861                 $catns = strtoupper ( $nt->getDBkey () ) ;
 862                 $catns = explode ( ":" , $catns ) ;
 863                 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
 864                 else $catns = "" ;
 865                 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
 866                         $t = explode ( ":" , $nt->getText() ) ;
 867                         array_shift ( $t ) ;
 868                         $t = implode ( ":" , $t ) ;
 869                         $t = $wgLang->ucFirst ( $t ) ;
 870                         $nnt = Title::newFromText ( $category.":".$t ) ;
 871                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 872                         $this->mOutput->mCategoryLinks[] = $t ;
 873                         $s .= $prefix . $trail ;
 874                         return $s ;
 875                 }
 876
 877                 if( $ns == $media ) {
 878                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 879                         $wgLinkCache->addImageLinkObj( $nt );
 880                         return $s;
 881                 } elseif( $ns == $special ) {
 882                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 883                         return $s;
 884                 }
 885                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 886
 887                 wfProfileOut( $fname );
 888                 return $s;
 889         }
 890
 891         # Some functions here used by doBlockLevels()
 892         #
 893         /* private */ function closeParagraph()
 894         {
 895                 $result = "";
 896                 if ( '' != $this->mLastSection ) {
 897                         $result = "</" . $this->mLastSection  . ">";
 898                 }
 899                 $this->mLastSection = "";
 900                 return $result."\n";
 901         }
 902         # getCommon() returns the length of the longest common substring
 903         # of both arguments, starting at the beginning of both.
 904         #
 905         /* private */ function getCommon( $st1, $st2 )
 906         {
 907                 $fl = strlen( $st1 );
 908                 $shorter = strlen( $st2 );
 909                 if ( $fl < $shorter ) { $shorter = $fl; }
 910
 911                 for ( $i = 0; $i < $shorter; ++$i ) {
 912                         if ( $st1{$i} != $st2{$i} ) { break; }
 913                 }
 914                 return $i;
 915         }
 916         # These next three functions open, continue, and close the list
 917         # element appropriate to the prefix character passed into them.
 918         #
 919         /* private */ function openList( $char )
 920     {
 921                 $result = $this->closeParagraph();
 922
 923                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 924                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 925                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 926                 else if ( ";" == $char ) {
 927                         $result .= "<dl><dt>";
 928                         $this->mDTopen = true;
 929                 }
 930                 else { $result = "<!-- ERR 1 -->"; }
 931
 932                 return $result;
 933         }
 934
 935         /* private */ function nextItem( $char )
 936         {
 937                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 938                 else if ( ":" == $char || ";" == $char ) {
 939                         $close = "</dd>";
 940                         if ( $this->mDTopen ) { $close = "</dt>"; }
 941                         if ( ";" == $char ) {
 942                                 $this->mDTopen = true;
 943                                 return $close . "<dt>";
 944                         } else {
 945                                 $this->mDTopen = false;
 946                                 return $close . "<dd>";
 947                         }
 948                 }
 949                 return "<!-- ERR 2 -->";
 950         }
 951
 952         /* private */function closeList( $char )
 953         {
 954                 if ( "*" == $char ) { $text = "</li></ul>"; }
 955                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 956                 else if ( ":" == $char ) {
 957                         if ( $this->mDTopen ) {
 958                                 $this->mDTopen = false;
 959                                 $text = "</dt></dl>";
 960                         } else {
 961                                 $text = "</dd></dl>";
 962                         }
 963                 }
 964                 else {  return "<!-- ERR 3 -->"; }
 965                 return $text."\n";
 966         }
 967
 968         /* private */ function doBlockLevels( $text, $linestart )
 969         {
 970                 $fname = "Parser::doBlockLevels";
 971                 wfProfileIn( $fname );
 972                 # Parsing through the text line by line.  The main thing
 973                 # happening here is handling of block-level elements p, pre,
 974                 # and making lists from lines starting with * # : etc.
 975                 #
 976                 $a = explode( "\n", $text );
 977                 $a[0] = "\n".$a[0];
 978                 $lastPref = $text = '';
 979                 $this->mDTopen = $inBlockElem = false;
 980
 981                 if ( ! $linestart ) { $text .= array_shift( $a ); }
 982                 foreach ( $a as $t ) {
 983                         if ( "" != $text ) { $text .= "\n"; }
 984
 985                         $oLine = $t;
 986                         $opl = strlen( $lastPref );
 987                         $npl = strspn( $t, "*#:;" );
 988                         $pref = substr( $t, 0, $npl );
 989                         $pref2 = str_replace( ";", ":", $pref );
 990                         $t = substr( $t, $npl );
 991
 992                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
 993                                 $text .= $this->nextItem( substr( $pref, -1 ) );
 994
 995                                 if ( ";" == substr( $pref, -1 ) ) {
 996                                         $cpos = strpos( $t, ":" );
 997                                         if ( ! ( false === $cpos ) ) {
 998                                                 $term = substr( $t, 0, $cpos );
 999                                                 $text .= $term . $this->nextItem( ":" );
1000                                                 $t = substr( $t, $cpos + 1 );
1001                                         }
1002                                 }
1003                         } else if (0 != $npl || 0 != $opl) {
1004                                 $cpl = $this->getCommon( $pref, $lastPref );
1005
1006                                 while ( $cpl < $opl ) {
1007                                         $text .= $this->closeList( $lastPref{$opl-1} );
1008                                         --$opl;
1009                                 }
1010                                 if ( $npl <= $cpl && $cpl > 0 ) {
1011                                         $text .= $this->nextItem( $pref{$cpl-1} );
1012                                 }
1013                                 while ( $npl > $cpl ) {
1014                                         $char = substr( $pref, $cpl, 1 );
1015                                         $text .= $this->openList( $char );
1016
1017                                         if ( ";" == $char ) {
1018                                                 $cpos = strpos( $t, ":" );
1019                                                 if ( ! ( false === $cpos ) ) {
1020                                                         $term = substr( $t, 0, $cpos );
1021                                                         $text .= $term . $this->nextItem( ":" );
1022                                                         $t = substr( $t, $cpos + 1 );
1023                                                 }
1024                                         }
1025                                         ++$cpl;
1026                                 }
1027                                 $lastPref = $pref2;
1028                         }
1029                         if ( 0 == $npl ) { # No prefix--go to paragraph mode
1030                                 if ( preg_match(
1031                                   "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre)/i", $t ) ) {
1032                                         $text .= $this->closeParagraph();
1033                                         $inBlockElem = true;
1034                                 } else if ( preg_match("/(<hr|<\\/td)/i", $t ) ) {
1035                                         $text .= $this->closeParagraph();
1036                                         $inBlockElem = false;
1037                                 }
1038                                 if ( ! $inBlockElem ) {
1039                                         if ( " " == $t{0} ) {
1040                                                 $newSection = "pre";
1041                                                 $text .= $this->closeParagraph();
1042                                                 # $t = wfEscapeHTML( $t );
1043                                         }
1044                                         else { $newSection = "p"; }
1045
1046                                         if ( '' == trim( $oLine ) ) {
1047                                                 if ( $this->mLastSection != 'p') {
1048                                                         $text .= $this->closeParagraph();
1049                                                         $text .= "<" . $newSection . ">";
1050                                                         $this->mLastSection = $newSection;
1051                                                 } else if ( $this->mLastSection == 'p' and '' == $oLine) {
1052                                                         $text .= '<br />';
1053                                                 }
1054                                         } else if ( $this->mLastSection == $newSection and $newSection != 'p' ) {
1055                                                 $text .= $this->closeParagraph();
1056                                                 $text .= "<" . $newSection . ">";
1057                                                 $this->mLastSection = $newSection;
1058                                         }
1059                                 }
1060                                 if ( $inBlockElem &&
1061                                   preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|<\\/p<\\/div|<\\/pre)/i", $t ) ) {
1062                                         $inBlockElem = false;
1063                                 }
1064                         }
1065                         $text .= $t;
1066                 }
1067                 while ( $npl ) {
1068                         $text .= $this->closeList( $pref2{$npl-1} );
1069                         --$npl;
1070                 }
1071                 if ( "" != $this->mLastSection ) {
1072                         $text .= "</" . $this->mLastSection . ">";
1073                         $this->mLastSection = "";
1074                 }
1075                 wfProfileOut( $fname );
1076                 return $text;
1077         }
1078
1079         function getVariableValue( $index ) {
1080                 global $wgLang, $wgSitename, $wgServer;
1081
1082                 switch ( $index ) {
1083                         case MAG_CURRENTMONTH:
1084                                 return date( "m" );
1085                         case MAG_CURRENTMONTHNAME:
1086                                 return $wgLang->getMonthName( date("n") );
1087                         case MAG_CURRENTMONTHNAMEGEN:
1088                                 return $wgLang->getMonthNameGen( date("n") );
1089                         case MAG_CURRENTDAY:
1090                                 return date("j");
1091                         case MAG_CURRENTDAYNAME:
1092                                 return $wgLang->getWeekdayName( date("w")+1 );
1093                         case MAG_CURRENTYEAR:
1094                                 return date( "Y" );
1095                         case MAG_CURRENTTIME:
1096                                 return $wgLang->time( wfTimestampNow(), false );
1097                         case MAG_NUMBEROFARTICLES:
1098                                 return wfNumberOfArticles();
1099                         case MAG_SITENAME:
1100                                 return $wgSitename;
1101                         case MAG_SERVER:
1102                                 return $wgServer;
1103                         default:
1104                                 return NULL;
1105                 }
1106         }
1107
1108         function initialiseVariables()
1109         {
1110                 global $wgVariableIDs;
1111                 $this->mVariables = array();
1112                 foreach ( $wgVariableIDs as $id ) {
1113                         $mw =& MagicWord::get( $id );
1114                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1115                 }
1116         }
1117
1118         /* private */ function replaceVariables( $text )
1119         {
1120                 global $wgLang, $wgCurParser;
1121                 global $wgScript, $wgArticlePath;
1122
1123                 $fname = "Parser::replaceVariables";
1124                 wfProfileIn( $fname );
1125
1126                 $bail = false;
1127                 if ( !$this->mVariables ) {
1128                         $this->initialiseVariables();
1129                 }
1130                 $titleChars = Title::legalChars();
1131                 $regex = "/{{([$titleChars\\|]*?)}}/s";
1132
1133                 # "Recursive" variable expansion: run it through a couple of passes
1134                 for ( $i=0; $i<MAX_INCLUDE_REPEAT && !$bail; $i++ ) {
1135                         $oldText = $text;
1136
1137                         # It's impossible to rebind a global in PHP
1138                         # Instead, we run the substitution on a copy, then merge the changed fields back in
1139                         $wgCurParser = $this->fork();
1140
1141                         $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1142                         if ( $oldText == $text ) {
1143                                 $bail = true;
1144                         }
1145                         $this->merge( $wgCurParser );
1146                 }
1147
1148                 return $text;
1149         }
1150
1151         # Returns a copy of this object except with various variables cleared
1152         # This copy can be re-merged with the parent after operations on the copy
1153         function fork()
1154         {
1155                 $copy = $this;
1156                 $copy->mOutput = new ParserOutput;
1157                 return $copy;
1158         }
1159
1160         # Merges a copy split off with fork()
1161         function merge( &$copy )
1162         {
1163                 $this->mOutput->merge( $copy->mOutput );
1164
1165                 # Merge include throttling arrays
1166                 foreach( $copy->mIncludeCount as $dbk => $count ) {
1167                         if ( array_key_exists( $dbk, $this->mIncludeCount ) ) {
1168                                 $this->mIncludeCount[$dbk] += $count;
1169                         } else {
1170                                 $this->mIncludeCount[$dbk] = $count;
1171                         }
1172                 }
1173         }
1174
1175         function braceSubstitution( $matches )
1176         {
1177                 global $wgLinkCache, $wgLang;
1178                 $fname = "Parser::braceSubstitution";
1179                 $found = false;
1180                 $nowiki = false;
1181
1182                 $text = $matches[1];
1183
1184                 # SUBST
1185                 $mwSubst =& MagicWord::get( MAG_SUBST );
1186                 if ( $mwSubst->matchStartAndRemove( $text ) ) {
1187                         if ( $this->mOutputType != OT_WIKI ) {
1188                                 # Invalid SUBST not replaced at PST time
1189                                 # Return without further processing
1190                                 $text = $matches[0];
1191                                 $found = true;
1192                         }
1193                 } elseif ( $this->mOutputType == OT_WIKI ) {
1194                         # SUBST not found in PST pass, do nothing
1195                         $text = $matches[0];
1196                         $found = true;
1197                 }
1198
1199                 # MSG, MSGNW and INT
1200                 if ( !$found ) {
1201                         # Check for MSGNW:
1202                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1203                         if ( $mwMsgnw->matchStartAndRemove( $text ) ) {
1204                                 $nowiki = true;
1205                         } else {
1206                                 # Remove obsolete MSG:
1207                                 $mwMsg =& MagicWord::get( MAG_MSG );
1208                                 $mwMsg->matchStartAndRemove( $text );
1209                         }
1210
1211                         # Check if it is an internal message
1212                         $mwInt =& MagicWord::get( MAG_INT );
1213                         if ( $mwInt->matchStartAndRemove( $text ) ) {
1214                                 $text = wfMsg( $text );
1215                                 $found = true;
1216                         }
1217                 }
1218
1219                 # NS
1220                 if ( !$found ) {
1221                         # Check for NS: (namespace expansion)
1222                         $mwNs = MagicWord::get( MAG_NS );
1223                         if ( $mwNs->matchStartAndRemove( $text ) ) {
1224                                 if ( intval( $text ) ) {
1225                                         $text = $wgLang->getNsText( intval( $text ) );
1226                                         $found = true;
1227                                 } else {
1228                                         $index = Namespace::getCanonicalIndex( strtolower( $text ) );
1229                                         if ( !is_null( $index ) ) {
1230                                                 $text = $wgLang->getNsText( $index );
1231                                                 $found = true;
1232                                         }
1233                                 }
1234                         }
1235                 }
1236
1237                 # LOCALURL and LOCALURLE
1238                 if ( !$found ) {
1239                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1240                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1241
1242                         if ( $mwLocal->matchStartAndRemove( $text ) ) {
1243                                 $func = 'getLocalURL';
1244                         } elseif ( $mwLocalE->matchStartAndRemove( $text ) ) {
1245                                 $func = 'escapeLocalURL';
1246                         } else {
1247                                 $func = '';
1248                         }
1249
1250                         if ( $func !== '' ) {
1251                                 $args = explode( "|", $text );
1252                                 $n = count( $args );
1253                                 if ( $n > 0 ) {
1254                                         $title = Title::newFromText( $args[0] );
1255                                         if ( !is_null( $title ) ) {
1256                                                 if ( $n > 1 ) {
1257                                                         $text = $title->$func( $args[1] );
1258                                                 } else {
1259                                                         $text = $title->$func();
1260                                                 }
1261                                                 $found = true;
1262                                         }
1263                                 }
1264                         }
1265                 }
1266
1267                 # Check for a match against internal variables
1268                 if ( !$found && array_key_exists( $text, $this->mVariables ) ) {
1269                         $text = $this->mVariables[$text];
1270                         $found = true;
1271                         $this->mOutput->mContainsOldMagic = true;
1272                 }
1273
1274                 # Load from database
1275                 if ( !$found ) {
1276                         $title = Title::newFromText( $text, NS_TEMPLATE );
1277                         if ( !is_null( $text ) && !$title->isExternal() ) {
1278                                 # Check for excessive inclusion
1279                                 $dbk = $title->getPrefixedDBkey();
1280                                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1281                                         $this->mIncludeCount[$dbk] = 0;
1282                                 }
1283                                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1284                                         $article = new Article( $title );
1285                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1286                                         if ( $articleContent !== false ) {
1287                                                 $found = true;
1288                                                 $text = $articleContent;
1289
1290                                                 # Escaping and link table handling
1291                                                 # Not required for preSaveTransform()
1292                                                 if ( $this->mOutputType == OT_HTML ) {
1293                                                         if ( $nowiki ) {
1294                                                                 $text = wfEscapeWikiText( $text );
1295                                                         } else {
1296                                                                 $text = $this->removeHTMLtags( $text );
1297                                                         }
1298                                                         $wgLinkCache->suspend();
1299                                                         $text = $this->doTokenizedParser( $text );
1300                                                         $wgLinkCache->resume();
1301                                                         $wgLinkCache->addLinkObj( $title );
1302
1303                                                 }
1304                                         }
1305                                 }
1306
1307                                 # If the title is valid but undisplayable, make a link to it
1308                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1309                                         $text = "[[" . $title->getPrefixedText() . "]]";
1310                                         $found = true;
1311                                 }
1312                         }
1313                 }
1314
1315                 if ( !$found ) {
1316                         return $matches[0];
1317                 } else {
1318                         return $text;
1319                 }
1320         }
1321
1322         # Cleans up HTML, removes dangerous tags and attributes
1323         /* private */ function removeHTMLtags( $text )
1324         {
1325                 $fname = "Parser::removeHTMLtags";
1326                 wfProfileIn( $fname );
1327                 $htmlpairs = array( # Tags that must be closed
1328                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1329                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1330                         "strike", "strong", "tt", "var", "div", "center",
1331                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1332                         "ruby", "rt" , "rb" , "rp", "p"
1333                 );
1334                 $htmlsingle = array(
1335                         "br", "hr", "li", "dt", "dd"
1336                 );
1337                 $htmlnest = array( # Tags that can be nested--??
1338                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1339                         "dl", "font", "big", "small", "sub", "sup"
1340                 );
1341                 $tabletags = array( # Can only appear inside table
1342                         "td", "th", "tr"
1343                 );
1344
1345                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1346                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1347
1348                 $htmlattrs = $this->getHTMLattrs () ;
1349
1350                 # Remove HTML comments
1351                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1352
1353                 $bits = explode( "<", $text );
1354                 $text = array_shift( $bits );
1355                 $tagstack = array(); $tablestack = array();
1356
1357                 foreach ( $bits as $x ) {
1358                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1359                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1360                           $x, $regs );
1361                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1362                         error_reporting( $prev );
1363
1364                         $badtag = 0 ;
1365                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1366                                 # Check our stack
1367                                 if ( $slash ) {
1368                                         # Closing a tag...
1369                                         if ( ! in_array( $t, $htmlsingle ) &&
1370                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1371                                                 array_push( $tagstack, $ot );
1372                                                 $badtag = 1;
1373                                         } else {
1374                                                 if ( $t == "table" ) {
1375                                                         $tagstack = array_pop( $tablestack );
1376                                                 }
1377                                                 $newparams = "";
1378                                         }
1379                                 } else {
1380                                         # Keep track for later
1381                                         if ( in_array( $t, $tabletags ) &&
1382                                           ! in_array( "table", $tagstack ) ) {
1383                                                 $badtag = 1;
1384                                         } else if ( in_array( $t, $tagstack ) &&
1385                                           ! in_array ( $t , $htmlnest ) ) {
1386                                                 $badtag = 1 ;
1387                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1388                                                 if ( $t == "table" ) {
1389                                                         array_push( $tablestack, $tagstack );
1390                                                         $tagstack = array();
1391                                                 }
1392                                                 array_push( $tagstack, $t );
1393                                         }
1394                                         # Strip non-approved attributes from the tag
1395                                         $newparams = $this->fixTagAttributes($params);
1396
1397                                 }
1398                                 if ( ! $badtag ) {
1399                                         $rest = str_replace( ">", "&gt;", $rest );
1400                                         $text .= "<$slash$t $newparams$brace$rest";
1401                                         continue;
1402                                 }
1403                         }
1404                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1405                 }
1406                 # Close off any remaining tags
1407                 while ( $t = array_pop( $tagstack ) ) {
1408                         $text .= "</$t>\n";
1409                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1410                 }
1411                 wfProfileOut( $fname );
1412                 return $text;
1413         }
1414
1415 /*
1416  *
1417  * This function accomplishes several tasks:
1418  * 1) Auto-number headings if that option is enabled
1419  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1420  * 3) Add a Table of contents on the top for users who have enabled the option
1421  * 4) Auto-anchor headings
1422  *
1423  * It loops through all headlines, collects the necessary data, then splits up the
1424  * string and re-inserts the newly formatted headlines.
1425  *
1426  */
1427
1428         /* private */ function formatHeadings( $text )
1429         {
1430                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1431                 $doShowToc = $this->mOptions->getShowToc();
1432                 if( !$this->mTitle->userCanEdit() ) {
1433                         $showEditLink = 0;
1434                         $rightClickHack = 0;
1435                 } else {
1436                         $showEditLink = $this->mOptions->getEditSection();
1437                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1438                 }
1439
1440                 # Inhibit editsection links if requested in the page
1441                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1442                 if( $esw->matchAndRemove( $text ) ) {
1443                         $showEditLink = 0;
1444                 }
1445                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1446                 # do not add TOC
1447                 $mw =& MagicWord::get( MAG_NOTOC );
1448                 if( $mw->matchAndRemove( $text ) ) {
1449                         $doShowToc = 0;
1450                 }
1451
1452                 # never add the TOC to the Main Page. This is an entry page that should not
1453                 # be more than 1-2 screens large anyway
1454                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1455                         $doShowToc = 0;
1456                 }
1457
1458                 # Get all headlines for numbering them and adding funky stuff like [edit]
1459                 # links - this is for later, but we need the number of headlines right now
1460                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1461
1462                 # if there are fewer than 4 headlines in the article, do not show TOC
1463                 if( $numMatches < 4 ) {
1464                         $doShowToc = 0;
1465                 }
1466
1467                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1468                 # override above conditions and always show TOC
1469                 $mw =& MagicWord::get( MAG_FORCETOC );
1470                 if ($mw->matchAndRemove( $text ) ) {
1471                         $doShowToc = 1;
1472                 }
1473
1474
1475                 # We need this to perform operations on the HTML
1476                 $sk =& $this->mOptions->getSkin();
1477
1478                 # headline counter
1479                 $headlineCount = 0;
1480
1481                 # Ugh .. the TOC should have neat indentation levels which can be
1482                 # passed to the skin functions. These are determined here
1483                 $toclevel = 0;
1484                 $toc = "";
1485                 $full = "";
1486                 $head = array();
1487                 $sublevelCount = array();
1488                 $level = 0;
1489                 $prevlevel = 0;
1490                 foreach( $matches[3] as $headline ) {
1491                         $numbering = "";
1492                         if( $level ) {
1493                                 $prevlevel = $level;
1494                         }
1495                         $level = $matches[1][$headlineCount];
1496                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1497                                 # reset when we enter a new level
1498                                 $sublevelCount[$level] = 0;
1499                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1500                                 $toclevel += $level - $prevlevel;
1501                         }
1502                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1503                                 # reset when we step back a level
1504                                 $sublevelCount[$level+1]=0;
1505                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1506                                 $toclevel -= $prevlevel - $level;
1507                         }
1508                         # count number of headlines for each level
1509                         @$sublevelCount[$level]++;
1510                         if( $doNumberHeadings || $doShowToc ) {
1511                                 $dot = 0;
1512                                 for( $i = 1; $i <= $level; $i++ ) {
1513                                         if( !empty( $sublevelCount[$i] ) ) {
1514                                                 if( $dot ) {
1515                                                         $numbering .= ".";
1516                                                 }
1517                                                 $numbering .= $sublevelCount[$i];
1518                                                 $dot = 1;
1519                                         }
1520                                 }
1521                         }
1522
1523                         # The canonized header is a version of the header text safe to use for links
1524                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1525                         $canonized_headline = Parser::unstrip( $headline, $this->mStripState );
1526
1527                         # strip out HTML
1528                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1529                         $tocline = trim( $canonized_headline );
1530                         $canonized_headline = preg_replace("/[ &\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1531                         $refer[$headlineCount] = $canonized_headline;
1532
1533                         # count how many in assoc. array so we can track dupes in anchors
1534                         @$refers[$canonized_headline]++;
1535                         $refcount[$headlineCount]=$refers[$canonized_headline];
1536
1537                         # Prepend the number to the heading text
1538
1539                         if( $doNumberHeadings || $doShowToc ) {
1540                                 $tocline = $numbering . " " . $tocline;
1541
1542                                 # Don't number the heading if it is the only one (looks silly)
1543                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1544                                         # the two are different if the line contains a link
1545                                         $headline=$numbering . " " . $headline;
1546                                 }
1547                         }
1548
1549                         # Create the anchor for linking from the TOC to the section
1550                         $anchor = $canonized_headline;
1551                         if($refcount[$headlineCount] > 1 ) {
1552                                 $anchor .= "_" . $refcount[$headlineCount];
1553                         }
1554                         if( $doShowToc ) {
1555                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1556                         }
1557                         if( $showEditLink ) {
1558                                 if ( empty( $head[$headlineCount] ) ) {
1559                                         $head[$headlineCount] = "";
1560                                 }
1561                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1562                         }
1563
1564                         # Add the edit section span
1565                         if( $rightClickHack ) {
1566                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1567                         }
1568
1569                         # give headline the correct <h#> tag
1570                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1571
1572                         $headlineCount++;
1573                 }
1574
1575                 if( $doShowToc ) {
1576                         $toclines = $headlineCount;
1577                         $toc .= $sk->tocUnindent( $toclevel );
1578                         $toc = $sk->tocTable( $toc );
1579                 }
1580
1581                 # split up and insert constructed headlines
1582
1583                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1584                 $i = 0;
1585
1586                 foreach( $blocks as $block ) {
1587                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1588                             # This is the [edit] link that appears for the top block of text when
1589                                 # section editing is enabled
1590                                 $full .= $sk->editSectionLink(0);
1591                         }
1592                         $full .= $block;
1593                         if( $doShowToc && !$i) {
1594                                 # Let's add a top anchor just in case we want to link to the top of the page
1595                                 $full = "<a name=\"top\"></a>".$full.$toc;
1596                         }
1597
1598                         if( !empty( $head[$i] ) ) {
1599                                 $full .= $head[$i];
1600                         }
1601                         $i++;
1602                 }
1603
1604                 return $full;
1605         }
1606
1607         /* private */ function doMagicISBN( &$tokenizer )
1608         {
1609                 global $wgLang;
1610
1611                 # Check whether next token is a text token
1612                 # If yes, fetch it and convert the text into a
1613                 # Special::BookSources link
1614                 $token = $tokenizer->previewToken();
1615                 while ( $token["type"] == "" )
1616                 {
1617                         $tokenizer->nextToken();
1618                         $token = $tokenizer->previewToken();
1619                 }
1620                 if ( $token["type"] == "text" )
1621                 {
1622                         $token = $tokenizer->nextToken();
1623                         $x = $token["text"];
1624                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1625
1626                         $isbn = $blank = "" ;
1627                         while ( " " == $x{0} ) {
1628                                 $blank .= " ";
1629                                 $x = substr( $x, 1 );
1630                         }
1631                         while ( strstr( $valid, $x{0} ) != false ) {
1632                                 $isbn .= $x{0};
1633                                 $x = substr( $x, 1 );
1634                         }
1635                         $num = str_replace( "-", "", $isbn );
1636                         $num = str_replace( " ", "", $num );
1637
1638                         if ( "" == $num ) {
1639                                 $text = "ISBN $blank$x";
1640                         } else {
1641                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1642                                 $text = "<a href=\"" .
1643                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1644                                         "\" class=\"internal\">ISBN $isbn</a>";
1645                                 $text .= $x;
1646                         }
1647                 } else {
1648                         $text = "ISBN ";
1649                 }
1650                 return $text;
1651         }
1652         /* private */ function doMagicRFC( &$tokenizer )
1653         {
1654                 global $wgLang;
1655
1656                 # Check whether next token is a text token
1657                 # If yes, fetch it and convert the text into a
1658                 # link to an RFC source
1659                 $token = $tokenizer->previewToken();
1660                 while ( $token["type"] == "" )
1661                 {
1662                         $tokenizer->nextToken();
1663                         $token = $tokenizer->previewToken();
1664                 }
1665                 if ( $token["type"] == "text" )
1666                 {
1667                         $token = $tokenizer->nextToken();
1668                         $x = $token["text"];
1669                         $valid = "0123456789";
1670
1671                         $rfc = $blank = "" ;
1672                         while ( " " == $x{0} ) {
1673                                 $blank .= " ";
1674                                 $x = substr( $x, 1 );
1675                         }
1676                         while ( strstr( $valid, $x{0} ) != false ) {
1677                                 $rfc .= $x{0};
1678                                 $x = substr( $x, 1 );
1679                         }
1680
1681                         if ( "" == $rfc ) {
1682                                 $text .= "RFC $blank$x";
1683                         } else {
1684                                 $url = wfmsg( "rfcurl" );
1685                                 $url = str_replace( "$1", $rfc, $url);
1686                                 $sk =& $this->mOptions->getSkin();
1687                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1688                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1689                         }
1690                 } else {
1691                         $text = "RFC ";
1692                 }
1693                 return $text;
1694         }
1695
1696         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1697         {
1698                 $this->mOptions = $options;
1699                 $this->mTitle =& $title;
1700                 $this->mOutputType = OT_WIKI;
1701
1702                 if ( $clearState ) {
1703                         $this->clearState();
1704                 }
1705
1706                 $stripState = false;
1707                 $text = str_replace("\r\n", "\n", $text);
1708                 $text = $this->strip( $text, $stripState, false );
1709                 $text = $this->pstPass2( $text, $user );
1710                 $text = $this->unstrip( $text, $stripState );
1711                 return $text;
1712         }
1713
1714         /* private */ function pstPass2( $text, &$user )
1715         {
1716                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1717
1718                 # Variable replacement
1719                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1720                 $text = $this->replaceVariables( $text );
1721
1722                 # Signatures
1723                 #
1724                 $n = $user->getName();
1725                 $k = $user->getOption( "nickname" );
1726                 if ( "" == $k ) { $k = $n; }
1727                 if(isset($wgLocaltimezone)) {
1728                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1729                 }
1730                 /* Note: this is an ugly timezone hack for the European wikis */
1731                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1732                   " (" . date( "T" ) . ")";
1733                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1734
1735                 $text = preg_replace( "/~~~~~/", $d, $text );
1736                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1737                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1738                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1739                   Namespace::getUser() ) . ":$n|$k]]", $text );
1740
1741                 # Context links: [[|name]] and [[name (context)|]]
1742                 #
1743                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1744                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1745                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1746                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1747
1748                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1749                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1750                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1751                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1752                                                                                                                 # [[ns:page (cont)|]]
1753                 $context = "";
1754                 $t = $this->mTitle->getText();
1755                 if ( preg_match( $conpat, $t, $m ) ) {
1756                         $context = $m[2];
1757                 }
1758                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1759                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1760                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1761
1762                 if ( "" == $context ) {
1763                         $text = preg_replace( $p2, "[[\\1]]", $text );
1764                 } else {
1765                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1766                 }
1767
1768                 /*
1769                 $mw =& MagicWord::get( MAG_SUBST );
1770                 $wgCurParser = $this->fork();
1771                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1772                 $this->merge( $wgCurParser );
1773                 */
1774
1775                 # Trim trailing whitespace
1776                 # MAG_END (__END__) tag allows for trailing
1777                 # whitespace to be deliberately included
1778                 $text = rtrim( $text );
1779                 $mw =& MagicWord::get( MAG_END );
1780                 $mw->matchAndRemove( $text );
1781
1782                 return $text;
1783         }
1784
1785         # Set up some variables which are usually set up in parse()
1786         # so that an external function can call some class members with confidence
1787         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1788         {
1789                 $this->mTitle =& $title;
1790                 $this->mOptions = $options;
1791                 $this->mOutputType = $outputType;
1792                 if ( $clearState ) {
1793                         $this->clearState();
1794                 }
1795         }
1796
1797         function transformMsg( $text, $options ) {
1798                 global $wgTitle;
1799                 static $executing = false;
1800
1801                 # Guard against infinite recursion
1802                 if ( $executing ) {
1803                         return $text;
1804                 }
1805                 $executing = true;
1806
1807                 $this->mTitle = $wgTitle;
1808                 $this->mOptions = $options;
1809                 $this->mOutputType = OT_MSG;
1810                 $this->clearState();
1811                 $text = $this->replaceVariables( $text );
1812
1813                 $executing = false;
1814                 return $text;
1815         }
1816 }
1817
1818 class ParserOutput
1819 {
1820         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1821
1822         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1823                 $containsOldMagic = false )
1824         {
1825                 $this->mText = $text;
1826                 $this->mLanguageLinks = $languageLinks;
1827                 $this->mCategoryLinks = $categoryLinks;
1828                 $this->mContainsOldMagic = $containsOldMagic;
1829         }
1830
1831         function getText() { return $this->mText; }
1832         function getLanguageLinks() { return $this->mLanguageLinks; }
1833         function getCategoryLinks() { return $this->mCategoryLinks; }
1834         function containsOldMagic() { return $this->mContainsOldMagic; }
1835         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1836         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1837         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1838         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1839
1840         function merge( $other ) {
1841                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1842                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1843                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1844         }
1845
1846 }
1847
1848 class ParserOptions
1849 {
1850         # All variables are private
1851         var $mUseTeX;                    # Use texvc to expand <math> tags
1852         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1853         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1854         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1855         var $mAllowExternalImages;       # Allow external images inline
1856         var $mSkin;                      # Reference to the preferred skin
1857         var $mDateFormat;                # Date format index
1858         var $mEditSection;               # Create "edit section" links
1859         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1860         var $mNumberHeadings;            # Automatically number headings
1861         var $mShowToc;                   # Show table of contents
1862
1863         function getUseTeX() { return $this->mUseTeX; }
1864         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1865         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1866         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1867         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1868         function getSkin() { return $this->mSkin; }
1869         function getDateFormat() { return $this->mDateFormat; }
1870         function getEditSection() { return $this->mEditSection; }
1871         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1872         function getNumberHeadings() { return $this->mNumberHeadings; }
1873         function getShowToc() { return $this->mShowToc; }
1874
1875         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1876         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1877         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1878         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1879         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1880         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1881         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1882         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1883         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1884         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1885         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1886
1887         /* static */ function newFromUser( &$user )
1888         {
1889                 $popts = new ParserOptions;
1890                 $popts->initialiseFromUser( &$user );
1891                 return $popts;
1892         }
1893
1894         function initialiseFromUser( &$userInput )
1895         {
1896                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1897
1898                 if ( !$userInput ) {
1899                         $user = new User;
1900                         $user->setLoaded( true );
1901                 } else {
1902                         $user =& $userInput;
1903                 }
1904
1905                 $this->mUseTeX = $wgUseTeX;
1906                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1907                 $this->mUseDynamicDates = $wgUseDynamicDates;
1908                 $this->mInterwikiMagic = $wgInterwikiMagic;
1909                 $this->mAllowExternalImages = $wgAllowExternalImages;
1910                 $this->mSkin =& $user->getSkin();
1911                 $this->mDateFormat = $user->getOption( "date" );
1912                 $this->mEditSection = $user->getOption( "editsection" );
1913                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1914                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1915                 $this->mShowToc = $user->getOption( "showtoc" );
1916         }
1917
1918
1919 }
1920
1921 # Regex callbacks, used in Parser::replaceVariables
1922 function wfBraceSubstitution( $matches )
1923 {
1924         global $wgCurParser;
1925         return $wgCurParser->braceSubstitution( $matches );
1926 }
1927
1928 ?>