includes/Parser.php

   1 <?php
   2
   3 include_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         include_once('wikihiero.php');
   7 }
   8
   9 # PHP Parser
  10 #
  11 # Processes wiki markup
  12 #
  13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  15 #
  16 # Globals used:
  17 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  18 #
  19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  20 #
  21 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  22 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  23 #               $wgLocaltimezone
  24 #
  25 #      * only within ParserOptions
  26 #
  27 #
  28 #----------------------------------------
  29 #    Variable substitution O(N^2) attack
  30 #-----------------------------------------
  31 # Without countermeasures, it would be possible to attack the parser by saving a page
  32 # filled with a large number of inclusions of large pages. The size of the generated
  33 # page would be proportional to the square of the input size. Hence, we limit the number
  34 # of inclusions of any given page, thus bringing any attack back to O(N).
  35 #
  36
  37 define( "MAX_INCLUDE_REPEAT", 5 );
  38
  39 # Allowed values for $mOutputType
  40 define( "OT_HTML", 1 );
  41 define( "OT_WIKI", 2 );
  42 define( "OT_MSG", 3 );
  43
  44 # prefix for escaping, used in two functions at least
  45 define( "UNIQ_PREFIX", "NaodW29");
  46
  47 class Parser
  48 {
  49         # Cleared with clearState():
  50         var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array();
  51         var $mVariables, $mIncludeCount, $mArgStack;
  52
  53         # Temporary:
  54         var $mOptions, $mTitle, $mOutputType;
  55
  56         function Parser()
  57         {
  58                 $this->clearState();
  59         }
  60
  61         function clearState()
  62         {
  63                 $this->mOutput = new ParserOutput;
  64                 $this->mAutonumber = 0;
  65                 $this->mLastSection = "";
  66                 $this->mDTopen = false;
  67                 $this->mVariables = false;
  68                 $this->mIncludeCount = array();
  69                 $this->mStripState = array();
  70                 $this->mArgStack = array();
  71         }
  72
  73         # First pass--just handle <nowiki> sections, pass the rest off
  74         # to internalParse() which does all the real work.
  75         #
  76         # Returns a ParserOutput
  77         #
  78         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  79         {
  80                 $fname = "Parser::parse";
  81                 wfProfileIn( $fname );
  82
  83                 if ( $clearState ) {
  84                         $this->clearState();
  85                 }
  86
  87                 $this->mOptions = $options;
  88                 $this->mTitle =& $title;
  89                 $this->mOutputType = OT_HTML;
  90
  91                 $stripState = NULL;
  92                 $text = $this->strip( $text, $this->mStripState );
  93                 $text = $this->internalParse( $text, $linestart );
  94                 # only once and next-to-last
  95                 $text = $this->unstrip( $text, $this->mStripState );
  96                 $text = $this->doBlockLevels( $text, $linestart );
  97                 # Clean up special characters, only run once and last
  98                 $fixtags = array(
  99                         "/<hr *>/i" => '<hr/>',
 100                         "/<br *>/i" => '<br/>',
 101                         "/<center *>/i"=>'<span style="text-align:center;">',
 102                         "/<\\/center *>/i" => '</span>',
 103                         # Clean up spare ampersands; note that we probably ought to be
 104                         # more careful about named entities.
 105                         '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 106                 );
 107                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 108
 109                 $this->mOutput->setText( $text );
 110                 wfProfileOut( $fname );
 111                 return $this->mOutput;
 112         }
 113
 114         /* static */ function getRandomString()
 115         {
 116                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 117         }
 118
 119         # Replaces all occurrences of <$tag>content</$tag> in the text
 120         # with a random marker and returns the new text. the output parameter
 121         # $content will be an associative array filled with data on the form
 122         # $unique_marker => content.
 123
 124         # If $content is already set, the additional entries will be appended
 125
 126         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 127                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 128                 if ( !$content ) {
 129                         $content = array( );
 130                 }
 131                 $n = 1;
 132                 $stripped = "";
 133
 134                 while ( "" != $text ) {
 135                         $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 136                         $stripped .= $p[0];
 137                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 138                                 $text = "";
 139                         } else {
 140                                 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 141                                 $marker = $rnd . sprintf("%08X", $n++);
 142                                 $content[$marker] = $q[0];
 143                                 $stripped .= $marker;
 144                                 $text = $q[1];
 145                         }
 146                 }
 147                 return $stripped;
 148         }
 149
 150         # Strips <nowiki>, <pre> and <math>
 151         # Returns the text, and fills an array with data needed in unstrip()
 152         # If the $state is already a valid strip state, it adds to the state
 153         #
 154         function strip( $text, &$state )
 155         {
 156                 $render = ($this->mOutputType == OT_HTML);
 157                 $nowiki_content = array();
 158                 $hiero_content = array();
 159                 $math_content = array();
 160                 $pre_content = array();
 161                 $item_content = array();
 162
 163                 # Replace any instances of the placeholders
 164                 $uniq_prefix = UNIQ_PREFIX;
 165                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 166
 167                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 168                 foreach( $nowiki_content as $marker => $content ){
 169                         if( $render ){
 170                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 171                         } else {
 172                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 173                         }
 174                 }
 175
 176                 if( $GLOBALS['wgUseWikiHiero'] ){
 177                         $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 178                         foreach( $hiero_content as $marker => $content ){
 179                                 if( $render ){
 180                                         $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 181                                 } else {
 182                                         $hiero_content[$marker] = "<hiero>$content</hiero>";
 183                                 }
 184                         }
 185                 }
 186
 187                 if( $this->mOptions->getUseTeX() ){
 188                         $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 189                         foreach( $math_content as $marker => $content ){
 190                                 if( $render ){
 191                                         $math_content[$marker] = renderMath( $content );
 192                                 } else {
 193                                         $math_content[$marker] = "<math>$content</math>";
 194                                 }
 195                         }
 196                 }
 197
 198                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 199                 foreach( $pre_content as $marker => $content ){
 200                         if( $render ){
 201                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 202                         } else {
 203                                 $pre_content[$marker] = "<pre>$content</pre>";
 204                         }
 205                 }
 206
 207                 # Merge state with the pre-existing state, if there is one
 208                 if ( $state ) {
 209                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 210                         $state['hiero'] = $state['hiero'] + $hiero_content;
 211                         $state['math'] = $state['math'] + $math_content;
 212                         $state['pre'] = $state['pre'] + $pre_content;
 213                 } else {
 214                         $state = array(
 215                           'nowiki' => $nowiki_content,
 216                           'hiero' => $hiero_content,
 217                           'math' => $math_content,
 218                           'pre' => $pre_content,
 219                           'item' => $item_content
 220                         );
 221                 }
 222                 return $text;
 223         }
 224
 225         function unstrip( $text, &$state )
 226         {
 227                 # Must expand in reverse order, otherwise nested tags will be corrupted
 228                 $contentDict = end( $state );
 229                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 230                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 231                                 $text = str_replace( key( $contentDict ), $content, $text );
 232                         }
 233                 }
 234
 235                 return $text;
 236         }
 237
 238         # Add an item to the strip state
 239         # Returns the unique tag which must be inserted into the stripped text
 240         # The tag will be replaced with the original text in unstrip()
 241
 242         function insertStripItem( $text, &$state )
 243         {
 244                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 245                 if ( !$state ) {
 246                         $state = array(
 247                           'nowiki' => array(),
 248                           'hiero' => array(),
 249                           'math' => array(),
 250                           'pre' => array(),
 251                           'item' => array()
 252                         );
 253                 }
 254                 $state['item'][$rnd] = $text;
 255                 return $rnd;
 256         }
 257
 258         function categoryMagic ()
 259         {
 260                 global $wgLang , $wgUser ;
 261                 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
 262                 $id = $this->mTitle->getArticleID() ;
 263                 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
 264                 $ti = $this->mTitle->getText() ;
 265                 $ti = explode ( ":" , $ti , 2 ) ;
 266                 if ( $cat != $ti[0] ) return "" ;
 267                 $r = '<br style="clear:both;"/>\n';
 268
 269                 $articles = array() ;
 270                 $parents = array () ;
 271                 $children = array() ;
 272
 273
 274 #               $sk =& $this->mGetSkin();
 275                 $sk =& $wgUser->getSkin() ;
 276
 277                 $data = array () ;
 278                 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
 279                 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
 280
 281                 $res = wfQuery ( $sql1, DB_READ ) ;
 282                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 283
 284                 $res = wfQuery ( $sql2, DB_READ ) ;
 285                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 286
 287
 288                 foreach ( $data AS $x )
 289                 {
 290                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 291                         if ( $t != "" ) $t .= ":" ;
 292                         $t .= $x->cur_title ;
 293
 294                         $y = explode ( ":" , $t , 2 ) ;
 295                         if ( count ( $y ) == 2 && $y[0] == $cat ) {
 296                                 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
 297                         } else {
 298                                 array_push ( $articles , $sk->makeLink ( $t ) ) ;
 299                         }
 300                 }
 301                 wfFreeResult ( $res ) ;
 302
 303                 # Children
 304                 if ( count ( $children ) > 0 )
 305                 {
 306                         asort ( $children ) ;
 307                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 308                         $r .= implode ( ", " , $children ) ;
 309                 }
 310
 311                 # Articles
 312                 if ( count ( $articles ) > 0 )
 313                 {
 314                         asort ( $articles ) ;
 315                         $h =  wfMsg( "category_header", $ti[1] );
 316                         $r .= "<h2>{$h}</h2>\n" ;
 317                         $r .= implode ( ", " , $articles ) ;
 318                 }
 319
 320
 321                 return $r ;
 322         }
 323
 324         function getHTMLattrs ()
 325         {
 326                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 327                                 "title", "align", "lang", "dir", "width", "height",
 328                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 329                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 330                                 /* FONT */ "type", "start", "value", "compact",
 331                                 /* For various lists, mostly deprecated but safe */
 332                                 "summary", "width", "border", "frame", "rules",
 333                                 "cellspacing", "cellpadding", "valign", "char",
 334                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 335                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 336                                 "id", "class", "name", "style" /* For CSS */
 337                                 );
 338                 return $htmlattrs ;
 339         }
 340
 341         function fixTagAttributes ( $t )
 342         {
 343                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 344                 $htmlattrs = $this->getHTMLattrs() ;
 345
 346                 # Strip non-approved attributes from the tag
 347                 $t = preg_replace(
 348                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 349                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 350                         $t);
 351                 # Strip javascript "expression" from stylesheets. Brute force approach:
 352                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 353
 354                 if( preg_match(
 355                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 356                         wfMungeToUtf8( $t ) ) )
 357                 {
 358                         $t="";
 359                 }
 360
 361                 return trim ( $t ) ;
 362         }
 363
 364         function doTableStuff ( $t )
 365         {
 366                 $t = explode ( "\n" , $t ) ;
 367                 $td = array () ; # Is currently a td tag open?
 368                         $ltd = array () ; # Was it TD or TH?
 369                         $tr = array () ; # Is currently a tr tag open?
 370                         $ltr = array () ; # tr attributes
 371                         foreach ( $t AS $k => $x )
 372                         {
 373                                 $x = rtrim ( $x ) ;
 374                                 $fc = substr ( $x , 0 , 1 ) ;
 375                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 376                                 {
 377                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 378                                         array_push ( $td , false ) ;
 379                                         array_push ( $ltd , "" ) ;
 380                                         array_push ( $tr , false ) ;
 381                                         array_push ( $ltr , "" ) ;
 382                                 }
 383                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 384                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 385                                 {
 386                                         $z = "</table>\n" ;
 387                                         $l = array_pop ( $ltd ) ;
 388                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 389                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 390                                         array_pop ( $ltr ) ;
 391                                         $t[$k] = $z ;
 392                                 }
 393                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 394                                                 {
 395                                                 $z = trim ( substr ( $x , 2 ) ) ;
 396                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 397                                                 }*/
 398                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 399                                 {
 400                                         $x = substr ( $x , 1 ) ;
 401                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 402                                         $z = "" ;
 403                                         $l = array_pop ( $ltd ) ;
 404                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 405                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 406                                         array_pop ( $ltr ) ;
 407                                         $t[$k] = $z ;
 408                                         array_push ( $tr , false ) ;
 409                                         array_push ( $td , false ) ;
 410                                         array_push ( $ltd , "" ) ;
 411                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 412                                 }
 413                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 414                                 {
 415                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 416                                         {
 417                                                 $fc = "+" ;
 418                                                 $x = substr ( $x , 1 ) ;
 419                                         }
 420                                         $after = substr ( $x , 1 ) ;
 421                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 422                                         $after = explode ( "||" , $after ) ;
 423                                         $t[$k] = "" ;
 424                                         foreach ( $after AS $theline )
 425                                         {
 426                                                 $z = "" ;
 427                                                 if ( $fc != "+" )
 428                                                 {
 429                                                         $tra = array_pop ( $ltr ) ;
 430                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 431                                                         array_push ( $tr , true ) ;
 432                                                         array_push ( $ltr , "" ) ;
 433                                                 }
 434
 435                                                 $l = array_pop ( $ltd ) ;
 436                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 437                                                 if ( $fc == "|" ) $l = "td" ;
 438                                                 else if ( $fc == "!" ) $l = "th" ;
 439                                                 else if ( $fc == "+" ) $l = "caption" ;
 440                                                 else $l = "" ;
 441                                                 array_push ( $ltd , $l ) ;
 442                                                 $y = explode ( "|" , $theline , 2 ) ;
 443                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 444                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 445                                                 $t[$k] .= $y ;
 446                                                 array_push ( $td , true ) ;
 447                                         }
 448                                 }
 449                         }
 450
 451                 # Closing open td, tr && table
 452                 while ( count ( $td ) > 0 )
 453                 {
 454                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 455                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 456                         $t[] = "</table>" ;
 457                 }
 458
 459                 $t = implode ( "\n" , $t ) ;
 460                 #               $t = $this->removeHTMLtags( $t );
 461                 return $t ;
 462         }
 463
 464         function internalParse( $text, $linestart, $args = array() )
 465         {
 466                 $fname = "Parser::internalParse";
 467                 wfProfileIn( $fname );
 468
 469                 $text = $this->removeHTMLtags( $text );
 470                 $text = $this->replaceVariables( $text, $args );
 471
 472                 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
 473
 474                 $text = $this->doHeadings( $text );
 475                 if($this->mOptions->getUseDynamicDates()) {
 476                         global $wgDateFormatter;
 477                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 478                 }
 479                 $text = $this->replaceExternalLinks( $text );
 480                 $text = $this->doTokenizedParser ( $text );
 481                 $text = $this->doTableStuff ( $text ) ;
 482                 $text = $this->formatHeadings( $text );
 483                 $sk =& $this->mOptions->getSkin();
 484                 $text = $sk->transformContent( $text );
 485
 486                 $text .= $this->categoryMagic () ;
 487
 488                 wfProfileOut( $fname );
 489                 return $text;
 490         }
 491
 492
 493         /* private */ function doHeadings( $text )
 494         {
 495                 for ( $i = 6; $i >= 1; --$i ) {
 496                         $h = substr( "======", 0, $i );
 497                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 498                           "<h{$i}>\\1</h{$i}>\\2", $text );
 499                 }
 500                 return $text;
 501         }
 502
 503         # Note: we have to do external links before the internal ones,
 504         # and otherwise take great care in the order of things here, so
 505         # that we don't end up interpreting some URLs twice.
 506
 507         /* private */ function replaceExternalLinks( $text )
 508         {
 509                 $fname = "Parser::replaceExternalLinks";
 510                 wfProfileIn( $fname );
 511                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 512                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 513                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 514                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 515                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 516                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 517                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 518                 wfProfileOut( $fname );
 519                 return $text;
 520         }
 521
 522         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 523         {
 524                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 525                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 526
 527                 # this is  the list of separators that should be ignored if they
 528                 # are the last character of an URL but that should be included
 529                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 530                 # in this case, the last comma should not become part of the URL,
 531                 # but in "www.foo.com/123,2342,32.htm" it should.
 532                 $sep = ",;\.:";
 533                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 534                 $images = "gif|png|jpg|jpeg";
 535
 536                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 537                 # they are interpreted as part of the string (used to tell PHP
 538                 # that the content of the string should be inserted there).
 539                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 540                   "((?i){$images})([^{$uc}]|$)/";
 541
 542                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 543                 $sk =& $this->mOptions->getSkin();
 544
 545                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 546                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 547                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 548                 }
 549                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 550                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 551                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 552                   "</a>\\5", $s );
 553                 $s = str_replace( $unique, $protocol, $s );
 554
 555                 $a = explode( "[{$protocol}:", " " . $s );
 556                 $s = array_shift( $a );
 557                 $s = substr( $s, 1 );
 558
 559                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 560                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 561
 562                 foreach ( $a as $line ) {
 563                         if ( preg_match( $e1, $line, $m ) ) {
 564                                 $link = "{$protocol}:{$m[1]}";
 565                                 $trail = $m[2];
 566                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 567                                 else { $text = wfEscapeHTML( $link ); }
 568                         } else if ( preg_match( $e2, $line, $m ) ) {
 569                                 $link = "{$protocol}:{$m[1]}";
 570                                 $text = $m[2];
 571                                 $trail = $m[3];
 572                         } else {
 573                                 $s .= "[{$protocol}:" . $line;
 574                                 continue;
 575                         }
 576                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 577                                 $paren = "";
 578                         } else {
 579                                 # Expand the URL for printable version
 580                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 581                         }
 582                         $la = $sk->getExternalLinkAttributes( $link, $text );
 583                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 584
 585                 }
 586                 return $s;
 587         }
 588
 589         /* private */ function handle3Quotes( &$state, $token )
 590         {
 591                 if ( $state["strong"] !== false ) {
 592                         if ( $state["em"] !== false && $state["em"] > $state["strong"] )
 593                         {
 594                                 # ''' lala ''lala '''
 595                                 $s = "</em></strong><em>";
 596                         } else {
 597                                 $s = "</strong>";
 598                         }
 599                         $state["strong"] = FALSE;
 600                 } else {
 601                         $s = "<strong>";
 602                         $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
 603                 }
 604                 return $s;
 605         }
 606
 607         /* private */ function handle2Quotes( &$state, $token )
 608         {
 609                 if ( $state["em"] !== false ) {
 610                         if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
 611                         {
 612                                 # ''lala'''lala'' ....'''
 613                                 $s = "</strong></em><strong>";
 614                         } else {
 615                                 $s = "</em>";
 616                         }
 617                         $state["em"] = FALSE;
 618                 } else {
 619                         $s = "<em>";
 620                         $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 621
 622                 }
 623                 return $s;
 624         }
 625
 626         /* private */ function handle5Quotes( &$state, $token )
 627         {
 628                 $s = "";
 629                 if ( $state["em"] !== false && $state["strong"] !== false ) {
 630                         if ( $state["em"] < $state["strong"] ) {
 631                                 $s .= "</strong></em>";
 632                         } else {
 633                                 $s .= "</em></strong>";
 634                         }
 635                         $state["strong"] = $state["em"] = FALSE;
 636                 } elseif ( $state["em"] !== false ) {
 637                         $s .= "</em><strong>";
 638                         $state["em"] = FALSE;
 639                         $state["strong"] = $token["pos"];
 640                 } elseif ( $state["strong"] !== false ) {
 641                         $s .= "</strong><em>";
 642                         $state["strong"] = FALSE;
 643                         $state["em"] = $token["pos"];
 644                 } else { # not $em and not $strong
 645                         $s .= "<strong><em>";
 646                         $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
 647                 }
 648                 return $s;
 649         }
 650
 651         /* private */ function doTokenizedParser( $str )
 652         {
 653                 global $wgLang; # for language specific parser hook
 654
 655                 $tokenizer=Tokenizer::newFromString( $str );
 656                 $tokenStack = array();
 657
 658                 $s="";
 659                 $state["em"]      = FALSE;
 660                 $state["strong"]  = FALSE;
 661                 $tagIsOpen = FALSE;
 662                 $threeopen = false;
 663
 664                 # The tokenizer splits the text into tokens and returns them one by one.
 665                 # Every call to the tokenizer returns a new token.
 666                 while ( $token = $tokenizer->nextToken() )
 667                 {
 668                         switch ( $token["type"] )
 669                         {
 670                                 case "text":
 671                                         # simple text with no further markup
 672                                         $txt = $token["text"];
 673                                         break;
 674                                 case "[[[":
 675                                         # remember the tag opened with 3 [
 676                                         $threeopen = true;
 677                                 case "[[":
 678                                         # link opening tag.
 679                                         # FIXME : Treat orphaned open tags (stack not empty when text is over)
 680                                         $tagIsOpen = TRUE;
 681                                         array_push( $tokenStack, $token );
 682                                         $txt="";
 683                                         break;
 684
 685                                 case "]]]":
 686                                 case "]]":
 687                                         # link close tag.
 688                                         # get text from stack, glue it together, and call the code to handle a
 689                                         # link
 690
 691                                         if ( count( $tokenStack ) == 0 )
 692                                         {
 693                                                 # stack empty. Found a ]] without an opening [[
 694                                                 $txt = "]]";
 695                                         } else {
 696                                                 $linkText = "";
 697                                                 $lastToken = array_pop( $tokenStack );
 698                                                 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
 699                                                 {
 700                                                         if( !empty( $lastToken["text"] ) ) {
 701                                                                 $linkText = $lastToken["text"] . $linkText;
 702                                                         }
 703                                                         $lastToken = array_pop( $tokenStack );
 704                                                 }
 705
 706                                                 $txt = $linkText ."]]";
 707
 708                                                 if( isset( $lastToken["text"] ) ) {
 709                                                         $prefix = $lastToken["text"];
 710                                                 } else {
 711                                                         $prefix = "";
 712                                                 }
 713                                                 $nextToken = $tokenizer->previewToken();
 714                                                 if ( $nextToken["type"] == "text" )
 715                                                 {
 716                                                         # Preview just looks at it. Now we have to fetch it.
 717                                                         $nextToken = $tokenizer->nextToken();
 718                                                         $txt .= $nextToken["text"];
 719                                                 }
 720                                                 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
 721
 722                                                 # did the tag start with 3 [ ?
 723                                                 if($threeopen) {
 724                                                         # show the first as text
 725                                                         $txt = "[".$txt;
 726                                                         $threeopen=false;
 727                                                 }
 728
 729                                         }
 730                                         $tagIsOpen = (count( $tokenStack ) != 0);
 731                                         break;
 732                                 case "----":
 733                                         $txt = "\n<hr />\n";
 734                                         break;
 735                                 case "'''":
 736                                         # This and the three next ones handle quotes
 737                                         $txt = $this->handle3Quotes( $state, $token );
 738                                         break;
 739                                 case "''":
 740                                         $txt = $this->handle2Quotes( $state, $token );
 741                                         break;
 742                                 case "'''''":
 743                                         $txt = $this->handle5Quotes( $state, $token );
 744                                         break;
 745                                 case "":
 746                                         # empty token
 747                                         $txt="";
 748                                         break;
 749                                 case "RFC ":
 750                                         if ( $tagIsOpen ) {
 751                                                 $txt = "RFC ";
 752                                         } else {
 753                                                 $txt = $this->doMagicRFC( $tokenizer );
 754                                         }
 755                                         break;
 756                                 case "ISBN ":
 757                                         if ( $tagIsOpen ) {
 758                                                 $txt = "ISBN ";
 759                                         } else {
 760                                                 $txt = $this->doMagicISBN( $tokenizer );
 761                                         }
 762                                         break;
 763                                 default:
 764                                         # Call language specific Hook.
 765                                         $txt = $wgLang->processToken( $token, $tokenStack );
 766                                         if ( NULL == $txt ) {
 767                                                 # An unkown token. Highlight.
 768                                                 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
 769                                                 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
 770                                         }
 771                                         break;
 772                         }
 773                         # If we're parsing the interior of a link, don't append the interior to $s,
 774                         # but push it to the stack so it can be processed when a ]] token is found.
 775                         if ( $tagIsOpen  && $txt != "" ) {
 776                                 $token["type"] = "text";
 777                                 $token["text"] = $txt;
 778                                 array_push( $tokenStack, $token );
 779                         } else {
 780                                 $s .= $txt;
 781                         }
 782                 } #end while
 783                 if ( count( $tokenStack ) != 0 )
 784                 {
 785                         # still objects on stack. opened [[ tag without closing ]] tag.
 786                         $txt = "";
 787                         while ( $lastToken = array_pop( $tokenStack ) )
 788                         {
 789                                 if ( $lastToken["type"] == "text" )
 790                                 {
 791                                         $txt = $lastToken["text"] . $txt;
 792                                 } else {
 793                                         $txt = $lastToken["type"] . $txt;
 794                                 }
 795                         }
 796                         $s .= $txt;
 797                 }
 798                 return $s;
 799         }
 800
 801         /* private */ function handleInternalLink( $line, $prefix )
 802         {
 803                 global $wgLang, $wgLinkCache;
 804                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 805                 static $fname = "Parser::handleInternalLink" ;
 806                 wfProfileIn( $fname );
 807
 808                 wfProfileIn( "$fname-setup" );
 809                 static $tc = FALSE;
 810                 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
 811                 $sk =& $this->mOptions->getSkin();
 812
 813                 # Match a link having the form [[namespace:link|alternate]]trail
 814                 static $e1 = FALSE;
 815                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 816                 # Match the end of a line for a word that's not followed by whitespace,
 817                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 818                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 819                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 820                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 821
 822
 823                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 824                 static $image = FALSE;
 825                 static $special = FALSE;
 826                 static $media = FALSE;
 827                 static $category = FALSE;
 828                 if ( !$image ) { $image = Namespace::getImage(); }
 829                 if ( !$special ) { $special = Namespace::getSpecial(); }
 830                 if ( !$media ) { $media = Namespace::getMedia(); }
 831                 if ( !$category ) { $category = wfMsg ( "category" ) ; }
 832
 833                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 834
 835                 wfProfileOut( "$fname-setup" );
 836                 $s = "";
 837
 838                 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 839                         $text = $m[2];
 840                         $trail = $m[3];
 841                 } else { # Invalid form; output directly
 842                         $s .= $prefix . "[[" . $line ;
 843                         return $s;
 844                 }
 845
 846                 /* Valid link forms:
 847                 Foobar -- normal
 848                 :Foobar -- override special treatment of prefix (images, language links)
 849                 /Foobar -- convert to CurrentPage/Foobar
 850                 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 851                 */
 852                 $c = substr($m[1],0,1);
 853                 $noforce = ($c != ":");
 854                 if( $c == "/" ) { # subpage
 855                         if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 856                                 $m[1]=substr($m[1],1,strlen($m[1])-2);
 857                                 $noslash=$m[1];
 858                         } else {
 859                                 $noslash=substr($m[1],1);
 860                         }
 861                         if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
 862                                 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 863                                 if( "" == $text ) {
 864                                         $text= $m[1];
 865                                 } # this might be changed for ugliness reasons
 866                         } else {
 867                                 $link = $noslash; # no subpage allowed, use standard link
 868                         }
 869                 } elseif( $noforce ) { # no subpage
 870                         $link = $m[1];
 871                 } else {
 872                         $link = substr( $m[1], 1 );
 873                 }
 874                 if( "" == $text )
 875                         $text = $link;
 876
 877                 $nt = Title::newFromText( $link );
 878                 if( !$nt ) {
 879                         $s .= $prefix . "[[" . $line;
 880                         return $s;
 881                 }
 882                 $ns = $nt->getNamespace();
 883                 $iw = $nt->getInterWiki();
 884                 if( $noforce ) {
 885                         if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 886                                 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 887                                 return (trim($s) == '')? '': $s;
 888                         }
 889                         if( $ns == $image ) {
 890                                 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 891                                 $wgLinkCache->addImageLinkObj( $nt );
 892                                 return $s;
 893                         }
 894                 }
 895                 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 896                     ( strpos( $link, "#" ) == FALSE ) ) {
 897                         $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
 898                         return $s;
 899                 }
 900
 901                 # Category feature
 902                 $catns = strtoupper ( $nt->getDBkey () ) ;
 903                 $catns = explode ( ":" , $catns ) ;
 904                 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
 905                 else $catns = "" ;
 906                 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
 907                         $t = explode ( ":" , $nt->getText() ) ;
 908                         array_shift ( $t ) ;
 909                         $t = implode ( ":" , $t ) ;
 910                         $t = $wgLang->ucFirst ( $t ) ;
 911                         $nnt = Title::newFromText ( $category.":".$t ) ;
 912                         $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
 913                         $this->mOutput->mCategoryLinks[] = $t ;
 914                         $s .= $prefix . $trail ;
 915                         return $s ;
 916                 }
 917
 918                 if( $ns == $media ) {
 919                         $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 920                         $wgLinkCache->addImageLinkObj( $nt );
 921                         return $s;
 922                 } elseif( $ns == $special ) {
 923                         $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 924                         return $s;
 925                 }
 926                 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 927
 928                 wfProfileOut( $fname );
 929                 return $s;
 930         }
 931
 932         # Some functions here used by doBlockLevels()
 933         #
 934         /* private */ function closeParagraph()
 935         {
 936                 $result = "";
 937                 if ( '' != $this->mLastSection ) {
 938                         $result = "</" . $this->mLastSection  . ">\n";
 939                 }
 940                 $this->mLastSection = "";
 941                 return $result;
 942         }
 943         # getCommon() returns the length of the longest common substring
 944         # of both arguments, starting at the beginning of both.
 945         #
 946         /* private */ function getCommon( $st1, $st2 )
 947         {
 948                 $fl = strlen( $st1 );
 949                 $shorter = strlen( $st2 );
 950                 if ( $fl < $shorter ) { $shorter = $fl; }
 951
 952                 for ( $i = 0; $i < $shorter; ++$i ) {
 953                         if ( $st1{$i} != $st2{$i} ) { break; }
 954                 }
 955                 return $i;
 956         }
 957         # These next three functions open, continue, and close the list
 958         # element appropriate to the prefix character passed into them.
 959         #
 960         /* private */ function openList( $char )
 961     {
 962                 $result = $this->closeParagraph();
 963
 964                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 965                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 966                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 967                 else if ( ";" == $char ) {
 968                         $result .= "<dl><dt>";
 969                         $this->mDTopen = true;
 970                 }
 971                 else { $result = "<!-- ERR 1 -->"; }
 972
 973                 return $result;
 974         }
 975
 976         /* private */ function nextItem( $char )
 977         {
 978                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 979                 else if ( ":" == $char || ";" == $char ) {
 980                         $close = "</dd>";
 981                         if ( $this->mDTopen ) { $close = "</dt>"; }
 982                         if ( ";" == $char ) {
 983                                 $this->mDTopen = true;
 984                                 return $close . "<dt>";
 985                         } else {
 986                                 $this->mDTopen = false;
 987                                 return $close . "<dd>";
 988                         }
 989                 }
 990                 return "<!-- ERR 2 -->";
 991         }
 992
 993         /* private */function closeList( $char )
 994         {
 995                 if ( "*" == $char ) { $text = "</li></ul>"; }
 996                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 997                 else if ( ":" == $char ) {
 998                         if ( $this->mDTopen ) {
 999                                 $this->mDTopen = false;
1000                                 $text = "</dt></dl>";
1001                         } else {
1002                                 $text = "</dd></dl>";
1003                         }
1004                 }
1005                 else {  return "<!-- ERR 3 -->"; }
1006                 return $text."\n";
1007         }
1008
1009         /* private */ function doBlockLevels( $text, $linestart )
1010         {
1011                 $fname = "Parser::doBlockLevels";
1012                 wfProfileIn( $fname );
1013                 # Parsing through the text line by line.  The main thing
1014                 # happening here is handling of block-level elements p, pre,
1015                 # and making lists from lines starting with * # : etc.
1016                 #
1017                 $a = explode( "\n", $text );
1018
1019                 $lastPref = $text = $lastLine = '';
1020                 $this->mDTopen = $inBlockElem = false;
1021                 $npl = 0;
1022                 $pstack = false;
1023
1024                 if ( ! $linestart ) { $text .= array_shift( $a ); }
1025                 foreach ( $a as $t ) {
1026                         $oLine = $t;
1027                         $opl = strlen( $lastPref );
1028                         $npl = strspn( $t, "*#:;" );
1029                         $pref = substr( $t, 0, $npl );
1030                         $pref2 = str_replace( ";", ":", $pref );
1031                         $t = substr( $t, $npl );
1032                         // list generation
1033                         if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1034                                 $text .= $this->nextItem( substr( $pref, -1 ) );
1035                                 if ( $pstack ) { $pstack = false; }
1036
1037                                 if ( ";" == substr( $pref, -1 ) ) {
1038                                         $cpos = strpos( $t, ":" );
1039                                         if ( false !== $cpos ) {
1040                                                 $term = substr( $t, 0, $cpos );
1041                                                 $text .= $term . $this->nextItem( ":" );
1042                                                 $t = substr( $t, $cpos + 1 );
1043                                         }
1044                                 }
1045                         } else if (0 != $npl || 0 != $opl) {
1046                                 $cpl = $this->getCommon( $pref, $lastPref );
1047                                 if ( $pstack ) { $pstack = false; }
1048
1049                                 while ( $cpl < $opl ) {
1050                                         $text .= $this->closeList( $lastPref{$opl-1} );
1051                                         --$opl;
1052                                 }
1053                                 if ( $npl <= $cpl && $cpl > 0 ) {
1054                                         $text .= $this->nextItem( $pref{$cpl-1} );
1055                                 }
1056                                 while ( $npl > $cpl ) {
1057                                         $char = substr( $pref, $cpl, 1 );
1058                                         $text .= $this->openList( $char );
1059
1060                                         if ( ";" == $char ) {
1061                                                 $cpos = strpos( $t, ":" );
1062                                                 if ( ! ( false === $cpos ) ) {
1063                                                         $term = substr( $t, 0, $cpos );
1064                                                         $text .= $term . $this->nextItem( ":" );
1065                                                         $t = substr( $t, $cpos + 1 );
1066                                                 }
1067                                         }
1068                                         ++$cpl;
1069                                 }
1070                                 $lastPref = $pref2;
1071                         }
1072                         if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1073                                 $uniq_prefix = UNIQ_PREFIX;
1074                                 // XXX: use a stack for nestable elements like span, table and div
1075                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1076                                 $closematch = preg_match(
1077                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1078                                         "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1079                                 if ( $openmatch or $closematch ) {
1080                                         if ( $pstack ) { $pstack = false; }
1081                                         $text .= $this->closeParagraph();
1082                                         if ( $closematch  ) {
1083                                                 $inBlockElem = false;
1084                                         } else {
1085                                                 $inBlockElem = true;
1086                                         }
1087                                 } else if ( !$inBlockElem ) {
1088                                         if ( " " == $t{0} ) {
1089                                                 // pre
1090                                                 if ($this->mLastSection != 'pre') {
1091                                                         $pstack = false;
1092                                                         $text .= $this->closeParagraph().'<pre>';
1093                                                         $this->mLastSection = 'pre';
1094                                                 }
1095                                         } else {
1096                                                 // paragraph
1097                                                 if ( '' == trim($t) ) {
1098                                                         if ( $pstack ) {
1099                                                                 $text .= $pstack.'<br/>';
1100                                                                 $pstack = false;
1101                                                                 $this->mLastSection = 'p';
1102                                                         } else {
1103                                                                 if ($this->mLastSection != 'p' ) {
1104                                                                         $text .= $this->closeParagraph();
1105                                                                         $this->mLastSection = '';
1106                                                                         $pstack = "<p>";
1107                                                                 } else {
1108                                                                         $pstack = '</p><p>';
1109                                                                 }
1110                                                         }
1111                                                 } else {
1112                                                         if ( $pstack ) {
1113                                                                 $text .= $pstack;
1114                                                                 $pstack = false;
1115                                                                 $this->mLastSection = 'p';
1116                                                         } else if ($this->mLastSection != 'p') {
1117                                                                 $text .= $this->closeParagraph().'<p>';
1118                                                                 $this->mLastSection = 'p';
1119                                                         }
1120                                                 }
1121                                         }
1122                                 }
1123                         }
1124                         if ($pstack === false) {
1125                                 $text .= $t."\n";
1126                         }
1127                 }
1128                 while ( $npl ) {
1129                         $text .= $this->closeList( $pref2{$npl-1} );
1130                         --$npl;
1131                 }
1132                 if ( "" != $this->mLastSection ) {
1133                         $text .= "</" . $this->mLastSection . ">";
1134                         $this->mLastSection = "";
1135                 }
1136
1137                 wfProfileOut( $fname );
1138                 return $text;
1139         }
1140
1141         function getVariableValue( $index ) {
1142                 global $wgLang, $wgSitename, $wgServer;
1143
1144                 switch ( $index ) {
1145                         case MAG_CURRENTMONTH:
1146                                 return date( "m" );
1147                         case MAG_CURRENTMONTHNAME:
1148                                 return $wgLang->getMonthName( date("n") );
1149                         case MAG_CURRENTMONTHNAMEGEN:
1150                                 return $wgLang->getMonthNameGen( date("n") );
1151                         case MAG_CURRENTDAY:
1152                                 return date("j");
1153                         case MAG_CURRENTDAYNAME:
1154                                 return $wgLang->getWeekdayName( date("w")+1 );
1155                         case MAG_CURRENTYEAR:
1156                                 return date( "Y" );
1157                         case MAG_CURRENTTIME:
1158                                 return $wgLang->time( wfTimestampNow(), false );
1159                         case MAG_NUMBEROFARTICLES:
1160                                 return wfNumberOfArticles();
1161                         case MAG_SITENAME:
1162                                 return $wgSitename;
1163                         case MAG_SERVER:
1164                                 return $wgServer;
1165                         default:
1166                                 return NULL;
1167                 }
1168         }
1169
1170         function initialiseVariables()
1171         {
1172                 global $wgVariableIDs;
1173                 $this->mVariables = array();
1174                 foreach ( $wgVariableIDs as $id ) {
1175                         $mw =& MagicWord::get( $id );
1176                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1177                 }
1178         }
1179
1180         /* private */ function replaceVariables( $text, $args = array() )
1181         {
1182                 global $wgLang, $wgScript, $wgArticlePath;
1183
1184                 $fname = "Parser::replaceVariables";
1185                 wfProfileIn( $fname );
1186
1187                 $bail = false;
1188                 if ( !$this->mVariables ) {
1189                         $this->initialiseVariables();
1190                 }
1191                 $titleChars = Title::legalChars();
1192                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1193
1194                 # This function is called recursively. To keep track of arguments we need a stack:
1195                 array_push( $this->mArgStack, $args );
1196
1197                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1198                 $GLOBALS['wgCurParser'] =& $this;
1199                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1200
1201                 array_pop( $this->mArgStack );
1202
1203                 return $text;
1204         }
1205
1206         function braceSubstitution( $matches )
1207         {
1208                 global $wgLinkCache, $wgLang;
1209                 $fname = "Parser::braceSubstitution";
1210                 $found = false;
1211                 $nowiki = false;
1212                 $title = NULL;
1213
1214                 # $newline is an optional newline character before the braces
1215                 # $part1 is the bit before the first |, and must contain only title characters
1216                 # $args is a list of arguments, starting from index 0, not including $part1
1217
1218                 $newline = $matches[1];
1219                 $part1 = $matches[2];
1220                 # If the third subpattern matched anything, it will start with |
1221                 if ( $matches[3] !== "" ) {
1222                         $args = explode( "|", substr( $matches[3], 1 ) );
1223                 } else {
1224                         $args = array();
1225                 }
1226                 $argc = count( $args );
1227
1228                 # SUBST
1229                 $mwSubst =& MagicWord::get( MAG_SUBST );
1230                 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1231                         if ( $this->mOutputType != OT_WIKI ) {
1232                                 # Invalid SUBST not replaced at PST time
1233                                 # Return without further processing
1234                                 $text = $matches[0];
1235                                 $found = true;
1236                         }
1237                 } elseif ( $this->mOutputType == OT_WIKI ) {
1238                         # SUBST not found in PST pass, do nothing
1239                         $text = $matches[0];
1240                         $found = true;
1241                 }
1242
1243                 # MSG, MSGNW and INT
1244                 if ( !$found ) {
1245                         # Check for MSGNW:
1246                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1247                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1248                                 $nowiki = true;
1249                         } else {
1250                                 # Remove obsolete MSG:
1251                                 $mwMsg =& MagicWord::get( MAG_MSG );
1252                                 $mwMsg->matchStartAndRemove( $part1 );
1253                         }
1254
1255                         # Check if it is an internal message
1256                         $mwInt =& MagicWord::get( MAG_INT );
1257                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1258                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1259                                         $text = wfMsgReal( $part1, $args, true );
1260                                         $found = true;
1261                                 }
1262                         }
1263                 }
1264
1265                 # NS
1266                 if ( !$found ) {
1267                         # Check for NS: (namespace expansion)
1268                         $mwNs = MagicWord::get( MAG_NS );
1269                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1270                                 if ( intval( $part1 ) ) {
1271                                         $text = $wgLang->getNsText( intval( $part1 ) );
1272                                         $found = true;
1273                                 } else {
1274                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1275                                         if ( !is_null( $index ) ) {
1276                                                 $text = $wgLang->getNsText( $index );
1277                                                 $found = true;
1278                                         }
1279                                 }
1280                         }
1281                 }
1282
1283                 # LOCALURL and LOCALURLE
1284                 if ( !$found ) {
1285                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1286                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1287
1288                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1289                                 $func = 'getLocalURL';
1290                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1291                                 $func = 'escapeLocalURL';
1292                         } else {
1293                                 $func = '';
1294                         }
1295
1296                         if ( $func !== '' ) {
1297                                 $title = Title::newFromText( $part1 );
1298                                 if ( !is_null( $title ) ) {
1299                                         if ( $argc > 0 ) {
1300                                                 $text = $title->$func( $args[0] );
1301                                         } else {
1302                                                 $text = $title->$func();
1303                                         }
1304                                         $found = true;
1305                                 }
1306                         }
1307                 }
1308
1309                 # Internal variables
1310                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1311                         $text = $this->mVariables[$part1];
1312                         $found = true;
1313                         $this->mOutput->mContainsOldMagic = true;
1314                 }
1315
1316                 # Arguments input from the caller
1317                 $inputArgs = end( $this->mArgStack );
1318                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1319                         $text = $inputArgs[$part1];
1320                         $found = true;
1321                 }
1322
1323                 # Load from database
1324                 if ( !$found ) {
1325                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1326                         if ( !is_null( $title ) && !$title->isExternal() ) {
1327                                 # Check for excessive inclusion
1328                                 $dbk = $title->getPrefixedDBkey();
1329                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1330                                         $article = new Article( $title );
1331                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1332                                         if ( $articleContent !== false ) {
1333                                                 $found = true;
1334                                                 $text = $articleContent;
1335
1336                                         }
1337                                 }
1338
1339                                 # If the title is valid but undisplayable, make a link to it
1340                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1341                                         $text = "[[" . $title->getPrefixedText() . "]]";
1342                                         $found = true;
1343                                 }
1344                         }
1345                 }
1346
1347                 # Recursive parsing, escaping and link table handling
1348                 # Only for HTML output
1349                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1350                         $text = wfEscapeWikiText( $text );
1351                 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1352                         # Clean up argument array
1353                         $assocArgs = array();
1354                         $index = 1;
1355                         foreach( $args as $arg ) {
1356                                 $eqpos = strpos( $arg, "=" );
1357                                 if ( $eqpos === false ) {
1358                                         $assocArgs[$index++] = $arg;
1359                                 } else {
1360                                         $name = trim( substr( $arg, 0, $eqpos ) );
1361                                         $value = trim( substr( $arg, $eqpos+1 ) );
1362                                         if ( $value === false ) {
1363                                                 $value = "";
1364                                         }
1365                                         if ( $name !== false ) {
1366                                                 $assocArgs[$name] = $value;
1367                                         }
1368                                 }
1369                         }
1370
1371                         # Do not enter included links in link table
1372                         if ( !is_null( $title ) ) {
1373                                 $wgLinkCache->suspend();
1374                         }
1375
1376                         # Run full parser on the included text
1377                         $text = $this->strip( $text, $this->mStripState );
1378                         $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1379
1380                         # Add the result to the strip state for re-inclusion after
1381                         # the rest of the processing
1382                         $text = $this->insertStripItem( $text, $this->mStripState );
1383
1384                         # Resume the link cache and register the inclusion as a link
1385                         if ( !is_null( $title ) ) {
1386                                 $wgLinkCache->resume();
1387                                 $wgLinkCache->addLinkObj( $title );
1388                         }
1389                 }
1390
1391                 if ( !$found ) {
1392                         return $matches[0];
1393                 } else {
1394                         return $newline . $text;
1395                 }
1396         }
1397
1398         # Returns true if the function is allowed to include this entity
1399         function incrementIncludeCount( $dbk )
1400         {
1401                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1402                         $this->mIncludeCount[$dbk] = 0;
1403                 }
1404                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1405                         return true;
1406                 } else {
1407                         return false;
1408                 }
1409         }
1410
1411
1412         # Cleans up HTML, removes dangerous tags and attributes
1413         /* private */ function removeHTMLtags( $text )
1414         {
1415                 $fname = "Parser::removeHTMLtags";
1416                 wfProfileIn( $fname );
1417                 $htmlpairs = array( # Tags that must be closed
1418                         "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1419                         "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1420                         "strike", "strong", "tt", "var", "div", "center",
1421                         "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1422                         "ruby", "rt" , "rb" , "rp", "p"
1423                 );
1424                 $htmlsingle = array(
1425                         "br", "hr", "li", "dt", "dd"
1426                 );
1427                 $htmlnest = array( # Tags that can be nested--??
1428                         "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1429                         "dl", "font", "big", "small", "sub", "sup"
1430                 );
1431                 $tabletags = array( # Can only appear inside table
1432                         "td", "th", "tr"
1433                 );
1434
1435                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1436                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1437
1438                 $htmlattrs = $this->getHTMLattrs () ;
1439
1440                 # Remove HTML comments
1441                 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1442
1443                 $bits = explode( "<", $text );
1444                 $text = array_shift( $bits );
1445                 $tagstack = array(); $tablestack = array();
1446
1447                 foreach ( $bits as $x ) {
1448                         $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1449                         preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1450                           $x, $regs );
1451                         list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1452                         error_reporting( $prev );
1453
1454                         $badtag = 0 ;
1455                         if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1456                                 # Check our stack
1457                                 if ( $slash ) {
1458                                         # Closing a tag...
1459                                         if ( ! in_array( $t, $htmlsingle ) &&
1460                                           ( $ot = array_pop( $tagstack ) ) != $t ) {
1461                                                 array_push( $tagstack, $ot );
1462                                                 $badtag = 1;
1463                                         } else {
1464                                                 if ( $t == "table" ) {
1465                                                         $tagstack = array_pop( $tablestack );
1466                                                 }
1467                                                 $newparams = "";
1468                                         }
1469                                 } else {
1470                                         # Keep track for later
1471                                         if ( in_array( $t, $tabletags ) &&
1472                                           ! in_array( "table", $tagstack ) ) {
1473                                                 $badtag = 1;
1474                                         } else if ( in_array( $t, $tagstack ) &&
1475                                           ! in_array ( $t , $htmlnest ) ) {
1476                                                 $badtag = 1 ;
1477                                         } else if ( ! in_array( $t, $htmlsingle ) ) {
1478                                                 if ( $t == "table" ) {
1479                                                         array_push( $tablestack, $tagstack );
1480                                                         $tagstack = array();
1481                                                 }
1482                                                 array_push( $tagstack, $t );
1483                                         }
1484                                         # Strip non-approved attributes from the tag
1485                                         $newparams = $this->fixTagAttributes($params);
1486
1487                                 }
1488                                 if ( ! $badtag ) {
1489                                         $rest = str_replace( ">", "&gt;", $rest );
1490                                         $text .= "<$slash$t $newparams$brace$rest";
1491                                         continue;
1492                                 }
1493                         }
1494                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1495                 }
1496                 # Close off any remaining tags
1497                 while ( $t = array_pop( $tagstack ) ) {
1498                         $text .= "</$t>\n";
1499                         if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1500                 }
1501                 wfProfileOut( $fname );
1502                 return $text;
1503         }
1504
1505 /*
1506  *
1507  * This function accomplishes several tasks:
1508  * 1) Auto-number headings if that option is enabled
1509  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1510  * 3) Add a Table of contents on the top for users who have enabled the option
1511  * 4) Auto-anchor headings
1512  *
1513  * It loops through all headlines, collects the necessary data, then splits up the
1514  * string and re-inserts the newly formatted headlines.
1515  *
1516  */
1517
1518         /* private */ function formatHeadings( $text )
1519         {
1520                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1521                 $doShowToc = $this->mOptions->getShowToc();
1522                 if( !$this->mTitle->userCanEdit() ) {
1523                         $showEditLink = 0;
1524                         $rightClickHack = 0;
1525                 } else {
1526                         $showEditLink = $this->mOptions->getEditSection();
1527                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1528                 }
1529
1530                 # Inhibit editsection links if requested in the page
1531                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1532                 if( $esw->matchAndRemove( $text ) ) {
1533                         $showEditLink = 0;
1534                 }
1535                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1536                 # do not add TOC
1537                 $mw =& MagicWord::get( MAG_NOTOC );
1538                 if( $mw->matchAndRemove( $text ) ) {
1539                         $doShowToc = 0;
1540                 }
1541
1542                 # never add the TOC to the Main Page. This is an entry page that should not
1543                 # be more than 1-2 screens large anyway
1544                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1545                         $doShowToc = 0;
1546                 }
1547
1548                 # Get all headlines for numbering them and adding funky stuff like [edit]
1549                 # links - this is for later, but we need the number of headlines right now
1550                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1551
1552                 # if there are fewer than 4 headlines in the article, do not show TOC
1553                 if( $numMatches < 4 ) {
1554                         $doShowToc = 0;
1555                 }
1556
1557                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1558                 # override above conditions and always show TOC
1559                 $mw =& MagicWord::get( MAG_FORCETOC );
1560                 if ($mw->matchAndRemove( $text ) ) {
1561                         $doShowToc = 1;
1562                 }
1563
1564
1565                 # We need this to perform operations on the HTML
1566                 $sk =& $this->mOptions->getSkin();
1567
1568                 # headline counter
1569                 $headlineCount = 0;
1570
1571                 # Ugh .. the TOC should have neat indentation levels which can be
1572                 # passed to the skin functions. These are determined here
1573                 $toclevel = 0;
1574                 $toc = "";
1575                 $full = "";
1576                 $head = array();
1577                 $sublevelCount = array();
1578                 $level = 0;
1579                 $prevlevel = 0;
1580                 foreach( $matches[3] as $headline ) {
1581                         $numbering = "";
1582                         if( $level ) {
1583                                 $prevlevel = $level;
1584                         }
1585                         $level = $matches[1][$headlineCount];
1586                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1587                                 # reset when we enter a new level
1588                                 $sublevelCount[$level] = 0;
1589                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1590                                 $toclevel += $level - $prevlevel;
1591                         }
1592                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1593                                 # reset when we step back a level
1594                                 $sublevelCount[$level+1]=0;
1595                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1596                                 $toclevel -= $prevlevel - $level;
1597                         }
1598                         # count number of headlines for each level
1599                         @$sublevelCount[$level]++;
1600                         if( $doNumberHeadings || $doShowToc ) {
1601                                 $dot = 0;
1602                                 for( $i = 1; $i <= $level; $i++ ) {
1603                                         if( !empty( $sublevelCount[$i] ) ) {
1604                                                 if( $dot ) {
1605                                                         $numbering .= ".";
1606                                                 }
1607                                                 $numbering .= $sublevelCount[$i];
1608                                                 $dot = 1;
1609                                         }
1610                                 }
1611                         }
1612
1613                         # The canonized header is a version of the header text safe to use for links
1614                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1615                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1616
1617                         # strip out HTML
1618                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1619                         $tocline = trim( $canonized_headline );
1620                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1621                         $refer[$headlineCount] = $canonized_headline;
1622
1623                         # count how many in assoc. array so we can track dupes in anchors
1624                         @$refers[$canonized_headline]++;
1625                         $refcount[$headlineCount]=$refers[$canonized_headline];
1626
1627                         # Prepend the number to the heading text
1628
1629                         if( $doNumberHeadings || $doShowToc ) {
1630                                 $tocline = $numbering . " " . $tocline;
1631
1632                                 # Don't number the heading if it is the only one (looks silly)
1633                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1634                                         # the two are different if the line contains a link
1635                                         $headline=$numbering . " " . $headline;
1636                                 }
1637                         }
1638
1639                         # Create the anchor for linking from the TOC to the section
1640                         $anchor = $canonized_headline;
1641                         if($refcount[$headlineCount] > 1 ) {
1642                                 $anchor .= "_" . $refcount[$headlineCount];
1643                         }
1644                         if( $doShowToc ) {
1645                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1646                         }
1647                         if( $showEditLink ) {
1648                                 if ( empty( $head[$headlineCount] ) ) {
1649                                         $head[$headlineCount] = "";
1650                                 }
1651                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1652                         }
1653
1654                         # Add the edit section span
1655                         if( $rightClickHack ) {
1656                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1657                         }
1658
1659                         # give headline the correct <h#> tag
1660                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1661
1662                         $headlineCount++;
1663                 }
1664
1665                 if( $doShowToc ) {
1666                         $toclines = $headlineCount;
1667                         $toc .= $sk->tocUnindent( $toclevel );
1668                         $toc = $sk->tocTable( $toc );
1669                 }
1670
1671                 # split up and insert constructed headlines
1672
1673                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1674                 $i = 0;
1675
1676                 foreach( $blocks as $block ) {
1677                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1678                             # This is the [edit] link that appears for the top block of text when
1679                                 # section editing is enabled
1680
1681                                 # Disabled because it broke block formatting
1682                                 # For example, a bullet point in the top line
1683                                 # $full .= $sk->editSectionLink(0);
1684                         }
1685                         $full .= $block;
1686                         if( $doShowToc && !$i) {
1687                         # Top anchor now in skin
1688                                 $full = $full.$toc;
1689                         }
1690
1691                         if( !empty( $head[$i] ) ) {
1692                                 $full .= $head[$i];
1693                         }
1694                         $i++;
1695                 }
1696
1697                 return $full;
1698         }
1699
1700         /* private */ function doMagicISBN( &$tokenizer )
1701         {
1702                 global $wgLang;
1703
1704                 # Check whether next token is a text token
1705                 # If yes, fetch it and convert the text into a
1706                 # Special::BookSources link
1707                 $token = $tokenizer->previewToken();
1708                 while ( $token["type"] == "" )
1709                 {
1710                         $tokenizer->nextToken();
1711                         $token = $tokenizer->previewToken();
1712                 }
1713                 if ( $token["type"] == "text" )
1714                 {
1715                         $token = $tokenizer->nextToken();
1716                         $x = $token["text"];
1717                         $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1718
1719                         $isbn = $blank = "" ;
1720                         while ( " " == $x{0} ) {
1721                                 $blank .= " ";
1722                                 $x = substr( $x, 1 );
1723                         }
1724                         while ( strstr( $valid, $x{0} ) != false ) {
1725                                 $isbn .= $x{0};
1726                                 $x = substr( $x, 1 );
1727                         }
1728                         $num = str_replace( "-", "", $isbn );
1729                         $num = str_replace( " ", "", $num );
1730
1731                         if ( "" == $num ) {
1732                                 $text = "ISBN $blank$x";
1733                         } else {
1734                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1735                                 $text = "<a href=\"" .
1736                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1737                                         "\" class=\"internal\">ISBN $isbn</a>";
1738                                 $text .= $x;
1739                         }
1740                 } else {
1741                         $text = "ISBN ";
1742                 }
1743                 return $text;
1744         }
1745         /* private */ function doMagicRFC( &$tokenizer )
1746         {
1747                 global $wgLang;
1748
1749                 # Check whether next token is a text token
1750                 # If yes, fetch it and convert the text into a
1751                 # link to an RFC source
1752                 $token = $tokenizer->previewToken();
1753                 while ( $token["type"] == "" )
1754                 {
1755                         $tokenizer->nextToken();
1756                         $token = $tokenizer->previewToken();
1757                 }
1758                 if ( $token["type"] == "text" )
1759                 {
1760                         $token = $tokenizer->nextToken();
1761                         $x = $token["text"];
1762                         $valid = "0123456789";
1763
1764                         $rfc = $blank = "" ;
1765                         while ( " " == $x{0} ) {
1766                                 $blank .= " ";
1767                                 $x = substr( $x, 1 );
1768                         }
1769                         while ( strstr( $valid, $x{0} ) != false ) {
1770                                 $rfc .= $x{0};
1771                                 $x = substr( $x, 1 );
1772                         }
1773
1774                         if ( "" == $rfc ) {
1775                                 $text .= "RFC $blank$x";
1776                         } else {
1777                                 $url = wfmsg( "rfcurl" );
1778                                 $url = str_replace( "$1", $rfc, $url);
1779                                 $sk =& $this->mOptions->getSkin();
1780                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1781                                 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1782                         }
1783                 } else {
1784                         $text = "RFC ";
1785                 }
1786                 return $text;
1787         }
1788
1789         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1790         {
1791                 $this->mOptions = $options;
1792                 $this->mTitle =& $title;
1793                 $this->mOutputType = OT_WIKI;
1794
1795                 if ( $clearState ) {
1796                         $this->clearState();
1797                 }
1798
1799                 $stripState = false;
1800                 $pairs = array(
1801                         "\r\n" => "\n",
1802                         );
1803                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1804                 // now with regexes
1805                 $pairs = array(
1806                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1807                         "/<br *?>/i" => "<br/>",
1808                 );
1809                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1810                 $text = $this->strip( $text, $stripState, false );
1811                 $text = $this->pstPass2( $text, $user );
1812                 $text = $this->unstrip( $text, $stripState );
1813                 return $text;
1814         }
1815
1816         /* private */ function pstPass2( $text, &$user )
1817         {
1818                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1819
1820                 # Variable replacement
1821                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1822                 $text = $this->replaceVariables( $text );
1823
1824                 # Signatures
1825                 #
1826                 $n = $user->getName();
1827                 $k = $user->getOption( "nickname" );
1828                 if ( "" == $k ) { $k = $n; }
1829                 if(isset($wgLocaltimezone)) {
1830                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1831                 }
1832                 /* Note: this is an ugly timezone hack for the European wikis */
1833                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1834                   " (" . date( "T" ) . ")";
1835                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1836
1837                 $text = preg_replace( "/~~~~~/", $d, $text );
1838                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1839                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1840                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1841                   Namespace::getUser() ) . ":$n|$k]]", $text );
1842
1843                 # Context links: [[|name]] and [[name (context)|]]
1844                 #
1845                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1846                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1847                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1848                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1849
1850                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1851                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1852                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1853                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1854                                                                                                                 # [[ns:page (cont)|]]
1855                 $context = "";
1856                 $t = $this->mTitle->getText();
1857                 if ( preg_match( $conpat, $t, $m ) ) {
1858                         $context = $m[2];
1859                 }
1860                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1861                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1862                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1863
1864                 if ( "" == $context ) {
1865                         $text = preg_replace( $p2, "[[\\1]]", $text );
1866                 } else {
1867                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1868                 }
1869
1870                 /*
1871                 $mw =& MagicWord::get( MAG_SUBST );
1872                 $wgCurParser = $this->fork();
1873                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1874                 $this->merge( $wgCurParser );
1875                 */
1876
1877                 # Trim trailing whitespace
1878                 # MAG_END (__END__) tag allows for trailing
1879                 # whitespace to be deliberately included
1880                 $text = rtrim( $text );
1881                 $mw =& MagicWord::get( MAG_END );
1882                 $mw->matchAndRemove( $text );
1883
1884                 return $text;
1885         }
1886
1887         # Set up some variables which are usually set up in parse()
1888         # so that an external function can call some class members with confidence
1889         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1890         {
1891                 $this->mTitle =& $title;
1892                 $this->mOptions = $options;
1893                 $this->mOutputType = $outputType;
1894                 if ( $clearState ) {
1895                         $this->clearState();
1896                 }
1897         }
1898
1899         function transformMsg( $text, $options ) {
1900                 global $wgTitle;
1901                 static $executing = false;
1902
1903                 # Guard against infinite recursion
1904                 if ( $executing ) {
1905                         return $text;
1906                 }
1907                 $executing = true;
1908
1909                 $this->mTitle = $wgTitle;
1910                 $this->mOptions = $options;
1911                 $this->mOutputType = OT_MSG;
1912                 $this->clearState();
1913                 $text = $this->replaceVariables( $text );
1914
1915                 $executing = false;
1916                 return $text;
1917         }
1918 }
1919
1920 class ParserOutput
1921 {
1922         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1923
1924         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1925                 $containsOldMagic = false )
1926         {
1927                 $this->mText = $text;
1928                 $this->mLanguageLinks = $languageLinks;
1929                 $this->mCategoryLinks = $categoryLinks;
1930                 $this->mContainsOldMagic = $containsOldMagic;
1931         }
1932
1933         function getText() { return $this->mText; }
1934         function getLanguageLinks() { return $this->mLanguageLinks; }
1935         function getCategoryLinks() { return $this->mCategoryLinks; }
1936         function containsOldMagic() { return $this->mContainsOldMagic; }
1937         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1938         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1939         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1940         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1941
1942         function merge( $other ) {
1943                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1944                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1945                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1946         }
1947
1948 }
1949
1950 class ParserOptions
1951 {
1952         # All variables are private
1953         var $mUseTeX;                    # Use texvc to expand <math> tags
1954         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
1955         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
1956         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
1957         var $mAllowExternalImages;       # Allow external images inline
1958         var $mSkin;                      # Reference to the preferred skin
1959         var $mDateFormat;                # Date format index
1960         var $mEditSection;               # Create "edit section" links
1961         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
1962         var $mNumberHeadings;            # Automatically number headings
1963         var $mShowToc;                   # Show table of contents
1964
1965         function getUseTeX() { return $this->mUseTeX; }
1966         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1967         function getUseDynamicDates() { return $this->mUseDynamicDates; }
1968         function getInterwikiMagic() { return $this->mInterwikiMagic; }
1969         function getAllowExternalImages() { return $this->mAllowExternalImages; }
1970         function getSkin() { return $this->mSkin; }
1971         function getDateFormat() { return $this->mDateFormat; }
1972         function getEditSection() { return $this->mEditSection; }
1973         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1974         function getNumberHeadings() { return $this->mNumberHeadings; }
1975         function getShowToc() { return $this->mShowToc; }
1976
1977         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1978         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1979         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1980         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1981         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1982         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1983         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1984         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1985         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1986         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1987         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1988
1989         /* static */ function newFromUser( &$user )
1990         {
1991                 $popts = new ParserOptions;
1992                 $popts->initialiseFromUser( &$user );
1993                 return $popts;
1994         }
1995
1996         function initialiseFromUser( &$userInput )
1997         {
1998                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1999
2000                 if ( !$userInput ) {
2001                         $user = new User;
2002                         $user->setLoaded( true );
2003                 } else {
2004                         $user =& $userInput;
2005                 }
2006
2007                 $this->mUseTeX = $wgUseTeX;
2008                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2009                 $this->mUseDynamicDates = $wgUseDynamicDates;
2010                 $this->mInterwikiMagic = $wgInterwikiMagic;
2011                 $this->mAllowExternalImages = $wgAllowExternalImages;
2012                 $this->mSkin =& $user->getSkin();
2013                 $this->mDateFormat = $user->getOption( "date" );
2014                 $this->mEditSection = $user->getOption( "editsection" );
2015                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2016                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2017                 $this->mShowToc = $user->getOption( "showtoc" );
2018         }
2019
2020
2021 }
2022
2023 # Regex callbacks, used in Parser::replaceVariables
2024 function wfBraceSubstitution( $matches )
2025 {
2026         global $wgCurParser;
2027         return $wgCurParser->braceSubstitution( $matches );
2028 }
2029
2030 ?>