includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         require_once('extensions/wikihiero/wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         require_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80         }
  81
  82         # First pass--just handle <nowiki> sections, pass the rest off
  83         # to internalParse() which does all the real work.
  84         #
  85         # Returns a ParserOutput
  86         #
  87         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  88         {
  89                 global $wgUseTidy;
  90                 $fname = "Parser::parse";
  91                 wfProfileIn( $fname );
  92
  93                 if ( $clearState ) {
  94                         $this->clearState();
  95                 }
  96
  97                 $this->mOptions = $options;
  98                 $this->mTitle =& $title;
  99                 $this->mOutputType = OT_HTML;
 100
 101                 $stripState = NULL;
 102                 $text = $this->strip( $text, $this->mStripState );
 103                 $text = $this->internalParse( $text, $linestart );
 104                 $text = $this->unstrip( $text, $this->mStripState );
 105                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 106                 if(!$wgUseTidy) {
 107                         $fixtags = array(
 108                                 # french spaces, last one Guillemet-left
 109                                 # only if there is something before the space
 110                                 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
 111                                 # french spaces, Guillemet-right
 112                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 113                                 "/<hr *>/i" => '<hr />',
 114                                 "/<br *>/i" => '<br />',
 115                                 "/<center *>/i"=>'<div class="center">',
 116                                 "/<\\/center *>/i" => '</div>',
 117                                 # Clean up spare ampersands; note that we probably ought to be
 118                                 # more careful about named entities.
 119                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 120                         );
 121                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 122                 } else {
 123                         $fixtags = array(
 124                                 # french spaces, last one Guillemet-left
 125                                 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
 126                                 # french spaces, Guillemet-right
 127                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 128                                 "/<center *>/i"=>'<div class="center">',
 129                                 "/<\\/center *>/i" => '</div>'
 130                         );
 131                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 132                 }
 133                 # only once and last
 134                 $text = $this->doBlockLevels( $text, $linestart );
 135                 if($wgUseTidy) {
 136                         $text = $this->tidy($text);
 137                 }
 138                 $this->mOutput->setText( $text );
 139                 wfProfileOut( $fname );
 140                 return $this->mOutput;
 141         }
 142
 143         /* static */ function getRandomString()
 144         {
 145                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 146         }
 147
 148         # Replaces all occurrences of <$tag>content</$tag> in the text
 149         # with a random marker and returns the new text. the output parameter
 150         # $content will be an associative array filled with data on the form
 151         # $unique_marker => content.
 152
 153         # If $content is already set, the additional entries will be appended
 154
 155         # If $tag is set to STRIP_COMMENTS, the function will extract
 156         # <!-- HTML comments -->
 157
 158         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 159                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 160                 if ( !$content ) {
 161                         $content = array( );
 162                 }
 163                 $n = 1;
 164                 $stripped = "";
 165
 166                 while ( "" != $text ) {
 167                         if($tag==STRIP_COMMENTS) {
 168                                 $p = preg_split( "/<!--/i", $text, 2 );
 169                         } else {
 170                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 171                         }
 172                         $stripped .= $p[0];
 173                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 174                                 $text = "";
 175                         } else {
 176                                 if($tag==STRIP_COMMENTS) {
 177                                         $q = preg_split( "/-->/i", $p[1], 2 );
 178                                 } else {
 179                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 180                                 }
 181                                 $marker = $rnd . sprintf("%08X", $n++);
 182                                 $content[$marker] = $q[0];
 183                                 $stripped .= $marker;
 184                                 $text = $q[1];
 185                         }
 186                 }
 187                 return $stripped;
 188         }
 189
 190         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 191         # If $render is set, performs necessary rendering operations on plugins
 192         # Returns the text, and fills an array with data needed in unstrip()
 193         # If the $state is already a valid strip state, it adds to the state
 194
 195         # When $stripcomments is set, HTML comments <!-- like this -->
 196         # will be stripped in addition to other tags. This is important
 197         # for section editing, where these comments cause confusion when
 198         # counting the sections in the wikisource
 199         function strip( $text, &$state, $stripcomments = false )
 200         {
 201                 $render = ($this->mOutputType == OT_HTML);
 202                 $nowiki_content = array();
 203                 $hiero_content = array();
 204                 $timeline_content = array();
 205                 $math_content = array();
 206                 $pre_content = array();
 207                 $comment_content = array();
 208
 209                 # Replace any instances of the placeholders
 210                 $uniq_prefix = UNIQ_PREFIX;
 211                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 212
 213                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 214                 foreach( $nowiki_content as $marker => $content ){
 215                         if( $render ){
 216                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 217                         } else {
 218                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 219                         }
 220                 }
 221
 222                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 223                 foreach( $hiero_content as $marker => $content ){
 224                         if( $render && $GLOBALS['wgUseWikiHiero']){
 225                                 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 226                         } else {
 227                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 228                         }
 229                 }
 230
 231                 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
 232                 foreach( $timeline_content as $marker => $content ){
 233                         if( $render && $GLOBALS['wgUseTimeline']){
 234                                 $timeline_content[$marker] = renderTimeline( $content );
 235                         } else {
 236                                 $timeline_content[$marker] = "<timeline>$content</timeline>";
 237                         }
 238                 }
 239
 240                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 241                 foreach( $math_content as $marker => $content ){
 242                         if( $render ) {
 243                                 if( $this->mOptions->getUseTeX() ) {
 244                                         $math_content[$marker] = renderMath( $content );
 245                                 } else {
 246                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 247                                 }
 248                         } else {
 249                                 $math_content[$marker] = "<math>$content</math>";
 250                         }
 251                 }
 252
 253                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 254                 foreach( $pre_content as $marker => $content ){
 255                         if( $render ){
 256                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 257                         } else {
 258                                 $pre_content[$marker] = "<pre>$content</pre>";
 259                         }
 260                 }
 261                 if($stripcomments) {
 262                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 263                         foreach( $comment_content as $marker => $content ){
 264                                 $comment_content[$marker] = "<!--$content-->";
 265                         }
 266                 }
 267
 268                 # Merge state with the pre-existing state, if there is one
 269                 if ( $state ) {
 270                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 271                         $state['hiero'] = $state['hiero'] + $hiero_content;
 272                         $state['timeline'] = $state['timeline'] + $timeline_content;
 273                         $state['math'] = $state['math'] + $math_content;
 274                         $state['pre'] = $state['pre'] + $pre_content;
 275                         $state['comment'] = $state['comment'] + $comment_content;
 276                 } else {
 277                         $state = array(
 278                           'nowiki' => $nowiki_content,
 279                           'hiero' => $hiero_content,
 280                           'timeline' => $timeline_content,
 281                           'math' => $math_content,
 282                           'pre' => $pre_content,
 283                           'comment' => $comment_content
 284                         );
 285                 }
 286                 return $text;
 287         }
 288
 289         function unstrip( $text, &$state )
 290         {
 291                 # Must expand in reverse order, otherwise nested tags will be corrupted
 292                 $contentDict = end( $state );
 293                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 294                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 295                                 $text = str_replace( key( $contentDict ), $content, $text );
 296                         }
 297                 }
 298
 299                 return $text;
 300         }
 301
 302         # Add an item to the strip state
 303         # Returns the unique tag which must be inserted into the stripped text
 304         # The tag will be replaced with the original text in unstrip()
 305
 306         function insertStripItem( $text, &$state )
 307         {
 308                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 309                 if ( !$state ) {
 310                         $state = array(
 311                           'nowiki' => array(),
 312                           'hiero' => array(),
 313                           'math' => array(),
 314                           'pre' => array()
 315                         );
 316                 }
 317                 $state['item'][$rnd] = $text;
 318                 return $rnd;
 319         }
 320
 321         # This method generates the list of subcategories and pages for a category
 322         function categoryMagic ()
 323         {
 324                 global $wgLang , $wgUser ;
 325                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 326
 327                 $cns = Namespace::getCategory() ;
 328                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 329
 330                 $r = "<br style=\"clear:both;\"/>\n";
 331
 332
 333                 $sk =& $wgUser->getSkin() ;
 334
 335                 $articles = array() ;
 336                 $children = array() ;
 337                 $data = array () ;
 338                 $id = $this->mTitle->getArticleID() ;
 339
 340                 # FIXME: add limits
 341                 $t = wfStrencode( $this->mTitle->getDBKey() );
 342                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 343                 $res = wfQuery ( $sql, DB_READ ) ;
 344                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 345
 346                 # For all pages that link to this category
 347                 foreach ( $data AS $x )
 348                 {
 349                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 350                         if ( $t != "" ) $t .= ":" ;
 351                         $t .= $x->cur_title ;
 352
 353                         if ( $x->cur_namespace == $cns ) {
 354                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 355                         } else {
 356                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 357                         }
 358                 }
 359                 wfFreeResult ( $res ) ;
 360
 361                 # Showing subcategories
 362                 if ( count ( $children ) > 0 ) {
 363                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 364                         $r .= implode ( ", " , $children ) ;
 365                 }
 366
 367                 # Showing pages in this category
 368                 if ( count ( $articles ) > 0 ) {
 369                         $ti = $this->mTitle->getText() ;
 370                         $h =  wfMsg( "category_header", $ti );
 371                         $r .= "<h2>{$h}</h2>\n" ;
 372                         $r .= implode ( ", " , $articles ) ;
 373                 }
 374
 375
 376                 return $r ;
 377         }
 378
 379         function getHTMLattrs ()
 380         {
 381                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 382                                 "title", "align", "lang", "dir", "width", "height",
 383                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 384                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 385                                 /* FONT */ "type", "start", "value", "compact",
 386                                 /* For various lists, mostly deprecated but safe */
 387                                 "summary", "width", "border", "frame", "rules",
 388                                 "cellspacing", "cellpadding", "valign", "char",
 389                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 390                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 391                                 "id", "class", "name", "style" /* For CSS */
 392                                 );
 393                 return $htmlattrs ;
 394         }
 395
 396         function fixTagAttributes ( $t )
 397         {
 398                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 399                 $htmlattrs = $this->getHTMLattrs() ;
 400
 401                 # Strip non-approved attributes from the tag
 402                 $t = preg_replace(
 403                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 404                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 405                         $t);
 406                 # Strip javascript "expression" from stylesheets. Brute force approach:
 407                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 408
 409                 if( preg_match(
 410                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 411                         wfMungeToUtf8( $t ) ) )
 412                 {
 413                         $t="";
 414                 }
 415
 416                 return trim ( $t ) ;
 417         }
 418
 419         /* interface with html tidy, used if $wgUseTidy = true */
 420         function tidy ( $text ) {
 421                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 422                 global $wgInputEncoding, $wgOutputEncoding;
 423                 $fname = "Parser::tidy";
 424                 wfProfileIn( $fname );
 425
 426                 $cleansource = '';
 427                 switch(strtoupper($wgOutputEncoding)) {
 428                         case 'ISO-8859-1':
 429                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 430                                 break;
 431                         case 'UTF-8':
 432                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 433                                 break;
 434                         default:
 435                                 $wgTidyOpts .= ' -raw';
 436                         }
 437
 438                 $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 439 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 440 '<head><title>test</title></head><body>'.$text.'</body></html>';
 441                 $descriptorspec = array(
 442                         0 => array("pipe", "r"),
 443                         1 => array("pipe", "w"),
 444                         2 => array("file", "/dev/null", "a")
 445                 );
 446                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 447                 if (is_resource($process)) {
 448                         fwrite($pipes[0], $text);
 449                         fclose($pipes[0]);
 450                         while (!feof($pipes[1])) {
 451                                 $cleansource .= fgets($pipes[1], 1024);
 452                         }
 453                         fclose($pipes[1]);
 454                         $return_value = proc_close($process);
 455                 }
 456
 457                 wfProfileOut( $fname );
 458
 459                 if( $cleansource == '' && $text != '') {
 460                         wfDebug( "Tidy error detected!\n" );
 461                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 462                 } else {
 463                         return $cleansource;
 464                 }
 465         }
 466
 467         function doTableStuff ( $t )
 468         {
 469                 $t = explode ( "\n" , $t ) ;
 470                 $td = array () ; # Is currently a td tag open?
 471                         $ltd = array () ; # Was it TD or TH?
 472                         $tr = array () ; # Is currently a tr tag open?
 473                         $ltr = array () ; # tr attributes
 474                         foreach ( $t AS $k => $x )
 475                         {
 476                                 $x = trim ( $x ) ;
 477                                 $fc = substr ( $x , 0 , 1 ) ;
 478                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 479                                 {
 480                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 481                                         array_push ( $td , false ) ;
 482                                         array_push ( $ltd , "" ) ;
 483                                         array_push ( $tr , false ) ;
 484                                         array_push ( $ltr , "" ) ;
 485                                 }
 486                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 487                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 488                                 {
 489                                         $z = "</table>\n" ;
 490                                         $l = array_pop ( $ltd ) ;
 491                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 492                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 493                                         array_pop ( $ltr ) ;
 494                                         $t[$k] = $z ;
 495                                 }
 496                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 497                                                 {
 498                                                 $z = trim ( substr ( $x , 2 ) ) ;
 499                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 500                                                 }*/
 501                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 502                                 {
 503                                         $x = substr ( $x , 1 ) ;
 504                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 505                                         $z = "" ;
 506                                         $l = array_pop ( $ltd ) ;
 507                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 508                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 509                                         array_pop ( $ltr ) ;
 510                                         $t[$k] = $z ;
 511                                         array_push ( $tr , false ) ;
 512                                         array_push ( $td , false ) ;
 513                                         array_push ( $ltd , "" ) ;
 514                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 515                                 }
 516                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 517                                 {
 518                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 519                                         {
 520                                                 $fc = "+" ;
 521                                                 $x = substr ( $x , 1 ) ;
 522                                         }
 523                                         $after = substr ( $x , 1 ) ;
 524                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 525                                         $after = explode ( "||" , $after ) ;
 526                                         $t[$k] = "" ;
 527                                         foreach ( $after AS $theline )
 528                                         {
 529                                                 $z = "" ;
 530                                                 if ( $fc != "+" )
 531                                                 {
 532                                                         $tra = array_pop ( $ltr ) ;
 533                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 534                                                         array_push ( $tr , true ) ;
 535                                                         array_push ( $ltr , "" ) ;
 536                                                 }
 537
 538                                                 $l = array_pop ( $ltd ) ;
 539                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 540                                                 if ( $fc == "|" ) $l = "td" ;
 541                                                 else if ( $fc == "!" ) $l = "th" ;
 542                                                 else if ( $fc == "+" ) $l = "caption" ;
 543                                                 else $l = "" ;
 544                                                 array_push ( $ltd , $l ) ;
 545                                                 $y = explode ( "|" , $theline , 2 ) ;
 546                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 547                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 548                                                 $t[$k] .= $y ;
 549                                                 array_push ( $td , true ) ;
 550                                         }
 551                                 }
 552                         }
 553
 554                 # Closing open td, tr && table
 555                 while ( count ( $td ) > 0 )
 556                 {
 557                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 558                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 559                         $t[] = "</table>" ;
 560                 }
 561
 562                 $t = implode ( "\n" , $t ) ;
 563                 #               $t = $this->removeHTMLtags( $t );
 564                 return $t ;
 565         }
 566
 567         # Parses the text and adds the result to the strip state
 568         # Returns the strip tag
 569         function stripParse( $text, $newline, $args )
 570         {
 571                 $text = $this->strip( $text, $this->mStripState );
 572                 $text = $this->internalParse( $text, (bool)$newline, $args, false );
 573                 return $newline.$this->insertStripItem( $text, $this->mStripState );
 574         }
 575
 576         function internalParse( $text, $linestart, $args = array(), $isMain=true )
 577         {
 578                 $fname = "Parser::internalParse";
 579                 wfProfileIn( $fname );
 580
 581                 $text = $this->removeHTMLtags( $text );
 582                 $text = $this->replaceVariables( $text, $args );
 583
 584                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
 585
 586                 $text = $this->doHeadings( $text );
 587                 if($this->mOptions->getUseDynamicDates()) {
 588                         global $wgDateFormatter;
 589                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 590                 }
 591                 $text = $this->doAllQuotes( $text );
 592                 $text = $this->replaceExternalLinks( $text );
 593                 $text = $this->replaceInternalLinks ( $text );
 594                 $text = $this->replaceInternalLinks ( $text );
 595                 //$text = $this->doTokenizedParser ( $text );
 596                 $text = $this->doTableStuff ( $text ) ;
 597                 $text = $this->magicISBN( $text );
 598                 $text = $this->magicRFC( $text );
 599                 $text = $this->formatHeadings( $text, $isMain );
 600                 $sk =& $this->mOptions->getSkin();
 601                 $text = $sk->transformContent( $text );
 602
 603                 if ( !isset ( $this->categoryMagicDone ) ) {
 604                         $text .= $this->categoryMagic () ;
 605                         $this->categoryMagicDone = true ;
 606                 }
 607
 608                 wfProfileOut( $fname );
 609                 return $text;
 610         }
 611
 612
 613         /* private */ function doHeadings( $text )
 614         {
 615                 for ( $i = 6; $i >= 1; --$i ) {
 616                         $h = substr( "======", 0, $i );
 617                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 618                           "<h{$i}>\\1</h{$i}>\\2", $text );
 619                 }
 620                 return $text;
 621         }
 622
 623         /* private */ function doAllQuotes( $text )
 624         {
 625                 $outtext = "";
 626                 $lines = explode( "\n", $text );
 627                 foreach ( $lines as $line ) {
 628                         $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
 629                 }
 630                 return substr($outtext, 0,-1);
 631         }
 632
 633         /* private */ function doQuotes( $pre, $text, $mode )
 634         {
 635                 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
 636                         $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
 637                         $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
 638                         if ( substr ($m[2], 0, 1) == "'" ) {
 639                                 $m[2] = substr ($m[2], 1);
 640                                 if ($mode == "em") {
 641                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
 642                                 } else if ($mode == "strong") {
 643                                         return $m1_strong . $this->doQuotes ( "", $m[2], "" );
 644                                 } else if (($mode == "emstrong") || ($mode == "both")) {
 645                                         return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
 646                                 } else if ($mode == "strongem") {
 647                                         return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
 648                                 } else {
 649                                         return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
 650                                 }
 651                         } else {
 652                                 if ($mode == "strong") {
 653                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
 654                                 } else if ($mode == "em") {
 655                                         return $m1_em . $this->doQuotes ( "", $m[2], "" );
 656                                 } else if ($mode == "emstrong") {
 657                                         return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
 658                                 } else if (($mode == "strongem") || ($mode == "both")) {
 659                                         return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
 660                                 } else {
 661                                         return $m[1] . $this->doQuotes ( "", $m[2], "em" );
 662                                 }
 663                         }
 664                 } else {
 665                         $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
 666                         $text_em = ($text == "") ? "" : "<em>{$text}</em>";
 667                         if ($mode == "") {
 668                                 return $pre . $text;
 669                         } else if ($mode == "em") {
 670                                 return $pre . $text_em;
 671                         } else if ($mode == "strong") {
 672                                 return $pre . $text_strong;
 673                         } else if ($mode == "strongem") {
 674                                 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
 675                         } else {
 676                                 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
 677                         }
 678                 }
 679         }
 680
 681         # Note: we have to do external links before the internal ones,
 682         # and otherwise take great care in the order of things here, so
 683         # that we don't end up interpreting some URLs twice.
 684
 685         /* private */ function replaceExternalLinks( $text )
 686         {
 687                 $fname = "Parser::replaceExternalLinks";
 688                 wfProfileIn( $fname );
 689                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 690                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 691                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 692                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 693                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 694                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 695                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 696                 wfProfileOut( $fname );
 697                 return $text;
 698         }
 699
 700         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 701         {
 702                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 703                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 704
 705                 # this is  the list of separators that should be ignored if they
 706                 # are the last character of an URL but that should be included
 707                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 708                 # in this case, the last comma should not become part of the URL,
 709                 # but in "www.foo.com/123,2342,32.htm" it should.
 710                 $sep = ",;\.:";
 711                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 712                 $images = "gif|png|jpg|jpeg";
 713
 714                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 715                 # they are interpreted as part of the string (used to tell PHP
 716                 # that the content of the string should be inserted there).
 717                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 718                   "((?i){$images})([^{$uc}]|$)/";
 719
 720                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 721                 $sk =& $this->mOptions->getSkin();
 722
 723                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 724                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 725                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 726                 }
 727                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 728                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 729                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 730                   "</a>\\5", $s );
 731                 $s = str_replace( $unique, $protocol, $s );
 732
 733                 $a = explode( "[{$protocol}:", " " . $s );
 734                 $s = array_shift( $a );
 735                 $s = substr( $s, 1 );
 736
 737                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 738                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 739
 740                 foreach ( $a as $line ) {
 741                         if ( preg_match( $e1, $line, $m ) ) {
 742                                 $link = "{$protocol}:{$m[1]}";
 743                                 $trail = $m[2];
 744                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 745                                 else { $text = wfEscapeHTML( $link ); }
 746                         } else if ( preg_match( $e2, $line, $m ) ) {
 747                                 $link = "{$protocol}:{$m[1]}";
 748                                 $text = $m[2];
 749                                 $trail = $m[3];
 750                         } else {
 751                                 $s .= "[{$protocol}:" . $line;
 752                                 continue;
 753                         }
 754                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 755                                 $paren = "";
 756                         } else {
 757                                 # Expand the URL for printable version
 758                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 759                         }
 760                         $la = $sk->getExternalLinkAttributes( $link, $text );
 761                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 762
 763                 }
 764                 return $s;
 765         }
 766
 767
 768         /* private */ function replaceInternalLinks( $s )
 769         {
 770                 global $wgLang, $wgLinkCache;
 771                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 772                 static $fname = "Parser::replaceInternalLink" ;
 773                 wfProfileIn( $fname );
 774
 775                 wfProfileIn( "$fname-setup" );
 776                 static $tc = FALSE;
 777                 # the % is needed to support urlencoded titles as well
 778                 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
 779                 $sk =& $this->mOptions->getSkin();
 780
 781                 $a = explode( "[[", " " . $s );
 782                 $s = array_shift( $a );
 783                 $s = substr( $s, 1 );
 784
 785                 # Match a link having the form [[namespace:link|alternate]]trail
 786                 static $e1 = FALSE;
 787                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 788                 # Match the end of a line for a word that's not followed by whitespace,
 789                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 790                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 791                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 792                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 793
 794
 795                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 796                 static $image = FALSE;
 797                 static $special = FALSE;
 798                 static $media = FALSE;
 799                 static $category = FALSE;
 800                 if ( !$image ) { $image = Namespace::getImage(); }
 801                 if ( !$special ) { $special = Namespace::getSpecial(); }
 802                 if ( !$media ) { $media = Namespace::getMedia(); }
 803                 if ( !$category ) { $category = Namespace::getCategory(); }
 804
 805                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 806
 807                 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
 808                         $new_prefix = $m[2];
 809                         $s = $m[1];
 810                 } else {
 811                         $new_prefix="";
 812                 }
 813
 814                 wfProfileOut( "$fname-setup" );
 815
 816                 foreach ( $a as $line ) {
 817                         $prefix = $new_prefix;
 818
 819                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 820                                 $text = $m[2];
 821                                 # fix up urlencoded title texts
 822                                 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
 823                                 $trail = $m[3];
 824                         } else { # Invalid form; output directly
 825                                 $s .= $prefix . "[[" . $line ;
 826                                 wfProfileOut( $fname );
 827                                 continue;
 828                         }
 829
 830                         /* Valid link forms:
 831                         Foobar -- normal
 832                         :Foobar -- override special treatment of prefix (images, language links)
 833                         /Foobar -- convert to CurrentPage/Foobar
 834                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 835                         */
 836                         $c = substr($m[1],0,1);
 837                         $noforce = ($c != ":");
 838                         if( $c == "/" ) { # subpage
 839                                 if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 840                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
 841                                         $noslash=$m[1];
 842                                 } else {
 843                                         $noslash=substr($m[1],1);
 844                                 }
 845                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
 846                                         $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 847                                         if( "" == $text ) {
 848                                                 $text= $m[1];
 849                                         } # this might be changed for ugliness reasons
 850                                 } else {
 851                                         $link = $noslash; # no subpage allowed, use standard link
 852                                 }
 853                         } elseif( $noforce ) { # no subpage
 854                                 $link = $m[1];
 855                         } else {
 856                                 $link = substr( $m[1], 1 );
 857                         }
 858                         $wasblank = ( "" == $text );
 859                         if( $wasblank )
 860                         $text = $link;
 861
 862                         $nt = Title::newFromText( $link );
 863                         if( !$nt ) {
 864                                 $s .= $prefix . "[[" . $line;
 865                                 wfProfileOut( $fname );
 866                                 continue;
 867                         }
 868                         $ns = $nt->getNamespace();
 869                         $iw = $nt->getInterWiki();
 870                         if( $noforce ) {
 871                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 872                                         array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 873                                         $tmp = $prefix . $trail ;
 874                                         wfProfileOut( $fname );
 875                                         $s .= (trim($tmp) == '')? '': $tmp;
 876                                         continue;
 877                                 }
 878                                 if ( $ns == $image ) {
 879                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 880                                         $wgLinkCache->addImageLinkObj( $nt );
 881                                         wfProfileOut( $fname );
 882                                         continue;
 883                                 }
 884                                 if ( $ns == $category ) {
 885                                         $t = $nt->getText() ;
 886                                         $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 887
 888                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
 889                                         $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 890                                         $wgLinkCache->resume();
 891
 892                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
 893                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
 894                                         $this->mOutput->mCategoryLinks[] = $t ;
 895                                         $s .= $prefix . $trail ;
 896                                         wfProfileOut( $fname );
 897                                         continue;
 898                                 }
 899                         }
 900                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 901                         ( strpos( $link, "#" ) == FALSE ) ) {
 902                                 # Self-links are handled specially; generally de-link and change to bold.
 903                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 904                                 wfProfileOut( $fname );
 905                                 continue;
 906                         }
 907
 908                         if( $ns == $media ) {
 909                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 910                                 $wgLinkCache->addImageLinkObj( $nt );
 911                                 wfProfileOut( $fname );
 912                                 continue;
 913                         } elseif( $ns == $special ) {
 914                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 915                                 wfProfileOut( $fname );
 916                                 continue;
 917                         }
 918                         $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 919                 }
 920                 wfProfileOut( $fname );
 921                 return $s;
 922         }
 923
 924         # Some functions here used by doBlockLevels()
 925         #
 926         /* private */ function closeParagraph()
 927         {
 928                 $result = "";
 929                 if ( '' != $this->mLastSection ) {
 930                         $result = "</" . $this->mLastSection  . ">\n";
 931                 }
 932                 $this->mInPre = false;
 933                 $this->mLastSection = "";
 934                 return $result;
 935         }
 936         # getCommon() returns the length of the longest common substring
 937         # of both arguments, starting at the beginning of both.
 938         #
 939         /* private */ function getCommon( $st1, $st2 )
 940         {
 941                 $fl = strlen( $st1 );
 942                 $shorter = strlen( $st2 );
 943                 if ( $fl < $shorter ) { $shorter = $fl; }
 944
 945                 for ( $i = 0; $i < $shorter; ++$i ) {
 946                         if ( $st1{$i} != $st2{$i} ) { break; }
 947                 }
 948                 return $i;
 949         }
 950         # These next three functions open, continue, and close the list
 951         # element appropriate to the prefix character passed into them.
 952         #
 953         /* private */ function openList( $char )
 954     {
 955                 $result = $this->closeParagraph();
 956
 957                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 958                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 959                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 960                 else if ( ";" == $char ) {
 961                         $result .= "<dl><dt>";
 962                         $this->mDTopen = true;
 963                 }
 964                 else { $result = "<!-- ERR 1 -->"; }
 965
 966                 return $result;
 967         }
 968
 969         /* private */ function nextItem( $char )
 970         {
 971                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 972                 else if ( ":" == $char || ";" == $char ) {
 973                         $close = "</dd>";
 974                         if ( $this->mDTopen ) { $close = "</dt>"; }
 975                         if ( ";" == $char ) {
 976                                 $this->mDTopen = true;
 977                                 return $close . "<dt>";
 978                         } else {
 979                                 $this->mDTopen = false;
 980                                 return $close . "<dd>";
 981                         }
 982                 }
 983                 return "<!-- ERR 2 -->";
 984         }
 985
 986         /* private */function closeList( $char )
 987         {
 988                 if ( "*" == $char ) { $text = "</li></ul>"; }
 989                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 990                 else if ( ":" == $char ) {
 991                         if ( $this->mDTopen ) {
 992                                 $this->mDTopen = false;
 993                                 $text = "</dt></dl>";
 994                         } else {
 995                                 $text = "</dd></dl>";
 996                         }
 997                 }
 998                 else {  return "<!-- ERR 3 -->"; }
 999                 return $text."\n";
1000         }
1001
1002         /* private */ function doBlockLevels( $text, $linestart ) {
1003                 $fname = "Parser::doBlockLevels";
1004                 wfProfileIn( $fname );
1005
1006                 # Parsing through the text line by line.  The main thing
1007                 # happening here is handling of block-level elements p, pre,
1008                 # and making lists from lines starting with * # : etc.
1009                 #
1010                 $textLines = explode( "\n", $text );
1011
1012                 $lastPrefix = $output = $lastLine = '';
1013                 $this->mDTopen = $inBlockElem = false;
1014                 $prefixLength = 0;
1015                 $paragraphStack = false;
1016
1017                 if ( !$linestart ) {
1018                         $output .= array_shift( $textLines );
1019                 }
1020                 foreach ( $textLines as $oLine ) {
1021                         $lastPrefixLength = strlen( $lastPrefix );
1022                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1023                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1024                         if (!$this->mInPre) {
1025                                 $this->mInPre = !empty($preOpenMatch);
1026                         }
1027                         if ( !$this->mInPre ) {
1028                                 # Multiple prefixes may abut each other for nested lists.
1029                                 $prefixLength = strspn( $oLine, "*#:;" );
1030                                 $pref = substr( $oLine, 0, $prefixLength );
1031
1032                                 # eh?
1033                                 $pref2 = str_replace( ";", ":", $pref );
1034                                 $t = substr( $oLine, $prefixLength );
1035                         } else {
1036                                 # Don't interpret any other prefixes in preformatted text
1037                                 $prefixLength = 0;
1038                                 $pref = $pref2 = '';
1039                                 $t = $oLine;
1040                         }
1041
1042                         # List generation
1043                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1044                                 # Same as the last item, so no need to deal with nesting or opening stuff
1045                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1046                                 $paragraphStack = false;
1047
1048                                 if ( ";" == substr( $pref, -1 ) ) {
1049                                         # The one nasty exception: definition lists work like this:
1050                                         # ; title : definition text
1051                                         # So we check for : in the remainder text to split up the
1052                                         # title and definition, without b0rking links.
1053                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1054                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1055                                                 $term = $match[1];
1056                                                 $output .= $term . $this->nextItem( ":" );
1057                                                 $t = $match[2];
1058                                         }
1059                                 }
1060                         } elseif( $prefixLength || $lastPrefixLength ) {
1061                                 # Either open or close a level...
1062                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1063                                 $paragraphStack = false;
1064
1065                                 while( $commonPrefixLength < $lastPrefixLength ) {
1066                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1067                                         --$lastPrefixLength;
1068                                 }
1069                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1070                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1071                                 }
1072                                 while ( $prefixLength > $commonPrefixLength ) {
1073                                         $char = substr( $pref, $commonPrefixLength, 1 );
1074                                         $output .= $this->openList( $char );
1075
1076                                         if ( ";" == $char ) {
1077                                                 # FIXME: This is dupe of code above
1078                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1079                                                         $term = $match[1];
1080                                                         $output .= $term . $this->nextItem( ":" );
1081                                                         $t = $match[2];
1082                                                 }
1083                                         }
1084                                         ++$commonPrefixLength;
1085                                 }
1086                                 $lastPrefix = $pref2;
1087                         }
1088                         if( 0 == $prefixLength ) {
1089                                 # No prefix (not in list)--go to paragraph mode
1090                                 $uniq_prefix = UNIQ_PREFIX;
1091                                 // XXX: use a stack for nestable elements like span, table and div
1092                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1093                                 $closematch = preg_match(
1094                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1095                                         "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1096                                 if ( $openmatch or $closematch ) {
1097                                         $paragraphStack = false;
1098                                         $output .= $this->closeParagraph();
1099                                         if($preOpenMatch and !$preCloseMatch) {
1100                                                 $this->mInPre = true;
1101                                         }
1102                                         if ( $closematch  ) {
1103                                                 $inBlockElem = false;
1104                                         } else {
1105                                                 $inBlockElem = true;
1106                                         }
1107                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1108                                         if ( " " == $t{0} and trim($t) != '' ) {
1109                                                 // pre
1110                                                 if ($this->mLastSection != 'pre') {
1111                                                         $paragraphStack = false;
1112                                                         $output .= $this->closeParagraph().'<pre>';
1113                                                         $this->mLastSection = 'pre';
1114                                                 }
1115                                         } else {
1116                                                 // paragraph
1117                                                 if ( '' == trim($t) ) {
1118                                                         if ( $paragraphStack ) {
1119                                                                 $output .= $paragraphStack.'<br />';
1120                                                                 $paragraphStack = false;
1121                                                                 $this->mLastSection = 'p';
1122                                                         } else {
1123                                                                 if ($this->mLastSection != 'p' ) {
1124                                                                         $output .= $this->closeParagraph();
1125                                                                         $this->mLastSection = '';
1126                                                                         $paragraphStack = "<p>";
1127                                                                 } else {
1128                                                                         $paragraphStack = '</p><p>';
1129                                                                 }
1130                                                         }
1131                                                 } else {
1132                                                         if ( $paragraphStack ) {
1133                                                                 $output .= $paragraphStack;
1134                                                                 $paragraphStack = false;
1135                                                                 $this->mLastSection = 'p';
1136                                                         } else if ($this->mLastSection != 'p') {
1137                                                                 $output .= $this->closeParagraph().'<p>';
1138                                                                 $this->mLastSection = 'p';
1139                                                         }
1140                                                 }
1141                                         }
1142                                 }
1143                         }
1144                         if ($paragraphStack === false) {
1145                                 $output .= $t."\n";
1146                         }
1147                 }
1148                 while ( $prefixLength ) {
1149                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1150                         --$prefixLength;
1151                 }
1152                 if ( "" != $this->mLastSection ) {
1153                         $output .= "</" . $this->mLastSection . ">";
1154                         $this->mLastSection = "";
1155                 }
1156
1157                 wfProfileOut( $fname );
1158                 return $output;
1159         }
1160
1161         function getVariableValue( $index ) {
1162                 global $wgLang, $wgSitename, $wgServer;
1163
1164                 switch ( $index ) {
1165                         case MAG_CURRENTMONTH:
1166                                 return date( "m" );
1167                         case MAG_CURRENTMONTHNAME:
1168                                 return $wgLang->getMonthName( date("n") );
1169                         case MAG_CURRENTMONTHNAMEGEN:
1170                                 return $wgLang->getMonthNameGen( date("n") );
1171                         case MAG_CURRENTDAY:
1172                                 return date("j");
1173                         case MAG_PAGENAME:
1174                                 return $this->mTitle->getText();
1175                         case MAG_NAMESPACE:
1176                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1177                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1178                         case MAG_CURRENTDAYNAME:
1179                                 return $wgLang->getWeekdayName( date("w")+1 );
1180                         case MAG_CURRENTYEAR:
1181                                 return date( "Y" );
1182                         case MAG_CURRENTTIME:
1183                                 return $wgLang->time( wfTimestampNow(), false );
1184                         case MAG_NUMBEROFARTICLES:
1185                                 return wfNumberOfArticles();
1186                         case MAG_SITENAME:
1187                                 return $wgSitename;
1188                         case MAG_SERVER:
1189                                 return $wgServer;
1190                         default:
1191                                 return NULL;
1192                 }
1193         }
1194
1195         function initialiseVariables()
1196         {
1197                 global $wgVariableIDs;
1198                 $this->mVariables = array();
1199                 foreach ( $wgVariableIDs as $id ) {
1200                         $mw =& MagicWord::get( $id );
1201                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1202                 }
1203         }
1204
1205         /* private */ function replaceVariables( $text, $args = array() )
1206         {
1207                 global $wgLang, $wgScript, $wgArticlePath;
1208
1209                 $fname = "Parser::replaceVariables";
1210                 wfProfileIn( $fname );
1211
1212                 $bail = false;
1213                 if ( !$this->mVariables ) {
1214                         $this->initialiseVariables();
1215                 }
1216                 $titleChars = Title::legalChars();
1217                 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1218
1219                 # This function is called recursively. To keep track of arguments we need a stack:
1220                 array_push( $this->mArgStack, $args );
1221
1222                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1223                 $GLOBALS['wgCurParser'] =& $this;
1224
1225
1226                 if ( $this->mOutputType == OT_HTML ) {
1227                         # Variable substitution
1228                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1229
1230                         # Argument substitution
1231                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1232                 }
1233                 # Template substitution
1234                 $regex = "/(\\n?){{([$titleChars]*)(\\|.*?|)}}/s";
1235                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1236
1237                 array_pop( $this->mArgStack );
1238
1239                 wfProfileOut( $fname );
1240                 return $text;
1241         }
1242
1243         function variableSubstitution( $matches )
1244         {
1245                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1246                         $text = $this->mVariables[$matches[1]];
1247                         $this->mOutput->mContainsOldMagic = true;
1248                 } else {
1249                         $text = $matches[0];
1250                 }
1251                 return $text;
1252         }
1253
1254         function braceSubstitution( $matches )
1255         {
1256                 global $wgLinkCache, $wgLang;
1257                 $fname = "Parser::braceSubstitution";
1258                 $found = false;
1259                 $nowiki = false;
1260                 $noparse = false;
1261
1262                 $title = NULL;
1263
1264                 # $newline is an optional newline character before the braces
1265                 # $part1 is the bit before the first |, and must contain only title characters
1266                 # $args is a list of arguments, starting from index 0, not including $part1
1267
1268                 $newline = $matches[1];
1269                 $part1 = $matches[2];
1270                 # If the third subpattern matched anything, it will start with |
1271                 if ( $matches[3] !== "" ) {
1272                         $args = explode( "|", substr( $matches[3], 1 ) );
1273                 } else {
1274                         $args = array();
1275                 }
1276                 $argc = count( $args );
1277
1278                 # {{{}}}
1279                 if ( strpos( $matches[0], "{{{" ) !== false ) {
1280                         $text = $matches[0];
1281                         $found = true;
1282                         $noparse = true;
1283                 }
1284
1285                 # SUBST
1286                 if ( !$found ) {
1287                         $mwSubst =& MagicWord::get( MAG_SUBST );
1288                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1289                                 if ( $this->mOutputType != OT_WIKI ) {
1290                                         # Invalid SUBST not replaced at PST time
1291                                         # Return without further processing
1292                                         $text = $matches[0];
1293                                         $found = true;
1294                                         $noparse= true;
1295                                 }
1296                         } elseif ( $this->mOutputType == OT_WIKI ) {
1297                                 # SUBST not found in PST pass, do nothing
1298                                 $text = $matches[0];
1299                                 $found = true;
1300                         }
1301                 }
1302
1303                 # MSG, MSGNW and INT
1304                 if ( !$found ) {
1305                         # Check for MSGNW:
1306                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1307                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1308                                 $nowiki = true;
1309                         } else {
1310                                 # Remove obsolete MSG:
1311                                 $mwMsg =& MagicWord::get( MAG_MSG );
1312                                 $mwMsg->matchStartAndRemove( $part1 );
1313                         }
1314
1315                         # Check if it is an internal message
1316                         $mwInt =& MagicWord::get( MAG_INT );
1317                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1318                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1319                                         $text = wfMsgReal( $part1, $args, true );
1320                                         $found = true;
1321                                 }
1322                         }
1323                 }
1324
1325                 # NS
1326                 if ( !$found ) {
1327                         # Check for NS: (namespace expansion)
1328                         $mwNs = MagicWord::get( MAG_NS );
1329                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1330                                 if ( intval( $part1 ) ) {
1331                                         $text = $wgLang->getNsText( intval( $part1 ) );
1332                                         $found = true;
1333                                 } else {
1334                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1335                                         if ( !is_null( $index ) ) {
1336                                                 $text = $wgLang->getNsText( $index );
1337                                                 $found = true;
1338                                         }
1339                                 }
1340                         }
1341                 }
1342
1343                 # LOCALURL and LOCALURLE
1344                 if ( !$found ) {
1345                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1346                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1347
1348                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1349                                 $func = 'getLocalURL';
1350                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1351                                 $func = 'escapeLocalURL';
1352                         } else {
1353                                 $func = '';
1354                         }
1355
1356                         if ( $func !== '' ) {
1357                                 $title = Title::newFromText( $part1 );
1358                                 if ( !is_null( $title ) ) {
1359                                         if ( $argc > 0 ) {
1360                                                 $text = $title->$func( $args[0] );
1361                                         } else {
1362                                                 $text = $title->$func();
1363                                         }
1364                                         $found = true;
1365                                 }
1366                         }
1367                 }
1368
1369                 # Internal variables
1370                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1371                         $text = $this->mVariables[$part1];
1372                         $found = true;
1373                         $this->mOutput->mContainsOldMagic = true;
1374                 }
1375 /*
1376                 # Arguments input from the caller
1377                 $inputArgs = end( $this->mArgStack );
1378                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1379                         $text = $inputArgs[$part1];
1380                         $found = true;
1381                 }
1382 */
1383                 # Load from database
1384                 if ( !$found ) {
1385                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1386                         if ( !is_null( $title ) && !$title->isExternal() ) {
1387                                 # Check for excessive inclusion
1388                                 $dbk = $title->getPrefixedDBkey();
1389                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1390                                         $article = new Article( $title );
1391                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1392                                         if ( $articleContent !== false ) {
1393                                                 $found = true;
1394                                                 $text = $articleContent;
1395
1396                                         }
1397                                 }
1398
1399                                 # If the title is valid but undisplayable, make a link to it
1400                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1401                                         $text = "[[" . $title->getPrefixedText() . "]]";
1402                                         $found = true;
1403                                 }
1404                         }
1405                 }
1406
1407                 # Recursive parsing, escaping and link table handling
1408                 # Only for HTML output
1409                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1410                         $text = wfEscapeWikiText( $text );
1411                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1412                         # Clean up argument array
1413                         $assocArgs = array();
1414                         $index = 1;
1415                         foreach( $args as $arg ) {
1416                                 $eqpos = strpos( $arg, "=" );
1417                                 if ( $eqpos === false ) {
1418                                         $assocArgs[$index++] = $arg;
1419                                 } else {
1420                                         $name = trim( substr( $arg, 0, $eqpos ) );
1421                                         $value = trim( substr( $arg, $eqpos+1 ) );
1422                                         if ( $value === false ) {
1423                                                 $value = "";
1424                                         }
1425                                         if ( $name !== false ) {
1426                                                 $assocArgs[$name] = $value;
1427                                         }
1428                                 }
1429                         }
1430
1431                         # Do not enter included links in link table
1432                         if ( !is_null( $title ) ) {
1433                                 $wgLinkCache->suspend();
1434                         }
1435
1436                         # Run full parser on the included text
1437                         $text = $this->stripParse( $text, $newline, $assocArgs );
1438
1439                         # Resume the link cache and register the inclusion as a link
1440                         if ( !is_null( $title ) ) {
1441                                 $wgLinkCache->resume();
1442                                 $wgLinkCache->addLinkObj( $title );
1443                         }
1444                 }
1445
1446                 if ( !$found ) {
1447                         return $matches[0];
1448                 } else {
1449                         return $text;
1450                 }
1451         }
1452
1453         # Triple brace replacement -- used for template arguments
1454         function argSubstitution( $matches )
1455         {
1456                 $newline = $matches[1];
1457                 $arg = trim( $matches[2] );
1458                 $text = $matches[0];
1459                 $inputArgs = end( $this->mArgStack );
1460
1461                 if ( array_key_exists( $arg, $inputArgs ) ) {
1462                         $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1463                 }
1464
1465                 return $text;
1466         }
1467
1468         # Returns true if the function is allowed to include this entity
1469         function incrementIncludeCount( $dbk )
1470         {
1471                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1472                         $this->mIncludeCount[$dbk] = 0;
1473                 }
1474                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1475                         return true;
1476                 } else {
1477                         return false;
1478                 }
1479         }
1480
1481
1482         # Cleans up HTML, removes dangerous tags and attributes
1483         /* private */ function removeHTMLtags( $text )
1484         {
1485                 global $wgUseTidy, $wgUserHtml;
1486                 $fname = "Parser::removeHTMLtags";
1487                 wfProfileIn( $fname );
1488
1489                 if( $wgUserHtml ) {
1490                         $htmlpairs = array( # Tags that must be closed
1491                                 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1492                                 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1493                                 "strike", "strong", "tt", "var", "div", "center",
1494                                 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1495                                 "ruby", "rt" , "rb" , "rp", "p"
1496                         );
1497                         $htmlsingle = array(
1498                                 "br", "hr", "li", "dt", "dd"
1499                         );
1500                         $htmlnest = array( # Tags that can be nested--??
1501                                 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1502                                 "dl", "font", "big", "small", "sub", "sup"
1503                         );
1504                         $tabletags = array( # Can only appear inside table
1505                                 "td", "th", "tr"
1506                         );
1507                 } else {
1508                         $htmlpairs = array();
1509                         $htmlsingle = array();
1510                         $htmlnest = array();
1511                         $tabletags = array();
1512                 }
1513
1514                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1515                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1516
1517                 $htmlattrs = $this->getHTMLattrs () ;
1518
1519                 # Remove HTML comments
1520                 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1521
1522                 $bits = explode( "<", $text );
1523                 $text = array_shift( $bits );
1524                 if(!$wgUseTidy) {
1525                         $tagstack = array(); $tablestack = array();
1526                         foreach ( $bits as $x ) {
1527                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1528                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1529                                 $x, $regs );
1530                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1531                                 error_reporting( $prev );
1532
1533                                 $badtag = 0 ;
1534                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1535                                         # Check our stack
1536                                         if ( $slash ) {
1537                                                 # Closing a tag...
1538                                                 if ( ! in_array( $t, $htmlsingle ) &&
1539                                                 ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
1540                                                         if(!empty($ot)) array_push( $tagstack, $ot );
1541                                                         $badtag = 1;
1542                                                 } else {
1543                                                         if ( $t == "table" ) {
1544                                                                 $tagstack = array_pop( $tablestack );
1545                                                         }
1546                                                         $newparams = "";
1547                                                 }
1548                                         } else {
1549                                                 # Keep track for later
1550                                                 if ( in_array( $t, $tabletags ) &&
1551                                                 ! in_array( "table", $tagstack ) ) {
1552                                                         $badtag = 1;
1553                                                 } else if ( in_array( $t, $tagstack ) &&
1554                                                 ! in_array ( $t , $htmlnest ) ) {
1555                                                         $badtag = 1 ;
1556                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1557                                                         if ( $t == "table" ) {
1558                                                                 array_push( $tablestack, $tagstack );
1559                                                                 $tagstack = array();
1560                                                         }
1561                                                         array_push( $tagstack, $t );
1562                                                 }
1563                                                 # Strip non-approved attributes from the tag
1564                                                 $newparams = $this->fixTagAttributes($params);
1565
1566                                         }
1567                                         if ( ! $badtag ) {
1568                                                 $rest = str_replace( ">", "&gt;", $rest );
1569                                                 $text .= "<$slash$t $newparams$brace$rest";
1570                                                 continue;
1571                                         }
1572                                 }
1573                                 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1574                         }
1575                         # Close off any remaining tags
1576                         while ( $t = array_pop( $tagstack ) ) {
1577                                 $text .= "</$t>\n";
1578                                 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1579                         }
1580                 } else {
1581                         # this might be possible using tidy itself
1582                         foreach ( $bits as $x ) {
1583                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1584                                 $x, $regs );
1585                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1586                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1587                                         $newparams = $this->fixTagAttributes($params);
1588                                         $rest = str_replace( ">", "&gt;", $rest );
1589                                         $text .= "<$slash$t $newparams$brace$rest";
1590                                 } else {
1591                                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1592                                 }
1593                         }
1594                 }
1595                 wfProfileOut( $fname );
1596                 return $text;
1597         }
1598
1599
1600 /*
1601  *
1602  * This function accomplishes several tasks:
1603  * 1) Auto-number headings if that option is enabled
1604  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1605  * 3) Add a Table of contents on the top for users who have enabled the option
1606  * 4) Auto-anchor headings
1607  *
1608  * It loops through all headlines, collects the necessary data, then splits up the
1609  * string and re-inserts the newly formatted headlines.
1610  *
1611  */
1612
1613         /* private */ function formatHeadings( $text, $isMain=true )
1614         {
1615                 global $wgInputEncoding;
1616
1617                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1618                 $doShowToc = $this->mOptions->getShowToc();
1619                 if( !$this->mTitle->userCanEdit() ) {
1620                         $showEditLink = 0;
1621                         $rightClickHack = 0;
1622                 } else {
1623                         $showEditLink = $this->mOptions->getEditSection();
1624                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1625                 }
1626
1627                 # Inhibit editsection links if requested in the page
1628                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1629                 if( $esw->matchAndRemove( $text ) ) {
1630                         $showEditLink = 0;
1631                 }
1632                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1633                 # do not add TOC
1634                 $mw =& MagicWord::get( MAG_NOTOC );
1635                 if( $mw->matchAndRemove( $text ) ) {
1636                         $doShowToc = 0;
1637                 }
1638
1639                 # never add the TOC to the Main Page. This is an entry page that should not
1640                 # be more than 1-2 screens large anyway
1641                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1642                         $doShowToc = 0;
1643                 }
1644
1645                 # Get all headlines for numbering them and adding funky stuff like [edit]
1646                 # links - this is for later, but we need the number of headlines right now
1647                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1648
1649                 # if there are fewer than 4 headlines in the article, do not show TOC
1650                 if( $numMatches < 4 ) {
1651                         $doShowToc = 0;
1652                 }
1653
1654                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1655                 # override above conditions and always show TOC
1656                 $mw =& MagicWord::get( MAG_FORCETOC );
1657                 if ($mw->matchAndRemove( $text ) ) {
1658                         $doShowToc = 1;
1659                 }
1660
1661
1662                 # We need this to perform operations on the HTML
1663                 $sk =& $this->mOptions->getSkin();
1664
1665                 # headline counter
1666                 $headlineCount = 0;
1667
1668                 # Ugh .. the TOC should have neat indentation levels which can be
1669                 # passed to the skin functions. These are determined here
1670                 $toclevel = 0;
1671                 $toc = "";
1672                 $full = "";
1673                 $head = array();
1674                 $sublevelCount = array();
1675                 $level = 0;
1676                 $prevlevel = 0;
1677                 foreach( $matches[3] as $headline ) {
1678                         $numbering = "";
1679                         if( $level ) {
1680                                 $prevlevel = $level;
1681                         }
1682                         $level = $matches[1][$headlineCount];
1683                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1684                                 # reset when we enter a new level
1685                                 $sublevelCount[$level] = 0;
1686                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1687                                 $toclevel += $level - $prevlevel;
1688                         }
1689                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1690                                 # reset when we step back a level
1691                                 $sublevelCount[$level+1]=0;
1692                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1693                                 $toclevel -= $prevlevel - $level;
1694                         }
1695                         # count number of headlines for each level
1696                         @$sublevelCount[$level]++;
1697                         if( $doNumberHeadings || $doShowToc ) {
1698                                 $dot = 0;
1699                                 for( $i = 1; $i <= $level; $i++ ) {
1700                                         if( !empty( $sublevelCount[$i] ) ) {
1701                                                 if( $dot ) {
1702                                                         $numbering .= ".";
1703                                                 }
1704                                                 $numbering .= $sublevelCount[$i];
1705                                                 $dot = 1;
1706                                         }
1707                                 }
1708                         }
1709
1710                         # The canonized header is a version of the header text safe to use for links
1711                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1712                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1713
1714                         # strip out HTML
1715                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1716                         $tocline = trim( $canonized_headline );
1717                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1718                         # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1719                         $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1720                         $refer[$headlineCount] = $canonized_headline;
1721
1722                         # count how many in assoc. array so we can track dupes in anchors
1723                         @$refers[$canonized_headline]++;
1724                         $refcount[$headlineCount]=$refers[$canonized_headline];
1725
1726                         # Prepend the number to the heading text
1727
1728                         if( $doNumberHeadings || $doShowToc ) {
1729                                 $tocline = $numbering . " " . $tocline;
1730
1731                                 # Don't number the heading if it is the only one (looks silly)
1732                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1733                                         # the two are different if the line contains a link
1734                                         $headline=$numbering . " " . $headline;
1735                                 }
1736                         }
1737
1738                         # Create the anchor for linking from the TOC to the section
1739                         $anchor = $canonized_headline;
1740                         if($refcount[$headlineCount] > 1 ) {
1741                                 $anchor .= "_" . $refcount[$headlineCount];
1742                         }
1743                         if( $doShowToc ) {
1744                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1745                         }
1746                         if( $showEditLink ) {
1747                                 if ( empty( $head[$headlineCount] ) ) {
1748                                         $head[$headlineCount] = "";
1749                                 }
1750                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1751                         }
1752
1753                         # Add the edit section span
1754                         if( $rightClickHack ) {
1755                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1756                         }
1757
1758                         # give headline the correct <h#> tag
1759                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1760
1761                         $headlineCount++;
1762                 }
1763
1764                 if( $doShowToc ) {
1765                         $toclines = $headlineCount;
1766                         $toc .= $sk->tocUnindent( $toclevel );
1767                         $toc = $sk->tocTable( $toc );
1768                 }
1769
1770                 # split up and insert constructed headlines
1771
1772                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1773                 $i = 0;
1774
1775                 foreach( $blocks as $block ) {
1776                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1777                             # This is the [edit] link that appears for the top block of text when
1778                                 # section editing is enabled
1779
1780                                 # Disabled because it broke block formatting
1781                                 # For example, a bullet point in the top line
1782                                 # $full .= $sk->editSectionLink(0);
1783                         }
1784                         $full .= $block;
1785                         if( $doShowToc && !$i && $isMain) {
1786                         # Top anchor now in skin
1787                                 $full = $full.$toc;
1788                         }
1789
1790                         if( !empty( $head[$i] ) ) {
1791                                 $full .= $head[$i];
1792                         }
1793                         $i++;
1794                 }
1795
1796                 return $full;
1797         }
1798
1799         /* private */ function magicISBN( $text )
1800         {
1801                 global $wgLang;
1802
1803                 $a = split( "ISBN ", " $text" );
1804                 if ( count ( $a ) < 2 ) return $text;
1805                 $text = substr( array_shift( $a ), 1);
1806                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1807
1808                 foreach ( $a as $x ) {
1809                         $isbn = $blank = "" ;
1810                         while ( " " == $x{0} ) {
1811                                 $blank .= " ";
1812                                 $x = substr( $x, 1 );
1813                         }
1814                         while ( strstr( $valid, $x{0} ) != false ) {
1815                                 $isbn .= $x{0};
1816                                 $x = substr( $x, 1 );
1817                         }
1818                         $num = str_replace( "-", "", $isbn );
1819                         $num = str_replace( " ", "", $num );
1820
1821                         if ( "" == $num ) {
1822                                 $text .= "ISBN $blank$x";
1823                         } else {
1824                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1825                                 $text .= "<a href=\"" .
1826                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1827                                         "\" class=\"internal\">ISBN $isbn</a>";
1828                                 $text .= $x;
1829                         }
1830                 }
1831                 return $text;
1832         }
1833         /* private */ function magicRFC( $text )
1834         {
1835                 global $wgLang;
1836
1837                 $a = split( "RFC ", " $text" );
1838                 if ( count ( $a ) < 2 ) return $text;
1839                 $text = substr( array_shift( $a ), 1);
1840                 $valid = "0123456789";
1841
1842                 foreach ( $a as $x ) {
1843                         $rfc = $blank = "" ;
1844                         while ( " " == $x{0} ) {
1845                                 $blank .= " ";
1846                                 $x = substr( $x, 1 );
1847                         }
1848                         while ( strstr( $valid, $x{0} ) != false ) {
1849                                 $rfc .= $x{0};
1850                                 $x = substr( $x, 1 );
1851                         }
1852
1853                         if ( "" == $rfc ) {
1854                                 $text .= "RFC $blank$x";
1855                         } else {
1856                                 $url = wfmsg( "rfcurl" );
1857                                 $url = str_replace( "$1", $rfc, $url);
1858                                 $sk =& $this->mOptions->getSkin();
1859                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1860                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1861                         }
1862                 }
1863                 return $text;
1864         }
1865
1866         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1867         {
1868                 $this->mOptions = $options;
1869                 $this->mTitle =& $title;
1870                 $this->mOutputType = OT_WIKI;
1871
1872                 if ( $clearState ) {
1873                         $this->clearState();
1874                 }
1875
1876                 $stripState = false;
1877                 $pairs = array(
1878                         "\r\n" => "\n",
1879                         );
1880                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1881                 // now with regexes
1882                 /*
1883                 $pairs = array(
1884                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1885                         "/<br *?>/i" => "<br />",
1886                 );
1887                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1888                 */
1889                 $text = $this->strip( $text, $stripState, false );
1890                 $text = $this->pstPass2( $text, $user );
1891                 $text = $this->unstrip( $text, $stripState );
1892                 return $text;
1893         }
1894
1895         /* private */ function pstPass2( $text, &$user )
1896         {
1897                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1898
1899                 # Variable replacement
1900                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1901                 $text = $this->replaceVariables( $text );
1902
1903                 # Signatures
1904                 #
1905                 $n = $user->getName();
1906                 $k = $user->getOption( "nickname" );
1907                 if ( "" == $k ) { $k = $n; }
1908                 if(isset($wgLocaltimezone)) {
1909                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1910                 }
1911                 /* Note: this is an ugly timezone hack for the European wikis */
1912                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1913                   " (" . date( "T" ) . ")";
1914                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1915
1916                 $text = preg_replace( "/~~~~~/", $d, $text );
1917                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1918                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1919                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1920                   Namespace::getUser() ) . ":$n|$k]]", $text );
1921
1922                 # Context links: [[|name]] and [[name (context)|]]
1923                 #
1924                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1925                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1926                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1927                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1928
1929                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1930                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1931                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1932                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1933                                                                                                                 # [[ns:page (cont)|]]
1934                 $context = "";
1935                 $t = $this->mTitle->getText();
1936                 if ( preg_match( $conpat, $t, $m ) ) {
1937                         $context = $m[2];
1938                 }
1939                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1940                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1941                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1942
1943                 if ( "" == $context ) {
1944                         $text = preg_replace( $p2, "[[\\1]]", $text );
1945                 } else {
1946                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1947                 }
1948
1949                 /*
1950                 $mw =& MagicWord::get( MAG_SUBST );
1951                 $wgCurParser = $this->fork();
1952                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1953                 $this->merge( $wgCurParser );
1954                 */
1955
1956                 # Trim trailing whitespace
1957                 # MAG_END (__END__) tag allows for trailing
1958                 # whitespace to be deliberately included
1959                 $text = rtrim( $text );
1960                 $mw =& MagicWord::get( MAG_END );
1961                 $mw->matchAndRemove( $text );
1962
1963                 return $text;
1964         }
1965
1966         # Set up some variables which are usually set up in parse()
1967         # so that an external function can call some class members with confidence
1968         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1969         {
1970                 $this->mTitle =& $title;
1971                 $this->mOptions = $options;
1972                 $this->mOutputType = $outputType;
1973                 if ( $clearState ) {
1974                         $this->clearState();
1975                 }
1976         }
1977
1978         function transformMsg( $text, $options ) {
1979                 global $wgTitle;
1980                 static $executing = false;
1981
1982                 # Guard against infinite recursion
1983                 if ( $executing ) {
1984                         return $text;
1985                 }
1986                 $executing = true;
1987
1988                 $this->mTitle = $wgTitle;
1989                 $this->mOptions = $options;
1990                 $this->mOutputType = OT_MSG;
1991                 $this->clearState();
1992                 $text = $this->replaceVariables( $text );
1993
1994                 $executing = false;
1995                 return $text;
1996         }
1997 }
1998
1999 class ParserOutput
2000 {
2001         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2002         var $mCacheTime; # Used in ParserCache
2003
2004         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2005                 $containsOldMagic = false )
2006         {
2007                 $this->mText = $text;
2008                 $this->mLanguageLinks = $languageLinks;
2009                 $this->mCategoryLinks = $categoryLinks;
2010                 $this->mContainsOldMagic = $containsOldMagic;
2011                 $this->mCacheTime = "";
2012         }
2013
2014         function getText() { return $this->mText; }
2015         function getLanguageLinks() { return $this->mLanguageLinks; }
2016         function getCategoryLinks() { return $this->mCategoryLinks; }
2017         function getCacheTime() { return $this->mCacheTime; }
2018         function containsOldMagic() { return $this->mContainsOldMagic; }
2019         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2020         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2021         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2022         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2023         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2024
2025         function merge( $other ) {
2026                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2027                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2028                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2029         }
2030
2031 }
2032
2033 class ParserOptions
2034 {
2035         # All variables are private
2036         var $mUseTeX;                    # Use texvc to expand <math> tags
2037         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2038         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2039         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2040         var $mAllowExternalImages;       # Allow external images inline
2041         var $mSkin;                      # Reference to the preferred skin
2042         var $mDateFormat;                # Date format index
2043         var $mEditSection;               # Create "edit section" links
2044         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2045         var $mNumberHeadings;            # Automatically number headings
2046         var $mShowToc;                   # Show table of contents
2047
2048         function getUseTeX() { return $this->mUseTeX; }
2049         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2050         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2051         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2052         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2053         function getSkin() { return $this->mSkin; }
2054         function getDateFormat() { return $this->mDateFormat; }
2055         function getEditSection() { return $this->mEditSection; }
2056         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2057         function getNumberHeadings() { return $this->mNumberHeadings; }
2058         function getShowToc() { return $this->mShowToc; }
2059
2060         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2061         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2062         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2063         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2064         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2065         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2066         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2067         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2068         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2069         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2070         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2071
2072         /* static */ function newFromUser( &$user )
2073         {
2074                 $popts = new ParserOptions;
2075                 $popts->initialiseFromUser( $user );
2076                 return $popts;
2077         }
2078
2079         function initialiseFromUser( &$userInput )
2080         {
2081                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2082
2083                 if ( !$userInput ) {
2084                         $user = new User;
2085                         $user->setLoaded( true );
2086                 } else {
2087                         $user =& $userInput;
2088                 }
2089
2090                 $this->mUseTeX = $wgUseTeX;
2091                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2092                 $this->mUseDynamicDates = $wgUseDynamicDates;
2093                 $this->mInterwikiMagic = $wgInterwikiMagic;
2094                 $this->mAllowExternalImages = $wgAllowExternalImages;
2095                 $this->mSkin =& $user->getSkin();
2096                 $this->mDateFormat = $user->getOption( "date" );
2097                 $this->mEditSection = $user->getOption( "editsection" );
2098                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2099                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2100                 $this->mShowToc = $user->getOption( "showtoc" );
2101         }
2102
2103
2104 }
2105
2106 # Regex callbacks, used in Parser::replaceVariables
2107 function wfBraceSubstitution( $matches )
2108 {
2109         global $wgCurParser;
2110         return $wgCurParser->braceSubstitution( $matches );
2111 }
2112
2113 function wfArgSubstitution( $matches )
2114 {
2115         global $wgCurParser;
2116         return $wgCurParser->argSubstitution( $matches );
2117 }
2118
2119 function wfVariableSubstitution( $matches )
2120 {
2121         global $wgCurParser;
2122         return $wgCurParser->variableSubstitution( $matches );
2123 }
2124
2125 ?>