includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Processes wiki markup
   8 #
   9 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  10 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  11 #
  12 # Globals used:
  13 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  14 #
  15 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  16 #
  17 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  18 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  19 #               $wgLocaltimezone
  20 #
  21 #      * only within ParserOptions
  22 #
  23 #
  24 #----------------------------------------
  25 #    Variable substitution O(N^2) attack
  26 #-----------------------------------------
  27 # Without countermeasures, it would be possible to attack the parser by saving a page
  28 # filled with a large number of inclusions of large pages. The size of the generated
  29 # page would be proportional to the square of the input size. Hence, we limit the number
  30 # of inclusions of any given page, thus bringing any attack back to O(N).
  31 #
  32
  33 define( "MAX_INCLUDE_REPEAT", 20 );
  34 define( "MAX_INCLUDE_SIZE", 1000000 ); // 1 Million
  35
  36 # Allowed values for $mOutputType
  37 define( "OT_HTML", 1 );
  38 define( "OT_WIKI", 2 );
  39 define( "OT_MSG", 3 );
  40
  41 # string parameter for extractTags which will cause it
  42 # to strip HTML comments in addition to regular
  43 # <XML>-style tags. This should not be anything we
  44 # may want to use in wikisyntax
  45 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
  46
  47 # prefix for escaping, used in two functions at least
  48 define( 'UNIQ_PREFIX', 'NaodW29');
  49
  50
  51 # Constants needed for external link processing
  52
  53 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
  54 define( 'HTTP_PROTOCOLS', 'http|https' );
  55 # Everything except bracket, space, or control characters
  56 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
  57 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
  58 # Including space
  59 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
  60 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
  61 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
  62 define( 'EXT_LINK_BRACKETED',  '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
  63 define( 'EXT_IMAGE_REGEX',
  64         '/^('.HTTP_PROTOCOLS.':)'.  # Protocol
  65         '('.EXT_LINK_URL_CLASS.'+)\\/'.  # Hostname and path
  66         '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
  67 );
  68
  69 class Parser
  70 {
  71         # Persistent:
  72         var $mTagHooks;
  73
  74         # Cleared with clearState():
  75         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  76         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  77
  78         # Temporary:
  79         var $mOptions, $mTitle, $mOutputType,
  80             $mTemplates,        // cache of already loaded templates, avoids
  81                                 // multiple SQL queries for the same string
  82             $mTemplatePath;     // stores an unsorted hash of all the templates already loaded
  83                                 // in this path. Used for loop detection.
  84
  85         function Parser() {
  86                 $this->mTemplates = array();
  87                 $this->mTemplatePath = array();
  88                 $this->mTagHooks = array();
  89                 $this->clearState();
  90         }
  91
  92         function clearState() {
  93                 $this->mOutput = new ParserOutput;
  94                 $this->mAutonumber = 0;
  95                 $this->mLastSection = "";
  96                 $this->mDTopen = false;
  97                 $this->mVariables = false;
  98                 $this->mIncludeCount = array();
  99                 $this->mStripState = array();
 100                 $this->mArgStack = array();
 101                 $this->mInPre = false;
 102         }
 103
 104         # First pass--just handle <nowiki> sections, pass the rest off
 105         # to internalParse() which does all the real work.
 106         #
 107         # Returns a ParserOutput
 108         #
 109         function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
 110                 global $wgUseTidy;
 111                 $fname = "Parser::parse";
 112                 wfProfileIn( $fname );
 113
 114                 if ( $clearState ) {
 115                         $this->clearState();
 116                 }
 117
 118                 $this->mOptions = $options;
 119                 $this->mTitle =& $title;
 120                 $this->mOutputType = OT_HTML;
 121
 122                 $stripState = NULL;
 123                 $text = $this->strip( $text, $this->mStripState );
 124                 $text = $this->internalParse( $text, $linestart );
 125                 $text = $this->unstrip( $text, $this->mStripState );
 126                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 127                 if(!$wgUseTidy) {
 128                         $fixtags = array(
 129                                 # french spaces, last one Guillemet-left
 130                                 # only if there is something before the space
 131                                 '/(.) (\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
 132                                 # french spaces, Guillemet-right
 133                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 134                                 '/<hr *>/i' => '<hr />',
 135                                 '/<br *>/i' => '<br />',
 136                                 '/<center *>/i' => '<div class="center">',
 137                                 '/<\\/center *>/i' => '</div>',
 138                                 # Clean up spare ampersands; note that we probably ought to be
 139                                 # more careful about named entities.
 140                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 141                         );
 142                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 143                 } else {
 144                         $fixtags = array(
 145                                 # french spaces, last one Guillemet-left
 146                                 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
 147                                 # french spaces, Guillemet-right
 148                                 '/(\\302\\253) /i' => '\\1&nbsp;',
 149                                 '/([^> ]+(&#x30(1|3|9);)[^< ]*)/i' => '<span class="diacrit">\\1</span>',
 150                                 '/<center *>/i' => '<div class="center">',
 151                                 '/<\\/center *>/i' => '</div>'
 152                         );
 153                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 154                 }
 155                 # only once and last
 156                 $text = $this->doBlockLevels( $text, $linestart );
 157                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 158                 if($wgUseTidy) {
 159                         $text = $this->tidy($text);
 160                 }
 161                 $this->mOutput->setText( $text );
 162                 wfProfileOut( $fname );
 163                 return $this->mOutput;
 164         }
 165
 166         /* static */ function getRandomString() {
 167                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 168         }
 169
 170         # Replaces all occurrences of <$tag>content</$tag> in the text
 171         # with a random marker and returns the new text. the output parameter
 172         # $content will be an associative array filled with data on the form
 173         # $unique_marker => content.
 174
 175         # If $content is already set, the additional entries will be appended
 176
 177         # If $tag is set to STRIP_COMMENTS, the function will extract
 178         # <!-- HTML comments -->
 179
 180         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 181                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 182                 if ( !$content ) {
 183                         $content = array( );
 184                 }
 185                 $n = 1;
 186                 $stripped = '';
 187
 188                 while ( '' != $text ) {
 189                         if($tag==STRIP_COMMENTS) {
 190                                 $p = preg_split( '/<!--/i', $text, 2 );
 191                         } else {
 192                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 193                         }
 194                         $stripped .= $p[0];
 195                         if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
 196                                 $text = '';
 197                         } else {
 198                                 if($tag==STRIP_COMMENTS) {
 199                                         $q = preg_split( '/-->/i', $p[1], 2 );
 200                                 } else {
 201                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 202                                 }
 203                                 $marker = $rnd . sprintf('%08X', $n++);
 204                                 $content[$marker] = $q[0];
 205                                 $stripped .= $marker;
 206                                 $text = $q[1];
 207                         }
 208                 }
 209                 return $stripped;
 210         }
 211
 212         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 213         # If $render is set, performs necessary rendering operations on plugins
 214         # Returns the text, and fills an array with data needed in unstrip()
 215         # If the $state is already a valid strip state, it adds to the state
 216
 217         # When $stripcomments is set, HTML comments <!-- like this -->
 218         # will be stripped in addition to other tags. This is important
 219         # for section editing, where these comments cause confusion when
 220         # counting the sections in the wikisource
 221         function strip( $text, &$state, $stripcomments = false ) {
 222                 $render = ($this->mOutputType == OT_HTML);
 223                 $html_content = array();
 224                 $nowiki_content = array();
 225                 $math_content = array();
 226                 $pre_content = array();
 227                 $comment_content = array();
 228                 $ext_content = array();
 229
 230                 # Replace any instances of the placeholders
 231                 $uniq_prefix = UNIQ_PREFIX;
 232                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 233
 234                 # html
 235                 global $wgRawHtml;
 236                 if( $wgRawHtml ) {
 237                         $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
 238                         foreach( $html_content as $marker => $content ) {
 239                                 if ($render ) {
 240                                         # Raw and unchecked for validity.
 241                                         $html_content[$marker] = $content;
 242                                 } else {
 243                                         $html_content[$marker] = "<html>$content</html>";
 244                                 }
 245                         }
 246                 }
 247
 248                 # nowiki
 249                 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
 250                 foreach( $nowiki_content as $marker => $content ) {
 251                         if( $render ){
 252                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 253                         } else {
 254                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 255                         }
 256                 }
 257
 258                 # math
 259                 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
 260                 foreach( $math_content as $marker => $content ){
 261                         if( $render ) {
 262                                 if( $this->mOptions->getUseTeX() ) {
 263                                         $math_content[$marker] = renderMath( $content );
 264                                 } else {
 265                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 266                                 }
 267                         } else {
 268                                 $math_content[$marker] = "<math>$content</math>";
 269                         }
 270                 }
 271
 272                 # pre
 273                 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
 274                 foreach( $pre_content as $marker => $content ){
 275                         if( $render ){
 276                                 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
 277                         } else {
 278                                 $pre_content[$marker] = "<pre>$content</pre>";
 279                         }
 280                 }
 281
 282                 # Comments
 283                 if($stripcomments) {
 284                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 285                         foreach( $comment_content as $marker => $content ){
 286                                 $comment_content[$marker] = "<!--$content-->";
 287                         }
 288                 }
 289
 290                 # Extensions
 291                 foreach ( $this->mTagHooks as $tag => $callback ) {
 292                         $ext_contents[$tag] = array();
 293                         $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
 294                         foreach( $ext_content[$tag] as $marker => $content ) {
 295                                 if ( $render ) {
 296                                         $ext_content[$tag][$marker] = $callback( $content );
 297                                 } else {
 298                                         $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
 299                                 }
 300                         }
 301                 }
 302
 303                 # Merge state with the pre-existing state, if there is one
 304                 if ( $state ) {
 305                         $state['html'] = $state['html'] + $html_content;
 306                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 307                         $state['math'] = $state['math'] + $math_content;
 308                         $state['pre'] = $state['pre'] + $pre_content;
 309                         $state['comment'] = $state['comment'] + $comment_content;
 310
 311                         foreach( $ext_content as $tag => $array ) {
 312                                 if ( array_key_exists( $tag, $state ) ) {
 313                                         $state[$tag] = $state[$tag] + $array;
 314                                 }
 315                         }
 316                 } else {
 317                         $state = array(
 318                           'html' => $html_content,
 319                           'nowiki' => $nowiki_content,
 320                           'math' => $math_content,
 321                           'pre' => $pre_content,
 322                           'comment' => $comment_content,
 323                         ) + $ext_content;
 324                 }
 325                 return $text;
 326         }
 327
 328         # always call unstripNoWiki() after this one
 329         function unstrip( $text, &$state ) {
 330                 # Must expand in reverse order, otherwise nested tags will be corrupted
 331                 $contentDict = end( $state );
 332                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 333                         if( key($state) != 'nowiki' && key($state) != 'html') {
 334                                 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 335                                         $text = str_replace( key( $contentDict ), $content, $text );
 336                                 }
 337                         }
 338                 }
 339
 340                 return $text;
 341         }
 342         # always call this after unstrip() to preserve the order
 343         function unstripNoWiki( $text, &$state ) {
 344                 # Must expand in reverse order, otherwise nested tags will be corrupted
 345                 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
 346                         $text = str_replace( key( $state['nowiki'] ), $content, $text );
 347                 }
 348
 349                 global $wgRawHtml;
 350                 if ($wgRawHtml) {
 351                         for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
 352                                 $text = str_replace( key( $state['html'] ), $content, $text );
 353                         }
 354                 }
 355
 356                 return $text;
 357         }
 358
 359         # Add an item to the strip state
 360         # Returns the unique tag which must be inserted into the stripped text
 361         # The tag will be replaced with the original text in unstrip()
 362
 363         function insertStripItem( $text, &$state ) {
 364                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 365                 if ( !$state ) {
 366                         $state = array(
 367                           'html' => array(),
 368                           'nowiki' => array(),
 369                           'math' => array(),
 370                           'pre' => array()
 371                         );
 372                 }
 373                 $state['item'][$rnd] = $text;
 374                 return $rnd;
 375         }
 376
 377         # categoryMagic
 378         # generate a list of subcategories and pages for a category
 379         # depending on wfMsg("usenewcategorypage") it either calls the new
 380         # or the old code. The new code will not work properly for some
 381         # languages due to sorting issues, so they might want to turn it
 382         # off.
 383         function categoryMagic() {
 384                 $msg = wfMsg('usenewcategorypage');
 385                 if ( '0' == @$msg[0] )
 386                 {
 387                         return $this->oldCategoryMagic();
 388                 } else {
 389                         return $this->newCategoryMagic();
 390                 }
 391         }
 392
 393         # This method generates the list of subcategories and pages for a category
 394         function oldCategoryMagic () {
 395                 global $wgLang ;
 396                 $fname = 'Parser::oldCategoryMagic';
 397
 398                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 399
 400                 if ( $this->mTitle->getNamespace() != NS_CATEGORY ) return "" ; # This ain't a category page
 401
 402                 $r = "<br style=\"clear:both;\"/>\n";
 403
 404
 405                 $sk =& $this->mOptions->getSkin() ;
 406
 407                 $articles = array() ;
 408                 $children = array() ;
 409                 $data = array () ;
 410                 $id = $this->mTitle->getArticleID() ;
 411
 412                 # FIXME: add limits
 413                 $dbr =& wfGetDB( DB_SLAVE );
 414                 $cur = $dbr->tableName( 'cur' );
 415                 $categorylinks = $dbr->tableName( 'categorylinks' );
 416
 417                 $t = $dbr->strencode( $this->mTitle->getDBKey() );
 418                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM $cur,$categorylinks " .
 419                         "WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 420                 $res = $dbr->query( $sql, $fname ) ;
 421                 while ( $x = $dbr->fetchObject ( $res ) ) $data[] = $x ;
 422
 423                 # For all pages that link to this category
 424                 foreach ( $data AS $x )
 425                 {
 426                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 427                         if ( $t != "" ) $t .= ":" ;
 428                         $t .= $x->cur_title ;
 429
 430                         if ( $x->cur_namespace == NS_CATEGORY ) {
 431                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 432                         } else {
 433                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 434                         }
 435                 }
 436                 $dbr->freeResult ( $res ) ;
 437
 438                 # Showing subcategories
 439                 if ( count ( $children ) > 0 ) {
 440                         $r .= '<h2>'.wfMsg('subcategories')."</h2>\n" ;
 441                         $r .= implode ( ', ' , $children ) ;
 442                 }
 443
 444                 # Showing pages in this category
 445                 if ( count ( $articles ) > 0 ) {
 446                         $ti = $this->mTitle->getText() ;
 447                         $h =  wfMsg( 'category_header', $ti );
 448                         $r .= "<h2>{$h}</h2>\n" ;
 449                         $r .= implode ( ', ' , $articles ) ;
 450                 }
 451
 452                 return $r ;
 453         }
 454
 455
 456
 457         function newCategoryMagic () {
 458                 global $wgLang;
 459                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 460
 461                 if ( $this->mTitle->getNamespace() != NS_CATEGORY ) return '' ; # This ain't a category page
 462
 463                 $r = "<br style=\"clear:both;\"/>\n";
 464
 465
 466                 $sk =& $this->mOptions->getSkin() ;
 467
 468                 $articles = array() ;
 469                 $articles_start_char = array();
 470                 $children = array() ;
 471                 $children_start_char = array();
 472                 $data = array () ;
 473                 $id = $this->mTitle->getArticleID() ;
 474
 475                 # FIXME: add limits
 476                 $dbr =& wfGetDB( DB_SLAVE );
 477                 $cur = $dbr->tableName( 'cur' );
 478                 $categorylinks = $dbr->tableName( 'categorylinks' );
 479
 480                 $t = $dbr->strencode( $this->mTitle->getDBKey() );
 481                 $sql = "SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM " .
 482                         "$cur,$categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 483                 $res = $dbr->query ( $sql ) ;
 484                 while ( $x = $dbr->fetchObject ( $res ) )
 485                 {
 486                         $t = $ns = $wgLang->getNsText ( $x->cur_namespace ) ;
 487                         if ( $t != '' ) $t .= ':' ;
 488                         $t .= $x->cur_title ;
 489
 490                         if ( $x->cur_namespace == NS_CATEGORY ) {
 491                                 $ctitle = str_replace( '_',' ',$x->cur_title );
 492                                 array_push ( $children, $sk->makeKnownLink ( $t, $ctitle ) ) ; # Subcategory
 493
 494                                 // If there's a link from Category:A to Category:B, the sortkey of the resulting
 495                                 // entry in the categorylinks table is Category:A, not A, which it SHOULD be.
 496                                 // Workaround: If sortkey == "Category:".$title, than use $title for sorting,
 497                                 // else use sortkey...
 498                                 if ( ($ns.":".$ctitle) ==  $x->cl_sortkey ) {
 499                                         array_push ( $children_start_char, $wgLang->firstChar( $x->cur_title ) );
 500                                 } else {
 501                                         array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
 502                                 }
 503                         } else {
 504                                 array_push ( $articles , $sk->makeKnownLink ( $t ) ) ; # Page in this category
 505                                 array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
 506                         }
 507                 }
 508                 $dbr->freeResult ( $res ) ;
 509
 510                 $ti = $this->mTitle->getText() ;
 511
 512                 # Don't show subcategories section if there are none.
 513                 if ( count ( $children ) > 0 )
 514                 {
 515                         # Showing subcategories
 516                         $r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n";
 517
 518                         $numchild = count( $children );
 519                         if($numchild == 1) {
 520                                 $r .= wfMsg( 'subcategorycount1', 1 );
 521                         } else {
 522                                 $r .= wfMsg( 'subcategorycount' , $numchild );
 523                         }
 524                         unset($numchild);
 525
 526                         if ( count ( $children ) > 6 ) {
 527
 528                                 // divide list into three equal chunks
 529                                 $chunk = (int) (count ( $children ) / 3);
 530
 531                                 // get and display header
 532                                 $r .= '<table width="100%"><tr valign="top">';
 533
 534                                 $startChunk = 0;
 535                                 $endChunk = $chunk;
 536
 537                                 // loop through the chunks
 538                                 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
 539                                         $chunkIndex < 3;
 540                                         $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
 541                                 {
 542
 543                                         $r .= '<td><ul>';
 544                                         // output all subcategories to category
 545                                         for ($index = $startChunk ;
 546                                                 $index < $endChunk && $index < count($children);
 547                                                 $index++ )
 548                                         {
 549                                                 // check for change of starting letter or begging of chunk
 550                                                 if ( ($children_start_char[$index] != $children_start_char[$index - 1])
 551                                                         || ($index == $startChunk) )
 552                                                 {
 553                                                         $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
 554                                                 }
 555
 556                                                 $r .= "<li>{$children[$index]}</li>";
 557                                         }
 558                                         $r .= '</ul></td>';
 559
 560
 561                                 }
 562                                 $r .= '</tr></table>';
 563                         } else {
 564                                 // for short lists of subcategories to category.
 565
 566                                 $r .= "<h3>{$children_start_char[0]}</h3>\n";
 567                                 $r .= '<ul><li>'.$children[0].'</li>';
 568                                 for ($index = 1; $index < count($children); $index++ )
 569                                 {
 570                                         if ($children_start_char[$index] != $children_start_char[$index - 1])
 571                                         {
 572                                                 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
 573                                         }
 574
 575                                         $r .= "<li>{$children[$index]}</li>";
 576                                 }
 577                                 $r .= '</ul>';
 578                         }
 579                 } # END of if ( count($children) > 0 )
 580
 581                 $r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n";
 582
 583                 $numart = count( $articles );
 584                 if($numart == 1) {
 585                         $r .= wfMsg( 'categoryarticlecount1', 1 );
 586                 } else {
 587                         $r .= wfMsg( 'categoryarticlecount' , $numart );
 588                 }
 589                 unset($numart);
 590
 591                 # Showing articles in this category
 592                 if ( count ( $articles ) > 6) {
 593                         $ti = $this->mTitle->getText() ;
 594
 595                         // divide list into three equal chunks
 596                         $chunk = (int) (count ( $articles ) / 3);
 597
 598                         // get and display header
 599                         $r .= '<table width="100%"><tr valign="top">';
 600
 601                         // loop through the chunks
 602                         for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
 603                                 $chunkIndex < 3;
 604                                 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
 605                         {
 606
 607                                 $r .= '<td><ul>';
 608
 609                                 // output all articles in category
 610                                 for ($index = $startChunk ;
 611                                         $index < $endChunk && $index < count($articles);
 612                                         $index++ )
 613                                 {
 614                                         // check for change of starting letter or begging of chunk
 615                                         if ( ($articles_start_char[$index] != $articles_start_char[$index - 1])
 616                                                 || ($index == $startChunk) )
 617                                         {
 618                                                 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
 619                                         }
 620
 621                                         $r .= "<li>{$articles[$index]}</li>";
 622                                 }
 623                                 $r .= '</ul></td>';
 624
 625
 626                         }
 627                         $r .= '</tr></table>';
 628                 } elseif ( count ( $articles )  > 0) {
 629                         // for short lists of articles in categories.
 630                         $ti = $this->mTitle->getText() ;
 631
 632                         $r .= '<h3>'.$articles_start_char[0]."</h3>\n";
 633                         $r .= '<ul><li>'.$articles[0].'</li>';
 634                         for ($index = 1; $index < count($articles); $index++ )
 635                         {
 636                                 if ($articles_start_char[$index] != $articles_start_char[$index - 1])
 637                                 {
 638                                         $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
 639                                 }
 640
 641                                 $r .= "<li>{$articles[$index]}</li>";
 642                         }
 643                         $r .= '</ul>';
 644                 }
 645
 646
 647                 return $r ;
 648         }
 649
 650         # Return allowed HTML attributes
 651         function getHTMLattrs () {
 652                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 653                                 'title', 'align', 'lang', 'dir', 'width', 'height',
 654                                 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
 655                                 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
 656                                 /* FONT */ 'type', 'start', 'value', 'compact',
 657                                 /* For various lists, mostly deprecated but safe */
 658                                 'summary', 'width', 'border', 'frame', 'rules',
 659                                 'cellspacing', 'cellpadding', 'valign', 'char',
 660                                 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
 661                                 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
 662                                 'id', 'class', 'name', 'style' /* For CSS */
 663                                 );
 664                 return $htmlattrs ;
 665         }
 666
 667         # Remove non approved attributes and javascript in css
 668         function fixTagAttributes ( $t ) {
 669                 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
 670                 $htmlattrs = $this->getHTMLattrs() ;
 671
 672                 # Strip non-approved attributes from the tag
 673                 $t = preg_replace(
 674                         '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
 675                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 676                         $t);
 677
 678                 $t = str_replace ( "<></>" , "" , $t ) ; # This should fix bug 980557
 679
 680                 # Strip javascript "expression" from stylesheets. Brute force approach:
 681                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 682
 683                 if( preg_match(
 684                         '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
 685                         wfMungeToUtf8( $t ) ) )
 686                 {
 687                         $t='';
 688                 }
 689
 690                 return trim ( $t ) ;
 691         }
 692
 693         # interface with html tidy, used if $wgUseTidy = true
 694         function tidy ( $text ) {
 695                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 696                 global $wgInputEncoding, $wgOutputEncoding;
 697                 $fname = 'Parser::tidy';
 698                 wfProfileIn( $fname );
 699
 700                 $cleansource = '';
 701                 switch(strtoupper($wgOutputEncoding)) {
 702                         case 'ISO-8859-1':
 703                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 704                                 break;
 705                         case 'UTF-8':
 706                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 707                                 break;
 708                         default:
 709                                 $wgTidyOpts .= ' -raw';
 710                         }
 711
 712                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 713 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 714 '<head><title>test</title></head><body>'.$text.'</body></html>';
 715                 $descriptorspec = array(
 716                         0 => array('pipe', 'r'),
 717                         1 => array('pipe', 'w'),
 718                         2 => array('file', '/dev/null', 'a')
 719                 );
 720                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 721                 if (is_resource($process)) {
 722                         fwrite($pipes[0], $wrappedtext);
 723                         fclose($pipes[0]);
 724                         while (!feof($pipes[1])) {
 725                                 $cleansource .= fgets($pipes[1], 1024);
 726                         }
 727                         fclose($pipes[1]);
 728                         $return_value = proc_close($process);
 729                 }
 730
 731                 wfProfileOut( $fname );
 732
 733                 if( $cleansource == '' && $text != '') {
 734                         wfDebug( "Tidy error detected!\n" );
 735                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 736                 } else {
 737                         return $cleansource;
 738                 }
 739         }
 740
 741         # parse the wiki syntax used to render tables
 742         function doTableStuff ( $t ) {
 743                 $fname = 'Parser::doTableStuff';
 744                 wfProfileIn( $fname );
 745
 746                 $t = explode ( "\n" , $t ) ;
 747                 $td = array () ; # Is currently a td tag open?
 748                 $ltd = array () ; # Was it TD or TH?
 749                 $tr = array () ; # Is currently a tr tag open?
 750                 $ltr = array () ; # tr attributes
 751                 $indent_level = 0; # indent level of the table
 752                 foreach ( $t AS $k => $x )
 753                 {
 754                         $x = trim ( $x ) ;
 755                         $fc = substr ( $x , 0 , 1 ) ;
 756                         if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) )
 757                         {
 758                                 $indent_level = strlen( $matches[1] );
 759                                 $t[$k] = "\n" .
 760                                         str_repeat( "<dl><dd>", $indent_level ) .
 761                                         "<table " . $this->fixTagAttributes ( $matches[2] ) . '>' ;
 762                                 array_push ( $td , false ) ;
 763                                 array_push ( $ltd , '' ) ;
 764                                 array_push ( $tr , false ) ;
 765                                 array_push ( $ltr , '' ) ;
 766                         }
 767                         else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 768                         else if ( '|}' == substr ( $x , 0 , 2 ) )
 769                         {
 770                                 $z = "</table>\n" ;
 771                                 $l = array_pop ( $ltd ) ;
 772                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 773                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 774                                 array_pop ( $ltr ) ;
 775                                 $t[$k] = $z . str_repeat( "</dd></dl>", $indent_level );
 776                         }
 777                         else if ( '|-' == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 778                         {
 779                                 $x = substr ( $x , 1 ) ;
 780                                 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 781                                 $z = '' ;
 782                                 $l = array_pop ( $ltd ) ;
 783                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 784                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 785                                 array_pop ( $ltr ) ;
 786                                 $t[$k] = $z ;
 787                                 array_push ( $tr , false ) ;
 788                                 array_push ( $td , false ) ;
 789                                 array_push ( $ltd , '' ) ;
 790                                 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 791                         }
 792                         else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) # Caption
 793                         {
 794                                 if ( '|+' == substr ( $x , 0 , 2 ) )
 795                                 {
 796                                         $fc = '+' ;
 797                                         $x = substr ( $x , 1 ) ;
 798                                 }
 799                                 $after = substr ( $x , 1 ) ;
 800                                 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
 801                                 $after = explode ( '||' , $after ) ;
 802                                 $t[$k] = '' ;
 803                                 foreach ( $after AS $theline )
 804                                 {
 805                                         $z = '' ;
 806                                         if ( $fc != '+' )
 807                                         {
 808                                                 $tra = array_pop ( $ltr ) ;
 809                                                 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 810                                                 array_push ( $tr , true ) ;
 811                                                 array_push ( $ltr , '' ) ;
 812                                         }
 813
 814                                         $l = array_pop ( $ltd ) ;
 815                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 816                                         if ( $fc == '|' ) $l = 'td' ;
 817                                         else if ( $fc == '!' ) $l = 'th' ;
 818                                         else if ( $fc == '+' ) $l = 'caption' ;
 819                                         else $l = '' ;
 820                                         array_push ( $ltd , $l ) ;
 821                                         $y = explode ( '|' , $theline , 2 ) ;
 822                                         if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 823                                         else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 824                                         $t[$k] .= $y ;
 825                                         array_push ( $td , true ) ;
 826                                 }
 827                         }
 828                 }
 829
 830                 # Closing open td, tr && table
 831                 while ( count ( $td ) > 0 )
 832                 {
 833                         if ( array_pop ( $td ) ) $t[] = '</td>' ;
 834                         if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
 835                         $t[] = '</table>' ;
 836                 }
 837
 838                 $t = implode ( "\n" , $t ) ;
 839                 #               $t = $this->removeHTMLtags( $t );
 840                 wfProfileOut( $fname );
 841                 return $t ;
 842         }
 843
 844         # Parses the text and adds the result to the strip state
 845         # Returns the strip tag
 846         function stripParse( $text, $newline, $args )
 847         {
 848                 $text = $this->strip( $text, $this->mStripState );
 849                 $text = $this->internalParse( $text, (bool)$newline, $args, false );
 850                 return $newline.$this->insertStripItem( $text, $this->mStripState );
 851         }
 852
 853         function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
 854                 $fname = 'Parser::internalParse';
 855                 wfProfileIn( $fname );
 856
 857                 $text = $this->removeHTMLtags( $text );
 858                 $text = $this->replaceVariables( $text, $args );
 859
 860                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
 861
 862                 $text = $this->doHeadings( $text );
 863                 if($this->mOptions->getUseDynamicDates()) {
 864                         global $wgDateFormatter;
 865                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 866                 }
 867                 $text = $this->doAllQuotes( $text );
 868                 $text = $this->replaceExternalLinks( $text );
 869                 $text = $this->doMagicLinks( $text );
 870                 $text = $this->replaceInternalLinks ( $text );
 871                 $text = $this->replaceInternalLinks ( $text );
 872
 873                 $text = $this->unstrip( $text, $this->mStripState );
 874                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 875
 876                 $text = $this->doTableStuff( $text );
 877                 $text = $this->formatHeadings( $text, $isMain );
 878                 $sk =& $this->mOptions->getSkin();
 879                 $text = $sk->transformContent( $text );
 880
 881                 if ( $isMain && !isset ( $this->categoryMagicDone ) ) {
 882                         $text .= $this->categoryMagic () ;
 883                         $this->categoryMagicDone = true ;
 884                 }
 885
 886                 wfProfileOut( $fname );
 887                 return $text;
 888         }
 889
 890         /* private */ function &doMagicLinks( &$text ) {
 891                 $text = $this->magicISBN( $text );
 892                 $text = $this->magicGEO( $text );
 893                 $text = $this->magicRFC( $text );
 894                 return $text;
 895         }
 896
 897         # Parse ^^ tokens and return html
 898         /* private */ function doExponent ( $text )
 899         {
 900                 $fname = 'Parser::doExponent';
 901                 wfProfileIn( $fname);
 902                 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
 903                 wfProfileOut( $fname);
 904                 return $text;
 905         }
 906
 907     # Parse headers and return html
 908         /* private */ function doHeadings( $text ) {
 909                 $fname = 'Parser::doHeadings';
 910                 wfProfileIn( $fname );
 911                 for ( $i = 6; $i >= 1; --$i ) {
 912                         $h = substr( '======', 0, $i );
 913                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 914                           "<h{$i}>\\1</h{$i}>\\2", $text );
 915                 }
 916                 wfProfileOut( $fname );
 917                 return $text;
 918         }
 919
 920         /* private */ function doAllQuotes( $text ) {
 921                 $fname = 'Parser::doAllQuotes';
 922                 wfProfileIn( $fname );
 923                 $outtext = '';
 924                 $lines = explode( "\n", $text );
 925                 foreach ( $lines as $line ) {
 926                         $outtext .= $this->doQuotes ( $line ) . "\n";
 927                 }
 928                 $outtext = substr($outtext, 0,-1);
 929                 wfProfileOut( $fname );
 930                 return $outtext;
 931         }
 932
 933         /* private */ function doQuotes( $text ) {
 934                 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 935                 if (count ($arr) == 1)
 936                         return $text;
 937                 else
 938                 {
 939                         # First, do some preliminary work. This may shift some apostrophes from
 940                         # being mark-up to being text. It also counts the number of occurrences
 941                         # of bold and italics mark-ups.
 942                         $i = 0;
 943                         $numbold = 0;
 944                         $numitalics = 0;
 945                         foreach ($arr as $r)
 946                         {
 947                                 if (($i % 2) == 1)
 948                                 {
 949                                         # If there are ever four apostrophes, assume the first is supposed to
 950                                         # be text, and the remaining three constitute mark-up for bold text.
 951                                         if (strlen ($arr[$i]) == 4)
 952                                         {
 953                                                 $arr[$i-1] .= "'";
 954                                                 $arr[$i] = "'''";
 955                                         }
 956                                         # If there are more than 5 apostrophes in a row, assume they're all
 957                                         # text except for the last 5.
 958                                         else if (strlen ($arr[$i]) > 5)
 959                                         {
 960                                                 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
 961                                                 $arr[$i] = "'''''";
 962                                         }
 963                                         # Count the number of occurrences of bold and italics mark-ups.
 964                                         # We are not counting sequences of five apostrophes.
 965                                         if (strlen ($arr[$i]) == 2) $numitalics++;  else
 966                                         if (strlen ($arr[$i]) == 3) $numbold++;     else
 967                                         if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
 968                                 }
 969                                 $i++;
 970                         }
 971
 972                         # If there is an odd number of both bold and italics, it is likely
 973                         # that one of the bold ones was meant to be an apostrophe followed
 974                         # by italics. Which one we cannot know for certain, but it is more
 975                         # likely to be one that has a single-letter word before it.
 976                         if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
 977                         {
 978                                 $i = 0;
 979                                 $firstsingleletterword = -1;
 980                                 $firstmultiletterword = -1;
 981                                 $firstspace = -1;
 982                                 foreach ($arr as $r)
 983                                 {
 984                                         if (($i % 2 == 1) and (strlen ($r) == 3))
 985                                         {
 986                                                 $x1 = substr ($arr[$i-1], -1);
 987                                                 $x2 = substr ($arr[$i-1], -2, 1);
 988                                                 if ($x1 == " ") {
 989                                                         if ($firstspace == -1) $firstspace = $i;
 990                                                 } else if ($x2 == " ") {
 991                                                         if ($firstsingleletterword == -1) $firstsingleletterword = $i;
 992                                                 } else {
 993                                                         if ($firstmultiletterword == -1) $firstmultiletterword = $i;
 994                                                 }
 995                                         }
 996                                         $i++;
 997                                 }
 998
 999                                 # If there is a single-letter word, use it!
1000                                 if ($firstsingleletterword > -1)
1001                                 {
1002                                         $arr [ $firstsingleletterword ] = "''";
1003                                         $arr [ $firstsingleletterword-1 ] .= "'";
1004                                 }
1005                                 # If not, but there's a multi-letter word, use that one.
1006                                 else if ($firstmultiletterword > -1)
1007                                 {
1008                                         $arr [ $firstmultiletterword ] = "''";
1009                                         $arr [ $firstmultiletterword-1 ] .= "'";
1010                                 }
1011                                 # ... otherwise use the first one that has neither.
1012                                 # (notice that it is possible for all three to be -1 if, for example,
1013                                 # there is only one pentuple-apostrophe in the line)
1014                                 else if ($firstspace > -1)
1015                                 {
1016                                         $arr [ $firstspace ] = "''";
1017                                         $arr [ $firstspace-1 ] .= "'";
1018                                 }
1019                         }
1020
1021                         # Now let's actually convert our apostrophic mush to HTML!
1022                         $output = '';
1023                         $buffer = '';
1024                         $state = '';
1025                         $i = 0;
1026                         foreach ($arr as $r)
1027                         {
1028                                 if (($i % 2) == 0)
1029                                 {
1030                                         if ($state == 'both')
1031                                                 $buffer .= $r;
1032                                         else
1033                                                 $output .= $r;
1034                                 }
1035                                 else
1036                                 {
1037                                         if (strlen ($r) == 2)
1038                                         {
1039                                                 if ($state == 'em')
1040                                                 { $output .= "</em>"; $state = ''; }
1041                                                 else if ($state == 'strongem')
1042                                                 { $output .= "</em>"; $state = 'strong'; }
1043                                                 else if ($state == 'emstrong')
1044                                                 { $output .= "</strong></em><strong>"; $state = 'strong'; }
1045                                                 else if ($state == 'both')
1046                                                 { $output .= "<strong><em>{$buffer}</em>"; $state = 'strong'; }
1047                                                 else # $state can be 'strong' or ''
1048                                                 { $output .= "<em>"; $state .= 'em'; }
1049                                         }
1050                                         else if (strlen ($r) == 3)
1051                                         {
1052                                                 if ($state == 'strong')
1053                                                 { $output .= "</strong>"; $state = ''; }
1054                                                 else if ($state == 'strongem')
1055                                                 { $output .= "</em></strong><em>"; $state = 'em'; }
1056                                                 else if ($state == 'emstrong')
1057                                                 { $output .= "</strong>"; $state = 'em'; }
1058                                                 else if ($state == 'both')
1059                                                 { $output .= "<em><strong>{$buffer}</strong>"; $state = 'em'; }
1060                                                 else # $state can be 'em' or ''
1061                                                 { $output .= "<strong>"; $state .= 'strong'; }
1062                                         }
1063                                         else if (strlen ($r) == 5)
1064                                         {
1065                                                 if ($state == 'strong')
1066                                                 { $output .= "</strong><em>"; $state = 'em'; }
1067                                                 else if ($state == 'em')
1068                                                 { $output .= "</em><strong>"; $state = 'strong'; }
1069                                                 else if ($state == 'strongem')
1070                                                 { $output .= "</em></strong>"; $state = ''; }
1071                                                 else if ($state == 'emstrong')
1072                                                 { $output .= "</strong></em>"; $state = ''; }
1073                                                 else if ($state == 'both')
1074                                                 { $output .= "<em><strong>{$buffer}</strong></em>"; $state = ''; }
1075                                                 else # ($state == '')
1076                                                 { $buffer = ''; $state = 'both'; }
1077                                         }
1078                                 }
1079                                 $i++;
1080                         }
1081                         # Now close all remaining tags.  Notice that the order is important.
1082                         if ($state == 'strong' || $state == 'emstrong')
1083                                 $output .= "</strong>";
1084                         if ($state == 'em' || $state == 'strongem' || $state == 'emstrong')
1085                                 $output .= "</em>";
1086                         if ($state == 'strongem')
1087                                 $output .= "</strong>";
1088                         if ($state == 'both')
1089                                 $output .= "<strong><em>{$buffer}</em></strong>";
1090                         return $output;
1091                 }
1092         }
1093
1094         # Note: we have to do external links before the internal ones,
1095         # and otherwise take great care in the order of things here, so
1096         # that we don't end up interpreting some URLs twice.
1097
1098         /* private */ function replaceExternalLinks( $text ) {
1099                 $fname = 'Parser::replaceExternalLinks';
1100                 wfProfileIn( $fname );
1101
1102                 $sk =& $this->mOptions->getSkin();
1103                 $linktrail = wfMsg('linktrail');
1104                 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1105
1106                 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
1107
1108                 $i = 0;
1109                 while ( $i<count( $bits ) ) {
1110                         $url = $bits[$i++];
1111                         $protocol = $bits[$i++];
1112                         $text = $bits[$i++];
1113                         $trail = $bits[$i++];
1114
1115                         # If the link text is an image URL, replace it with an <img> tag
1116                         # This happened by accident in the original parser, but some people used it extensively
1117                         $img = $this->maybeMakeImageLink( $text );
1118                         if ( $img !== false ) {
1119                                 $text = $img;
1120                         }
1121
1122                         $dtrail = '';
1123
1124                         # No link text, e.g. [http://domain.tld/some.link]
1125                         if ( $text == '' ) {
1126                                 # Autonumber if allowed
1127                                 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
1128                                         $text = "[" . ++$this->mAutonumber . "]";
1129                                 } else {
1130                                         # Otherwise just use the URL
1131                                         $text = htmlspecialchars( $url );
1132                                 }
1133                         } else {
1134                                 # Have link text, e.g. [http://domain.tld/some.link text]s
1135                                 # Check for trail
1136                                 if ( preg_match( $linktrail, $trail, $m2 ) ) {
1137                                         $dtrail = $m2[1];
1138                                         $trail = $m2[2];
1139                                 }
1140                         }
1141
1142                         $encUrl = htmlspecialchars( $url );
1143                         # Bit in parentheses showing the URL for the printable version
1144                         if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $url ) ) {
1145                                 $paren = '';
1146                         } else {
1147                                 # Expand the URL for printable version
1148                                 if ( ! $sk->suppressUrlExpansion() ) {
1149                                     $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
1150                             } else {
1151                                         $paren = '';
1152                             }
1153                         }
1154
1155                         # Process the trail (i.e. everything after this link up until start of the next link),
1156                         # replacing any non-bracketed links
1157                         $trail = $this->replaceFreeExternalLinks( $trail );
1158
1159                         $la = $sk->getExternalLinkAttributes( $url, $text );
1160
1161                         # Use the encoded URL
1162                         # This means that users can paste URLs directly into the text
1163                         # Funny characters like &ouml; aren't valid in URLs anyway
1164                         # This was changed in August 2004
1165                         $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
1166                 }
1167
1168                 wfProfileOut( $fname );
1169                 return $s;
1170         }
1171
1172         # Replace anything that looks like a URL with a link
1173         function replaceFreeExternalLinks( $text ) {
1174                 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1175                 $s = array_shift( $bits );
1176                 $i = 0;
1177
1178                 $sk =& $this->mOptions->getSkin();
1179
1180                 while ( $i < count( $bits ) ){
1181                         $protocol = $bits[$i++];
1182                         $remainder = $bits[$i++];
1183
1184                         if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1185                                 # Found some characters after the protocol that look promising
1186                                 $url = $protocol . $m[1];
1187                                 $trail = $m[2];
1188
1189                                 # Move trailing punctuation to $trail
1190                                 $sep = ',;\.:!?';
1191                                 # If there is no left bracket, then consider right brackets fair game too
1192                                 if ( strpos( $url, '(' ) === false ) {
1193                                         $sep .= ')';
1194                                 }
1195
1196                                 $numSepChars = strspn( strrev( $url ), $sep );
1197                                 if ( $numSepChars ) {
1198                                         $trail = substr( $url, -$numSepChars ) . $trail;
1199                                         $url = substr( $url, 0, -$numSepChars );
1200                                 }
1201
1202                                 # Replace &amp; from obsolete syntax with &
1203                                 $url = str_replace( '&amp;', '&', $url );
1204
1205                                 # Is this an external image?
1206                                 $text = $this->maybeMakeImageLink( $url );
1207                                 if ( $text === false ) {
1208                                         # Not an image, make a link
1209                                         $text = $sk->makeExternalLink( $url, $url );
1210                                 }
1211                                 $s .= $text . $trail;
1212                         } else {
1213                                 $s .= $protocol . $remainder;
1214                         }
1215                 }
1216                 return $s;
1217         }
1218
1219         function maybeMakeImageLink( $url ) {
1220                 $sk =& $this->mOptions->getSkin();
1221                 $text = false;
1222                 if ( $this->mOptions->getAllowExternalImages() ) {
1223                         if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1224                                 # Image found
1225                                 $text = $sk->makeImage( htmlspecialchars( $url ) );
1226                         }
1227                 }
1228                 return $text;
1229         }
1230
1231         /* private */ function replaceInternalLinks( $s ) {
1232                 global $wgLang, $wgLinkCache;
1233                 global $wgNamespacesWithSubpages, $wgLanguageCode;
1234                 static $fname = 'Parser::replaceInternalLinks' ;
1235                 wfProfileIn( $fname );
1236
1237                 wfProfileIn( $fname.'-setup' );
1238                 static $tc = FALSE;
1239                 # the % is needed to support urlencoded titles as well
1240                 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1241                 $sk =& $this->mOptions->getSkin();
1242
1243                 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1244
1245                 $a = explode( '[[', ' ' . $s );
1246                 $s = array_shift( $a );
1247                 $s = substr( $s, 1 );
1248
1249                 # Match a link having the form [[namespace:link|alternate]]trail
1250                 static $e1 = FALSE;
1251                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1252                 # Match the end of a line for a word that's not followed by whitespace,
1253                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1254                 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1255
1256                 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
1257                 # Special and Media are pseudo-namespaces; no pages actually exist in them
1258
1259                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1260
1261                 if ( $useLinkPrefixExtension ) {
1262                         if ( preg_match( $e2, $s, $m ) ) {
1263                                 $first_prefix = $m[2];
1264                                 $s = $m[1];
1265                         } else {
1266                                 $first_prefix = false;
1267                         }
1268                 } else {
1269                         $prefix = '';
1270                 }
1271
1272                 wfProfileOut( $fname.'-setup' );
1273
1274                 foreach ( $a as $line ) {
1275                         wfProfileIn( $fname.'-prefixhandling' );
1276                         if ( $useLinkPrefixExtension ) {
1277                                 if ( preg_match( $e2, $s, $m ) ) {
1278                                         $prefix = $m[2];
1279                                         $s = $m[1];
1280                                 } else {
1281                                         $prefix='';
1282                                 }
1283                                 # first link
1284                                 if($first_prefix) {
1285                                         $prefix = $first_prefix;
1286                                         $first_prefix = false;
1287                                 }
1288                         }
1289                         wfProfileOut( $fname.'-prefixhandling' );
1290
1291                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1292                                 $text = $m[2];
1293                                 # fix up urlencoded title texts
1294                                 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1295                                 $trail = $m[3];
1296                         } else { # Invalid form; output directly
1297                                 $s .= $prefix . '[[' . $line ;
1298                                 continue;
1299                         }
1300
1301                         /* Valid link forms:
1302                         Foobar -- normal
1303                         :Foobar -- override special treatment of prefix (images, language links)
1304                         /Foobar -- convert to CurrentPage/Foobar
1305                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1306                         */
1307                         $c = substr($m[1],0,1);
1308                         $noforce = ($c != ':');
1309                         if( $c == '/' ) { # subpage
1310                                 if(substr($m[1],-1,1)=='/') {                 # / at end means we don't want the slash to be shown
1311                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
1312                                         $noslash=$m[1];
1313                                 } else {
1314                                         $noslash=substr($m[1],1);
1315                                 }
1316                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
1317                                         $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1318                                         if( '' == $text ) {
1319                                                 $text= $m[1];
1320                                         } # this might be changed for ugliness reasons
1321                                 } else {
1322                                         $link = $noslash; # no subpage allowed, use standard link
1323                                 }
1324                         } elseif( $noforce ) { # no subpage
1325                                 $link = $m[1];
1326                         } else {
1327                                 $link = substr( $m[1], 1 );
1328                         }
1329                         $wasblank = ( '' == $text );
1330                         if( $wasblank )
1331                         $text = $link;
1332
1333                         $nt = Title::newFromText( $link );
1334                         if( !$nt ) {
1335                                 $s .= $prefix . '[[' . $line;
1336                                 continue;
1337                         }
1338                         $ns = $nt->getNamespace();
1339                         $iw = $nt->getInterWiki();
1340                         if( $noforce ) {
1341                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1342                                         array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1343                                         $tmp = $prefix . $trail ;
1344                                         $s .= (trim($tmp) == '')? '': $tmp;
1345                                         continue;
1346                                 }
1347                                 if ( $ns == NS_IMAGE ) {
1348                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1349                                         $wgLinkCache->addImageLinkObj( $nt );
1350                                         continue;
1351                                 }
1352                                 if ( $ns == NS_CATEGORY ) {
1353                                         $t = $nt->getText() ;
1354                                         $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).":".$t ) ;
1355
1356                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1357                                         $pPLC=$sk->postParseLinkColour();
1358                                         $sk->postParseLinkColour( false );
1359                                         $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1360                                         $sk->postParseLinkColour( $pPLC );
1361                                         $wgLinkCache->resume();
1362
1363                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
1364                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1365                                         $this->mOutput->mCategoryLinks[] = $t ;
1366                                         $s .= $prefix . $trail ;
1367                                         continue;
1368                                 }
1369                         }
1370                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
1371                         ( strpos( $link, '#' ) == FALSE ) ) {
1372                                 # Self-links are handled specially; generally de-link and change to bold.
1373                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1374                                 continue;
1375                         }
1376
1377                         if( $ns == NS_MEDIA ) {
1378                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1379                                 $wgLinkCache->addImageLinkObj( $nt );
1380                                 continue;
1381                         } elseif( $ns == NS_SPECIAL ) {
1382                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1383                                 continue;
1384                         }
1385                         $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1386                 }
1387                 wfProfileOut( $fname );
1388                 return $s;
1389         }
1390
1391         # Some functions here used by doBlockLevels()
1392         #
1393         /* private */ function closeParagraph() {
1394                 $result = '';
1395                 if ( '' != $this->mLastSection ) {
1396                         $result = '</' . $this->mLastSection  . ">\n";
1397                 }
1398                 $this->mInPre = false;
1399                 $this->mLastSection = '';
1400                 return $result;
1401         }
1402         # getCommon() returns the length of the longest common substring
1403         # of both arguments, starting at the beginning of both.
1404         #
1405         /* private */ function getCommon( $st1, $st2 ) {
1406                 $fl = strlen( $st1 );
1407                 $shorter = strlen( $st2 );
1408                 if ( $fl < $shorter ) { $shorter = $fl; }
1409
1410                 for ( $i = 0; $i < $shorter; ++$i ) {
1411                         if ( $st1{$i} != $st2{$i} ) { break; }
1412                 }
1413                 return $i;
1414         }
1415         # These next three functions open, continue, and close the list
1416         # element appropriate to the prefix character passed into them.
1417         #
1418         /* private */ function openList( $char )
1419     {
1420                 $result = $this->closeParagraph();
1421
1422                 if ( '*' == $char ) { $result .= '<ul><li>'; }
1423                 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1424                 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1425                 else if ( ';' == $char ) {
1426                         $result .= '<dl><dt>';
1427                         $this->mDTopen = true;
1428                 }
1429                 else { $result = '<!-- ERR 1 -->'; }
1430
1431                 return $result;
1432         }
1433
1434         /* private */ function nextItem( $char ) {
1435                 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1436                 else if ( ':' == $char || ';' == $char ) {
1437                         $close = "</dd>";
1438                         if ( $this->mDTopen ) { $close = '</dt>'; }
1439                         if ( ';' == $char ) {
1440                                 $this->mDTopen = true;
1441                                 return $close . '<dt>';
1442                         } else {
1443                                 $this->mDTopen = false;
1444                                 return $close . '<dd>';
1445                         }
1446                 }
1447                 return '<!-- ERR 2 -->';
1448         }
1449
1450         /* private */function closeList( $char ) {
1451                 if ( '*' == $char ) { $text = '</li></ul>'; }
1452                 else if ( '#' == $char ) { $text = '</li></ol>'; }
1453                 else if ( ':' == $char ) {
1454                         if ( $this->mDTopen ) {
1455                                 $this->mDTopen = false;
1456                                 $text = '</dt></dl>';
1457                         } else {
1458                                 $text = '</dd></dl>';
1459                         }
1460                 }
1461                 else {  return '<!-- ERR 3 -->'; }
1462                 return $text."\n";
1463         }
1464
1465         /* private */ function doBlockLevels( $text, $linestart ) {
1466                 $fname = 'Parser::doBlockLevels';
1467                 wfProfileIn( $fname );
1468
1469                 # Parsing through the text line by line.  The main thing
1470                 # happening here is handling of block-level elements p, pre,
1471                 # and making lists from lines starting with * # : etc.
1472                 #
1473                 $textLines = explode( "\n", $text );
1474
1475                 $lastPrefix = $output = $lastLine = '';
1476                 $this->mDTopen = $inBlockElem = false;
1477                 $prefixLength = 0;
1478                 $paragraphStack = false;
1479
1480                 if ( !$linestart ) {
1481                         $output .= array_shift( $textLines );
1482                 }
1483                 foreach ( $textLines as $oLine ) {
1484                         $lastPrefixLength = strlen( $lastPrefix );
1485                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1486                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1487                         if ( !$this->mInPre ) {
1488                                 # Multiple prefixes may abut each other for nested lists.
1489                                 $prefixLength = strspn( $oLine, '*#:;' );
1490                                 $pref = substr( $oLine, 0, $prefixLength );
1491
1492                                 # eh?
1493                                 $pref2 = str_replace( ';', ':', $pref );
1494                                 $t = substr( $oLine, $prefixLength );
1495                                 $this->mInPre = !empty($preOpenMatch);
1496                         } else {
1497                                 # Don't interpret any other prefixes in preformatted text
1498                                 $prefixLength = 0;
1499                                 $pref = $pref2 = '';
1500                                 $t = $oLine;
1501                         }
1502
1503                         # List generation
1504                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1505                                 # Same as the last item, so no need to deal with nesting or opening stuff
1506                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1507                                 $paragraphStack = false;
1508
1509                                 if ( ";" == substr( $pref, -1 ) ) {
1510                                         # The one nasty exception: definition lists work like this:
1511                                         # ; title : definition text
1512                                         # So we check for : in the remainder text to split up the
1513                                         # title and definition, without b0rking links.
1514                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1515                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1516                                                 $term = $match[1];
1517                                                 $output .= $term . $this->nextItem( ':' );
1518                                                 $t = $match[2];
1519                                         }
1520                                 }
1521                         } elseif( $prefixLength || $lastPrefixLength ) {
1522                                 # Either open or close a level...
1523                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1524                                 $paragraphStack = false;
1525
1526                                 while( $commonPrefixLength < $lastPrefixLength ) {
1527                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1528                                         --$lastPrefixLength;
1529                                 }
1530                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1531                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1532                                 }
1533                                 while ( $prefixLength > $commonPrefixLength ) {
1534                                         $char = substr( $pref, $commonPrefixLength, 1 );
1535                                         $output .= $this->openList( $char );
1536
1537                                         if ( ';' == $char ) {
1538                                                 # FIXME: This is dupe of code above
1539                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1540                                                         $term = $match[1];
1541                                                         $output .= $term . $this->nextItem( ":" );
1542                                                         $t = $match[2];
1543                                                 }
1544                                         }
1545                                         ++$commonPrefixLength;
1546                                 }
1547                                 $lastPrefix = $pref2;
1548                         }
1549                         if( 0 == $prefixLength ) {
1550                                 # No prefix (not in list)--go to paragraph mode
1551                                 $uniq_prefix = UNIQ_PREFIX;
1552                                 // XXX: use a stack for nestable elements like span, table and div
1553                                 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1554                                 $closematch = preg_match(
1555                                         '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1556                                         '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1557                                 if ( $openmatch or $closematch ) {
1558                                         $paragraphStack = false;
1559                                         $output .= $this->closeParagraph();
1560                                         if($preOpenMatch and !$preCloseMatch) {
1561                                                 $this->mInPre = true;
1562                                         }
1563                                         if ( $closematch  ) {
1564                                                 $inBlockElem = false;
1565                                         } else {
1566                                                 $inBlockElem = true;
1567                                         }
1568                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1569                                         if ( " " == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1570                                                 // pre
1571                                                 if ($this->mLastSection != 'pre') {
1572                                                         $paragraphStack = false;
1573                                                         $output .= $this->closeParagraph().'<pre>';
1574                                                         $this->mLastSection = 'pre';
1575                                                 }
1576                                         } else {
1577                                                 // paragraph
1578                                                 if ( '' == trim($t) ) {
1579                                                         if ( $paragraphStack ) {
1580                                                                 $output .= $paragraphStack.'<br />';
1581                                                                 $paragraphStack = false;
1582                                                                 $this->mLastSection = 'p';
1583                                                         } else {
1584                                                                 if ($this->mLastSection != 'p' ) {
1585                                                                         $output .= $this->closeParagraph();
1586                                                                         $this->mLastSection = '';
1587                                                                         $paragraphStack = '<p>';
1588                                                                 } else {
1589                                                                         $paragraphStack = '</p><p>';
1590                                                                 }
1591                                                         }
1592                                                 } else {
1593                                                         if ( $paragraphStack ) {
1594                                                                 $output .= $paragraphStack;
1595                                                                 $paragraphStack = false;
1596                                                                 $this->mLastSection = 'p';
1597                                                         } else if ($this->mLastSection != 'p') {
1598                                                                 $output .= $this->closeParagraph().'<p>';
1599                                                                 $this->mLastSection = 'p';
1600                                                         }
1601                                                 }
1602                                         }
1603                                 }
1604                         }
1605                         if ($paragraphStack === false) {
1606                                 $output .= $t."\n";
1607                         }
1608                 }
1609                 while ( $prefixLength ) {
1610                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1611                         --$prefixLength;
1612                 }
1613                 if ( '' != $this->mLastSection ) {
1614                         $output .= '</' . $this->mLastSection . '>';
1615                         $this->mLastSection = '';
1616                 }
1617
1618                 wfProfileOut( $fname );
1619                 return $output;
1620         }
1621
1622         # Return value of a magic variable (like PAGENAME)
1623         function getVariableValue( $index ) {
1624                 global $wgLang, $wgSitename, $wgServer;
1625
1626                 switch ( $index ) {
1627                         case MAG_CURRENTMONTH:
1628                                 return $wgLang->formatNum( date( 'm' ) );
1629                         case MAG_CURRENTMONTHNAME:
1630                                 return $wgLang->getMonthName( date('n') );
1631                         case MAG_CURRENTMONTHNAMEGEN:
1632                                 return $wgLang->getMonthNameGen( date('n') );
1633                         case MAG_CURRENTDAY:
1634                                 return $wgLang->formatNum( date('j') );
1635                         case MAG_PAGENAME:
1636                                 return $this->mTitle->getText();
1637                         case MAG_PAGENAMEE:
1638                                 return $this->mTitle->getPartialURL();
1639                         case MAG_NAMESPACE:
1640                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1641                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1642                         case MAG_CURRENTDAYNAME:
1643                                 return $wgLang->getWeekdayName( date('w')+1 );
1644                         case MAG_CURRENTYEAR:
1645                                 return $wgLang->formatNum( date( 'Y' ) );
1646                         case MAG_CURRENTTIME:
1647                                 return $wgLang->time( wfTimestampNow(), false );
1648                         case MAG_NUMBEROFARTICLES:
1649                                 return $wgLang->formatNum( wfNumberOfArticles() );
1650                         case MAG_SITENAME:
1651                                 return $wgSitename;
1652                         case MAG_SERVER:
1653                                 return $wgServer;
1654                         default:
1655                                 return NULL;
1656                 }
1657         }
1658
1659         # initialise the magic variables (like CURRENTMONTHNAME)
1660         function initialiseVariables() {
1661                 global $wgVariableIDs;
1662                 $this->mVariables = array();
1663                 foreach ( $wgVariableIDs as $id ) {
1664                         $mw =& MagicWord::get( $id );
1665                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1666                 }
1667         }
1668
1669         /* private */ function replaceVariables( $text, $args = array() ) {
1670                 global $wgLang, $wgScript, $wgArticlePath;
1671
1672                 # Prevent too big inclusions
1673                 if(strlen($text)> MAX_INCLUDE_SIZE)
1674                    return $text;
1675
1676                 $fname = 'Parser::replaceVariables';
1677                 wfProfileIn( $fname );
1678
1679                 $bail = false;
1680                 $titleChars = Title::legalChars();
1681                 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1682
1683                 # This function is called recursively. To keep track of arguments we need a stack:
1684                 array_push( $this->mArgStack, $args );
1685
1686                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1687                 $GLOBALS['wgCurParser'] =& $this;
1688
1689
1690                 if ( $this->mOutputType == OT_HTML ) {
1691                         # Variable substitution
1692                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1693
1694                         # Argument substitution
1695                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1696                 }
1697                 # Template substitution
1698                 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1699                 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1700
1701                 array_pop( $this->mArgStack );
1702
1703                 wfProfileOut( $fname );
1704                 return $text;
1705         }
1706
1707         function variableSubstitution( $matches ) {
1708                 if ( !$this->mVariables ) {
1709                         $this->initialiseVariables();
1710                 }
1711                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1712                         $text = $this->mVariables[$matches[1]];
1713                         $this->mOutput->mContainsOldMagic = true;
1714                 } else {
1715                         $text = $matches[0];
1716                 }
1717                 return $text;
1718         }
1719
1720         # Split template arguments
1721         function getTemplateArgs( $argsString ) {
1722                 if ( $argsString === '' ) {
1723                         return array();
1724                 }
1725
1726                 $args = explode( '|', substr( $argsString, 1 ) );
1727
1728                 # If any of the arguments contains a '[[' but no ']]', it needs to be
1729                 # merged with the next arg because the '|' character between belongs
1730                 # to the link syntax and not the template parameter syntax.
1731                 $argc = count($args);
1732                 $i = 0;
1733                 for ( $i = 0; $i < $argc-1; $i++ ) {
1734                         if ( substr_count ( $args[$i], "[[" ) != substr_count ( $args[$i], "]]" ) ) {
1735                                 $args[$i] .= "|".$args[$i+1];
1736                                 array_splice($args, $i+1, 1);
1737                                 $i--;
1738                                 $argc--;
1739                         }
1740                 }
1741
1742                 return $args;
1743         }
1744
1745         function braceSubstitution( $matches ) {
1746                 global $wgLinkCache, $wgLang;
1747                 $fname = 'Parser::braceSubstitution';
1748                 $found = false;
1749                 $nowiki = false;
1750                 $noparse = false;
1751
1752                 $title = NULL;
1753
1754                 # $newline is an optional newline character before the braces
1755                 # $part1 is the bit before the first |, and must contain only title characters
1756                 # $args is a list of arguments, starting from index 0, not including $part1
1757
1758                 $newline = $matches[1];
1759                 $part1 = $matches[2];
1760                 # If the third subpattern matched anything, it will start with |
1761
1762                 $args = $this->getTemplateArgs($matches[3]);
1763                 $argc = count( $args );
1764
1765                 # {{{}}}
1766                 if ( strpos( $matches[0], '{{{' ) !== false ) {
1767                         $text = $matches[0];
1768                         $found = true;
1769                         $noparse = true;
1770                 }
1771
1772                 # SUBST
1773                 if ( !$found ) {
1774                         $mwSubst =& MagicWord::get( MAG_SUBST );
1775                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1776                                 if ( $this->mOutputType != OT_WIKI ) {
1777                                         # Invalid SUBST not replaced at PST time
1778                                         # Return without further processing
1779                                         $text = $matches[0];
1780                                         $found = true;
1781                                         $noparse= true;
1782                                 }
1783                         } elseif ( $this->mOutputType == OT_WIKI ) {
1784                                 # SUBST not found in PST pass, do nothing
1785                                 $text = $matches[0];
1786                                 $found = true;
1787                         }
1788                 }
1789
1790                 # MSG, MSGNW and INT
1791                 if ( !$found ) {
1792                         # Check for MSGNW:
1793                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1794                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1795                                 $nowiki = true;
1796                         } else {
1797                                 # Remove obsolete MSG:
1798                                 $mwMsg =& MagicWord::get( MAG_MSG );
1799                                 $mwMsg->matchStartAndRemove( $part1 );
1800                         }
1801
1802                         # Check if it is an internal message
1803                         $mwInt =& MagicWord::get( MAG_INT );
1804                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1805                                 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1806                                         $text = wfMsgReal( $part1, $args, true );
1807                                         $found = true;
1808                                 }
1809                         }
1810                 }
1811
1812                 # NS
1813                 if ( !$found ) {
1814                         # Check for NS: (namespace expansion)
1815                         $mwNs = MagicWord::get( MAG_NS );
1816                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1817                                 if ( intval( $part1 ) ) {
1818                                         $text = $wgLang->getNsText( intval( $part1 ) );
1819                                         $found = true;
1820                                 } else {
1821                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1822                                         if ( !is_null( $index ) ) {
1823                                                 $text = $wgLang->getNsText( $index );
1824                                                 $found = true;
1825                                         }
1826                                 }
1827                         }
1828                 }
1829
1830                 # LOCALURL and LOCALURLE
1831                 if ( !$found ) {
1832                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1833                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1834
1835                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1836                                 $func = 'getLocalURL';
1837                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1838                                 $func = 'escapeLocalURL';
1839                         } else {
1840                                 $func = '';
1841                         }
1842
1843                         if ( $func !== '' ) {
1844                                 $title = Title::newFromText( $part1 );
1845                                 if ( !is_null( $title ) ) {
1846                                         if ( $argc > 0 ) {
1847                                                 $text = $title->$func( $args[0] );
1848                                         } else {
1849                                                 $text = $title->$func();
1850                                         }
1851                                         $found = true;
1852                                 }
1853                         }
1854                 }
1855
1856                 # Internal variables
1857                 if ( !$this->mVariables ) {
1858                         $this->initialiseVariables();
1859                 }
1860                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1861                         $text = $this->mVariables[$part1];
1862                         $found = true;
1863                         $this->mOutput->mContainsOldMagic = true;
1864                 }
1865
1866                 # Template table test
1867
1868                 # Did we encounter this template already? If yes, it is in the cache
1869                 # and we need to check for loops.
1870                 if ( isset( $this->mTemplates[$part1] ) ) {
1871                         # Infinite loop test
1872                         if ( isset( $this->mTemplatePath[$part1] ) ) {
1873                                 $noparse = true;
1874                                 $found = true;
1875                         }
1876                         # set $text to cached message.
1877                         $text = $this->mTemplates[$part1];
1878                         $found = true;
1879                 }
1880
1881                 # Load from database
1882                 if ( !$found ) {
1883                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1884                         if ( !is_null( $title ) && !$title->isExternal() ) {
1885                                 # Check for excessive inclusion
1886                                 $dbk = $title->getPrefixedDBkey();
1887                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1888                                         # This should never be reached.
1889                                         $article = new Article( $title );
1890                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1891                                         if ( $articleContent !== false ) {
1892                                                 $found = true;
1893                                                 $text = $articleContent;
1894
1895                                         }
1896                                 }
1897
1898                                 # If the title is valid but undisplayable, make a link to it
1899                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1900                                         $text = '[[' . $title->getPrefixedText() . ']]';
1901                                         $found = true;
1902                                 }
1903
1904                                 # Template cache array insertion
1905                                 $this->mTemplates[$part1] = $text;
1906                         }
1907                 }
1908
1909                 # Recursive parsing, escaping and link table handling
1910                 # Only for HTML output
1911                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1912                         $text = wfEscapeWikiText( $text );
1913                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1914                         # Clean up argument array
1915                         $assocArgs = array();
1916                         $index = 1;
1917                         foreach( $args as $arg ) {
1918                                 $eqpos = strpos( $arg, '=' );
1919                                 if ( $eqpos === false ) {
1920                                         $assocArgs[$index++] = $arg;
1921                                 } else {
1922                                         $name = trim( substr( $arg, 0, $eqpos ) );
1923                                         $value = trim( substr( $arg, $eqpos+1 ) );
1924                                         if ( $value === false ) {
1925                                                 $value = '';
1926                                         }
1927                                         if ( $name !== false ) {
1928                                                 $assocArgs[$name] = $value;
1929                                         }
1930                                 }
1931                         }
1932
1933                         # Do not enter included links in link table
1934                         if ( !is_null( $title ) ) {
1935                                 $wgLinkCache->suspend();
1936                         }
1937
1938                         # Add a new element to the templace recursion path
1939                         $this->mTemplatePath[$part1] = 1;
1940
1941                         $text = $this->stripParse( $text, $newline, $assocArgs );
1942
1943                         # Resume the link cache and register the inclusion as a link
1944                         if ( !is_null( $title ) ) {
1945                                 $wgLinkCache->resume();
1946                                 $wgLinkCache->addLinkObj( $title );
1947                         }
1948                 }
1949                 # Empties the template path
1950                 $this->mTemplatePath = array();
1951
1952                 if ( !$found ) {
1953                         return $matches[0];
1954                 } else {
1955                         return $text;
1956                 }
1957         }
1958
1959         # Triple brace replacement -- used for template arguments
1960         function argSubstitution( $matches ) {
1961                 $newline = $matches[1];
1962                 $arg = trim( $matches[2] );
1963                 $text = $matches[0];
1964                 $inputArgs = end( $this->mArgStack );
1965
1966                 if ( array_key_exists( $arg, $inputArgs ) ) {
1967                         $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1968                 }
1969
1970                 return $text;
1971         }
1972
1973         # Returns true if the function is allowed to include this entity
1974         function incrementIncludeCount( $dbk ) {
1975                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1976                         $this->mIncludeCount[$dbk] = 0;
1977                 }
1978                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1979                         return true;
1980                 } else {
1981                         return false;
1982                 }
1983         }
1984
1985
1986         # Cleans up HTML, removes dangerous tags and attributes
1987         /* private */ function removeHTMLtags( $text ) {
1988                 global $wgUseTidy, $wgUserHtml;
1989                 $fname = 'Parser::removeHTMLtags';
1990                 wfProfileIn( $fname );
1991
1992                 if( $wgUserHtml ) {
1993                         $htmlpairs = array( # Tags that must be closed
1994                                 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1995                                 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1996                                 'strike', 'strong', 'tt', 'var', 'div', 'center',
1997                                 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1998                                 'ruby', 'rt' , 'rb' , 'rp', 'p'
1999                         );
2000                         $htmlsingle = array(
2001                                 'br', 'hr', 'li', 'dt', 'dd'
2002                         );
2003                         $htmlnest = array( # Tags that can be nested--??
2004                                 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2005                                 'dl', 'font', 'big', 'small', 'sub', 'sup'
2006                         );
2007                         $tabletags = array( # Can only appear inside table
2008                                 'td', 'th', 'tr'
2009                         );
2010                 } else {
2011                         $htmlpairs = array();
2012                         $htmlsingle = array();
2013                         $htmlnest = array();
2014                         $tabletags = array();
2015                 }
2016
2017                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2018                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2019
2020                 $htmlattrs = $this->getHTMLattrs () ;
2021
2022                 # Remove HTML comments
2023                 $text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
2024
2025                 $bits = explode( '<', $text );
2026                 $text = array_shift( $bits );
2027                 if(!$wgUseTidy) {
2028                         $tagstack = array(); $tablestack = array();
2029                         foreach ( $bits as $x ) {
2030                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
2031                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2032                                 $x, $regs );
2033                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2034                                 error_reporting( $prev );
2035
2036                                 $badtag = 0 ;
2037                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2038                                         # Check our stack
2039                                         if ( $slash ) {
2040                                                 # Closing a tag...
2041                                                 if ( ! in_array( $t, $htmlsingle ) &&
2042                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
2043                                                         @array_push( $tagstack, $ot );
2044                                                         $badtag = 1;
2045                                                 } else {
2046                                                         if ( $t == 'table' ) {
2047                                                                 $tagstack = array_pop( $tablestack );
2048                                                         }
2049                                                         $newparams = '';
2050                                                 }
2051                                         } else {
2052                                                 # Keep track for later
2053                                                 if ( in_array( $t, $tabletags ) &&
2054                                                 ! in_array( 'table', $tagstack ) ) {
2055                                                         $badtag = 1;
2056                                                 } else if ( in_array( $t, $tagstack ) &&
2057                                                 ! in_array ( $t , $htmlnest ) ) {
2058                                                         $badtag = 1 ;
2059                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
2060                                                         if ( $t == 'table' ) {
2061                                                                 array_push( $tablestack, $tagstack );
2062                                                                 $tagstack = array();
2063                                                         }
2064                                                         array_push( $tagstack, $t );
2065                                                 }
2066                                                 # Strip non-approved attributes from the tag
2067                                                 $newparams = $this->fixTagAttributes($params);
2068
2069                                         }
2070                                         if ( ! $badtag ) {
2071                                                 $rest = str_replace( '>', '&gt;', $rest );
2072                                                 $text .= "<$slash$t $newparams$brace$rest";
2073                                                 continue;
2074                                         }
2075                                 }
2076                                 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2077                         }
2078                         # Close off any remaining tags
2079                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2080                                 $text .= "</$t>\n";
2081                                 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2082                         }
2083                 } else {
2084                         # this might be possible using tidy itself
2085                         foreach ( $bits as $x ) {
2086                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2087                                 $x, $regs );
2088                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2089                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2090                                         $newparams = $this->fixTagAttributes($params);
2091                                         $rest = str_replace( '>', '&gt;', $rest );
2092                                         $text .= "<$slash$t $newparams$brace$rest";
2093                                 } else {
2094                                         $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2095                                 }
2096                         }
2097                 }
2098                 wfProfileOut( $fname );
2099                 return $text;
2100         }
2101
2102
2103 /*
2104  *
2105  * This function accomplishes several tasks:
2106  * 1) Auto-number headings if that option is enabled
2107  * 2) Add an [edit] link to sections for logged in users who have enabled the option
2108  * 3) Add a Table of contents on the top for users who have enabled the option
2109  * 4) Auto-anchor headings
2110  *
2111  * It loops through all headlines, collects the necessary data, then splits up the
2112  * string and re-inserts the newly formatted headlines.
2113  *
2114  */
2115
2116         /* private */ function formatHeadings( $text, $isMain=true ) {
2117                 global $wgInputEncoding, $wgMaxTocLevel;
2118
2119                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2120                 $doShowToc = $this->mOptions->getShowToc();
2121                 $forceTocHere = false;
2122                 if( !$this->mTitle->userCanEdit() ) {
2123                         $showEditLink = 0;
2124                         $rightClickHack = 0;
2125                 } else {
2126                         $showEditLink = $this->mOptions->getEditSection();
2127                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2128                 }
2129
2130                 # Inhibit editsection links if requested in the page
2131                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2132                 if( $esw->matchAndRemove( $text ) ) {
2133                         $showEditLink = 0;
2134                 }
2135                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2136                 # do not add TOC
2137                 $mw =& MagicWord::get( MAG_NOTOC );
2138                 if( $mw->matchAndRemove( $text ) ) {
2139                         $doShowToc = 0;
2140                 }
2141
2142                 # never add the TOC to the Main Page. This is an entry page that should not
2143                 # be more than 1-2 screens large anyway
2144                 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2145                         $doShowToc = 0;
2146                 }
2147
2148                 # Get all headlines for numbering them and adding funky stuff like [edit]
2149                 # links - this is for later, but we need the number of headlines right now
2150                 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2151
2152                 # if there are fewer than 4 headlines in the article, do not show TOC
2153                 if( $numMatches < 4 ) {
2154                         $doShowToc = 0;
2155                 }
2156
2157                 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2158                 # override above conditions and always show TOC at that place
2159                 $mw =& MagicWord::get( MAG_TOC );
2160                 if ($mw->match( $text ) ) {
2161                         $doShowToc = 1;
2162                         $forceTocHere = true;
2163                 } else {
2164                         # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2165                         # override above conditions and always show TOC above first header
2166                         $mw =& MagicWord::get( MAG_FORCETOC );
2167                         if ($mw->matchAndRemove( $text ) ) {
2168                                 $doShowToc = 1;
2169                         }
2170                 }
2171
2172
2173
2174                 # We need this to perform operations on the HTML
2175                 $sk =& $this->mOptions->getSkin();
2176
2177                 # headline counter
2178                 $headlineCount = 0;
2179
2180                 # Ugh .. the TOC should have neat indentation levels which can be
2181                 # passed to the skin functions. These are determined here
2182                 $toclevel = 0;
2183                 $toc = '';
2184                 $full = '';
2185                 $head = array();
2186                 $sublevelCount = array();
2187                 $level = 0;
2188                 $prevlevel = 0;
2189                 foreach( $matches[3] as $headline ) {
2190                         $numbering = '';
2191                         if( $level ) {
2192                                 $prevlevel = $level;
2193                         }
2194                         $level = $matches[1][$headlineCount];
2195                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2196                                 # reset when we enter a new level
2197                                 $sublevelCount[$level] = 0;
2198                                 $toc .= $sk->tocIndent( $level - $prevlevel );
2199                                 $toclevel += $level - $prevlevel;
2200                         }
2201                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2202                                 # reset when we step back a level
2203                                 $sublevelCount[$level+1]=0;
2204                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
2205                                 $toclevel -= $prevlevel - $level;
2206                         }
2207                         # count number of headlines for each level
2208                         @$sublevelCount[$level]++;
2209                         if( $doNumberHeadings || $doShowToc ) {
2210                                 $dot = 0;
2211                                 for( $i = 1; $i <= $level; $i++ ) {
2212                                         if( !empty( $sublevelCount[$i] ) ) {
2213                                                 if( $dot ) {
2214                                                         $numbering .= '.';
2215                                                 }
2216                                                 $numbering .= $sublevelCount[$i];
2217                                                 $dot = 1;
2218                                         }
2219                                 }
2220                         }
2221
2222                         # The canonized header is a version of the header text safe to use for links
2223                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2224                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2225                         $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2226
2227                         # strip out HTML
2228                         $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2229                         $tocline = trim( $canonized_headline );
2230                         $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2231                         $replacearray = array(
2232                                 '%3A' => ':',
2233                                 '%' => '.'
2234                         );
2235                         $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2236                         $refer[$headlineCount] = $canonized_headline;
2237
2238                         # count how many in assoc. array so we can track dupes in anchors
2239                         @$refers[$canonized_headline]++;
2240                         $refcount[$headlineCount]=$refers[$canonized_headline];
2241
2242                         # Prepend the number to the heading text
2243
2244                         if( $doNumberHeadings || $doShowToc ) {
2245                                 $tocline = $numbering . ' ' . $tocline;
2246
2247                                 # Don't number the heading if it is the only one (looks silly)
2248                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2249                                         # the two are different if the line contains a link
2250                                         $headline=$numbering . ' ' . $headline;
2251                                 }
2252                         }
2253
2254                         # Create the anchor for linking from the TOC to the section
2255                         $anchor = $canonized_headline;
2256                         if($refcount[$headlineCount] > 1 ) {
2257                                 $anchor .= '_' . $refcount[$headlineCount];
2258                         }
2259                         if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2260                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2261                         }
2262                         if( $showEditLink ) {
2263                                 if ( empty( $head[$headlineCount] ) ) {
2264                                         $head[$headlineCount] = '';
2265                                 }
2266                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
2267                         }
2268
2269                         # Add the edit section span
2270                         if( $rightClickHack ) {
2271                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
2272                         }
2273
2274                         # give headline the correct <h#> tag
2275                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
2276
2277                         $headlineCount++;
2278                 }
2279
2280                 if( $doShowToc ) {
2281                         $toclines = $headlineCount;
2282                         $toc .= $sk->tocUnindent( $toclevel );
2283                         $toc = $sk->tocTable( $toc );
2284                 }
2285
2286                 # split up and insert constructed headlines
2287
2288                 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2289                 $i = 0;
2290
2291                 foreach( $blocks as $block ) {
2292                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2293                             # This is the [edit] link that appears for the top block of text when
2294                                 # section editing is enabled
2295
2296                                 # Disabled because it broke block formatting
2297                                 # For example, a bullet point in the top line
2298                                 # $full .= $sk->editSectionLink(0);
2299                         }
2300                         $full .= $block;
2301                         if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2302                         # Top anchor now in skin
2303                                 $full = $full.$toc;
2304                         }
2305
2306                         if( !empty( $head[$i] ) ) {
2307                                 $full .= $head[$i];
2308                         }
2309                         $i++;
2310                 }
2311                 if($forceTocHere) {
2312                         $mw =& MagicWord::get( MAG_TOC );
2313                         return $mw->replace( $toc, $full );
2314                 } else {
2315                         return $full;
2316                 }
2317         }
2318
2319         # Return an HTML link for the "ISBN 123456" text
2320         /* private */ function magicISBN( $text ) {
2321                 global $wgLang;
2322                 $fname = 'Parser::magicISBN';
2323                 wfProfileIn( $fname );
2324
2325                 $a = split( 'ISBN ', " $text" );
2326                 if ( count ( $a ) < 2 ) {
2327                         wfProfileOut( $fname );
2328                         return $text;
2329                 }
2330                 $text = substr( array_shift( $a ), 1);
2331                 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2332
2333                 foreach ( $a as $x ) {
2334                         $isbn = $blank = '' ;
2335                         while ( ' ' == $x{0} ) {
2336                                 $blank .= ' ';
2337                                 $x = substr( $x, 1 );
2338                         }
2339                         while ( strstr( $valid, $x{0} ) != false ) {
2340                                 $isbn .= $x{0};
2341                                 $x = substr( $x, 1 );
2342                         }
2343                         $num = str_replace( '-', '', $isbn );
2344                         $num = str_replace( ' ', '', $num );
2345
2346                         if ( '' == $num ) {
2347                                 $text .= "ISBN $blank$x";
2348                         } else {
2349                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2350                                 $text .= '<a href="' .
2351                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
2352                                         "\" class=\"internal\">ISBN $isbn</a>";
2353                                 $text .= $x;
2354                         }
2355                 }
2356                 wfProfileOut( $fname );
2357                 return $text;
2358         }
2359
2360         # Return an HTML link for the "GEO ..." text
2361         /* private */ function magicGEO( $text ) {
2362                 global $wgLang, $wgUseGeoMode;
2363                 if ( !isset ( $wgUseGeoMode ) || !$wgUseGeoMode ) return $text ;
2364                 $fname = 'Parser::magicGEO';
2365                 wfProfileIn( $fname );
2366
2367                 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2368                 $directions = array ( "N" => "North" , "S" => "South" , "E" => "East" , "W" => "West" ) ;
2369                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2370                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2371                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2372                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2373
2374                 $a = split( 'GEO ', " $text" );
2375                 if ( count ( $a ) < 2 ) {
2376                         wfProfileOut( $fname );
2377                         return $text;
2378                 }
2379                 $text = substr( array_shift( $a ), 1);
2380                 $valid = '0123456789.+-:';
2381
2382                 foreach ( $a as $x ) {
2383                         $geo = $blank = '' ;
2384                         while ( ' ' == $x{0} ) {
2385                                 $blank .= ' ';
2386                                 $x = substr( $x, 1 );
2387                         }
2388                         while ( strstr( $valid, $x{0} ) != false ) {
2389                                 $geo .= $x{0};
2390                                 $x = substr( $x, 1 );
2391                         }
2392                         $num = str_replace( '+', '', $geo );
2393                         $num = str_replace( ' ', '', $num );
2394
2395                         if ( '' == $num || count ( explode ( ":" , $num , 3 ) ) < 2 ) {
2396                                 $text .= "GEO $blank$x";
2397                         } else {
2398                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2399                                 $text .= '<a href="' .
2400                                 $titleObj->escapeLocalUrl( "coordinates={$num}" ) .
2401                                         "\" class=\"internal\">GEO $geo</a>";
2402                                 $text .= $x;
2403                         }
2404                 }
2405                 wfProfileOut( $fname );
2406                 return $text;
2407         }
2408
2409         # Return an HTML link for the "RFC 1234" text
2410         /* private */ function magicRFC( $text ) {
2411                 global $wgLang;
2412
2413                 $a = split( 'RFC ', ' '.$text );
2414                 if ( count ( $a ) < 2 ) return $text;
2415                 $text = substr( array_shift( $a ), 1);
2416                 $valid = '0123456789';
2417
2418                 foreach ( $a as $x ) {
2419                         $rfc = $blank = '' ;
2420                         while ( ' ' == $x{0} ) {
2421                                 $blank .= ' ';
2422                                 $x = substr( $x, 1 );
2423                         }
2424                         while ( strstr( $valid, $x{0} ) != false ) {
2425                                 $rfc .= $x{0};
2426                                 $x = substr( $x, 1 );
2427                         }
2428
2429                         if ( '' == $rfc ) {
2430                                 $text .= "RFC $blank$x";
2431                         } else {
2432                                 $url = wfmsg( 'rfcurl' );
2433                                 $url = str_replace( '$1', $rfc, $url);
2434                                 $sk =& $this->mOptions->getSkin();
2435                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
2436                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2437                         }
2438                 }
2439                 return $text;
2440         }
2441
2442         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2443                 $this->mOptions = $options;
2444                 $this->mTitle =& $title;
2445                 $this->mOutputType = OT_WIKI;
2446
2447                 if ( $clearState ) {
2448                         $this->clearState();
2449                 }
2450
2451                 $stripState = false;
2452                 $pairs = array(
2453                         "\r\n" => "\n",
2454                         );
2455                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2456                 // now with regexes
2457                 /*
2458                 $pairs = array(
2459                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2460                         "/<br *?>/i" => "<br />",
2461                 );
2462                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2463                 */
2464                 $text = $this->strip( $text, $stripState, false );
2465                 $text = $this->pstPass2( $text, $user );
2466                 $text = $this->unstrip( $text, $stripState );
2467                 $text = $this->unstripNoWiki( $text, $stripState );
2468                 return $text;
2469         }
2470
2471         /* private */ function pstPass2( $text, &$user ) {
2472                 global $wgLang, $wgLocaltimezone, $wgCurParser;
2473
2474                 # Variable replacement
2475                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2476                 $text = $this->replaceVariables( $text );
2477
2478                 # Signatures
2479                 #
2480                 $n = $user->getName();
2481                 $k = $user->getOption( 'nickname' );
2482                 if ( '' == $k ) { $k = $n; }
2483                 if(isset($wgLocaltimezone)) {
2484                         $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2485                 }
2486                 /* Note: this is an ugly timezone hack for the European wikis */
2487                 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2488                   ' (' . date( 'T' ) . ')';
2489                 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2490
2491                 $text = preg_replace( '/~~~~~/', $d, $text );
2492                 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2493                 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2494
2495                 # Context links: [[|name]] and [[name (context)|]]
2496                 #
2497                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2498                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2499                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2500                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2501
2502                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
2503                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
2504                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
2505                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
2506                                                                                                                 # [[ns:page (cont)|]]
2507                 $context = "";
2508                 $t = $this->mTitle->getText();
2509                 if ( preg_match( $conpat, $t, $m ) ) {
2510                         $context = $m[2];
2511                 }
2512                 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2513                 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2514                 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2515
2516                 if ( '' == $context ) {
2517                         $text = preg_replace( $p2, '[[\\1]]', $text );
2518                 } else {
2519                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2520                 }
2521
2522                 /*
2523                 $mw =& MagicWord::get( MAG_SUBST );
2524                 $wgCurParser = $this->fork();
2525                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2526                 $this->merge( $wgCurParser );
2527                 */
2528
2529                 # Trim trailing whitespace
2530                 # MAG_END (__END__) tag allows for trailing
2531                 # whitespace to be deliberately included
2532                 $text = rtrim( $text );
2533                 $mw =& MagicWord::get( MAG_END );
2534                 $mw->matchAndRemove( $text );
2535
2536                 return $text;
2537         }
2538
2539         # Set up some variables which are usually set up in parse()
2540         # so that an external function can call some class members with confidence
2541         function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2542                 $this->mTitle =& $title;
2543                 $this->mOptions = $options;
2544                 $this->mOutputType = $outputType;
2545                 if ( $clearState ) {
2546                         $this->clearState();
2547                 }
2548         }
2549
2550         function transformMsg( $text, $options ) {
2551                 global $wgTitle;
2552                 static $executing = false;
2553
2554                 # Guard against infinite recursion
2555                 if ( $executing ) {
2556                         return $text;
2557                 }
2558                 $executing = true;
2559
2560                 $this->mTitle = $wgTitle;
2561                 $this->mOptions = $options;
2562                 $this->mOutputType = OT_MSG;
2563                 $this->clearState();
2564                 $text = $this->replaceVariables( $text );
2565
2566                 $executing = false;
2567                 return $text;
2568         }
2569
2570         # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2571         # Callback will be called with the text within
2572         # Transform and return the text within
2573         function setHook( $tag, $callback ) {
2574                 $oldVal = @$this->mTagHooks[$tag];
2575                 $this->mTagHooks[$tag] = $callback;
2576                 return $oldVal;
2577         }
2578 }
2579
2580 class ParserOutput
2581 {
2582         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2583         var $mCacheTime; # Used in ParserCache
2584
2585         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2586                 $containsOldMagic = false )
2587         {
2588                 $this->mText = $text;
2589                 $this->mLanguageLinks = $languageLinks;
2590                 $this->mCategoryLinks = $categoryLinks;
2591                 $this->mContainsOldMagic = $containsOldMagic;
2592                 $this->mCacheTime = "";
2593         }
2594
2595         function getText() { return $this->mText; }
2596         function getLanguageLinks() { return $this->mLanguageLinks; }
2597         function getCategoryLinks() { return $this->mCategoryLinks; }
2598         function getCacheTime() { return $this->mCacheTime; }
2599         function containsOldMagic() { return $this->mContainsOldMagic; }
2600         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2601         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2602         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2603         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2604         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2605
2606         function merge( $other ) {
2607                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2608                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2609                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2610         }
2611
2612 }
2613
2614 class ParserOptions
2615 {
2616         # All variables are private
2617         var $mUseTeX;                    # Use texvc to expand <math> tags
2618         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2619         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2620         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2621         var $mAllowExternalImages;       # Allow external images inline
2622         var $mSkin;                      # Reference to the preferred skin
2623         var $mDateFormat;                # Date format index
2624         var $mEditSection;               # Create "edit section" links
2625         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2626         var $mNumberHeadings;            # Automatically number headings
2627         var $mShowToc;                   # Show table of contents
2628
2629         function getUseTeX()                        { return $this->mUseTeX; }
2630         function getUseCategoryMagic()              { return $this->mUseCategoryMagic; }
2631         function getUseDynamicDates()               { return $this->mUseDynamicDates; }
2632         function getInterwikiMagic()                { return $this->mInterwikiMagic; }
2633         function getAllowExternalImages()           { return $this->mAllowExternalImages; }
2634         function getSkin()                          { return $this->mSkin; }
2635         function getDateFormat()                    { return $this->mDateFormat; }
2636         function getEditSection()                   { return $this->mEditSection; }
2637         function getEditSectionOnRightClick()       { return $this->mEditSectionOnRightClick; }
2638         function getNumberHeadings()                { return $this->mNumberHeadings; }
2639         function getShowToc()                       { return $this->mShowToc; }
2640
2641         function setUseTeX( $x )                    { return wfSetVar( $this->mUseTeX, $x ); }
2642         function setUseCategoryMagic( $x )          { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2643         function setUseDynamicDates( $x )           { return wfSetVar( $this->mUseDynamicDates, $x ); }
2644         function setInterwikiMagic( $x )            { return wfSetVar( $this->mInterwikiMagic, $x ); }
2645         function setAllowExternalImages( $x )       { return wfSetVar( $this->mAllowExternalImages, $x ); }
2646         function setDateFormat( $x )                { return wfSetVar( $this->mDateFormat, $x ); }
2647         function setEditSection( $x )               { return wfSetVar( $this->mEditSection, $x ); }
2648         function setEditSectionOnRightClick( $x )   { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2649         function setNumberHeadings( $x )            { return wfSetVar( $this->mNumberHeadings, $x ); }
2650         function setShowToc( $x )                   { return wfSetVar( $this->mShowToc, $x ); }
2651
2652     function setSkin( &$x ) { $this->mSkin =& $x; }
2653
2654         /* static */ function newFromUser( &$user ) {
2655                 $popts = new ParserOptions;
2656                 $popts->initialiseFromUser( $user );
2657                 return $popts;
2658         }
2659
2660         function initialiseFromUser( &$userInput ) {
2661                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2662
2663                 if ( !$userInput ) {
2664                         $user = new User;
2665                         $user->setLoaded( true );
2666                 } else {
2667                         $user =& $userInput;
2668                 }
2669
2670                 $this->mUseTeX = $wgUseTeX;
2671                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2672                 $this->mUseDynamicDates = $wgUseDynamicDates;
2673                 $this->mInterwikiMagic = $wgInterwikiMagic;
2674                 $this->mAllowExternalImages = $wgAllowExternalImages;
2675                 $this->mSkin =& $user->getSkin();
2676                 $this->mDateFormat = $user->getOption( 'date' );
2677                 $this->mEditSection = $user->getOption( 'editsection' );
2678                 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2679                 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2680                 $this->mShowToc = $user->getOption( 'showtoc' );
2681         }
2682
2683
2684 }
2685
2686 # Regex callbacks, used in Parser::replaceVariables
2687 function wfBraceSubstitution( $matches )
2688 {
2689         global $wgCurParser;
2690         return $wgCurParser->braceSubstitution( $matches );
2691 }
2692
2693 function wfArgSubstitution( $matches )
2694 {
2695         global $wgCurParser;
2696         return $wgCurParser->argSubstitution( $matches );
2697 }
2698
2699 function wfVariableSubstitution( $matches )
2700 {
2701         global $wgCurParser;
2702         return $wgCurParser->variableSubstitution( $matches );
2703 }
2704
2705 function wfNumberOfArticles()
2706 {
2707         global $wgNumberOfArticles;
2708
2709         wfLoadSiteStats();
2710         return $wgNumberOfArticles;
2711 }
2712
2713 /* private */ function wfLoadSiteStats()
2714 {
2715         global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2716         $fname = 'wfLoadSiteStats';
2717
2718         if ( -1 != $wgNumberOfArticles ) return;
2719         $dbr =& wfGetDB( DB_SLAVE );
2720         $s = $dbr->getArray( 'site_stats',
2721                 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2722                 array( 'ss_row_id' => 1 ), $fname
2723         );
2724
2725         if ( $s === false ) {
2726                 return;
2727         } else {
2728                 $wgTotalViews = $s->ss_total_views;
2729                 $wgTotalEdits = $s->ss_total_edits;
2730                 $wgNumberOfArticles = $s->ss_good_articles;
2731         }
2732 }
2733
2734 function wfEscapeHTMLTagsOnly( $in ) {
2735         return str_replace(
2736                 array( '"', '>', '<' ),
2737                 array( '&quot;', '&gt;', '&lt;' ),
2738                 $in );
2739 }
2740
2741
2742 ?>