includes/Parser.php

   1 <?php
   2
   3 /**
   4  * File for Parser and related classes
   5  *
   6  * @package MediaWiki
   7  * @version $Id$
   8  */
   9
  10 /**
  11  * Variable substitution O(N^2) attack
  12  *
  13  * Without countermeasures, it would be possible to attack the parser by saving
  14  * a page filled with a large number of inclusions of large pages. The size of
  15  * the generated page would be proportional to the square of the input size.
  16  * Hence, we limit the number of inclusions of any given page, thus bringing any
  17  * attack back to O(N).
  18  */
  19 define( 'MAX_INCLUDE_REPEAT', 100 );
  20 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
  21
  22 # Allowed values for $mOutputType
  23 define( 'OT_HTML', 1 );
  24 define( 'OT_WIKI', 2 );
  25 define( 'OT_MSG' , 3 );
  26
  27 # string parameter for extractTags which will cause it
  28 # to strip HTML comments in addition to regular
  29 # <XML>-style tags. This should not be anything we
  30 # may want to use in wikisyntax
  31 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
  32
  33 # prefix for escaping, used in two functions at least
  34 define( 'UNIQ_PREFIX', 'NaodW29');
  35
  36 # Constants needed for external link processing
  37 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
  38 define( 'HTTP_PROTOCOLS', 'http|https' );
  39 # Everything except bracket, space, or control characters
  40 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
  41 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
  42 # Including space
  43 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
  44 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
  45 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
  46 define( 'EXT_LINK_BRACKETED',  '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
  47 define( 'EXT_IMAGE_REGEX',
  48         '/^('.HTTP_PROTOCOLS.':)'.  # Protocol
  49         '('.EXT_LINK_URL_CLASS.'+)\\/'.  # Hostname and path
  50         '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
  51 );
  52
  53 /**
  54  * PHP Parser
  55  *
  56  * Processes wiki markup
  57  *
  58  * <pre>
  59  * There are three main entry points into the Parser class:
  60  * parse()
  61  *   produces HTML output
  62  * preSaveTransform().
  63  *   produces altered wiki markup.
  64  * transformMsg()
  65  *   performs brace substitution on MediaWiki messages
  66  *
  67  * Globals used:
  68  *    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  69  *
  70  * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  71  *
  72  * settings:
  73  *  $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  74  *  $wgNamespacesWithSubpages, $wgAllowExternalImages*,
  75  *  $wgLocaltimezone
  76  *
  77  *  * only within ParserOptions
  78  * </pre>
  79  *
  80  * @package MediaWiki
  81  */
  82 class Parser
  83 {
  84         /**#@+
  85          * @access private
  86          */
  87         # Persistent:
  88         var $mTagHooks;
  89
  90         # Cleared with clearState():
  91         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  92         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  93
  94         # Temporary:
  95         var $mOptions, $mTitle, $mOutputType,
  96             $mTemplates,        // cache of already loaded templates, avoids
  97                                 // multiple SQL queries for the same string
  98             $mTemplatePath;     // stores an unsorted hash of all the templates already loaded
  99                                 // in this path. Used for loop detection.
 100
 101         /**#@-*/
 102
 103         /**
 104          * Constructor
 105          *
 106          * @access public
 107          */
 108         function Parser() {
 109                 $this->mTemplates = array();
 110                 $this->mTemplatePath = array();
 111                 $this->mTagHooks = array();
 112                 $this->clearState();
 113         }
 114
 115         /**
 116          * Clear Parser state
 117          *
 118          * @access private
 119          */
 120         function clearState() {
 121                 $this->mOutput = new ParserOutput;
 122                 $this->mAutonumber = 0;
 123                 $this->mLastSection = "";
 124                 $this->mDTopen = false;
 125                 $this->mVariables = false;
 126                 $this->mIncludeCount = array();
 127                 $this->mStripState = array();
 128                 $this->mArgStack = array();
 129                 $this->mInPre = false;
 130         }
 131
 132         /**
 133          * First pass--just handle <nowiki> sections, pass the rest off
 134          * to internalParse() which does all the real work.
 135          *
 136          * @access private
 137          * @return ParserOutput a ParserOutput
 138          */
 139         function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
 140                 global $wgUseTidy;
 141                 $fname = 'Parser::parse';
 142                 wfProfileIn( $fname );
 143
 144                 if ( $clearState ) {
 145                         $this->clearState();
 146                 }
 147
 148                 $this->mOptions = $options;
 149                 $this->mTitle =& $title;
 150                 $this->mOutputType = OT_HTML;
 151
 152                 $stripState = NULL;
 153                 $text = $this->strip( $text, $this->mStripState );
 154                 $text = $this->internalParse( $text, $linestart );
 155                 $text = $this->unstrip( $text, $this->mStripState );
 156                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 157                 if(!$wgUseTidy) {
 158                         $fixtags = array(
 159                                 # french spaces, last one Guillemet-left
 160                                 # only if there is something before the space
 161                                 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
 162                                 # french spaces, Guillemet-right
 163                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 164                                 '/<hr *>/i' => '<hr />',
 165                                 '/<br *>/i' => '<br />',
 166                                 '/<center *>/i' => '<div class="center">',
 167                                 '/<\\/center *>/i' => '</div>',
 168                                 # Clean up spare ampersands; note that we probably ought to be
 169                                 # more careful about named entities.
 170                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 171                         );
 172                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 173                 } else {
 174                         $fixtags = array(
 175                                 # french spaces, last one Guillemet-left
 176                                 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
 177                                 # french spaces, Guillemet-right
 178                                 '/(\\302\\253) /i' => '\\1&nbsp;',
 179                                 '/<center *>/i' => '<div class="center">',
 180                                 '/<\\/center *>/i' => '</div>'
 181                         );
 182                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 183                 }
 184                 # only once and last
 185                 $text = $this->doBlockLevels( $text, $linestart );
 186                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 187                 $this->mOutput->setText( $text );
 188                 wfProfileOut( $fname );
 189                 return $this->mOutput;
 190         }
 191
 192         /**
 193          * Get a random string
 194          *
 195          * @access private
 196          * @static
 197          */
 198         function getRandomString() {
 199                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 200         }
 201
 202         /**
 203          * Replaces all occurrences of <$tag>content</$tag> in the text
 204          * with a random marker and returns the new text. the output parameter
 205          * $content will be an associative array filled with data on the form
 206          * $unique_marker => content.
 207          *
 208          * If $content is already set, the additional entries will be appended
 209          * If $tag is set to STRIP_COMMENTS, the function will extract
 210          * <!-- HTML comments -->
 211          *
 212          * @access private
 213          * @static
 214          */
 215         function extractTags($tag, $text, &$content, $uniq_prefix = ''){
 216                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 217                 if ( !$content ) {
 218                         $content = array( );
 219                 }
 220                 $n = 1;
 221                 $stripped = '';
 222
 223                 while ( '' != $text ) {
 224                         if($tag==STRIP_COMMENTS) {
 225                                 $p = preg_split( '/<!--/i', $text, 2 );
 226                         } else {
 227                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 228                         }
 229                         $stripped .= $p[0];
 230                         if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
 231                                 $text = '';
 232                         } else {
 233                                 if($tag==STRIP_COMMENTS) {
 234                                         $q = preg_split( '/-->/i', $p[1], 2 );
 235                                 } else {
 236                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 237                                 }
 238                                 $marker = $rnd . sprintf('%08X', $n++);
 239                                 $content[$marker] = $q[0];
 240                                 $stripped .= $marker;
 241                                 $text = $q[1];
 242                         }
 243                 }
 244                 return $stripped;
 245         }
 246
 247         /**
 248          * Strips and renders nowiki, pre, math, hiero
 249          * If $render is set, performs necessary rendering operations on plugins
 250          * Returns the text, and fills an array with data needed in unstrip()
 251          * If the $state is already a valid strip state, it adds to the state
 252          *
 253          * @param bool $stripcomments when set, HTML comments <!-- like this -->
 254          *  will be stripped in addition to other tags. This is important
 255          *  for section editing, where these comments cause confusion when
 256          *  counting the sections in the wikisource
 257          *
 258          * @access private
 259          */
 260         function strip( $text, &$state, $stripcomments = false ) {
 261                 $render = ($this->mOutputType == OT_HTML);
 262                 $html_content = array();
 263                 $nowiki_content = array();
 264                 $math_content = array();
 265                 $pre_content = array();
 266                 $comment_content = array();
 267                 $ext_content = array();
 268
 269                 # Replace any instances of the placeholders
 270                 $uniq_prefix = UNIQ_PREFIX;
 271                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 272
 273                 # html
 274                 global $wgRawHtml, $wgWhitelistEdit;
 275                 if( $wgRawHtml && $wgWhitelistEdit ) {
 276                         $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
 277                         foreach( $html_content as $marker => $content ) {
 278                                 if ($render ) {
 279                                         # Raw and unchecked for validity.
 280                                         $html_content[$marker] = $content;
 281                                 } else {
 282                                         $html_content[$marker] = '<html>'.$content.'</html>';
 283                                 }
 284                         }
 285                 }
 286
 287                 # nowiki
 288                 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
 289                 foreach( $nowiki_content as $marker => $content ) {
 290                         if( $render ){
 291                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 292                         } else {
 293                                 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
 294                         }
 295                 }
 296
 297                 # math
 298                 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
 299                 foreach( $math_content as $marker => $content ){
 300                         if( $render ) {
 301                                 if( $this->mOptions->getUseTeX() ) {
 302                                         $math_content[$marker] = renderMath( $content );
 303                                 } else {
 304                                         $math_content[$marker] = '&lt;math&gt;'.$content.'&lt;math&gt;';
 305                                 }
 306                         } else {
 307                                 $math_content[$marker] = '<math>'.$content.'</math>';
 308                         }
 309                 }
 310
 311                 # pre
 312                 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
 313                 foreach( $pre_content as $marker => $content ){
 314                         if( $render ){
 315                                 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
 316                         } else {
 317                                 $pre_content[$marker] = '<pre>'.$content.'</pre>';
 318                         }
 319                 }
 320
 321                 # Comments
 322                 if($stripcomments) {
 323                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 324                         foreach( $comment_content as $marker => $content ){
 325                                 $comment_content[$marker] = '<!--'.$content.'-->';
 326                         }
 327                 }
 328
 329                 # Extensions
 330                 foreach ( $this->mTagHooks as $tag => $callback ) {
 331                         $ext_contents[$tag] = array();
 332                         $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
 333                         foreach( $ext_content[$tag] as $marker => $content ) {
 334                                 if ( $render ) {
 335                                         $ext_content[$tag][$marker] = $callback( $content );
 336                                 } else {
 337                                         $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
 338                                 }
 339                         }
 340                 }
 341
 342                 # Merge state with the pre-existing state, if there is one
 343                 if ( $state ) {
 344                         $state['html'] = $state['html'] + $html_content;
 345                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 346                         $state['math'] = $state['math'] + $math_content;
 347                         $state['pre'] = $state['pre'] + $pre_content;
 348                         $state['comment'] = $state['comment'] + $comment_content;
 349
 350                         foreach( $ext_content as $tag => $array ) {
 351                                 if ( array_key_exists( $tag, $state ) ) {
 352                                         $state[$tag] = $state[$tag] + $array;
 353                                 }
 354                         }
 355                 } else {
 356                         $state = array(
 357                           'html' => $html_content,
 358                           'nowiki' => $nowiki_content,
 359                           'math' => $math_content,
 360                           'pre' => $pre_content,
 361                           'comment' => $comment_content,
 362                         ) + $ext_content;
 363                 }
 364                 return $text;
 365         }
 366
 367         /**
 368          * restores pre, math, and heiro removed by strip()
 369          *
 370          * always call unstripNoWiki() after this one
 371          * @access private
 372          */
 373         function unstrip( $text, &$state ) {
 374                 # Must expand in reverse order, otherwise nested tags will be corrupted
 375                 $contentDict = end( $state );
 376                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 377                         if( key($state) != 'nowiki' && key($state) != 'html') {
 378                                 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 379                                         $text = str_replace( key( $contentDict ), $content, $text );
 380                                 }
 381                         }
 382                 }
 383
 384                 return $text;
 385         }
 386
 387         /**
 388          * always call this after unstrip() to preserve the order
 389          *
 390          * @access private
 391          */
 392         function unstripNoWiki( $text, &$state ) {
 393                 # Must expand in reverse order, otherwise nested tags will be corrupted
 394                 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
 395                         $text = str_replace( key( $state['nowiki'] ), $content, $text );
 396                 }
 397
 398                 global $wgRawHtml;
 399                 if ($wgRawHtml) {
 400                         for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
 401                                 $text = str_replace( key( $state['html'] ), $content, $text );
 402                         }
 403                 }
 404
 405                 return $text;
 406         }
 407
 408         /**
 409          * Add an item to the strip state
 410          * Returns the unique tag which must be inserted into the stripped text
 411          * The tag will be replaced with the original text in unstrip()
 412          *
 413          * @access private
 414          */
 415         function insertStripItem( $text, &$state ) {
 416                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 417                 if ( !$state ) {
 418                         $state = array(
 419                           'html' => array(),
 420                           'nowiki' => array(),
 421                           'math' => array(),
 422                           'pre' => array()
 423                         );
 424                 }
 425                 $state['item'][$rnd] = $text;
 426                 return $rnd;
 427         }
 428
 429         /**
 430          * Return allowed HTML attributes
 431          *
 432          * @access private
 433          */
 434         function getHTMLattrs () {
 435                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 436                                 'title', 'align', 'lang', 'dir', 'width', 'height',
 437                                 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
 438                                 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
 439                                 /* FONT */ 'type', 'start', 'value', 'compact',
 440                                 /* For various lists, mostly deprecated but safe */
 441                                 'summary', 'width', 'border', 'frame', 'rules',
 442                                 'cellspacing', 'cellpadding', 'valign', 'char',
 443                                 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
 444                                 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
 445                                 'id', 'class', 'name', 'style' /* For CSS */
 446                                 );
 447                 return $htmlattrs ;
 448         }
 449
 450         /**
 451          * Remove non approved attributes and javascript in css
 452          *
 453          * @access private
 454          */
 455         function fixTagAttributes ( $t ) {
 456                 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
 457                 $htmlattrs = $this->getHTMLattrs() ;
 458
 459                 # Strip non-approved attributes from the tag
 460                 $t = preg_replace(
 461                         '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
 462                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 463                         $t);
 464
 465                 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
 466
 467                 # Strip javascript "expression" from stylesheets. Brute force approach:
 468                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 469
 470                 if( preg_match(
 471                         '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
 472                         wfMungeToUtf8( $t ) ) )
 473                 {
 474                         $t='';
 475                 }
 476
 477                 return trim ( $t ) ;
 478         }
 479
 480         /**
 481          * interface with html tidy, used if $wgUseTidy = true
 482          *
 483          * @access public
 484          * @static
 485          */
 486         function tidy ( $text ) {
 487                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 488                 global $wgInputEncoding, $wgOutputEncoding;
 489                 $fname = 'Parser::tidy';
 490                 wfProfileIn( $fname );
 491
 492                 $cleansource = '';
 493                 $opts = '';
 494                 switch(strtoupper($wgOutputEncoding)) {
 495                         case 'ISO-8859-1':
 496                                 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 497                                 break;
 498                         case 'UTF-8':
 499                                 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 500                                 break;
 501                         default:
 502                                 $opts .= ' -raw';
 503                         }
 504
 505                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 506 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 507 '<head><title>test</title></head><body>'.$text.'</body></html>';
 508                 $descriptorspec = array(
 509                         0 => array('pipe', 'r'),
 510                         1 => array('pipe', 'w'),
 511                         2 => array('file', '/dev/null', 'a')
 512                 );
 513                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
 514                 if (is_resource($process)) {
 515                         fwrite($pipes[0], $wrappedtext);
 516                         fclose($pipes[0]);
 517                         while (!feof($pipes[1])) {
 518                                 $cleansource .= fgets($pipes[1], 1024);
 519                         }
 520                         fclose($pipes[1]);
 521                         $return_value = proc_close($process);
 522                 }
 523
 524                 wfProfileOut( $fname );
 525
 526                 if( $cleansource == '' && $text != '') {
 527                         wfDebug( "Tidy error detected!\n" );
 528                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 529                 } else {
 530                         return $cleansource;
 531                 }
 532         }
 533
 534         /**
 535          * parse the wiki syntax used to render tables
 536          *
 537          * @access private
 538          */
 539         function doTableStuff ( $t ) {
 540                 $fname = 'Parser::doTableStuff';
 541                 wfProfileIn( $fname );
 542
 543                 $t = explode ( "\n" , $t ) ;
 544                 $td = array () ; # Is currently a td tag open?
 545                 $ltd = array () ; # Was it TD or TH?
 546                 $tr = array () ; # Is currently a tr tag open?
 547                 $ltr = array () ; # tr attributes
 548                 $indent_level = 0; # indent level of the table
 549                 foreach ( $t AS $k => $x )
 550                 {
 551                         $x = trim ( $x ) ;
 552                         $fc = substr ( $x , 0 , 1 ) ;
 553                         if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
 554                                 $indent_level = strlen( $matches[1] );
 555                                 $t[$k] = "\n" .
 556                                         str_repeat( '<dl><dd>', $indent_level ) .
 557                                         '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
 558                                 array_push ( $td , false ) ;
 559                                 array_push ( $ltd , '' ) ;
 560                                 array_push ( $tr , false ) ;
 561                                 array_push ( $ltr , '' ) ;
 562                         }
 563                         else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 564                         else if ( '|}' == substr ( $x , 0 , 2 ) ) {
 565                                 $z = "</table>\n" ;
 566                                 $l = array_pop ( $ltd ) ;
 567                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 568                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 569                                 array_pop ( $ltr ) ;
 570                                 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
 571                         }
 572                         else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
 573                                 $x = substr ( $x , 1 ) ;
 574                                 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 575                                 $z = '' ;
 576                                 $l = array_pop ( $ltd ) ;
 577                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 578                                 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 579                                 array_pop ( $ltr ) ;
 580                                 $t[$k] = $z ;
 581                                 array_push ( $tr , false ) ;
 582                                 array_push ( $td , false ) ;
 583                                 array_push ( $ltd , '' ) ;
 584                                 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 585                         }
 586                         else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
 587                                 # $x is a table row
 588                                 if ( '|+' == substr ( $x , 0 , 2 ) ) {
 589                                         $fc = '+' ;
 590                                         $x = substr ( $x , 1 ) ;
 591                                 }
 592                                 $after = substr ( $x , 1 ) ;
 593                                 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
 594                                 $after = explode ( '||' , $after ) ;
 595                                 $t[$k] = '' ;
 596
 597                                 # Loop through each table cell
 598                                 foreach ( $after AS $theline )
 599                                 {
 600                                         $z = '' ;
 601                                         if ( $fc != '+' )
 602                                         {
 603                                                 $tra = array_pop ( $ltr ) ;
 604                                                 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
 605                                                 array_push ( $tr , true ) ;
 606                                                 array_push ( $ltr , '' ) ;
 607                                         }
 608
 609                                         $l = array_pop ( $ltd ) ;
 610                                         if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
 611                                         if ( $fc == '|' ) $l = 'td' ;
 612                                         else if ( $fc == '!' ) $l = 'th' ;
 613                                         else if ( $fc == '+' ) $l = 'caption' ;
 614                                         else $l = '' ;
 615                                         array_push ( $ltd , $l ) ;
 616
 617                                         # Cell parameters
 618                                         $y = explode ( '|' , $theline , 2 ) ;
 619                                         # Note that a '|' inside an invalid link should not
 620                                         # be mistaken as delimiting cell parameters
 621                                         if ( strpos( $y[0], '[[' ) !== false ) {
 622                                                 $y = array ($theline);
 623                                         }
 624                                         if ( count ( $y ) == 1 )
 625                                                 $y = "{$z}<{$l}>{$y[0]}" ;
 626                                         else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 627                                         $t[$k] .= $y ;
 628                                         array_push ( $td , true ) ;
 629                                 }
 630                         }
 631                 }
 632
 633                 # Closing open td, tr && table
 634                 while ( count ( $td ) > 0 )
 635                 {
 636                         if ( array_pop ( $td ) ) $t[] = '</td>' ;
 637                         if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
 638                         $t[] = '</table>' ;
 639                 }
 640
 641                 $t = implode ( "\n" , $t ) ;
 642                 #               $t = $this->removeHTMLtags( $t );
 643                 wfProfileOut( $fname );
 644                 return $t ;
 645         }
 646
 647         /**
 648          * Helper function for parse() that transforms wiki markup into
 649          * HTML. Only called for $mOutputType == OT_HTML.
 650          *
 651          * @access private
 652          */
 653         function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
 654         global $wgContLang;
 655
 656                 $fname = 'Parser::internalParse';
 657                 wfProfileIn( $fname );
 658
 659                 $text = $this->removeHTMLtags( $text );
 660                 $text = $this->replaceVariables( $text, $args );
 661
 662                 $text = $wgContLang->convert($text);
 663
 664                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
 665
 666                 $text = $this->doHeadings( $text );
 667                 if($this->mOptions->getUseDynamicDates()) {
 668                         global $wgDateFormatter;
 669                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 670                 }
 671                 $text = $this->doAllQuotes( $text );
 672                 $text = $this->replaceInternalLinks ( $text );
 673                 # Another call to replace links and images inside captions of images
 674                 $text = $this->replaceInternalLinks ( $text );
 675                 $text = $this->replaceExternalLinks( $text );
 676                 $text = $this->doMagicLinks( $text );
 677                 $text = $this->doTableStuff( $text );
 678                 $text = $this->formatHeadings( $text, $isMain );
 679                 $sk =& $this->mOptions->getSkin();
 680                 $text = $sk->transformContent( $text );
 681
 682                 wfProfileOut( $fname );
 683                 return $text;
 684         }
 685
 686         /**
 687          * Replace special strings like "ISBN xxx" and "RFC xxx" with
 688          * magic external links.
 689          *
 690          * @access private
 691          */
 692         function &doMagicLinks( &$text ) {
 693                 global $wgUseGeoMode;
 694                 $text = $this->magicISBN( $text );
 695                 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
 696                         $text = $this->magicGEO( $text );
 697                 }
 698                 $text = $this->magicRFC( $text );
 699                 return $text;
 700         }
 701
 702         /**
 703          * Parse ^^ tokens and return html
 704          *
 705          * @access private
 706          */
 707         function doExponent ( $text ) {
 708                 $fname = 'Parser::doExponent';
 709                 wfProfileIn( $fname);
 710                 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
 711                 wfProfileOut( $fname);
 712                 return $text;
 713         }
 714
 715         /**
 716          * Parse headers and return html
 717          *
 718          * @access private
 719          */
 720         function doHeadings( $text ) {
 721                 $fname = 'Parser::doHeadings';
 722                 wfProfileIn( $fname );
 723                 for ( $i = 6; $i >= 1; --$i ) {
 724                         $h = substr( '======', 0, $i );
 725                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 726                           "<h{$i}>\\1</h{$i}>\\2", $text );
 727                 }
 728                 wfProfileOut( $fname );
 729                 return $text;
 730         }
 731
 732         /**
 733          * Replace single quotes with HTML markup
 734          * @access private
 735          * @return string the altered text
 736          */
 737         function doAllQuotes( $text ) {
 738                 $fname = 'Parser::doAllQuotes';
 739                 wfProfileIn( $fname );
 740                 $outtext = '';
 741                 $lines = explode( "\n", $text );
 742                 foreach ( $lines as $line ) {
 743                         $outtext .= $this->doQuotes ( $line ) . "\n";
 744                 }
 745                 $outtext = substr($outtext, 0,-1);
 746                 wfProfileOut( $fname );
 747                 return $outtext;
 748         }
 749
 750         /**
 751          * Helper function for doAllQuotes()
 752          * @access private
 753          */
 754         function doQuotes( $text ) {
 755                 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 756                 if (count ($arr) == 1)
 757                         return $text;
 758                 else
 759                 {
 760                         # First, do some preliminary work. This may shift some apostrophes from
 761                         # being mark-up to being text. It also counts the number of occurrences
 762                         # of bold and italics mark-ups.
 763                         $i = 0;
 764                         $numbold = 0;
 765                         $numitalics = 0;
 766                         foreach ($arr as $r)
 767                         {
 768                                 if (($i % 2) == 1)
 769                                 {
 770                                         # If there are ever four apostrophes, assume the first is supposed to
 771                                         # be text, and the remaining three constitute mark-up for bold text.
 772                                         if (strlen ($arr[$i]) == 4)
 773                                         {
 774                                                 $arr[$i-1] .= "'";
 775                                                 $arr[$i] = "'''";
 776                                         }
 777                                         # If there are more than 5 apostrophes in a row, assume they're all
 778                                         # text except for the last 5.
 779                                         else if (strlen ($arr[$i]) > 5)
 780                                         {
 781                                                 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
 782                                                 $arr[$i] = "'''''";
 783                                         }
 784                                         # Count the number of occurrences of bold and italics mark-ups.
 785                                         # We are not counting sequences of five apostrophes.
 786                                         if (strlen ($arr[$i]) == 2) $numitalics++;  else
 787                                         if (strlen ($arr[$i]) == 3) $numbold++;     else
 788                                         if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
 789                                 }
 790                                 $i++;
 791                         }
 792
 793                         # If there is an odd number of both bold and italics, it is likely
 794                         # that one of the bold ones was meant to be an apostrophe followed
 795                         # by italics. Which one we cannot know for certain, but it is more
 796                         # likely to be one that has a single-letter word before it.
 797                         if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
 798                         {
 799                                 $i = 0;
 800                                 $firstsingleletterword = -1;
 801                                 $firstmultiletterword = -1;
 802                                 $firstspace = -1;
 803                                 foreach ($arr as $r)
 804                                 {
 805                                         if (($i % 2 == 1) and (strlen ($r) == 3))
 806                                         {
 807                                                 $x1 = substr ($arr[$i-1], -1);
 808                                                 $x2 = substr ($arr[$i-1], -2, 1);
 809                                                 if ($x1 == ' ') {
 810                                                         if ($firstspace == -1) $firstspace = $i;
 811                                                 } else if ($x2 == ' ') {
 812                                                         if ($firstsingleletterword == -1) $firstsingleletterword = $i;
 813                                                 } else {
 814                                                         if ($firstmultiletterword == -1) $firstmultiletterword = $i;
 815                                                 }
 816                                         }
 817                                         $i++;
 818                                 }
 819
 820                                 # If there is a single-letter word, use it!
 821                                 if ($firstsingleletterword > -1)
 822                                 {
 823                                         $arr [ $firstsingleletterword ] = "''";
 824                                         $arr [ $firstsingleletterword-1 ] .= "'";
 825                                 }
 826                                 # If not, but there's a multi-letter word, use that one.
 827                                 else if ($firstmultiletterword > -1)
 828                                 {
 829                                         $arr [ $firstmultiletterword ] = "''";
 830                                         $arr [ $firstmultiletterword-1 ] .= "'";
 831                                 }
 832                                 # ... otherwise use the first one that has neither.
 833                                 # (notice that it is possible for all three to be -1 if, for example,
 834                                 # there is only one pentuple-apostrophe in the line)
 835                                 else if ($firstspace > -1)
 836                                 {
 837                                         $arr [ $firstspace ] = "''";
 838                                         $arr [ $firstspace-1 ] .= "'";
 839                                 }
 840                         }
 841
 842                         # Now let's actually convert our apostrophic mush to HTML!
 843                         $output = '';
 844                         $buffer = '';
 845                         $state = '';
 846                         $i = 0;
 847                         foreach ($arr as $r)
 848                         {
 849                                 if (($i % 2) == 0)
 850                                 {
 851                                         if ($state == 'both')
 852                                                 $buffer .= $r;
 853                                         else
 854                                                 $output .= $r;
 855                                 }
 856                                 else
 857                                 {
 858                                         if (strlen ($r) == 2)
 859                                         {
 860                                                 if ($state == 'i')
 861                                                 { $output .= '</i>'; $state = ''; }
 862                                                 else if ($state == 'bi')
 863                                                 { $output .= '</i>'; $state = 'b'; }
 864                                                 else if ($state == 'ib')
 865                                                 { $output .= '</b></i><b>'; $state = 'b'; }
 866                                                 else if ($state == 'both')
 867                                                 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
 868                                                 else # $state can be 'b' or ''
 869                                                 { $output .= '<i>'; $state .= 'i'; }
 870                                         }
 871                                         else if (strlen ($r) == 3)
 872                                         {
 873                                                 if ($state == 'b')
 874                                                 { $output .= '</b>'; $state = ''; }
 875                                                 else if ($state == 'bi')
 876                                                 { $output .= '</i></b><i>'; $state = 'i'; }
 877                                                 else if ($state == 'ib')
 878                                                 { $output .= '</b>'; $state = 'i'; }
 879                                                 else if ($state == 'both')
 880                                                 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
 881                                                 else # $state can be 'i' or ''
 882                                                 { $output .= '<b>'; $state .= 'b'; }
 883                                         }
 884                                         else if (strlen ($r) == 5)
 885                                         {
 886                                                 if ($state == 'b')
 887                                                 { $output .= '</b><i>'; $state = 'i'; }
 888                                                 else if ($state == 'i')
 889                                                 { $output .= '</i><b>'; $state = 'b'; }
 890                                                 else if ($state == 'bi')
 891                                                 { $output .= '</i></b>'; $state = ''; }
 892                                                 else if ($state == 'ib')
 893                                                 { $output .= '</b></i>'; $state = ''; }
 894                                                 else if ($state == 'both')
 895                                                 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
 896                                                 else # ($state == '')
 897                                                 { $buffer = ''; $state = 'both'; }
 898                                         }
 899                                 }
 900                                 $i++;
 901                         }
 902                         # Now close all remaining tags.  Notice that the order is important.
 903                         if ($state == 'b' || $state == 'ib')
 904                                 $output .= '</b>';
 905                         if ($state == 'i' || $state == 'bi' || $state == 'ib')
 906                                 $output .= '</i>';
 907                         if ($state == 'bi')
 908                                 $output .= '</b>';
 909                         if ($state == 'both')
 910                                 $output .= '<b><i>'.$buffer.'</i></b>';
 911                         return $output;
 912                 }
 913         }
 914
 915         /**
 916          * Replace external links
 917          *
 918          * Note: we have to do external links before the internal ones,
 919          * and otherwise take great care in the order of things here, so
 920          * that we don't end up interpreting some URLs twice.
 921          *
 922          * @access private
 923          */
 924         function replaceExternalLinks( $text ) {
 925                 $fname = 'Parser::replaceExternalLinks';
 926                 wfProfileIn( $fname );
 927
 928                 $sk =& $this->mOptions->getSkin();
 929                 $linktrail = wfMsgForContent('linktrail');
 930                 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 931
 932                 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
 933
 934                 $i = 0;
 935                 while ( $i<count( $bits ) ) {
 936                         $url = $bits[$i++];
 937                         $protocol = $bits[$i++];
 938                         $text = $bits[$i++];
 939                         $trail = $bits[$i++];
 940
 941                         # If the link text is an image URL, replace it with an <img> tag
 942                         # This happened by accident in the original parser, but some people used it extensively
 943                         $img = $this->maybeMakeImageLink( $text );
 944                         if ( $img !== false ) {
 945                                 $text = $img;
 946                         }
 947
 948                         $dtrail = '';
 949
 950                         # No link text, e.g. [http://domain.tld/some.link]
 951                         if ( $text == '' ) {
 952                                 # Autonumber if allowed
 953                                 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
 954                                         $text = '[' . ++$this->mAutonumber . ']';
 955                                 } else {
 956                                         # Otherwise just use the URL
 957                                         $text = htmlspecialchars( $url );
 958                                 }
 959                         } else {
 960                                 # Have link text, e.g. [http://domain.tld/some.link text]s
 961                                 # Check for trail
 962                                 if ( preg_match( $linktrail, $trail, $m2 ) ) {
 963                                         $dtrail = $m2[1];
 964                                         $trail = $m2[2];
 965                                 }
 966                         }
 967
 968                         $encUrl = htmlspecialchars( $url );
 969                         # Bit in parentheses showing the URL for the printable version
 970                         if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
 971                                 $paren = '';
 972                         } else {
 973                                 # Expand the URL for printable version
 974                                 if ( ! $sk->suppressUrlExpansion() ) {
 975                                         $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
 976                                 } else {
 977                                         $paren = '';
 978                                 }
 979                         }
 980
 981                         # Process the trail (i.e. everything after this link up until start of the next link),
 982                         # replacing any non-bracketed links
 983                         $trail = $this->replaceFreeExternalLinks( $trail );
 984
 985                         $la = $sk->getExternalLinkAttributes( $url, $text );
 986
 987                         # Use the encoded URL
 988                         # This means that users can paste URLs directly into the text
 989                         # Funny characters like &ouml; aren't valid in URLs anyway
 990                         # This was changed in August 2004
 991                         $s .= "<a href=\"{$url}\"{$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
 992                 }
 993
 994                 wfProfileOut( $fname );
 995                 return $s;
 996         }
 997
 998         /**
 999          * Replace anything that looks like a URL with a link
1000          * @access private
1001          */
1002         function replaceFreeExternalLinks( $text ) {
1003                 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1004                 $s = array_shift( $bits );
1005                 $i = 0;
1006
1007                 $sk =& $this->mOptions->getSkin();
1008
1009                 while ( $i < count( $bits ) ){
1010                         $protocol = $bits[$i++];
1011                         $remainder = $bits[$i++];
1012
1013                         if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1014                                 # Found some characters after the protocol that look promising
1015                                 $url = $protocol . $m[1];
1016                                 $trail = $m[2];
1017
1018                                 # Move trailing punctuation to $trail
1019                                 $sep = ',;\.:!?';
1020                                 # If there is no left bracket, then consider right brackets fair game too
1021                                 if ( strpos( $url, '(' ) === false ) {
1022                                         $sep .= ')';
1023                                 }
1024
1025                                 $numSepChars = strspn( strrev( $url ), $sep );
1026                                 if ( $numSepChars ) {
1027                                         $trail = substr( $url, -$numSepChars ) . $trail;
1028                                         $url = substr( $url, 0, -$numSepChars );
1029                                 }
1030
1031                                 # Replace &amp; from obsolete syntax with &
1032                                 $url = str_replace( '&amp;', '&', $url );
1033
1034                                 # Is this an external image?
1035                                 $text = $this->maybeMakeImageLink( $url );
1036                                 if ( $text === false ) {
1037                                         # Not an image, make a link
1038                                         $text = $sk->makeExternalLink( $url, $url );
1039                                 }
1040                                 $s .= $text . $trail;
1041                         } else {
1042                                 $s .= $protocol . $remainder;
1043                         }
1044                 }
1045                 return $s;
1046         }
1047
1048         /**
1049          * make an image if it's allowed
1050          * @access private
1051          */
1052         function maybeMakeImageLink( $url ) {
1053                 $sk =& $this->mOptions->getSkin();
1054                 $text = false;
1055                 if ( $this->mOptions->getAllowExternalImages() ) {
1056                         if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1057                                 # Image found
1058                                 $text = $sk->makeImage( htmlspecialchars( $url ) );
1059                         }
1060                 }
1061                 return $text;
1062         }
1063
1064         /**
1065          * Process [[ ]] wikilinks
1066          *
1067          * @access private
1068          */
1069         function replaceInternalLinks( $s ) {
1070                 global $wgLang, $wgContLang, $wgLinkCache;
1071                 static $fname = 'Parser::replaceInternalLinks' ;
1072                 wfProfileIn( $fname );
1073
1074                 wfProfileIn( $fname.'-setup' );
1075                 static $tc = FALSE;
1076                 # the % is needed to support urlencoded titles as well
1077                 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1078                 $sk =& $this->mOptions->getSkin();
1079
1080                 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1081
1082                 $a = explode( '[[', ' ' . $s );
1083                 $s = array_shift( $a );
1084                 $s = substr( $s, 1 );
1085
1086                 # Match a link having the form [[namespace:link|alternate]]trail
1087                 static $e1 = FALSE;
1088                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1089                 # Match the end of a line for a word that's not followed by whitespace,
1090                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1091                 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1092
1093                 $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
1094                 # Special and Media are pseudo-namespaces; no pages actually exist in them
1095
1096                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1097
1098                 if ( $useLinkPrefixExtension ) {
1099                         if ( preg_match( $e2, $s, $m ) ) {
1100                                 $first_prefix = $m[2];
1101                                 $s = $m[1];
1102                         } else {
1103                                 $first_prefix = false;
1104                         }
1105                 } else {
1106                         $prefix = '';
1107                 }
1108
1109                 wfProfileOut( $fname.'-setup' );
1110
1111                 # start procedeeding each line
1112                 foreach ( $a as $line ) {
1113                         wfProfileIn( $fname.'-prefixhandling' );
1114                         if ( $useLinkPrefixExtension ) {
1115                                 if ( preg_match( $e2, $s, $m ) ) {
1116                                         $prefix = $m[2];
1117                                         $s = $m[1];
1118                                 } else {
1119                                         $prefix='';
1120                                 }
1121                                 # first link
1122                                 if($first_prefix) {
1123                                         $prefix = $first_prefix;
1124                                         $first_prefix = false;
1125                                 }
1126                         }
1127                         wfProfileOut( $fname.'-prefixhandling' );
1128
1129                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1130                                 $text = $m[2];
1131                                 # fix up urlencoded title texts
1132                                 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1133                                 $trail = $m[3];
1134                         } else { # Invalid form; output directly
1135                                 $s .= $prefix . '[[' . $line ;
1136                                 continue;
1137                         }
1138
1139                         # Don't allow internal links to pages containing
1140                         # PROTO: where PROTO is a valid URL protocol; these
1141                         # should be external links.
1142                         if (preg_match('/((?:'.URL_PROTOCOLS.'):)/', $m[1])) {
1143                                 $s .= $prefix . '[[' . $line ;
1144                                 continue;
1145                         }
1146
1147                         # Make subpage if necessary
1148                         $link = $this->maybeDoSubpageLink( $m[1], $text );
1149
1150                         $noforce = (substr($m[1], 0, 1) != ':');
1151                         if (!$noforce) {
1152                                 # Strip off leading ':'
1153                                 $link = substr($link, 1);
1154                         }
1155
1156                         $wasblank = ( '' == $text );
1157                         if( $wasblank ) $text = $link;
1158
1159                         $nt = Title::newFromText( $link );
1160                         if( !$nt ) {
1161                                 $s .= $prefix . '[[' . $line;
1162                                 continue;
1163                         }
1164
1165                         $ns = $nt->getNamespace();
1166                         $iw = $nt->getInterWiki();
1167
1168                         # Link not escaped by : , create the various objects
1169                         if( $noforce ) {
1170
1171                                 # Interwikis
1172                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
1173                                         array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1174                                         $tmp = $prefix . $trail ;
1175                                         $s .= (trim($tmp) == '')? '': $tmp;
1176                                         continue;
1177                                 }
1178
1179                                 if ( $ns == NS_IMAGE ) {
1180                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1181                                         $wgLinkCache->addImageLinkObj( $nt );
1182                                         continue;
1183                                 }
1184
1185                                 if ( $ns == NS_CATEGORY ) {
1186                                         $t = $nt->getText() ;
1187                                         $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).':'.$t ) ;
1188
1189                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1190                                         $pPLC=$sk->postParseLinkColour();
1191                                         $sk->postParseLinkColour( false );
1192                                         $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1193                                         $sk->postParseLinkColour( $pPLC );
1194                                         $wgLinkCache->resume();
1195
1196                                         if ( $wasblank ) {
1197                                                 if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {
1198                                                         $sortkey = $this->mTitle->getText();
1199                                                 } else {
1200                                                         $sortkey = $this->mTitle->getPrefixedText();
1201                                                 }
1202                                         } else {
1203                                                 $sortkey = $text;
1204                                         }
1205                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1206                                         $this->mOutput->mCategoryLinks[] = $t ;
1207                                         $s .= $prefix . $trail ;
1208                                         continue;
1209                                 }
1210                         }
1211
1212                         if( ( $nt->getPrefixedText() === $this->mTitle->getPrefixedText() ) &&
1213                             ( strpos( $link, '#' ) === FALSE ) ) {
1214                                 # Self-links are handled specially; generally de-link and change to bold.
1215                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1216                                 continue;
1217                         }
1218
1219                         if( $ns == NS_MEDIA ) {
1220                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1221                                 $wgLinkCache->addImageLinkObj( $nt );
1222                                 continue;
1223                         } elseif( $ns == NS_SPECIAL ) {
1224                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1225                                 continue;
1226                         }
1227                         $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1228                 }
1229                 wfProfileOut( $fname );
1230                 return $s;
1231         }
1232
1233         /**
1234          * Handle link to subpage if necessary
1235          * @param $target string the source of the link
1236          * @param &$text the link text, modified as necessary
1237          * @return string the full name of the link
1238          * @access private
1239          */
1240         function maybeDoSubpageLink($target, &$text) {
1241                 # Valid link forms:
1242                 # Foobar -- normal
1243                 # :Foobar -- override special treatment of prefix (images, language links)
1244                 # /Foobar -- convert to CurrentPage/Foobar
1245                 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1246                 global $wgNamespacesWithSubpages;
1247
1248                 $fname = 'Parser::maybeDoSubpageLink';
1249                 wfProfileIn( $fname );
1250                 # Look at the first character
1251                 if( $target{0} == '/' ) {
1252                         # / at end means we don't want the slash to be shown
1253                         if(substr($target,-1,1)=='/') {
1254                                 $target=substr($target,1,-1);
1255                                 $noslash=$target;
1256                         } else {
1257                                 $noslash=substr($target,1);
1258                         }
1259
1260                         # Some namespaces don't allow subpages
1261                         if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
1262                                 # subpages allowed here
1263                                 $ret = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1264                                 if( '' === $text ) {
1265                                         $text = $target;
1266                                 } # this might be changed for ugliness reasons
1267                         } else {
1268                                 # no subpage allowed, use standard link
1269                                 $ret = $target;
1270                         }
1271                 } else {
1272                         # no subpage
1273                         $ret = $target;
1274                 }
1275
1276                 wfProfileOut( $fname );
1277                 return $ret;
1278         }
1279
1280         /**#@+
1281          * Used by doBlockLevels()
1282          * @access private
1283          */
1284         /* private */ function closeParagraph() {
1285                 $result = '';
1286                 if ( '' != $this->mLastSection ) {
1287                         $result = '</' . $this->mLastSection  . ">\n";
1288                 }
1289                 $this->mInPre = false;
1290                 $this->mLastSection = '';
1291                 return $result;
1292         }
1293         # getCommon() returns the length of the longest common substring
1294         # of both arguments, starting at the beginning of both.
1295         #
1296         /* private */ function getCommon( $st1, $st2 ) {
1297                 $fl = strlen( $st1 );
1298                 $shorter = strlen( $st2 );
1299                 if ( $fl < $shorter ) { $shorter = $fl; }
1300
1301                 for ( $i = 0; $i < $shorter; ++$i ) {
1302                         if ( $st1{$i} != $st2{$i} ) { break; }
1303                 }
1304                 return $i;
1305         }
1306         # These next three functions open, continue, and close the list
1307         # element appropriate to the prefix character passed into them.
1308         #
1309         /* private */ function openList( $char ) {
1310                 $result = $this->closeParagraph();
1311
1312                 if ( '*' == $char ) { $result .= '<ul><li>'; }
1313                 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1314                 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1315                 else if ( ';' == $char ) {
1316                         $result .= '<dl><dt>';
1317                         $this->mDTopen = true;
1318                 }
1319                 else { $result = '<!-- ERR 1 -->'; }
1320
1321                 return $result;
1322         }
1323
1324         /* private */ function nextItem( $char ) {
1325                 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1326                 else if ( ':' == $char || ';' == $char ) {
1327                         $close = '</dd>';
1328                         if ( $this->mDTopen ) { $close = '</dt>'; }
1329                         if ( ';' == $char ) {
1330                                 $this->mDTopen = true;
1331                                 return $close . '<dt>';
1332                         } else {
1333                                 $this->mDTopen = false;
1334                                 return $close . '<dd>';
1335                         }
1336                 }
1337                 return '<!-- ERR 2 -->';
1338         }
1339
1340         /* private */ function closeList( $char ) {
1341                 if ( '*' == $char ) { $text = '</li></ul>'; }
1342                 else if ( '#' == $char ) { $text = '</li></ol>'; }
1343                 else if ( ':' == $char ) {
1344                         if ( $this->mDTopen ) {
1345                                 $this->mDTopen = false;
1346                                 $text = '</dt></dl>';
1347                         } else {
1348                                 $text = '</dd></dl>';
1349                         }
1350                 }
1351                 else {  return '<!-- ERR 3 -->'; }
1352                 return $text."\n";
1353         }
1354         /**#@-*/
1355
1356         /**
1357          * Make lists from lines starting with ':', '*', '#', etc.
1358          *
1359          * @access private
1360          * @return string the lists rendered as HTML
1361          */
1362         function doBlockLevels( $text, $linestart ) {
1363                 $fname = 'Parser::doBlockLevels';
1364                 wfProfileIn( $fname );
1365
1366                 # Parsing through the text line by line.  The main thing
1367                 # happening here is handling of block-level elements p, pre,
1368                 # and making lists from lines starting with * # : etc.
1369                 #
1370                 $textLines = explode( "\n", $text );
1371
1372                 $lastPrefix = $output = $lastLine = '';
1373                 $this->mDTopen = $inBlockElem = false;
1374                 $prefixLength = 0;
1375                 $paragraphStack = false;
1376
1377                 if ( !$linestart ) {
1378                         $output .= array_shift( $textLines );
1379                 }
1380                 foreach ( $textLines as $oLine ) {
1381                         $lastPrefixLength = strlen( $lastPrefix );
1382                         $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1383                         $preOpenMatch = preg_match('/<pre/i', $oLine );
1384                         if ( !$this->mInPre ) {
1385                                 # Multiple prefixes may abut each other for nested lists.
1386                                 $prefixLength = strspn( $oLine, '*#:;' );
1387                                 $pref = substr( $oLine, 0, $prefixLength );
1388
1389                                 # eh?
1390                                 $pref2 = str_replace( ';', ':', $pref );
1391                                 $t = substr( $oLine, $prefixLength );
1392                                 $this->mInPre = !empty($preOpenMatch);
1393                         } else {
1394                                 # Don't interpret any other prefixes in preformatted text
1395                                 $prefixLength = 0;
1396                                 $pref = $pref2 = '';
1397                                 $t = $oLine;
1398                         }
1399
1400                         # List generation
1401                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1402                                 # Same as the last item, so no need to deal with nesting or opening stuff
1403                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1404                                 $paragraphStack = false;
1405
1406                                 if ( substr( $pref, -1 ) == ';') {
1407                                         # The one nasty exception: definition lists work like this:
1408                                         # ; title : definition text
1409                                         # So we check for : in the remainder text to split up the
1410                                         # title and definition, without b0rking links.
1411                                         if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1412                                                 $t = $t2;
1413                                                 $output .= $term . $this->nextItem( ':' );
1414                                         }
1415                                 }
1416                         } elseif( $prefixLength || $lastPrefixLength ) {
1417                                 # Either open or close a level...
1418                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1419                                 $paragraphStack = false;
1420
1421                                 while( $commonPrefixLength < $lastPrefixLength ) {
1422                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1423                                         --$lastPrefixLength;
1424                                 }
1425                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1426                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1427                                 }
1428                                 while ( $prefixLength > $commonPrefixLength ) {
1429                                         $char = substr( $pref, $commonPrefixLength, 1 );
1430                                         $output .= $this->openList( $char );
1431
1432                                         if ( ';' == $char ) {
1433                                                 # FIXME: This is dupe of code above
1434                                                 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1435                                                         $t = $t2;
1436                                                         $output .= $term . $this->nextItem( ':' );
1437                                                 }
1438                                         }
1439                                         ++$commonPrefixLength;
1440                                 }
1441                                 $lastPrefix = $pref2;
1442                         }
1443                         if( 0 == $prefixLength ) {
1444                                 # No prefix (not in list)--go to paragraph mode
1445                                 $uniq_prefix = UNIQ_PREFIX;
1446                                 // XXX: use a stack for nestable elements like span, table and div
1447                                 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1448                                 $closematch = preg_match(
1449                                         '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1450                                         '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1451                                 if ( $openmatch or $closematch ) {
1452                                         $paragraphStack = false;
1453                                         $output .= $this->closeParagraph();
1454                                         if($preOpenMatch and !$preCloseMatch) {
1455                                                 $this->mInPre = true;
1456                                         }
1457                                         if ( $closematch ) {
1458                                                 $inBlockElem = false;
1459                                         } else {
1460                                                 $inBlockElem = true;
1461                                         }
1462                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1463                                         if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1464                                                 // pre
1465                                                 if ($this->mLastSection != 'pre') {
1466                                                         $paragraphStack = false;
1467                                                         $output .= $this->closeParagraph().'<pre>';
1468                                                         $this->mLastSection = 'pre';
1469                                                 }
1470                                                 $t = substr( $t, 1 );
1471                                         } else {
1472                                                 // paragraph
1473                                                 if ( '' == trim($t) ) {
1474                                                         if ( $paragraphStack ) {
1475                                                                 $output .= $paragraphStack.'<br />';
1476                                                                 $paragraphStack = false;
1477                                                                 $this->mLastSection = 'p';
1478                                                         } else {
1479                                                                 if ($this->mLastSection != 'p' ) {
1480                                                                         $output .= $this->closeParagraph();
1481                                                                         $this->mLastSection = '';
1482                                                                         $paragraphStack = '<p>';
1483                                                                 } else {
1484                                                                         $paragraphStack = '</p><p>';
1485                                                                 }
1486                                                         }
1487                                                 } else {
1488                                                         if ( $paragraphStack ) {
1489                                                                 $output .= $paragraphStack;
1490                                                                 $paragraphStack = false;
1491                                                                 $this->mLastSection = 'p';
1492                                                         } else if ($this->mLastSection != 'p') {
1493                                                                 $output .= $this->closeParagraph().'<p>';
1494                                                                 $this->mLastSection = 'p';
1495                                                         }
1496                                                 }
1497                                         }
1498                                 }
1499                         }
1500                         if ($paragraphStack === false) {
1501                                 $output .= $t."\n";
1502                         }
1503                 }
1504                 while ( $prefixLength ) {
1505                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1506                         --$prefixLength;
1507                 }
1508                 if ( '' != $this->mLastSection ) {
1509                         $output .= '</' . $this->mLastSection . '>';
1510                         $this->mLastSection = '';
1511                 }
1512
1513                 wfProfileOut( $fname );
1514                 return $output;
1515         }
1516
1517         /**
1518          * Split up a string on ':', ignoring any occurences inside
1519          * <a>..</a> or <span>...</span>
1520          * @param $str string the string to split
1521          * @param &$before string set to everything before the ':'
1522          * @param &$after string set to everything after the ':'
1523          * return string the position of the ':', or false if none found
1524          */
1525         function findColonNoLinks($str, &$before, &$after) {
1526                 # I wonder if we should make this count all tags, not just <a>
1527                 # and <span>. That would prevent us from matching a ':' that
1528                 # comes in the middle of italics other such formatting....
1529                 # -- Wil
1530                 $fname = 'Parser::findColonNoLinks';
1531                 wfProfileIn( $fname );
1532                 $pos = 0;
1533                 do {
1534                         $colon = strpos($str, ':', $pos);
1535
1536                         if ($colon !== false) {
1537                                 $before = substr($str, 0, $colon);
1538                                 $after = substr($str, $colon + 1);
1539
1540                                 # Skip any ':' within <a> or <span> pairs
1541                                 $a = substr_count($before, '<a');
1542                                 $s = substr_count($before, '<span');
1543                                 $ca = substr_count($before, '</a>');
1544                                 $cs = substr_count($before, '</span>');
1545
1546                                 if ($a <= $ca and $s <= $cs) {
1547                                         # Tags are balanced before ':'; ok
1548                                         break;
1549                                 }
1550                                 $pos = $colon + 1;
1551                         }
1552                 } while ($colon !== false);
1553                 wfProfileOut( $fname );
1554                 return $colon;
1555         }
1556
1557         /**
1558          * Return value of a magic variable (like PAGENAME)
1559          *
1560          * @access private
1561          */
1562         function getVariableValue( $index ) {
1563                 global $wgContLang, $wgSitename, $wgServer;
1564
1565                 switch ( $index ) {
1566                         case MAG_CURRENTMONTH:
1567                                 return $wgContLang->formatNum( date( 'm' ) );
1568                         case MAG_CURRENTMONTHNAME:
1569                                 return $wgContLang->getMonthName( date('n') );
1570                         case MAG_CURRENTMONTHNAMEGEN:
1571                                 return $wgContLang->getMonthNameGen( date('n') );
1572                         case MAG_CURRENTDAY:
1573                                 return $wgContLang->formatNum( date('j') );
1574                         case MAG_PAGENAME:
1575                                 return $this->mTitle->getText();
1576                         case MAG_PAGENAMEE:
1577                                 return $this->mTitle->getPartialURL();
1578                         case MAG_NAMESPACE:
1579                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1580                                 return $wgContLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1581                         case MAG_CURRENTDAYNAME:
1582                                 return $wgContLang->getWeekdayName( date('w')+1 );
1583                         case MAG_CURRENTYEAR:
1584                                 return $wgContLang->formatNum( date( 'Y' ) );
1585                         case MAG_CURRENTTIME:
1586                                 return $wgContLang->time( wfTimestampNow(), false );
1587                         case MAG_NUMBEROFARTICLES:
1588                                 return $wgContLang->formatNum( wfNumberOfArticles() );
1589                         case MAG_SITENAME:
1590                                 return $wgSitename;
1591                         case MAG_SERVER:
1592                                 return $wgServer;
1593                         default:
1594                                 return NULL;
1595                 }
1596         }
1597
1598         /**
1599          * initialise the magic variables (like CURRENTMONTHNAME)
1600          *
1601          * @access private
1602          */
1603         function initialiseVariables() {
1604                 $fname = 'Parser::initialiseVariables';
1605                 wfProfileIn( $fname );
1606                 global $wgVariableIDs;
1607                 $this->mVariables = array();
1608                 foreach ( $wgVariableIDs as $id ) {
1609                         $mw =& MagicWord::get( $id );
1610                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1611                 }
1612                 wfProfileOut( $fname );
1613         }
1614
1615         /**
1616          * Replace magic variables, templates, and template arguments
1617          * with the appropriate text. Templates are substituted recursively,
1618          * taking care to avoid infinite loops.
1619          *
1620          * Note that the substitution depends on value of $mOutputType:
1621          *  OT_WIKI: only {{subst:}} templates
1622          *  OT_MSG: only magic variables
1623          *  OT_HTML: all templates and magic variables
1624          *
1625          * @param string $tex The text to transform
1626          * @param array $args Key-value pairs representing template parameters to substitute
1627          * @access private
1628          */
1629         function replaceVariables( $text, $args = array() ) {
1630                 global $wgLang, $wgScript, $wgArticlePath;
1631
1632                 # Prevent too big inclusions
1633                 if(strlen($text)> MAX_INCLUDE_SIZE)
1634                 return $text;
1635
1636                 $fname = 'Parser::replaceVariables';
1637                 wfProfileIn( $fname );
1638
1639                 $titleChars = Title::legalChars();
1640
1641                 # This function is called recursively. To keep track of arguments we need a stack:
1642                 array_push( $this->mArgStack, $args );
1643
1644                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1645                 $GLOBALS['wgCurParser'] =& $this;
1646
1647                 # Variable substitution
1648                 $text = preg_replace_callback( "/{{([$titleChars]*?)}}/", 'wfVariableSubstitution', $text );
1649
1650                 if ( $this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI ) {
1651                         # Argument substitution
1652                         $text = preg_replace_callback( "/{{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1653                 }
1654                 # Template substitution
1655                 $regex = '/(\\n|{)?{{(['.$titleChars.']*)(\\|.*?|)}}/s';
1656                 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1657
1658                 array_pop( $this->mArgStack );
1659
1660                 wfProfileOut( $fname );
1661                 return $text;
1662         }
1663
1664         /**
1665          * Replace magic variables
1666          * @access private
1667          */
1668         function variableSubstitution( $matches ) {
1669                 if ( !$this->mVariables ) {
1670                         $this->initialiseVariables();
1671                 }
1672                 $skip = false;
1673                 if ( $this->mOutputType == OT_WIKI ) {
1674                         # Do only magic variables prefixed by SUBST
1675                         $mwSubst =& MagicWord::get( MAG_SUBST );
1676                         if (!$mwSubst->matchStartAndRemove( $matches[1] ))
1677                                 $skip = true;
1678                         # Note that if we don't substitute the variable below,
1679                         # we don't remove the {{subst:}} magic word, in case
1680                         # it is a template rather than a magic variable.
1681                 }
1682                 if ( !$skip && array_key_exists( $matches[1], $this->mVariables ) ) {
1683                         $text = $this->mVariables[$matches[1]];
1684                         $this->mOutput->mContainsOldMagic = true;
1685                 } else {
1686                         $text = $matches[0];
1687                 }
1688                 return $text;
1689         }
1690
1691         # Split template arguments
1692         function getTemplateArgs( $argsString ) {
1693                 if ( $argsString === '' ) {
1694                         return array();
1695                 }
1696
1697                 $args = explode( '|', substr( $argsString, 1 ) );
1698
1699                 # If any of the arguments contains a '[[' but no ']]', it needs to be
1700                 # merged with the next arg because the '|' character between belongs
1701                 # to the link syntax and not the template parameter syntax.
1702                 $argc = count($args);
1703                 $i = 0;
1704                 for ( $i = 0; $i < $argc-1; $i++ ) {
1705                         if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1706                                 $args[$i] .= '|'.$args[$i+1];
1707                                 array_splice($args, $i+1, 1);
1708                                 $i--;
1709                                 $argc--;
1710                         }
1711                 }
1712
1713                 return $args;
1714         }
1715
1716         /**
1717          * Return the text of a template, after recursively
1718          * replacing any variables or templates within the template.
1719          *
1720          * @param array $matches The parts of the template
1721          *  $matches[1]: the title, i.e. the part before the |
1722          *  $matches[2]: the parameters (including a leading |), if  any
1723          * @return string the text of the template
1724          * @access private
1725          */
1726         function braceSubstitution( $matches ) {
1727                 global $wgLinkCache, $wgContLang;
1728                 $fname = 'Parser::braceSubstitution';
1729                 $found = false;
1730                 $nowiki = false;
1731                 $noparse = false;
1732
1733                 $title = NULL;
1734
1735                 # Need to know if the template comes at the start of a line,
1736                 # to treat the beginning of the template like the beginning
1737                 # of a line for tables and block-level elements.
1738                 $linestart = $matches[1];
1739
1740                 # $part1 is the bit before the first |, and must contain only title characters
1741                 # $args is a list of arguments, starting from index 0, not including $part1
1742
1743                 $part1 = $matches[2];
1744                 # If the third subpattern matched anything, it will start with |
1745
1746                 $args = $this->getTemplateArgs($matches[3]);
1747                 $argc = count( $args );
1748
1749                 # Don't parse {{{}}} because that's only for template arguments
1750                 if ( $linestart === '{' ) {
1751                         $text = $matches[0];
1752                         $found = true;
1753                         $noparse = true;
1754                 }
1755
1756                 # SUBST
1757                 if ( !$found ) {
1758                         $mwSubst =& MagicWord::get( MAG_SUBST );
1759                         if ( $mwSubst->matchStartAndRemove( $part1 ) xor ($this->mOutputType == OT_WIKI) ) {
1760                                 # One of two possibilities is true:
1761                                 # 1) Found SUBST but not in the PST phase
1762                                 # 2) Didn't find SUBST and in the PST phase
1763                                 # In either case, return without further processing
1764                                 $text = $matches[0];
1765                                 $found = true;
1766                                 $noparse = true;
1767                         }
1768                 }
1769
1770                 # MSG, MSGNW and INT
1771                 if ( !$found ) {
1772                         # Check for MSGNW:
1773                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1774                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1775                                 $nowiki = true;
1776                         } else {
1777                                 # Remove obsolete MSG:
1778                                 $mwMsg =& MagicWord::get( MAG_MSG );
1779                                 $mwMsg->matchStartAndRemove( $part1 );
1780                         }
1781
1782                         # Check if it is an internal message
1783                         $mwInt =& MagicWord::get( MAG_INT );
1784                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1785                                 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1786                                         $text = $linestart . wfMsgReal( $part1, $args, true );
1787                                         $found = true;
1788                                 }
1789                         }
1790                 }
1791
1792                 # NS
1793                 if ( !$found ) {
1794                         # Check for NS: (namespace expansion)
1795                         $mwNs = MagicWord::get( MAG_NS );
1796                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1797                                 if ( intval( $part1 ) ) {
1798                                         $text = $linestart . $wgContLang->getNsText( intval( $part1 ) );
1799                                         $found = true;
1800                                 } else {
1801                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1802                                         if ( !is_null( $index ) ) {
1803                                                 $text = $linestart . $wgContLang->getNsText( $index );
1804                                                 $found = true;
1805                                         }
1806                                 }
1807                         }
1808                 }
1809
1810                 # LOCALURL and LOCALURLE
1811                 if ( !$found ) {
1812                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1813                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1814
1815                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1816                                 $func = 'getLocalURL';
1817                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1818                                 $func = 'escapeLocalURL';
1819                         } else {
1820                                 $func = '';
1821                         }
1822
1823                         if ( $func !== '' ) {
1824                                 $title = Title::newFromText( $part1 );
1825                                 if ( !is_null( $title ) ) {
1826                                         if ( $argc > 0 ) {
1827                                                 $text = $linestart . $title->$func( $args[0] );
1828                                         } else {
1829                                                 $text = $linestart . $title->$func();
1830                                         }
1831                                         $found = true;
1832                                 }
1833                         }
1834                 }
1835
1836                 # GRAMMAR
1837                 if ( !$found && $argc == 1 ) {
1838                         $mwGrammar =& MagicWord::get( MAG_GRAMMAR );
1839                         if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
1840                                 $text = $linestart . $wgContLang->convertGrammar( $args[0], $part1 );
1841                                 $found = true;
1842                         }
1843                 }
1844
1845                 # Template table test
1846
1847                 # Did we encounter this template already? If yes, it is in the cache
1848                 # and we need to check for loops.
1849                 if ( !$found && isset( $this->mTemplates[$part1] ) ) {
1850                         # set $text to cached message.
1851                         $text = $linestart . $this->mTemplates[$part1];
1852                         $found = true;
1853
1854                         # Infinite loop test
1855                         if ( isset( $this->mTemplatePath[$part1] ) ) {
1856                                 $noparse = true;
1857                                 $found = true;
1858                                 $text .= '<!-- WARNING: template loop detected -->';
1859                         }
1860                 }
1861
1862                 # Load from database
1863                 $itcamefromthedatabase = false;
1864                 if ( !$found ) {
1865                         $ns = NS_TEMPLATE;
1866                         $part1 = $this->maybeDoSubpageLink( $part1, $subpage='' );
1867                         if ($subpage !== '') {
1868                                 $ns = $this->mTitle->getNamespace();
1869                         }
1870                         $title = Title::newFromText( $part1, $ns );
1871                         if ( !is_null( $title ) && !$title->isExternal() ) {
1872                                 # Check for excessive inclusion
1873                                 $dbk = $title->getPrefixedDBkey();
1874                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1875                                         # This should never be reached.
1876                                         $article = new Article( $title );
1877                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1878                                         if ( $articleContent !== false ) {
1879                                                 $found = true;
1880                                                 $text = $linestart . $articleContent;
1881                                                 $itcamefromthedatabase = true;
1882                                         }
1883                                 }
1884
1885                                 # If the title is valid but undisplayable, make a link to it
1886                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1887                                         $text = $linestart . '[['.$title->getPrefixedText().']]';
1888                                         $found = true;
1889                                 }
1890
1891                                 # Template cache array insertion
1892                                 $this->mTemplates[$part1] = $text;
1893                         }
1894                 }
1895
1896                 # Recursive parsing, escaping and link table handling
1897                 # Only for HTML output
1898                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1899                         $text = wfEscapeWikiText( $text );
1900                 } elseif ( ($this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI) && $found && !$noparse) {
1901                         # Clean up argument array
1902                         $assocArgs = array();
1903                         $index = 1;
1904                         foreach( $args as $arg ) {
1905                                 $eqpos = strpos( $arg, '=' );
1906                                 if ( $eqpos === false ) {
1907                                         $assocArgs[$index++] = $arg;
1908                                 } else {
1909                                         $name = trim( substr( $arg, 0, $eqpos ) );
1910                                         $value = trim( substr( $arg, $eqpos+1 ) );
1911                                         if ( $value === false ) {
1912                                                 $value = '';
1913                                         }
1914                                         if ( $name !== false ) {
1915                                                 $assocArgs[$name] = $value;
1916                                         }
1917                                 }
1918                         }
1919
1920                         # Add a new element to the templace recursion path
1921                         $this->mTemplatePath[$part1] = 1;
1922
1923                         $text = $this->strip( $text, $this->mStripState );
1924                         $text = $this->removeHTMLtags( $text );
1925                         $text = $this->replaceVariables( $text, $assocArgs );
1926
1927                         # Resume the link cache and register the inclusion as a link
1928                         if ( $this->mOutputType == OT_HTML && !is_null( $title ) ) {
1929                                 $wgLinkCache->addLinkObj( $title );
1930                         }
1931
1932                         # If the template begins with a table or block-level
1933                         # element, it should be treated as beginning a new line.
1934                         if ($linestart !== '\n' && preg_match('/^({\\||:|;|#|\*)/', $text)) {
1935                                 $text = "\n" . $text;
1936                         }
1937                 }
1938
1939                 # Empties the template path
1940                 $this->mTemplatePath = array();
1941                 if ( !$found ) {
1942                         return $matches[0];
1943                 } else {
1944                         # replace ==section headers==
1945                         # XXX this needs to go away once we have a better parser.
1946                         if ( $this->mOutputType != OT_WIKI && $itcamefromthedatabase ) {
1947                                 if( !is_null( $title ) )
1948                                         $encodedname = base64_encode($title->getPrefixedDBkey());
1949                                 else
1950                                         $encodedname = base64_encode("");
1951                                 $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
1952                                         PREG_SPLIT_DELIM_CAPTURE);
1953                                 $text = '';
1954                                 $nsec = 0;
1955                                 for( $i = 0; $i < count($m); $i += 2 ) {
1956                                         $text .= $m[$i];
1957                                         if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue;
1958                                         $hl = $m[$i + 1];
1959                                         if( strstr($hl, "<!--MWTEMPLATESECTION") ) {
1960                                                 $text .= $hl;
1961                                                 continue;
1962                                         }
1963                                         preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2);
1964                                         $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION="
1965                                                 . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3];
1966
1967                                         $nsec++;
1968                                 }
1969                         }
1970                 }
1971
1972                 # Empties the template path
1973                 $this->mTemplatePath = array();
1974                 if ( !$found ) {
1975                         return $matches[0];
1976                 } else {
1977                         return $text;
1978                 }
1979         }
1980
1981         /**
1982          * Triple brace replacement -- used for template arguments
1983          * @access private
1984          */
1985         function argSubstitution( $matches ) {
1986                 $arg = trim( $matches[1] );
1987                 $text = $matches[0];
1988                 $inputArgs = end( $this->mArgStack );
1989
1990                 if ( array_key_exists( $arg, $inputArgs ) ) {
1991                         $text = $inputArgs[$arg];
1992                 }
1993
1994                 return $text;
1995         }
1996
1997         /**
1998          * Returns true if the function is allowed to include this entity
1999          * @access private
2000          */
2001         function incrementIncludeCount( $dbk ) {
2002                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
2003                         $this->mIncludeCount[$dbk] = 0;
2004                 }
2005                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
2006                         return true;
2007                 } else {
2008                         return false;
2009                 }
2010         }
2011
2012
2013         /**
2014          * Cleans up HTML, removes dangerous tags and attributes, and
2015          * removes HTML comments
2016          * @access private
2017          */
2018         function removeHTMLtags( $text ) {
2019                 global $wgUseTidy, $wgUserHtml;
2020                 $fname = 'Parser::removeHTMLtags';
2021                 wfProfileIn( $fname );
2022
2023                 if( $wgUserHtml ) {
2024                         $htmlpairs = array( # Tags that must be closed
2025                                 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
2026                                 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
2027                                 'strike', 'strong', 'tt', 'var', 'div', 'center',
2028                                 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
2029                                 'ruby', 'rt' , 'rb' , 'rp', 'p'
2030                         );
2031                         $htmlsingle = array(
2032                                 'br', 'hr', 'li', 'dt', 'dd'
2033                         );
2034                         $htmlnest = array( # Tags that can be nested--??
2035                                 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2036                                 'dl', 'font', 'big', 'small', 'sub', 'sup'
2037                         );
2038                         $tabletags = array( # Can only appear inside table
2039                                 'td', 'th', 'tr'
2040                         );
2041                 } else {
2042                         $htmlpairs = array();
2043                         $htmlsingle = array();
2044                         $htmlnest = array();
2045                         $tabletags = array();
2046                 }
2047
2048                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2049                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2050
2051                 $htmlattrs = $this->getHTMLattrs () ;
2052
2053                 # Remove HTML comments
2054                 $text = $this->removeHTMLcomments( $text );
2055
2056                 $bits = explode( '<', $text );
2057                 $text = array_shift( $bits );
2058                 if(!$wgUseTidy) {
2059                         $tagstack = array(); $tablestack = array();
2060                         foreach ( $bits as $x ) {
2061                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
2062                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2063                                 $x, $regs );
2064                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2065                                 error_reporting( $prev );
2066
2067                                 $badtag = 0 ;
2068                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2069                                         # Check our stack
2070                                         if ( $slash ) {
2071                                                 # Closing a tag...
2072                                                 if ( ! in_array( $t, $htmlsingle ) &&
2073                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
2074                                                         @array_push( $tagstack, $ot );
2075                                                         $badtag = 1;
2076                                                 } else {
2077                                                         if ( $t == 'table' ) {
2078                                                                 $tagstack = array_pop( $tablestack );
2079                                                         }
2080                                                         $newparams = '';
2081                                                 }
2082                                         } else {
2083                                                 # Keep track for later
2084                                                 if ( in_array( $t, $tabletags ) &&
2085                                                 ! in_array( 'table', $tagstack ) ) {
2086                                                         $badtag = 1;
2087                                                 } else if ( in_array( $t, $tagstack ) &&
2088                                                 ! in_array ( $t , $htmlnest ) ) {
2089                                                         $badtag = 1 ;
2090                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
2091                                                         if ( $t == 'table' ) {
2092                                                                 array_push( $tablestack, $tagstack );
2093                                                                 $tagstack = array();
2094                                                         }
2095                                                         array_push( $tagstack, $t );
2096                                                 }
2097                                                 # Strip non-approved attributes from the tag
2098                                                 $newparams = $this->fixTagAttributes($params);
2099
2100                                         }
2101                                         if ( ! $badtag ) {
2102                                                 $rest = str_replace( '>', '&gt;', $rest );
2103                                                 $text .= "<$slash$t $newparams$brace$rest";
2104                                                 continue;
2105                                         }
2106                                 }
2107                                 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2108                         }
2109                         # Close off any remaining tags
2110                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2111                                 $text .= "</$t>\n";
2112                                 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2113                         }
2114                 } else {
2115                         # this might be possible using tidy itself
2116                         foreach ( $bits as $x ) {
2117                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2118                                 $x, $regs );
2119                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2120                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2121                                         $newparams = $this->fixTagAttributes($params);
2122                                         $rest = str_replace( '>', '&gt;', $rest );
2123                                         $text .= "<$slash$t $newparams$brace$rest";
2124                                 } else {
2125                                         $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2126                                 }
2127                         }
2128                 }
2129                 wfProfileOut( $fname );
2130                 return $text;
2131         }
2132
2133         /**
2134          * Remove '<!--', '-->', and everything between.
2135          * To avoid leaving blank lines, when a comment is both preceded
2136          * and followed by a newline (ignoring spaces), trim leading and
2137          * trailing spaces and one of the newlines.
2138          *
2139          * @access private
2140          */
2141         function removeHTMLcomments( $text ) {
2142                 $fname='Parser::removeHTMLcomments';
2143                 wfProfileIn( $fname );
2144                 while (($start = strpos($text, '<!--')) !== false) {
2145                         $end = strpos($text, '-->', $start + 4);
2146                         if ($end === false) {
2147                                 # Unterminated comment; bail out
2148                                 break;
2149                         }
2150
2151                         $end += 3;
2152
2153                         # Trim space and newline if the comment is both
2154                         # preceded and followed by a newline
2155                         $spaceStart = max($start - 1, 0);
2156                         $spaceLen = $end - $spaceStart;
2157                         while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
2158                                 $spaceStart--;
2159                                 $spaceLen++;
2160                         }
2161                         while (substr($text, $spaceStart + $spaceLen, 1) === ' ')
2162                                 $spaceLen++;
2163                         if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") {
2164                                 # Remove the comment, leading and trailing
2165                                 # spaces, and leave only one newline.
2166                                 $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
2167                         }
2168                         else {
2169                                 # Remove just the comment.
2170                                 $text = substr_replace($text, '', $start, $end - $start);
2171                         }
2172                 }
2173                 wfProfileOut( $fname );
2174                 return $text;
2175         }
2176
2177         /**
2178          * This function accomplishes several tasks:
2179          * 1) Auto-number headings if that option is enabled
2180          * 2) Add an [edit] link to sections for logged in users who have enabled the option
2181          * 3) Add a Table of contents on the top for users who have enabled the option
2182          * 4) Auto-anchor headings
2183          *
2184          * It loops through all headlines, collects the necessary data, then splits up the
2185          * string and re-inserts the newly formatted headlines.
2186          * @access private
2187          */
2188         /* private */ function formatHeadings( $text, $isMain=true ) {
2189                 global $wgInputEncoding, $wgMaxTocLevel, $wgContLang, $wgLinkHolders;
2190
2191                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2192                 $doShowToc = $this->mOptions->getShowToc();
2193                 $forceTocHere = false;
2194                 if( !$this->mTitle->userCanEdit() ) {
2195                         $showEditLink = 0;
2196                         $rightClickHack = 0;
2197                 } else {
2198                         $showEditLink = $this->mOptions->getEditSection();
2199                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2200                 }
2201
2202                 # Inhibit editsection links if requested in the page
2203                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2204                 if( $esw->matchAndRemove( $text ) ) {
2205                         $showEditLink = 0;
2206                 }
2207                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2208                 # do not add TOC
2209                 $mw =& MagicWord::get( MAG_NOTOC );
2210                 if( $mw->matchAndRemove( $text ) ) {
2211                         $doShowToc = 0;
2212                 }
2213
2214                 # never add the TOC to the Main Page. This is an entry page that should not
2215                 # be more than 1-2 screens large anyway
2216                 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2217                         $doShowToc = 0;
2218                 }
2219
2220                 # Get all headlines for numbering them and adding funky stuff like [edit]
2221                 # links - this is for later, but we need the number of headlines right now
2222                 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2223
2224                 # if there are fewer than 4 headlines in the article, do not show TOC
2225                 if( $numMatches < 4 ) {
2226                         $doShowToc = 0;
2227                 }
2228
2229                 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2230                 # override above conditions and always show TOC at that place
2231                 $mw =& MagicWord::get( MAG_TOC );
2232                 if ($mw->match( $text ) ) {
2233                         $doShowToc = 1;
2234                         $forceTocHere = true;
2235                 } else {
2236                         # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2237                         # override above conditions and always show TOC above first header
2238                         $mw =& MagicWord::get( MAG_FORCETOC );
2239                         if ($mw->matchAndRemove( $text ) ) {
2240                                 $doShowToc = 1;
2241                         }
2242                 }
2243
2244
2245
2246                 # We need this to perform operations on the HTML
2247                 $sk =& $this->mOptions->getSkin();
2248
2249                 # headline counter
2250                 $headlineCount = 0;
2251                 $sectionCount = 0; # headlineCount excluding template sections
2252
2253                 # Ugh .. the TOC should have neat indentation levels which can be
2254                 # passed to the skin functions. These are determined here
2255                 $toclevel = 0;
2256                 $toc = '';
2257                 $full = '';
2258                 $head = array();
2259                 $sublevelCount = array();
2260                 $level = 0;
2261                 $prevlevel = 0;
2262                 foreach( $matches[3] as $headline ) {
2263                         $istemplate = 0;
2264                         $templatetitle = "";
2265                         $templatesection = 0;
2266
2267                         if (preg_match("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", $headline, $mat)) {
2268                                 $istemplate = 1;
2269                                 $templatetitle = base64_decode($mat[1]);
2270                                 $templatesection = 1 + (int)base64_decode($mat[2]);
2271                                 $headline = preg_replace("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", "", $headline);
2272                         }
2273
2274                         $numbering = '';
2275                         if( $level ) {
2276                                 $prevlevel = $level;
2277                         }
2278                         $level = $matches[1][$headlineCount];
2279                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2280                                 # reset when we enter a new level
2281                                 $sublevelCount[$level] = 0;
2282                                 $toc .= $sk->tocIndent( $level - $prevlevel );
2283                                 $toclevel += $level - $prevlevel;
2284                         }
2285                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2286                                 # reset when we step back a level
2287                                 $sublevelCount[$level+1]=0;
2288                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
2289                                 $toclevel -= $prevlevel - $level;
2290                         }
2291                         # count number of headlines for each level
2292                         @$sublevelCount[$level]++;
2293                         if( $doNumberHeadings || $doShowToc ) {
2294                                 $dot = 0;
2295                                 for( $i = 1; $i <= $level; $i++ ) {
2296                                         if( !empty( $sublevelCount[$i] ) ) {
2297                                                 if( $dot ) {
2298                                                         $numbering .= '.';
2299                                                 }
2300                                                 $numbering .= $wgContLang->formatNum( $sublevelCount[$i] );
2301                                                 $dot = 1;
2302                                         }
2303                                 }
2304                         }
2305
2306                         # The canonized header is a version of the header text safe to use for links
2307                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2308                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2309                         $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2310
2311                         # Remove link placeholders by the link text.
2312                         #     <!--LINK number-->
2313                         # turns into
2314                         #     link text with suffix
2315                         $canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
2316                                                             "\$wgLinkHolders['texts'][\$1]",
2317                                                             $canonized_headline );
2318
2319                         # strip out HTML
2320                         $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2321                         $tocline = trim( $canonized_headline );
2322                         $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2323                         $replacearray = array(
2324                                 '%3A' => ':',
2325                                 '%' => '.'
2326                         );
2327                         $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2328                         $refer[$headlineCount] = $canonized_headline;
2329
2330                         # count how many in assoc. array so we can track dupes in anchors
2331                         @$refers[$canonized_headline]++;
2332                         $refcount[$headlineCount]=$refers[$canonized_headline];
2333
2334                         # Prepend the number to the heading text
2335
2336                         if( $doNumberHeadings || $doShowToc ) {
2337                                 $tocline = $numbering . ' ' . $tocline;
2338
2339                                 # Don't number the heading if it is the only one (looks silly)
2340                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2341                                         # the two are different if the line contains a link
2342                                         $headline=$numbering . ' ' . $headline;
2343                                 }
2344                         }
2345
2346                         # Create the anchor for linking from the TOC to the section
2347                         $anchor = $canonized_headline;
2348                         if($refcount[$headlineCount] > 1 ) {
2349                                 $anchor .= '_' . $refcount[$headlineCount];
2350                         }
2351                         if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2352                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2353                         }
2354                         if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) {
2355                                 if ( empty( $head[$headlineCount] ) ) {
2356                                         $head[$headlineCount] = '';
2357                                 }
2358                                 if( $istemplate )
2359                                         $head[$headlineCount] .= $sk->editSectionLinkForOther($templatetitle, $templatesection);
2360                                 else
2361                                         $head[$headlineCount] .= $sk->editSectionLink($sectionCount+1);
2362                         }
2363
2364                         # Add the edit section span
2365                         if( $rightClickHack ) {
2366                                 if( $istemplate )
2367                                         $headline = $sk->editSectionScriptForOther($templatetitle, $templatesection, $headline);
2368                                 else
2369                                         $headline = $sk->editSectionScript($sectionCount+1,$headline);
2370                         }
2371
2372                         # give headline the correct <h#> tag
2373                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2374
2375                         $headlineCount++;
2376                         if( !$istemplate )
2377                                 $sectionCount++;
2378                 }
2379
2380                 if( $doShowToc ) {
2381                         $toclines = $headlineCount;
2382                         $toc .= $sk->tocUnindent( $toclevel );
2383                         $toc = $sk->tocTable( $toc );
2384                 }
2385
2386                 # split up and insert constructed headlines
2387
2388                 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2389                 $i = 0;
2390
2391                 foreach( $blocks as $block ) {
2392                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2393                                 # This is the [edit] link that appears for the top block of text when
2394                                 # section editing is enabled
2395
2396                                 # Disabled because it broke block formatting
2397                                 # For example, a bullet point in the top line
2398                                 # $full .= $sk->editSectionLink(0);
2399                         }
2400                         $full .= $block;
2401                         if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2402                         # Top anchor now in skin
2403                                 $full = $full.$toc;
2404                         }
2405
2406                         if( !empty( $head[$i] ) ) {
2407                                 $full .= $head[$i];
2408                         }
2409                         $i++;
2410                 }
2411                 if($forceTocHere) {
2412                         $mw =& MagicWord::get( MAG_TOC );
2413                         return $mw->replace( $toc, $full );
2414                 } else {
2415                         return $full;
2416                 }
2417         }
2418
2419         /**
2420          * Return an HTML link for the "ISBN 123456" text
2421          * @access private
2422          */
2423         function magicISBN( $text ) {
2424                 global $wgLang;
2425                 $fname = 'Parser::magicISBN';
2426                 wfProfileIn( $fname );
2427
2428                 $a = split( 'ISBN ', ' '.$text );
2429                 if ( count ( $a ) < 2 ) {
2430                         wfProfileOut( $fname );
2431                         return $text;
2432                 }
2433                 $text = substr( array_shift( $a ), 1);
2434                 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2435
2436                 foreach ( $a as $x ) {
2437                         $isbn = $blank = '' ;
2438                         while ( ' ' == $x{0} ) {
2439                                 $blank .= ' ';
2440                                 $x = substr( $x, 1 );
2441                         }
2442                         if ( $x == '' ) { # blank isbn
2443                                 $text .= "ISBN $blank";
2444                                 continue;
2445                         }
2446                         while ( strstr( $valid, $x{0} ) != false ) {
2447                                 $isbn .= $x{0};
2448                                 $x = substr( $x, 1 );
2449                         }
2450                         $num = str_replace( '-', '', $isbn );
2451                         $num = str_replace( ' ', '', $num );
2452
2453                         if ( '' == $num ) {
2454                                 $text .= "ISBN $blank$x";
2455                         } else {
2456                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2457                                 $text .= '<a href="' .
2458                                 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2459                                         "\" class=\"internal\">ISBN $isbn</a>";
2460                                 $text .= $x;
2461                         }
2462                 }
2463                 wfProfileOut( $fname );
2464                 return $text;
2465         }
2466
2467         /**
2468          * Return an HTML link for the "GEO ..." text
2469          * @access private
2470          */
2471         function magicGEO( $text ) {
2472                 global $wgLang, $wgUseGeoMode;
2473                 $fname = 'Parser::magicGEO';
2474                 wfProfileIn( $fname );
2475
2476                 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2477                 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2478                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2479                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2480                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2481                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2482
2483                 $a = split( 'GEO ', ' '.$text );
2484                 if ( count ( $a ) < 2 ) {
2485                         wfProfileOut( $fname );
2486                         return $text;
2487                 }
2488                 $text = substr( array_shift( $a ), 1);
2489                 $valid = '0123456789.+-:';
2490
2491                 foreach ( $a as $x ) {
2492                         $geo = $blank = '' ;
2493                         while ( ' ' == $x{0} ) {
2494                                 $blank .= ' ';
2495                                 $x = substr( $x, 1 );
2496                         }
2497                         while ( strstr( $valid, $x{0} ) != false ) {
2498                                 $geo .= $x{0};
2499                                 $x = substr( $x, 1 );
2500                         }
2501                         $num = str_replace( '+', '', $geo );
2502                         $num = str_replace( ' ', '', $num );
2503
2504                         if ( '' == $num || count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2505                                 $text .= "GEO $blank$x";
2506                         } else {
2507                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2508                                 $text .= '<a href="' .
2509                                 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2510                                         "\" class=\"internal\">GEO $geo</a>";
2511                                 $text .= $x;
2512                         }
2513                 }
2514                 wfProfileOut( $fname );
2515                 return $text;
2516         }
2517
2518         /**
2519          * Return an HTML link for the "RFC 1234" text
2520          * @access private
2521          * @param string $text text to be processed
2522          */
2523         function magicRFC( $text ) {
2524                 global $wgLang;
2525
2526                 $valid = '0123456789';
2527                 $internal = false;
2528
2529                 $a = split( 'RFC ', ' '.$text );
2530                 if ( count ( $a ) < 2 ) return $text;
2531                 $text = substr( array_shift( $a ), 1);
2532
2533                 /* Check if RFC keyword is preceed by [[.
2534                  * This test is made here cause of the array_shift above
2535                  * that prevent the test to be done in the foreach.
2536                  */
2537                 if(substr($text, -2) == '[[') { $internal = true; }
2538
2539                 foreach ( $a as $x ) {
2540                         /* token might be empty if we have RFC RFC 1234 */
2541                         if($x=='') {
2542                                 $text.='RFC ';
2543                                 continue;
2544                                 }
2545
2546                         $rfc = $blank = '' ;
2547
2548                         /** remove and save whitespaces in $blank */
2549                         while ( $x{0} == ' ' ) {
2550                                 $blank .= ' ';
2551                                 $x = substr( $x, 1 );
2552                         }
2553
2554                         /** remove and save the rfc number in $rfc */
2555                         while ( strstr( $valid, $x{0} ) != false ) {
2556                                 $rfc .= $x{0};
2557                                 $x = substr( $x, 1 );
2558                         }
2559
2560                         if ( $rfc == '') {
2561                                 /* call back stripped spaces*/
2562                                 $text .= "RFC $blank$x";
2563                         } elseif( $internal) {
2564                                 /* normal link */
2565                                 $text .= "RFC $rfc$x";
2566                         } else {
2567                                 /* build the external link*/
2568                                 $url = wfmsg( 'rfcurl' );
2569                                 $url = str_replace( '$1', $rfc, $url);
2570                                 $sk =& $this->mOptions->getSkin();
2571                                 $la = $sk->getExternalLinkAttributes( $url, 'RFC '.$rfc );
2572                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2573                         }
2574
2575                         /* Check if the next RFC keyword is preceed by [[ */
2576                         $internal = (substr($x,-2) == '[[');
2577                 }
2578                 return $text;
2579         }
2580
2581         /**
2582          * Transform wiki markup when saving a page by doing \r\n -> \n
2583          * conversion, substitting signatures, {{subst:}} templates, etc.
2584          *
2585          * @param string $text the text to transform
2586          * @param Title &$title the Title object for the current article
2587          * @param User &$user the User object describing the current user
2588          * @param ParserOptions $options parsing options
2589          * @param bool $clearState whether to clear the parser state first
2590          * @return string the altered wiki markup
2591          * @access public
2592          */
2593         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2594                 $this->mOptions = $options;
2595                 $this->mTitle =& $title;
2596                 $this->mOutputType = OT_WIKI;
2597
2598                 if ( $clearState ) {
2599                         $this->clearState();
2600                 }
2601
2602                 $stripState = false;
2603                 $pairs = array(
2604                         "\r\n" => "\n",
2605                         );
2606                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2607                 // now with regexes
2608                 /*
2609                 $pairs = array(
2610                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2611                         "/<br *?>/i" => "<br />",
2612                 );
2613                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2614                 */
2615                 $text = $this->strip( $text, $stripState, false );
2616                 $text = $this->pstPass2( $text, $user );
2617                 $text = $this->unstrip( $text, $stripState );
2618                 $text = $this->unstripNoWiki( $text, $stripState );
2619                 return $text;
2620         }
2621
2622         /**
2623          * Pre-save transform helper function
2624          * @access private
2625          */
2626         function pstPass2( $text, &$user ) {
2627                 global $wgLang, $wgContLang, $wgLocaltimezone, $wgCurParser;
2628
2629                 # Variable replacement
2630                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2631                 $text = $this->replaceVariables( $text );
2632
2633                 # Signatures
2634                 #
2635                 $n = $user->getName();
2636                 $k = $user->getOption( 'nickname' );
2637                 if ( '' == $k ) { $k = $n; }
2638                 if(isset($wgLocaltimezone)) {
2639                         $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2640                 }
2641                 /* Note: this is an ugly timezone hack for the European wikis */
2642                 $d = $wgContLang->timeanddate( date( 'YmdHis' ), false ) .
2643                   ' (' . date( 'T' ) . ')';
2644                 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2645
2646                 $text = preg_replace( '/~~~~~/', $d, $text );
2647                 $text = preg_replace( '/~~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2648                 $text = preg_replace( '/~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2649
2650                 # Context links: [[|name]] and [[name (context)|]]
2651                 #
2652                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2653                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2654                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2655                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2656
2657                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
2658                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
2659                 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/";                # [[namespace:page|]] and [[:namespace:page|]]
2660                 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2661                 $context = '';
2662                 $t = $this->mTitle->getText();
2663                 if ( preg_match( $conpat, $t, $m ) ) {
2664                         $context = $m[2];
2665                 }
2666                 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2667                 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2668                 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2669
2670                 if ( '' == $context ) {
2671                         $text = preg_replace( $p2, '[[\\1]]', $text );
2672                 } else {
2673                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2674                 }
2675
2676                 # Trim trailing whitespace
2677                 # MAG_END (__END__) tag allows for trailing
2678                 # whitespace to be deliberately included
2679                 $text = rtrim( $text );
2680                 $mw =& MagicWord::get( MAG_END );
2681                 $mw->matchAndRemove( $text );
2682
2683                 return $text;
2684         }
2685
2686         /**
2687          * Set up some variables which are usually set up in parse()
2688          * so that an external function can call some class members with confidence
2689          * @access public
2690          */
2691         function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2692                 $this->mTitle =& $title;
2693                 $this->mOptions = $options;
2694                 $this->mOutputType = $outputType;
2695                 if ( $clearState ) {
2696                         $this->clearState();
2697                 }
2698         }
2699
2700         /**
2701          * Transform a MediaWiki message by replacing magic variables.
2702          *
2703          * @param string $text the text to transform
2704          * @param ParserOptions $options  options
2705          * @return string the text with variables substituted
2706          * @access public
2707          */
2708         function transformMsg( $text, $options ) {
2709                 global $wgTitle;
2710                 static $executing = false;
2711
2712                 # Guard against infinite recursion
2713                 if ( $executing ) {
2714                         return $text;
2715                 }
2716                 $executing = true;
2717
2718                 $this->mTitle = $wgTitle;
2719                 $this->mOptions = $options;
2720                 $this->mOutputType = OT_MSG;
2721                 $this->clearState();
2722                 $text = $this->replaceVariables( $text );
2723
2724                 $executing = false;
2725                 return $text;
2726         }
2727
2728         /**
2729          * Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2730          * Callback will be called with the text within
2731          * Transform and return the text within
2732          * @access public
2733          */
2734         function setHook( $tag, $callback ) {
2735                 $oldVal = @$this->mTagHooks[$tag];
2736                 $this->mTagHooks[$tag] = $callback;
2737                 return $oldVal;
2738         }
2739 }
2740
2741 /**
2742  * @todo document
2743  * @package MediaWiki
2744  */
2745 class ParserOutput
2746 {
2747         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2748         var $mCacheTime; # Used in ParserCache
2749
2750         function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
2751                 $containsOldMagic = false )
2752         {
2753                 $this->mText = $text;
2754                 $this->mLanguageLinks = $languageLinks;
2755                 $this->mCategoryLinks = $categoryLinks;
2756                 $this->mContainsOldMagic = $containsOldMagic;
2757                 $this->mCacheTime = '';
2758         }
2759
2760         function getText() { return $this->mText; }
2761         function getLanguageLinks() { return $this->mLanguageLinks; }
2762         function getCategoryLinks() { return $this->mCategoryLinks; }
2763         function getCacheTime() { return $this->mCacheTime; }
2764         function containsOldMagic() { return $this->mContainsOldMagic; }
2765         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2766         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2767         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2768         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2769         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2770
2771         function merge( $other ) {
2772                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2773                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2774                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2775         }
2776
2777 }
2778
2779 /**
2780  * Set options of the Parser
2781  * @todo document
2782  * @package MediaWiki
2783  */
2784 class ParserOptions
2785 {
2786         # All variables are private
2787         var $mUseTeX;                    # Use texvc to expand <math> tags
2788         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2789         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2790         var $mAllowExternalImages;       # Allow external images inline
2791         var $mSkin;                      # Reference to the preferred skin
2792         var $mDateFormat;                # Date format index
2793         var $mEditSection;               # Create "edit section" links
2794         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2795         var $mNumberHeadings;            # Automatically number headings
2796         var $mShowToc;                   # Show table of contents
2797
2798         function getUseTeX()                        { return $this->mUseTeX; }
2799         function getUseDynamicDates()               { return $this->mUseDynamicDates; }
2800         function getInterwikiMagic()                { return $this->mInterwikiMagic; }
2801         function getAllowExternalImages()           { return $this->mAllowExternalImages; }
2802         function getSkin()                          { return $this->mSkin; }
2803         function getDateFormat()                    { return $this->mDateFormat; }
2804         function getEditSection()                   { return $this->mEditSection; }
2805         function getEditSectionOnRightClick()       { return $this->mEditSectionOnRightClick; }
2806         function getNumberHeadings()                { return $this->mNumberHeadings; }
2807         function getShowToc()                       { return $this->mShowToc; }
2808
2809         function setUseTeX( $x )                    { return wfSetVar( $this->mUseTeX, $x ); }
2810         function setUseDynamicDates( $x )           { return wfSetVar( $this->mUseDynamicDates, $x ); }
2811         function setInterwikiMagic( $x )            { return wfSetVar( $this->mInterwikiMagic, $x ); }
2812         function setAllowExternalImages( $x )       { return wfSetVar( $this->mAllowExternalImages, $x ); }
2813         function setDateFormat( $x )                { return wfSetVar( $this->mDateFormat, $x ); }
2814         function setEditSection( $x )               { return wfSetVar( $this->mEditSection, $x ); }
2815         function setEditSectionOnRightClick( $x )   { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2816         function setNumberHeadings( $x )            { return wfSetVar( $this->mNumberHeadings, $x ); }
2817         function setShowToc( $x )                   { return wfSetVar( $this->mShowToc, $x ); }
2818
2819         function setSkin( &$x ) { $this->mSkin =& $x; }
2820
2821         # Get parser options
2822         /* static */ function newFromUser( &$user ) {
2823                 $popts = new ParserOptions;
2824                 $popts->initialiseFromUser( $user );
2825                 return $popts;
2826         }
2827
2828         # Get user options
2829         function initialiseFromUser( &$userInput ) {
2830                 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2831
2832                 $fname = 'ParserOptions::initialiseFromUser';
2833                 wfProfileIn( $fname );
2834                 if ( !$userInput ) {
2835                         $user = new User;
2836                         $user->setLoaded( true );
2837                 } else {
2838                         $user =& $userInput;
2839                 }
2840
2841                 $this->mUseTeX = $wgUseTeX;
2842                 $this->mUseDynamicDates = $wgUseDynamicDates;
2843                 $this->mInterwikiMagic = $wgInterwikiMagic;
2844                 $this->mAllowExternalImages = $wgAllowExternalImages;
2845                 wfProfileIn( $fname.'-skin' );
2846                 $this->mSkin =& $user->getSkin();
2847                 wfProfileOut( $fname.'-skin' );
2848                 $this->mDateFormat = $user->getOption( 'date' );
2849                 $this->mEditSection = $user->getOption( 'editsection' );
2850                 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2851                 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2852                 $this->mShowToc = $user->getOption( 'showtoc' );
2853                 wfProfileOut( $fname );
2854         }
2855
2856
2857 }
2858
2859 # Regex callbacks, used in Parser::replaceVariables
2860 function wfBraceSubstitution( $matches ) {
2861         global $wgCurParser;
2862         return $wgCurParser->braceSubstitution( $matches );
2863 }
2864
2865 function wfArgSubstitution( $matches ) {
2866         global $wgCurParser;
2867         return $wgCurParser->argSubstitution( $matches );
2868 }
2869
2870 function wfVariableSubstitution( $matches ) {
2871         global $wgCurParser;
2872         return $wgCurParser->variableSubstitution( $matches );
2873 }
2874
2875 /**
2876  * Return the total number of articles
2877  */
2878 function wfNumberOfArticles() {
2879         global $wgNumberOfArticles;
2880
2881         wfLoadSiteStats();
2882         return $wgNumberOfArticles;
2883 }
2884
2885 /**
2886  * Get various statistics from the database
2887  * @private
2888  */
2889 function wfLoadSiteStats() {
2890         global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2891         $fname = 'wfLoadSiteStats';
2892
2893         if ( -1 != $wgNumberOfArticles ) return;
2894         $dbr =& wfGetDB( DB_SLAVE );
2895         $s = $dbr->getArray( 'site_stats',
2896                 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2897                 array( 'ss_row_id' => 1 ), $fname
2898         );
2899
2900         if ( $s === false ) {
2901                 return;
2902         } else {
2903                 $wgTotalViews = $s->ss_total_views;
2904                 $wgTotalEdits = $s->ss_total_edits;
2905                 $wgNumberOfArticles = $s->ss_good_articles;
2906         }
2907 }
2908
2909 function wfEscapeHTMLTagsOnly( $in ) {
2910         return str_replace(
2911                 array( '"', '>', '<' ),
2912                 array( '&quot;', '&gt;', '&lt;' ),
2913                 $in );
2914 }
2915
2916 ?>