Split link generation, table of contents, and image functions from the
[mediawiki.git] / includes / Parser.php
bloba1f2ae3ea23a5485dfb4d94fb7a3dd4e7959d2a9
1 <?php
3 /**
4 * File for Parser and related classes
6 * @package MediaWiki
7 */
9 /**
10 * Update this version number when the ParserOutput format
11 * changes in an incompatible way, so the parser cache
12 * can automatically discard old data.
14 define( 'MW_PARSER_VERSION', '1.4.0' );
16 /**
17 * Variable substitution O(N^2) attack
19 * Without countermeasures, it would be possible to attack the parser by saving
20 * a page filled with a large number of inclusions of large pages. The size of
21 * the generated page would be proportional to the square of the input size.
22 * Hence, we limit the number of inclusions of any given page, thus bringing any
23 * attack back to O(N).
26 define( 'MAX_INCLUDE_REPEAT', 100 );
27 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
29 define( 'RLH_FOR_UPDATE', 1 );
31 # Allowed values for $mOutputType
32 define( 'OT_HTML', 1 );
33 define( 'OT_WIKI', 2 );
34 define( 'OT_MSG' , 3 );
36 # string parameter for extractTags which will cause it
37 # to strip HTML comments in addition to regular
38 # <XML>-style tags. This should not be anything we
39 # may want to use in wikisyntax
40 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
42 # prefix for escaping, used in two functions at least
43 define( 'UNIQ_PREFIX', 'NaodW29');
45 # Constants needed for external link processing
46 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
47 define( 'HTTP_PROTOCOLS', 'http|https' );
48 # Everything except bracket, space, or control characters
49 define( 'EXT_LINK_URL_CLASS', '[^]<>"\\x00-\\x20\\x7F]' );
50 # Including space
51 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
52 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
53 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
54 define( 'EXT_LINK_BRACKETED', '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
55 define( 'EXT_IMAGE_REGEX',
56 '/^('.HTTP_PROTOCOLS.':)'. # Protocol
57 '('.EXT_LINK_URL_CLASS.'+)\\/'. # Hostname and path
58 '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
61 /**
62 * PHP Parser
64 * Processes wiki markup
66 * <pre>
67 * There are three main entry points into the Parser class:
68 * parse()
69 * produces HTML output
70 * preSaveTransform().
71 * produces altered wiki markup.
72 * transformMsg()
73 * performs brace substitution on MediaWiki messages
75 * Globals used:
76 * objects: $wgLang, $wgDateFormatter, $wgLinkCache
78 * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
80 * settings:
81 * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
82 * $wgNamespacesWithSubpages, $wgAllowExternalImages*,
83 * $wgLocaltimezone
85 * * only within ParserOptions
86 * </pre>
88 * @package MediaWiki
90 class Parser
92 /**#@+
93 * @access private
95 # Persistent:
96 var $mTagHooks;
98 # Cleared with clearState():
99 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
100 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
102 # Temporary:
103 var $mOptions, $mTitle, $mOutputType,
104 $mTemplates, // cache of already loaded templates, avoids
105 // multiple SQL queries for the same string
106 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
107 // in this path. Used for loop detection.
109 /**#@-*/
112 * Constructor
114 * @access public
116 function Parser() {
117 $this->mTemplates = array();
118 $this->mTemplatePath = array();
119 $this->mTagHooks = array();
120 $this->clearState();
124 * Clear Parser state
126 * @access private
128 function clearState() {
129 $this->mOutput = new ParserOutput;
130 $this->mAutonumber = 0;
131 $this->mLastSection = "";
132 $this->mDTopen = false;
133 $this->mVariables = false;
134 $this->mIncludeCount = array();
135 $this->mStripState = array();
136 $this->mArgStack = array();
137 $this->mInPre = false;
141 * First pass--just handle <nowiki> sections, pass the rest off
142 * to internalParse() which does all the real work.
144 * @access private
145 * @return ParserOutput a ParserOutput
147 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
148 global $wgUseTidy, $wgContLang;
149 $fname = 'Parser::parse';
150 wfProfileIn( $fname );
152 if ( $clearState ) {
153 $this->clearState();
156 $this->mOptions = $options;
157 $this->mTitle =& $title;
158 $this->mOutputType = OT_HTML;
160 $stripState = NULL;
161 $text = $this->strip( $text, $this->mStripState );
163 $text = $this->internalParse( $text, $linestart );
164 $text = $this->unstrip( $text, $this->mStripState );
165 # Clean up special characters, only run once, next-to-last before doBlockLevels
166 if(!$wgUseTidy) {
167 $fixtags = array(
168 # french spaces, last one Guillemet-left
169 # only if there is something before the space
170 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
171 # french spaces, Guillemet-right
172 "/(\\302\\253) /i"=>"\\1&nbsp;",
173 '/<hr *>/i' => '<hr />',
174 '/<br *>/i' => '<br />',
175 '/<center *>/i' => '<div class="center">',
176 '/<\\/center *>/i' => '</div>',
177 # Clean up spare ampersands; note that we probably ought to be
178 # more careful about named entities.
179 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
181 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
182 } else {
183 $fixtags = array(
184 # french spaces, last one Guillemet-left
185 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
186 # french spaces, Guillemet-right
187 '/(\\302\\253) /i' => '\\1&nbsp;',
188 '/<center *>/i' => '<div class="center">',
189 '/<\\/center *>/i' => '</div>'
191 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
193 # only once and last
194 $text = $this->doBlockLevels( $text, $linestart );
196 $this->replaceLinkHolders( $text );
197 $text = $wgContLang->convert($text);
199 $text = $this->unstripNoWiki( $text, $this->mStripState );
200 global $wgUseTidy;
201 if ($wgUseTidy) {
202 $text = Parser::tidy($text);
205 $this->mOutput->setText( $text );
206 wfProfileOut( $fname );
207 return $this->mOutput;
211 * Get a random string
213 * @access private
214 * @static
216 function getRandomString() {
217 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
220 /**
221 * Replaces all occurrences of <$tag>content</$tag> in the text
222 * with a random marker and returns the new text. the output parameter
223 * $content will be an associative array filled with data on the form
224 * $unique_marker => content.
226 * If $content is already set, the additional entries will be appended
227 * If $tag is set to STRIP_COMMENTS, the function will extract
228 * <!-- HTML comments -->
230 * @access private
231 * @static
233 function extractTags($tag, $text, &$content, $uniq_prefix = ''){
234 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
235 if ( !$content ) {
236 $content = array( );
238 $n = 1;
239 $stripped = '';
241 while ( '' != $text ) {
242 if($tag==STRIP_COMMENTS) {
243 $p = preg_split( '/<!--/i', $text, 2 );
244 } else {
245 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
247 $stripped .= $p[0];
248 if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
249 $text = '';
250 } else {
251 if($tag==STRIP_COMMENTS) {
252 $q = preg_split( '/-->/i', $p[1], 2 );
253 } else {
254 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
256 $marker = $rnd . sprintf('%08X', $n++);
257 $content[$marker] = $q[0];
258 $stripped .= $marker;
259 $text = $q[1];
262 return $stripped;
266 * Strips and renders nowiki, pre, math, hiero
267 * If $render is set, performs necessary rendering operations on plugins
268 * Returns the text, and fills an array with data needed in unstrip()
269 * If the $state is already a valid strip state, it adds to the state
271 * @param bool $stripcomments when set, HTML comments <!-- like this -->
272 * will be stripped in addition to other tags. This is important
273 * for section editing, where these comments cause confusion when
274 * counting the sections in the wikisource
276 * @access private
278 function strip( $text, &$state, $stripcomments = false ) {
279 $render = ($this->mOutputType == OT_HTML);
280 $html_content = array();
281 $nowiki_content = array();
282 $math_content = array();
283 $pre_content = array();
284 $comment_content = array();
285 $ext_content = array();
286 $gallery_content = array();
288 # Replace any instances of the placeholders
289 $uniq_prefix = UNIQ_PREFIX;
290 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
292 # html
293 global $wgRawHtml, $wgWhitelistEdit;
294 if( $wgRawHtml && $wgWhitelistEdit ) {
295 $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
296 foreach( $html_content as $marker => $content ) {
297 if ($render ) {
298 # Raw and unchecked for validity.
299 $html_content[$marker] = $content;
300 } else {
301 $html_content[$marker] = '<html>'.$content.'</html>';
306 # nowiki
307 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
308 foreach( $nowiki_content as $marker => $content ) {
309 if( $render ){
310 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
311 } else {
312 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
316 # math
317 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
318 foreach( $math_content as $marker => $content ){
319 if( $render ) {
320 if( $this->mOptions->getUseTeX() ) {
321 $math_content[$marker] = renderMath( $content );
322 } else {
323 $math_content[$marker] = '&lt;math&gt;'.$content.'&lt;math&gt;';
325 } else {
326 $math_content[$marker] = '<math>'.$content.'</math>';
330 # pre
331 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
332 foreach( $pre_content as $marker => $content ){
333 if( $render ){
334 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
335 } else {
336 $pre_content[$marker] = '<pre>'.$content.'</pre>';
340 # gallery
341 $text = Parser::extractTags('gallery', $text, $gallery_content, $uniq_prefix);
342 foreach( $gallery_content as $marker => $content ) {
343 require_once( 'ImageGallery.php' );
344 if ( $render ) {
345 $gallery_content[$marker] = Parser::renderImageGallery( $content );
346 } else {
347 $gallery_content[$marker] = '<gallery>'.$content.'</gallery>';
351 # Comments
352 if($stripcomments) {
353 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
354 foreach( $comment_content as $marker => $content ){
355 $comment_content[$marker] = '<!--'.$content.'-->';
359 # Extensions
360 foreach ( $this->mTagHooks as $tag => $callback ) {
361 $ext_contents[$tag] = array();
362 $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
363 foreach( $ext_content[$tag] as $marker => $content ) {
364 if ( $render ) {
365 $ext_content[$tag][$marker] = $callback( $content );
366 } else {
367 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
372 # Merge state with the pre-existing state, if there is one
373 if ( $state ) {
374 $state['html'] = $state['html'] + $html_content;
375 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
376 $state['math'] = $state['math'] + $math_content;
377 $state['pre'] = $state['pre'] + $pre_content;
378 $state['comment'] = $state['comment'] + $comment_content;
379 $state['gallery'] = $state['gallery'] + $gallery_content;
381 foreach( $ext_content as $tag => $array ) {
382 if ( array_key_exists( $tag, $state ) ) {
383 $state[$tag] = $state[$tag] + $array;
386 } else {
387 $state = array(
388 'html' => $html_content,
389 'nowiki' => $nowiki_content,
390 'math' => $math_content,
391 'pre' => $pre_content,
392 'comment' => $comment_content,
393 'gallery' => $gallery_content,
394 ) + $ext_content;
396 return $text;
400 * restores pre, math, and hiero removed by strip()
402 * always call unstripNoWiki() after this one
403 * @access private
405 function unstrip( $text, &$state ) {
406 # Must expand in reverse order, otherwise nested tags will be corrupted
407 $contentDict = end( $state );
408 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
409 if( key($state) != 'nowiki' && key($state) != 'html') {
410 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
411 $text = str_replace( key( $contentDict ), $content, $text );
416 return $text;
420 * always call this after unstrip() to preserve the order
422 * @access private
424 function unstripNoWiki( $text, &$state ) {
425 # Must expand in reverse order, otherwise nested tags will be corrupted
426 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
427 $text = str_replace( key( $state['nowiki'] ), $content, $text );
430 global $wgRawHtml;
431 if ($wgRawHtml) {
432 for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
433 $text = str_replace( key( $state['html'] ), $content, $text );
437 return $text;
441 * Add an item to the strip state
442 * Returns the unique tag which must be inserted into the stripped text
443 * The tag will be replaced with the original text in unstrip()
445 * @access private
447 function insertStripItem( $text, &$state ) {
448 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
449 if ( !$state ) {
450 $state = array(
451 'html' => array(),
452 'nowiki' => array(),
453 'math' => array(),
454 'pre' => array()
457 $state['item'][$rnd] = $text;
458 return $rnd;
462 * Return allowed HTML attributes
464 * @access private
466 function getHTMLattrs () {
467 $htmlattrs = array( # Allowed attributes--no scripting, etc.
468 'title', 'align', 'lang', 'dir', 'width', 'height',
469 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
470 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
471 /* FONT */ 'type', 'start', 'value', 'compact',
472 /* For various lists, mostly deprecated but safe */
473 'summary', 'width', 'border', 'frame', 'rules',
474 'cellspacing', 'cellpadding', 'valign', 'char',
475 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
476 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
477 'id', 'class', 'name', 'style' /* For CSS */
479 return $htmlattrs ;
483 * Remove non approved attributes and javascript in css
485 * @access private
487 function fixTagAttributes ( $t ) {
488 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
489 $htmlattrs = $this->getHTMLattrs() ;
491 # Strip non-approved attributes from the tag
492 $t = preg_replace(
493 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
494 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
495 $t);
497 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
499 # Strip javascript "expression" from stylesheets. Brute force approach:
500 # If anythin offensive is found, all attributes of the HTML tag are dropped
502 if( preg_match(
503 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
504 wfMungeToUtf8( $t ) ) )
506 $t='';
509 return trim ( $t ) ;
513 * interface with html tidy, used if $wgUseTidy = true
515 * @access public
516 * @static
518 function tidy ( $text ) {
519 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
520 global $wgInputEncoding, $wgOutputEncoding;
521 $fname = 'Parser::tidy';
522 wfProfileIn( $fname );
524 $cleansource = '';
525 $opts = '';
526 switch(strtoupper($wgOutputEncoding)) {
527 case 'ISO-8859-1':
528 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
529 break;
530 case 'UTF-8':
531 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
532 break;
533 default:
534 $opts .= ' -raw';
537 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
538 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
539 '<head><title>test</title></head><body>'.$text.'</body></html>';
540 $descriptorspec = array(
541 0 => array('pipe', 'r'),
542 1 => array('pipe', 'w'),
543 2 => array('file', '/dev/null', 'a')
545 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
546 if (is_resource($process)) {
547 fwrite($pipes[0], $wrappedtext);
548 fclose($pipes[0]);
549 while (!feof($pipes[1])) {
550 $cleansource .= fgets($pipes[1], 1024);
552 fclose($pipes[1]);
553 $return_value = proc_close($process);
556 wfProfileOut( $fname );
558 if( $cleansource == '' && $text != '') {
559 wfDebug( "Tidy error detected!\n" );
560 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
561 } else {
562 return $cleansource;
567 * parse the wiki syntax used to render tables
569 * @access private
571 function doTableStuff ( $t ) {
572 $fname = 'Parser::doTableStuff';
573 wfProfileIn( $fname );
575 $t = explode ( "\n" , $t ) ;
576 $td = array () ; # Is currently a td tag open?
577 $ltd = array () ; # Was it TD or TH?
578 $tr = array () ; # Is currently a tr tag open?
579 $ltr = array () ; # tr attributes
580 $indent_level = 0; # indent level of the table
581 foreach ( $t AS $k => $x )
583 $x = trim ( $x ) ;
584 $fc = substr ( $x , 0 , 1 ) ;
585 if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
586 $indent_level = strlen( $matches[1] );
587 $t[$k] = "\n" .
588 str_repeat( '<dl><dd>', $indent_level ) .
589 '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
590 array_push ( $td , false ) ;
591 array_push ( $ltd , '' ) ;
592 array_push ( $tr , false ) ;
593 array_push ( $ltr , '' ) ;
595 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
596 else if ( '|}' == substr ( $x , 0 , 2 ) ) {
597 $z = "</table>\n" ;
598 $l = array_pop ( $ltd ) ;
599 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
600 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
601 array_pop ( $ltr ) ;
602 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
604 else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
605 $x = substr ( $x , 1 ) ;
606 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
607 $z = '' ;
608 $l = array_pop ( $ltd ) ;
609 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
610 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
611 array_pop ( $ltr ) ;
612 $t[$k] = $z ;
613 array_push ( $tr , false ) ;
614 array_push ( $td , false ) ;
615 array_push ( $ltd , '' ) ;
616 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
618 else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
619 # $x is a table row
620 if ( '|+' == substr ( $x , 0 , 2 ) ) {
621 $fc = '+' ;
622 $x = substr ( $x , 1 ) ;
624 $after = substr ( $x , 1 ) ;
625 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
626 $after = explode ( '||' , $after ) ;
627 $t[$k] = '' ;
629 # Loop through each table cell
630 foreach ( $after AS $theline )
632 $z = '' ;
633 if ( $fc != '+' )
635 $tra = array_pop ( $ltr ) ;
636 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
637 array_push ( $tr , true ) ;
638 array_push ( $ltr , '' ) ;
641 $l = array_pop ( $ltd ) ;
642 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
643 if ( $fc == '|' ) $l = 'td' ;
644 else if ( $fc == '!' ) $l = 'th' ;
645 else if ( $fc == '+' ) $l = 'caption' ;
646 else $l = '' ;
647 array_push ( $ltd , $l ) ;
649 # Cell parameters
650 $y = explode ( '|' , $theline , 2 ) ;
651 # Note that a '|' inside an invalid link should not
652 # be mistaken as delimiting cell parameters
653 if ( strpos( $y[0], '[[' ) !== false ) {
654 $y = array ($theline);
656 if ( count ( $y ) == 1 )
657 $y = "{$z}<{$l}>{$y[0]}" ;
658 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
659 $t[$k] .= $y ;
660 array_push ( $td , true ) ;
665 # Closing open td, tr && table
666 while ( count ( $td ) > 0 )
668 if ( array_pop ( $td ) ) $t[] = '</td>' ;
669 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
670 $t[] = '</table>' ;
673 $t = implode ( "\n" , $t ) ;
674 # $t = $this->removeHTMLtags( $t );
675 wfProfileOut( $fname );
676 return $t ;
680 * Helper function for parse() that transforms wiki markup into
681 * HTML. Only called for $mOutputType == OT_HTML.
683 * @access private
685 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
686 global $wgContLang;
688 $fname = 'Parser::internalParse';
689 wfProfileIn( $fname );
691 $text = $this->removeHTMLtags( $text );
692 $text = $this->replaceVariables( $text, $args );
694 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
696 $text = $this->doHeadings( $text );
697 if($this->mOptions->getUseDynamicDates()) {
698 global $wgDateFormatter;
699 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
701 $text = $this->doAllQuotes( $text );
702 $text = $this->replaceInternalLinks( $text );
703 $text = $this->replaceExternalLinks( $text );
705 # replaceInternalLinks may sometimes leave behind
706 # absolute URLs, which have to be masked to hide them from replaceExternalLinks
707 $text = str_replace("http-noparse://","http://",$text);
709 $text = $this->doMagicLinks( $text );
710 $text = $this->doTableStuff( $text );
711 $text = $this->formatHeadings( $text, $isMain );
713 wfProfileOut( $fname );
714 return $text;
718 * Replace special strings like "ISBN xxx" and "RFC xxx" with
719 * magic external links.
721 * @access private
723 function &doMagicLinks( &$text ) {
724 global $wgUseGeoMode;
725 $text = $this->magicISBN( $text );
726 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
727 $text = $this->magicGEO( $text );
729 $text = $this->magicRFC( $text, 'RFC ', 'rfcurl' );
730 $text = $this->magicRFC( $text, 'PMID ', 'pubmedurl' );
731 return $text;
735 * Parse ^^ tokens and return html
737 * @access private
739 function doExponent( $text ) {
740 $fname = 'Parser::doExponent';
741 wfProfileIn( $fname );
742 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
743 wfProfileOut( $fname );
744 return $text;
748 * Parse headers and return html
750 * @access private
752 function doHeadings( $text ) {
753 $fname = 'Parser::doHeadings';
754 wfProfileIn( $fname );
755 for ( $i = 6; $i >= 1; --$i ) {
756 $h = substr( '======', 0, $i );
757 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
758 "<h{$i}>\\1</h{$i}>\\2", $text );
760 wfProfileOut( $fname );
761 return $text;
765 * Replace single quotes with HTML markup
766 * @access private
767 * @return string the altered text
769 function doAllQuotes( $text ) {
770 $fname = 'Parser::doAllQuotes';
771 wfProfileIn( $fname );
772 $outtext = '';
773 $lines = explode( "\n", $text );
774 foreach ( $lines as $line ) {
775 $outtext .= $this->doQuotes ( $line ) . "\n";
777 $outtext = substr($outtext, 0,-1);
778 wfProfileOut( $fname );
779 return $outtext;
783 * Helper function for doAllQuotes()
784 * @access private
786 function doQuotes( $text ) {
787 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
788 if ( count( $arr ) == 1 )
789 return $text;
790 else
792 # First, do some preliminary work. This may shift some apostrophes from
793 # being mark-up to being text. It also counts the number of occurrences
794 # of bold and italics mark-ups.
795 $i = 0;
796 $numbold = 0;
797 $numitalics = 0;
798 foreach ( $arr as $r )
800 if ( ( $i % 2 ) == 1 )
802 # If there are ever four apostrophes, assume the first is supposed to
803 # be text, and the remaining three constitute mark-up for bold text.
804 if ( strlen( $arr[$i] ) == 4 )
806 $arr[$i-1] .= "'";
807 $arr[$i] = "'''";
809 # If there are more than 5 apostrophes in a row, assume they're all
810 # text except for the last 5.
811 else if ( strlen( $arr[$i] ) > 5 )
813 $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );
814 $arr[$i] = "'''''";
816 # Count the number of occurrences of bold and italics mark-ups.
817 # We are not counting sequences of five apostrophes.
818 if ( strlen( $arr[$i] ) == 2 ) $numitalics++; else
819 if ( strlen( $arr[$i] ) == 3 ) $numbold++; else
820 if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; }
822 $i++;
825 # If there is an odd number of both bold and italics, it is likely
826 # that one of the bold ones was meant to be an apostrophe followed
827 # by italics. Which one we cannot know for certain, but it is more
828 # likely to be one that has a single-letter word before it.
829 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) )
831 $i = 0;
832 $firstsingleletterword = -1;
833 $firstmultiletterword = -1;
834 $firstspace = -1;
835 foreach ( $arr as $r )
837 if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) )
839 $x1 = substr ($arr[$i-1], -1);
840 $x2 = substr ($arr[$i-1], -2, 1);
841 if ($x1 == ' ') {
842 if ($firstspace == -1) $firstspace = $i;
843 } else if ($x2 == ' ') {
844 if ($firstsingleletterword == -1) $firstsingleletterword = $i;
845 } else {
846 if ($firstmultiletterword == -1) $firstmultiletterword = $i;
849 $i++;
852 # If there is a single-letter word, use it!
853 if ($firstsingleletterword > -1)
855 $arr [ $firstsingleletterword ] = "''";
856 $arr [ $firstsingleletterword-1 ] .= "'";
858 # If not, but there's a multi-letter word, use that one.
859 else if ($firstmultiletterword > -1)
861 $arr [ $firstmultiletterword ] = "''";
862 $arr [ $firstmultiletterword-1 ] .= "'";
864 # ... otherwise use the first one that has neither.
865 # (notice that it is possible for all three to be -1 if, for example,
866 # there is only one pentuple-apostrophe in the line)
867 else if ($firstspace > -1)
869 $arr [ $firstspace ] = "''";
870 $arr [ $firstspace-1 ] .= "'";
874 # Now let's actually convert our apostrophic mush to HTML!
875 $output = '';
876 $buffer = '';
877 $state = '';
878 $i = 0;
879 foreach ($arr as $r)
881 if (($i % 2) == 0)
883 if ($state == 'both')
884 $buffer .= $r;
885 else
886 $output .= $r;
888 else
890 if (strlen ($r) == 2)
892 if ($state == 'i')
893 { $output .= '</i>'; $state = ''; }
894 else if ($state == 'bi')
895 { $output .= '</i>'; $state = 'b'; }
896 else if ($state == 'ib')
897 { $output .= '</b></i><b>'; $state = 'b'; }
898 else if ($state == 'both')
899 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
900 else # $state can be 'b' or ''
901 { $output .= '<i>'; $state .= 'i'; }
903 else if (strlen ($r) == 3)
905 if ($state == 'b')
906 { $output .= '</b>'; $state = ''; }
907 else if ($state == 'bi')
908 { $output .= '</i></b><i>'; $state = 'i'; }
909 else if ($state == 'ib')
910 { $output .= '</b>'; $state = 'i'; }
911 else if ($state == 'both')
912 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
913 else # $state can be 'i' or ''
914 { $output .= '<b>'; $state .= 'b'; }
916 else if (strlen ($r) == 5)
918 if ($state == 'b')
919 { $output .= '</b><i>'; $state = 'i'; }
920 else if ($state == 'i')
921 { $output .= '</i><b>'; $state = 'b'; }
922 else if ($state == 'bi')
923 { $output .= '</i></b>'; $state = ''; }
924 else if ($state == 'ib')
925 { $output .= '</b></i>'; $state = ''; }
926 else if ($state == 'both')
927 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
928 else # ($state == '')
929 { $buffer = ''; $state = 'both'; }
932 $i++;
934 # Now close all remaining tags. Notice that the order is important.
935 if ($state == 'b' || $state == 'ib')
936 $output .= '</b>';
937 if ($state == 'i' || $state == 'bi' || $state == 'ib')
938 $output .= '</i>';
939 if ($state == 'bi')
940 $output .= '</b>';
941 if ($state == 'both')
942 $output .= '<b><i>'.$buffer.'</i></b>';
943 return $output;
948 * Replace external links
950 * Note: this is all very hackish and the order of execution matters a lot.
951 * Make sure to run maintenance/parserTests.php if you change this code.
953 * @access private
955 function replaceExternalLinks( $text ) {
956 $fname = 'Parser::replaceExternalLinks';
957 wfProfileIn( $fname );
959 $sk =& $this->mOptions->getSkin();
960 global $wgContLang;
961 $linktrail = $wgContLang->linkTrail();
963 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
965 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
967 $i = 0;
968 while ( $i<count( $bits ) ) {
969 $url = $bits[$i++];
970 $protocol = $bits[$i++];
971 $text = $bits[$i++];
972 $trail = $bits[$i++];
974 # The characters '<' and '>' (which were escaped by
975 # removeHTMLtags()) should not be included in
976 # URLs, per RFC 2396.
977 if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
978 $text = substr($url, $m2[0][1]) . ' ' . $text;
979 $url = substr($url, 0, $m2[0][1]);
982 # If the link text is an image URL, replace it with an <img> tag
983 # This happened by accident in the original parser, but some people used it extensively
984 $img = $this->maybeMakeImageLink( $text );
985 if ( $img !== false ) {
986 $text = $img;
989 $dtrail = '';
991 # No link text, e.g. [http://domain.tld/some.link]
992 if ( $text == '' ) {
993 # Autonumber if allowed
994 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
995 $text = '[' . ++$this->mAutonumber . ']';
996 } else {
997 # Otherwise just use the URL
998 $text = htmlspecialchars( $url );
1000 } else {
1001 # Have link text, e.g. [http://domain.tld/some.link text]s
1002 # Check for trail
1003 if ( preg_match( $linktrail, $trail, $m2 ) ) {
1004 $dtrail = $m2[1];
1005 $trail = $m2[2];
1009 $encUrl = htmlspecialchars( $url );
1010 # Bit in parentheses showing the URL for the printable version
1011 if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
1012 $paren = '';
1013 } else {
1014 # Expand the URL for printable version
1015 if ( ! $sk->suppressUrlExpansion() ) {
1016 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
1017 } else {
1018 $paren = '';
1022 # Process the trail (i.e. everything after this link up until start of the next link),
1023 # replacing any non-bracketed links
1024 $trail = $this->replaceFreeExternalLinks( $trail );
1026 # Use the encoded URL
1027 # This means that users can paste URLs directly into the text
1028 # Funny characters like &ouml; aren't valid in URLs anyway
1029 # This was changed in August 2004
1030 $s .= $sk->makeExternalLink( $url, $text, false ) . $dtrail. $paren . $trail;
1033 wfProfileOut( $fname );
1034 return $s;
1038 * Replace anything that looks like a URL with a link
1039 * @access private
1041 function replaceFreeExternalLinks( $text ) {
1042 $fname = 'Parser::replaceFreeExternalLinks';
1043 wfProfileIn( $fname );
1045 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1046 $s = array_shift( $bits );
1047 $i = 0;
1049 $sk =& $this->mOptions->getSkin();
1051 while ( $i < count( $bits ) ){
1052 $protocol = $bits[$i++];
1053 $remainder = $bits[$i++];
1055 if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1056 # Found some characters after the protocol that look promising
1057 $url = $protocol . $m[1];
1058 $trail = $m[2];
1060 # The characters '<' and '>' (which were escaped by
1061 # removeHTMLtags()) should not be included in
1062 # URLs, per RFC 2396.
1063 if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
1064 $trail = substr($url, $m2[0][1]) . $trail;
1065 $url = substr($url, 0, $m2[0][1]);
1068 # Move trailing punctuation to $trail
1069 $sep = ',;\.:!?';
1070 # If there is no left bracket, then consider right brackets fair game too
1071 if ( strpos( $url, '(' ) === false ) {
1072 $sep .= ')';
1075 $numSepChars = strspn( strrev( $url ), $sep );
1076 if ( $numSepChars ) {
1077 $trail = substr( $url, -$numSepChars ) . $trail;
1078 $url = substr( $url, 0, -$numSepChars );
1081 # Replace &amp; from obsolete syntax with &.
1082 # All HTML entities will be escaped by makeExternalLink()
1083 # or maybeMakeImageLink()
1084 $url = str_replace( '&amp;', '&', $url );
1086 # Is this an external image?
1087 $text = $this->maybeMakeImageLink( $url );
1088 if ( $text === false ) {
1089 # Not an image, make a link
1090 $text = $sk->makeExternalLink( $url, $url );
1092 $s .= $text . $trail;
1093 } else {
1094 $s .= $protocol . $remainder;
1097 wfProfileOut();
1098 return $s;
1102 * make an image if it's allowed
1103 * @access private
1105 function maybeMakeImageLink( $url ) {
1106 $sk =& $this->mOptions->getSkin();
1107 $text = false;
1108 if ( $this->mOptions->getAllowExternalImages() ) {
1109 if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1110 # Image found
1111 $text = $sk->makeImage( htmlspecialchars( $url ) );
1114 return $text;
1118 * Process [[ ]] wikilinks
1120 * @access private
1123 function replaceInternalLinks( $s ) {
1124 global $wgLang, $wgContLang, $wgLinkCache;
1125 global $wgDisableLangConversion;
1126 static $fname = 'Parser::replaceInternalLinks' ;
1128 wfProfileIn( $fname );
1130 wfProfileIn( $fname.'-setup' );
1131 static $tc = FALSE;
1132 # the % is needed to support urlencoded titles as well
1133 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1135 $sk =& $this->mOptions->getSkin();
1136 global $wgUseOldExistenceCheck;
1137 # "Post-parse link colour check" works only on wiki text since it's now
1138 # in Parser. Enable it, then disable it when we're done.
1139 $saveParseColour = $sk->postParseLinkColour( !$wgUseOldExistenceCheck );
1141 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1143 #split the entire text string on occurences of [[
1144 $a = explode( '[[', ' ' . $s );
1145 #get the first element (all text up to first [[), and remove the space we added
1146 $s = array_shift( $a );
1147 $s = substr( $s, 1 );
1149 # Match a link having the form [[namespace:link|alternate]]trail
1150 static $e1 = FALSE;
1151 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1152 # Match cases where there is no "]]", which might still be images
1153 static $e1_img = FALSE;
1154 if ( !$e1_img ) { $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; }
1155 # Match the end of a line for a word that's not followed by whitespace,
1156 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1157 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1159 $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
1161 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1163 if ( $useLinkPrefixExtension ) {
1164 if ( preg_match( $e2, $s, $m ) ) {
1165 $first_prefix = $m[2];
1166 $s = $m[1];
1167 } else {
1168 $first_prefix = false;
1170 } else {
1171 $prefix = '';
1174 $selflink = $this->mTitle->getPrefixedText();
1175 wfProfileOut( $fname.'-setup' );
1177 $checkVariantLink = sizeof($wgContLang->getVariants())>1;
1178 $useSubpages = $this->areSubpagesAllowed();
1180 # Loop for each link
1181 for ($k = 0; isset( $a[$k] ); $k++) {
1182 $line = $a[$k];
1183 if ( $useLinkPrefixExtension ) {
1184 wfProfileIn( $fname.'-prefixhandling' );
1185 if ( preg_match( $e2, $s, $m ) ) {
1186 $prefix = $m[2];
1187 $s = $m[1];
1188 } else {
1189 $prefix='';
1191 # first link
1192 if($first_prefix) {
1193 $prefix = $first_prefix;
1194 $first_prefix = false;
1196 wfProfileOut( $fname.'-prefixhandling' );
1199 $might_be_img = false;
1201 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1202 $text = $m[2];
1203 # fix up urlencoded title texts
1204 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1205 $trail = $m[3];
1206 } elseif( preg_match($e1_img, $line, $m) ) { # Invalid, but might be an image with a link in its caption
1207 $might_be_img = true;
1208 $text = $m[2];
1209 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1210 $trail = "";
1211 } else { # Invalid form; output directly
1212 $s .= $prefix . '[[' . $line ;
1213 continue;
1216 # Don't allow internal links to pages containing
1217 # PROTO: where PROTO is a valid URL protocol; these
1218 # should be external links.
1219 if (preg_match('/^((?:'.URL_PROTOCOLS.'):)/', $m[1])) {
1220 $s .= $prefix . '[[' . $line ;
1221 continue;
1224 # Make subpage if necessary
1225 if( $useSubpages ) {
1226 $link = $this->maybeDoSubpageLink( $m[1], $text );
1227 } else {
1228 $link = $m[1];
1231 $noforce = (substr($m[1], 0, 1) != ':');
1232 if (!$noforce) {
1233 # Strip off leading ':'
1234 $link = substr($link, 1);
1237 $nt =& Title::newFromText( $this->unstripNoWiki($link, $this->mStripState) );
1238 if( !$nt ) {
1239 $s .= $prefix . '[[' . $line;
1240 continue;
1243 #check other language variants of the link
1244 #if the article does not exist
1245 if( $checkVariantLink
1246 && $nt->getArticleID() == 0 ) {
1247 $wgContLang->findVariantLink($link, $nt);
1250 $ns = $nt->getNamespace();
1251 $iw = $nt->getInterWiki();
1253 if ($might_be_img) { # if this is actually an invalid link
1254 if ($ns == NS_IMAGE && $noforce) { #but might be an image
1255 $found = false;
1256 while (isset ($a[$k+1]) ) {
1257 #look at the next 'line' to see if we can close it there
1258 $next_line = array_shift(array_splice( $a, $k + 1, 1) );
1259 if( preg_match("/^(.*?]].*?)]](.*)$/sD", $next_line, $m) ) {
1260 # the first ]] closes the inner link, the second the image
1261 $found = true;
1262 $text .= '[[' . $m[1];
1263 $trail = $m[2];
1264 break;
1265 } elseif( preg_match("/^.*?]].*$/sD", $next_line, $m) ) {
1266 #if there's exactly one ]] that's fine, we'll keep looking
1267 $text .= '[[' . $m[0];
1268 } else {
1269 #if $next_line is invalid too, we need look no further
1270 $text .= '[[' . $next_line;
1271 break;
1274 if ( !$found ) {
1275 # we couldn't find the end of this imageLink, so output it raw
1276 #but don't ignore what might be perfectly normal links in the text we've examined
1277 $text = $this->replaceInternalLinks($text);
1278 $s .= $prefix . '[[' . $link . '|' . $text;
1279 # note: no $trail, because without an end, there *is* no trail
1280 continue;
1282 } else { #it's not an image, so output it raw
1283 $s .= $prefix . '[[' . $link . '|' . $text;
1284 # note: no $trail, because without an end, there *is* no trail
1285 continue;
1289 $wasblank = ( '' == $text );
1290 if( $wasblank ) $text = $link;
1293 # Link not escaped by : , create the various objects
1294 if( $noforce ) {
1296 # Interwikis
1297 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
1298 array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1299 $tmp = $prefix . $trail ;
1300 $s .= (trim($tmp) == '')? '': $tmp;
1301 continue;
1304 if ( $ns == NS_IMAGE ) {
1305 wfProfileIn( "$fname-image" );
1307 # recursively parse links inside the image caption
1308 # actually, this will parse them in any other parameters, too,
1309 # but it might be hard to fix that, and it doesn't matter ATM
1310 $text = $this->replaceExternalLinks($text);
1311 $text = $this->replaceInternalLinks($text);
1313 # replace the image with a link-holder so that replaceExternalLinks() can't mess with it
1314 $s .= $prefix . $this->insertStripItem( $sk->makeImageLinkObj( $nt, $text ), $this->mStripState ) . $trail;
1315 $wgLinkCache->addImageLinkObj( $nt );
1317 wfProfileOut( "$fname-image" );
1318 continue;
1321 if ( $ns == NS_CATEGORY ) {
1322 wfProfileIn( "$fname-category" );
1323 $t = $nt->getText();
1325 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1326 $pPLC=$sk->postParseLinkColour();
1327 $sk->postParseLinkColour( false );
1328 $t = $sk->makeLinkObj( $nt, $t, '', '' , $prefix );
1329 $sk->postParseLinkColour( $pPLC );
1330 $wgLinkCache->resume();
1332 if ( $wasblank ) {
1333 if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {
1334 $sortkey = $this->mTitle->getText();
1335 } else {
1336 $sortkey = $this->mTitle->getPrefixedText();
1338 } else {
1339 $sortkey = $text;
1341 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1342 $this->mOutput->addCategoryLink( $t );
1343 $s .= $prefix . $trail ;
1345 wfProfileOut( "$fname-category" );
1346 continue;
1350 if( ( $nt->getPrefixedText() === $selflink ) &&
1351 ( $nt->getFragment() === '' ) ) {
1352 # Self-links are handled specially; generally de-link and change to bold.
1353 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1354 continue;
1357 # Special and Media are pseudo-namespaces; no pages actually exist in them
1358 if( $ns == NS_MEDIA ) {
1359 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text, true ) . $trail;
1360 $wgLinkCache->addImageLinkObj( $nt );
1361 continue;
1362 } elseif( $ns == NS_SPECIAL ) {
1363 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1364 continue;
1366 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1368 $sk->postParseLinkColour( $saveParseColour );
1369 wfProfileOut( $fname );
1370 return $s;
1374 * Return true if subpage links should be expanded on this page.
1375 * @return bool
1377 function areSubpagesAllowed() {
1378 # Some namespaces don't allow subpages
1379 global $wgNamespacesWithSubpages;
1380 return !empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]);
1384 * Handle link to subpage if necessary
1385 * @param string $target the source of the link
1386 * @param string &$text the link text, modified as necessary
1387 * @return string the full name of the link
1388 * @access private
1390 function maybeDoSubpageLink($target, &$text) {
1391 # Valid link forms:
1392 # Foobar -- normal
1393 # :Foobar -- override special treatment of prefix (images, language links)
1394 # /Foobar -- convert to CurrentPage/Foobar
1395 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1396 # ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage
1397 # ../Foobar -- convert to CurrentPage/Foobar, from CurrentPage/CurrentSubPage
1399 $fname = 'Parser::maybeDoSubpageLink';
1400 wfProfileIn( $fname );
1401 $ret = $target; # default return value is no change
1403 # Some namespaces don't allow subpages,
1404 # so only perform processing if subpages are allowed
1405 if( $this->areSubpagesAllowed() ) {
1406 # Look at the first character
1407 if( $target != '' && $target{0} == '/' ) {
1408 # / at end means we don't want the slash to be shown
1409 if( substr( $target, -1, 1 ) == '/' ) {
1410 $target = substr( $target, 1, -1 );
1411 $noslash = $target;
1412 } else {
1413 $noslash = substr( $target, 1 );
1416 $ret = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1417 if( '' === $text ) {
1418 $text = $target;
1419 } # this might be changed for ugliness reasons
1420 } else {
1421 # check for .. subpage backlinks
1422 $dotdotcount = 0;
1423 $nodotdot = $target;
1424 while( strncmp( $nodotdot, "../", 3 ) == 0 ) {
1425 ++$dotdotcount;
1426 $nodotdot = substr( $nodotdot, 3 );
1428 if($dotdotcount > 0) {
1429 $exploded = explode( '/', $this->mTitle->GetPrefixedText() );
1430 if( count( $exploded ) > $dotdotcount ) { # not allowed to go below top level page
1431 $ret = implode( '/', array_slice( $exploded, 0, -$dotdotcount ) );
1432 # / at the end means don't show full path
1433 if( substr( $nodotdot, -1, 1 ) == '/' ) {
1434 $nodotdot = substr( $nodotdot, 0, -1 );
1435 if( '' === $text ) {
1436 $text = $nodotdot;
1439 $nodotdot = trim( $nodotdot );
1440 if( $nodotdot != '' ) {
1441 $ret .= '/' . $nodotdot;
1448 wfProfileOut( $fname );
1449 return $ret;
1452 /**#@+
1453 * Used by doBlockLevels()
1454 * @access private
1456 /* private */ function closeParagraph() {
1457 $result = '';
1458 if ( '' != $this->mLastSection ) {
1459 $result = '</' . $this->mLastSection . ">\n";
1461 $this->mInPre = false;
1462 $this->mLastSection = '';
1463 return $result;
1465 # getCommon() returns the length of the longest common substring
1466 # of both arguments, starting at the beginning of both.
1468 /* private */ function getCommon( $st1, $st2 ) {
1469 $fl = strlen( $st1 );
1470 $shorter = strlen( $st2 );
1471 if ( $fl < $shorter ) { $shorter = $fl; }
1473 for ( $i = 0; $i < $shorter; ++$i ) {
1474 if ( $st1{$i} != $st2{$i} ) { break; }
1476 return $i;
1478 # These next three functions open, continue, and close the list
1479 # element appropriate to the prefix character passed into them.
1481 /* private */ function openList( $char ) {
1482 $result = $this->closeParagraph();
1484 if ( '*' == $char ) { $result .= '<ul><li>'; }
1485 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1486 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1487 else if ( ';' == $char ) {
1488 $result .= '<dl><dt>';
1489 $this->mDTopen = true;
1491 else { $result = '<!-- ERR 1 -->'; }
1493 return $result;
1496 /* private */ function nextItem( $char ) {
1497 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1498 else if ( ':' == $char || ';' == $char ) {
1499 $close = '</dd>';
1500 if ( $this->mDTopen ) { $close = '</dt>'; }
1501 if ( ';' == $char ) {
1502 $this->mDTopen = true;
1503 return $close . '<dt>';
1504 } else {
1505 $this->mDTopen = false;
1506 return $close . '<dd>';
1509 return '<!-- ERR 2 -->';
1512 /* private */ function closeList( $char ) {
1513 if ( '*' == $char ) { $text = '</li></ul>'; }
1514 else if ( '#' == $char ) { $text = '</li></ol>'; }
1515 else if ( ':' == $char ) {
1516 if ( $this->mDTopen ) {
1517 $this->mDTopen = false;
1518 $text = '</dt></dl>';
1519 } else {
1520 $text = '</dd></dl>';
1523 else { return '<!-- ERR 3 -->'; }
1524 return $text."\n";
1526 /**#@-*/
1529 * Make lists from lines starting with ':', '*', '#', etc.
1531 * @access private
1532 * @return string the lists rendered as HTML
1534 function doBlockLevels( $text, $linestart ) {
1535 $fname = 'Parser::doBlockLevels';
1536 wfProfileIn( $fname );
1538 # Parsing through the text line by line. The main thing
1539 # happening here is handling of block-level elements p, pre,
1540 # and making lists from lines starting with * # : etc.
1542 $textLines = explode( "\n", $text );
1544 $lastPrefix = $output = $lastLine = '';
1545 $this->mDTopen = $inBlockElem = false;
1546 $prefixLength = 0;
1547 $paragraphStack = false;
1549 if ( !$linestart ) {
1550 $output .= array_shift( $textLines );
1552 foreach ( $textLines as $oLine ) {
1553 $lastPrefixLength = strlen( $lastPrefix );
1554 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1555 $preOpenMatch = preg_match('/<pre/i', $oLine );
1556 if ( !$this->mInPre ) {
1557 # Multiple prefixes may abut each other for nested lists.
1558 $prefixLength = strspn( $oLine, '*#:;' );
1559 $pref = substr( $oLine, 0, $prefixLength );
1561 # eh?
1562 $pref2 = str_replace( ';', ':', $pref );
1563 $t = substr( $oLine, $prefixLength );
1564 $this->mInPre = !empty($preOpenMatch);
1565 } else {
1566 # Don't interpret any other prefixes in preformatted text
1567 $prefixLength = 0;
1568 $pref = $pref2 = '';
1569 $t = $oLine;
1572 # List generation
1573 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1574 # Same as the last item, so no need to deal with nesting or opening stuff
1575 $output .= $this->nextItem( substr( $pref, -1 ) );
1576 $paragraphStack = false;
1578 if ( substr( $pref, -1 ) == ';') {
1579 # The one nasty exception: definition lists work like this:
1580 # ; title : definition text
1581 # So we check for : in the remainder text to split up the
1582 # title and definition, without b0rking links.
1583 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1584 $t = $t2;
1585 $output .= $term . $this->nextItem( ':' );
1588 } elseif( $prefixLength || $lastPrefixLength ) {
1589 # Either open or close a level...
1590 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1591 $paragraphStack = false;
1593 while( $commonPrefixLength < $lastPrefixLength ) {
1594 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1595 --$lastPrefixLength;
1597 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1598 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1600 while ( $prefixLength > $commonPrefixLength ) {
1601 $char = substr( $pref, $commonPrefixLength, 1 );
1602 $output .= $this->openList( $char );
1604 if ( ';' == $char ) {
1605 # FIXME: This is dupe of code above
1606 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1607 $t = $t2;
1608 $output .= $term . $this->nextItem( ':' );
1611 ++$commonPrefixLength;
1613 $lastPrefix = $pref2;
1615 if( 0 == $prefixLength ) {
1616 wfProfileIn( "$fname-paragraph" );
1617 # No prefix (not in list)--go to paragraph mode
1618 $uniq_prefix = UNIQ_PREFIX;
1619 // XXX: use a stack for nestable elements like span, table and div
1620 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
1621 $closematch = preg_match(
1622 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1623 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/iS', $t );
1624 if ( $openmatch or $closematch ) {
1625 $paragraphStack = false;
1626 $output .= $this->closeParagraph();
1627 if($preOpenMatch and !$preCloseMatch) {
1628 $this->mInPre = true;
1630 if ( $closematch ) {
1631 $inBlockElem = false;
1632 } else {
1633 $inBlockElem = true;
1635 } else if ( !$inBlockElem && !$this->mInPre ) {
1636 if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1637 // pre
1638 if ($this->mLastSection != 'pre') {
1639 $paragraphStack = false;
1640 $output .= $this->closeParagraph().'<pre>';
1641 $this->mLastSection = 'pre';
1643 $t = substr( $t, 1 );
1644 } else {
1645 // paragraph
1646 if ( '' == trim($t) ) {
1647 if ( $paragraphStack ) {
1648 $output .= $paragraphStack.'<br />';
1649 $paragraphStack = false;
1650 $this->mLastSection = 'p';
1651 } else {
1652 if ($this->mLastSection != 'p' ) {
1653 $output .= $this->closeParagraph();
1654 $this->mLastSection = '';
1655 $paragraphStack = '<p>';
1656 } else {
1657 $paragraphStack = '</p><p>';
1660 } else {
1661 if ( $paragraphStack ) {
1662 $output .= $paragraphStack;
1663 $paragraphStack = false;
1664 $this->mLastSection = 'p';
1665 } else if ($this->mLastSection != 'p') {
1666 $output .= $this->closeParagraph().'<p>';
1667 $this->mLastSection = 'p';
1672 wfProfileOut( "$fname-paragraph" );
1674 if ($paragraphStack === false) {
1675 $output .= $t."\n";
1678 while ( $prefixLength ) {
1679 $output .= $this->closeList( $pref2{$prefixLength-1} );
1680 --$prefixLength;
1682 if ( '' != $this->mLastSection ) {
1683 $output .= '</' . $this->mLastSection . '>';
1684 $this->mLastSection = '';
1687 wfProfileOut( $fname );
1688 return $output;
1692 * Split up a string on ':', ignoring any occurences inside
1693 * <a>..</a> or <span>...</span>
1694 * @param string $str the string to split
1695 * @param string &$before set to everything before the ':'
1696 * @param string &$after set to everything after the ':'
1697 * return string the position of the ':', or false if none found
1699 function findColonNoLinks($str, &$before, &$after) {
1700 # I wonder if we should make this count all tags, not just <a>
1701 # and <span>. That would prevent us from matching a ':' that
1702 # comes in the middle of italics other such formatting....
1703 # -- Wil
1704 $fname = 'Parser::findColonNoLinks';
1705 wfProfileIn( $fname );
1706 $pos = 0;
1707 do {
1708 $colon = strpos($str, ':', $pos);
1710 if ($colon !== false) {
1711 $before = substr($str, 0, $colon);
1712 $after = substr($str, $colon + 1);
1714 # Skip any ':' within <a> or <span> pairs
1715 $a = substr_count($before, '<a');
1716 $s = substr_count($before, '<span');
1717 $ca = substr_count($before, '</a>');
1718 $cs = substr_count($before, '</span>');
1720 if ($a <= $ca and $s <= $cs) {
1721 # Tags are balanced before ':'; ok
1722 break;
1724 $pos = $colon + 1;
1726 } while ($colon !== false);
1727 wfProfileOut( $fname );
1728 return $colon;
1732 * Return value of a magic variable (like PAGENAME)
1734 * @access private
1736 function getVariableValue( $index ) {
1737 global $wgContLang, $wgSitename, $wgServer;
1740 * Some of these require message or data lookups and can be
1741 * expensive to check many times.
1743 static $varCache = array();
1744 if( isset( $varCache[$index] ) ) return $varCache[$index];
1746 switch ( $index ) {
1747 case MAG_CURRENTMONTH:
1748 return $varCache[$index] = $wgContLang->formatNum( date( 'm' ) );
1749 case MAG_CURRENTMONTHNAME:
1750 return $varCache[$index] = $wgContLang->getMonthName( date('n') );
1751 case MAG_CURRENTMONTHNAMEGEN:
1752 return $varCache[$index] = $wgContLang->getMonthNameGen( date('n') );
1753 case MAG_CURRENTDAY:
1754 return $varCache[$index] = $wgContLang->formatNum( date('j') );
1755 case MAG_PAGENAME:
1756 return $this->mTitle->getText();
1757 case MAG_PAGENAMEE:
1758 return $this->mTitle->getPartialURL();
1759 case MAG_NAMESPACE:
1760 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1761 return $wgContLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1762 case MAG_CURRENTDAYNAME:
1763 return $varCache[$index] = $wgContLang->getWeekdayName( date('w')+1 );
1764 case MAG_CURRENTYEAR:
1765 return $varCache[$index] = $wgContLang->formatNum( date( 'Y' ) );
1766 case MAG_CURRENTTIME:
1767 return $varCache[$index] = $wgContLang->time( wfTimestampNow(), false );
1768 case MAG_NUMBEROFARTICLES:
1769 return $varCache[$index] = $wgContLang->formatNum( wfNumberOfArticles() );
1770 case MAG_SITENAME:
1771 return $wgSitename;
1772 case MAG_SERVER:
1773 return $wgServer;
1774 default:
1775 return NULL;
1780 * initialise the magic variables (like CURRENTMONTHNAME)
1782 * @access private
1784 function initialiseVariables() {
1785 $fname = 'Parser::initialiseVariables';
1786 wfProfileIn( $fname );
1787 global $wgVariableIDs;
1788 $this->mVariables = array();
1789 foreach ( $wgVariableIDs as $id ) {
1790 $mw =& MagicWord::get( $id );
1791 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1793 wfProfileOut( $fname );
1797 * Replace magic variables, templates, and template arguments
1798 * with the appropriate text. Templates are substituted recursively,
1799 * taking care to avoid infinite loops.
1801 * Note that the substitution depends on value of $mOutputType:
1802 * OT_WIKI: only {{subst:}} templates
1803 * OT_MSG: only magic variables
1804 * OT_HTML: all templates and magic variables
1806 * @param string $tex The text to transform
1807 * @param array $args Key-value pairs representing template parameters to substitute
1808 * @access private
1810 function replaceVariables( $text, $args = array() ) {
1811 global $wgLang, $wgScript, $wgArticlePath;
1813 # Prevent too big inclusions
1814 if( strlen( $text ) > MAX_INCLUDE_SIZE ) {
1815 return $text;
1818 $fname = 'Parser::replaceVariables';
1819 wfProfileIn( $fname );
1821 $titleChars = Title::legalChars();
1823 # This function is called recursively. To keep track of arguments we need a stack:
1824 array_push( $this->mArgStack, $args );
1826 # Variable substitution
1827 $text = preg_replace_callback( "/{{([$titleChars]*?)}}/", array( &$this, 'variableSubstitution' ), $text );
1829 if ( $this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI ) {
1830 # Argument substitution
1831 $text = preg_replace_callback( "/{{{([$titleChars]*?)}}}/", array( &$this, 'argSubstitution' ), $text );
1833 # Template substitution
1834 $regex = '/(\\n|{)?{{(['.$titleChars.']*)(\\|.*?|)}}/s';
1835 $text = preg_replace_callback( $regex, array( &$this, 'braceSubstitution' ), $text );
1837 array_pop( $this->mArgStack );
1839 wfProfileOut( $fname );
1840 return $text;
1844 * Replace magic variables
1845 * @access private
1847 function variableSubstitution( $matches ) {
1848 $fname = 'parser::variableSubstitution';
1849 wfProfileIn( $fname );
1850 if ( !$this->mVariables ) {
1851 $this->initialiseVariables();
1853 $skip = false;
1854 if ( $this->mOutputType == OT_WIKI ) {
1855 # Do only magic variables prefixed by SUBST
1856 $mwSubst =& MagicWord::get( MAG_SUBST );
1857 if (!$mwSubst->matchStartAndRemove( $matches[1] ))
1858 $skip = true;
1859 # Note that if we don't substitute the variable below,
1860 # we don't remove the {{subst:}} magic word, in case
1861 # it is a template rather than a magic variable.
1863 if ( !$skip && array_key_exists( $matches[1], $this->mVariables ) ) {
1864 $text = $this->mVariables[$matches[1]];
1865 $this->mOutput->mContainsOldMagic = true;
1866 } else {
1867 $text = $matches[0];
1869 wfProfileOut( $fname );
1870 return $text;
1873 # Split template arguments
1874 function getTemplateArgs( $argsString ) {
1875 if ( $argsString === '' ) {
1876 return array();
1879 $args = explode( '|', substr( $argsString, 1 ) );
1881 # If any of the arguments contains a '[[' but no ']]', it needs to be
1882 # merged with the next arg because the '|' character between belongs
1883 # to the link syntax and not the template parameter syntax.
1884 $argc = count($args);
1885 $i = 0;
1886 for ( $i = 0; $i < $argc-1; $i++ ) {
1887 if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1888 $args[$i] .= '|'.$args[$i+1];
1889 array_splice($args, $i+1, 1);
1890 $i--;
1891 $argc--;
1895 return $args;
1899 * Return the text of a template, after recursively
1900 * replacing any variables or templates within the template.
1902 * @param array $matches The parts of the template
1903 * $matches[1]: the title, i.e. the part before the |
1904 * $matches[2]: the parameters (including a leading |), if any
1905 * @return string the text of the template
1906 * @access private
1908 function braceSubstitution( $matches ) {
1909 global $wgLinkCache, $wgContLang;
1910 $fname = 'Parser::braceSubstitution';
1911 wfProfileIn( $fname );
1913 $found = false;
1914 $nowiki = false;
1915 $noparse = false;
1917 $title = NULL;
1919 # Need to know if the template comes at the start of a line,
1920 # to treat the beginning of the template like the beginning
1921 # of a line for tables and block-level elements.
1922 $linestart = $matches[1];
1924 # $part1 is the bit before the first |, and must contain only title characters
1925 # $args is a list of arguments, starting from index 0, not including $part1
1927 $part1 = $matches[2];
1928 # If the third subpattern matched anything, it will start with |
1930 $args = $this->getTemplateArgs($matches[3]);
1931 $argc = count( $args );
1933 # Don't parse {{{}}} because that's only for template arguments
1934 if ( $linestart === '{' ) {
1935 $text = $matches[0];
1936 $found = true;
1937 $noparse = true;
1940 # SUBST
1941 if ( !$found ) {
1942 $mwSubst =& MagicWord::get( MAG_SUBST );
1943 if ( $mwSubst->matchStartAndRemove( $part1 ) xor ($this->mOutputType == OT_WIKI) ) {
1944 # One of two possibilities is true:
1945 # 1) Found SUBST but not in the PST phase
1946 # 2) Didn't find SUBST and in the PST phase
1947 # In either case, return without further processing
1948 $text = $matches[0];
1949 $found = true;
1950 $noparse = true;
1954 # MSG, MSGNW and INT
1955 if ( !$found ) {
1956 # Check for MSGNW:
1957 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1958 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1959 $nowiki = true;
1960 } else {
1961 # Remove obsolete MSG:
1962 $mwMsg =& MagicWord::get( MAG_MSG );
1963 $mwMsg->matchStartAndRemove( $part1 );
1966 # Check if it is an internal message
1967 $mwInt =& MagicWord::get( MAG_INT );
1968 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1969 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1970 $text = $linestart . wfMsgReal( $part1, $args, true );
1971 $found = true;
1976 # NS
1977 if ( !$found ) {
1978 # Check for NS: (namespace expansion)
1979 $mwNs = MagicWord::get( MAG_NS );
1980 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1981 if ( intval( $part1 ) ) {
1982 $text = $linestart . $wgContLang->getNsText( intval( $part1 ) );
1983 $found = true;
1984 } else {
1985 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1986 if ( !is_null( $index ) ) {
1987 $text = $linestart . $wgContLang->getNsText( $index );
1988 $found = true;
1994 # LOCALURL and LOCALURLE
1995 if ( !$found ) {
1996 $mwLocal = MagicWord::get( MAG_LOCALURL );
1997 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1999 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
2000 $func = 'getLocalURL';
2001 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
2002 $func = 'escapeLocalURL';
2003 } else {
2004 $func = '';
2007 if ( $func !== '' ) {
2008 $title = Title::newFromText( $part1 );
2009 if ( !is_null( $title ) ) {
2010 if ( $argc > 0 ) {
2011 $text = $linestart . $title->$func( $args[0] );
2012 } else {
2013 $text = $linestart . $title->$func();
2015 $found = true;
2020 # GRAMMAR
2021 if ( !$found && $argc == 1 ) {
2022 $mwGrammar =& MagicWord::get( MAG_GRAMMAR );
2023 if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
2024 $text = $linestart . $wgContLang->convertGrammar( $args[0], $part1 );
2025 $found = true;
2029 # Template table test
2031 # Did we encounter this template already? If yes, it is in the cache
2032 # and we need to check for loops.
2033 if ( !$found && isset( $this->mTemplates[$part1] ) ) {
2034 # set $text to cached message.
2035 $text = $linestart . $this->mTemplates[$part1];
2036 $found = true;
2038 # Infinite loop test
2039 if ( isset( $this->mTemplatePath[$part1] ) ) {
2040 $noparse = true;
2041 $found = true;
2042 $text .= '<!-- WARNING: template loop detected -->';
2046 # Load from database
2047 $itcamefromthedatabase = false;
2048 if ( !$found ) {
2049 $ns = NS_TEMPLATE;
2050 $part1 = $this->maybeDoSubpageLink( $part1, $subpage='' );
2051 if ($subpage !== '') {
2052 $ns = $this->mTitle->getNamespace();
2054 $title = Title::newFromText( $part1, $ns );
2055 if ( !is_null( $title ) && !$title->isExternal() ) {
2056 # Check for excessive inclusion
2057 $dbk = $title->getPrefixedDBkey();
2058 if ( $this->incrementIncludeCount( $dbk ) ) {
2059 # This should never be reached.
2060 $article = new Article( $title );
2061 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
2062 if ( $articleContent !== false ) {
2063 $found = true;
2064 $text = $linestart . $articleContent;
2065 $itcamefromthedatabase = true;
2069 # If the title is valid but undisplayable, make a link to it
2070 if ( $this->mOutputType == OT_HTML && !$found ) {
2071 $text = $linestart . '[['.$title->getPrefixedText().']]';
2072 $found = true;
2075 # Template cache array insertion
2076 $this->mTemplates[$part1] = $text;
2080 # Recursive parsing, escaping and link table handling
2081 # Only for HTML output
2082 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
2083 $text = wfEscapeWikiText( $text );
2084 } elseif ( ($this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI) && $found && !$noparse) {
2085 # Clean up argument array
2086 $assocArgs = array();
2087 $index = 1;
2088 foreach( $args as $arg ) {
2089 $eqpos = strpos( $arg, '=' );
2090 if ( $eqpos === false ) {
2091 $assocArgs[$index++] = $arg;
2092 } else {
2093 $name = trim( substr( $arg, 0, $eqpos ) );
2094 $value = trim( substr( $arg, $eqpos+1 ) );
2095 if ( $value === false ) {
2096 $value = '';
2098 if ( $name !== false ) {
2099 $assocArgs[$name] = $value;
2104 # Add a new element to the templace recursion path
2105 $this->mTemplatePath[$part1] = 1;
2107 $text = $this->strip( $text, $this->mStripState );
2108 $text = $this->removeHTMLtags( $text );
2109 $text = $this->replaceVariables( $text, $assocArgs );
2111 # Resume the link cache and register the inclusion as a link
2112 if ( $this->mOutputType == OT_HTML && !is_null( $title ) ) {
2113 $wgLinkCache->addLinkObj( $title );
2116 # If the template begins with a table or block-level
2117 # element, it should be treated as beginning a new line.
2118 if ($linestart !== '\n' && preg_match('/^({\\||:|;|#|\*)/', $text)) {
2119 $text = "\n" . $text;
2123 # Empties the template path
2124 $this->mTemplatePath = array();
2125 if ( !$found ) {
2126 wfProfileOut( $fname );
2127 return $matches[0];
2128 } else {
2129 # replace ==section headers==
2130 # XXX this needs to go away once we have a better parser.
2131 if ( $this->mOutputType != OT_WIKI && $itcamefromthedatabase ) {
2132 if( !is_null( $title ) )
2133 $encodedname = base64_encode($title->getPrefixedDBkey());
2134 else
2135 $encodedname = base64_encode("");
2136 $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
2137 PREG_SPLIT_DELIM_CAPTURE);
2138 $text = '';
2139 $nsec = 0;
2140 for( $i = 0; $i < count($m); $i += 2 ) {
2141 $text .= $m[$i];
2142 if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue;
2143 $hl = $m[$i + 1];
2144 if( strstr($hl, "<!--MWTEMPLATESECTION") ) {
2145 $text .= $hl;
2146 continue;
2148 preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2);
2149 $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION="
2150 . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3];
2152 $nsec++;
2157 # Empties the template path
2158 $this->mTemplatePath = array();
2160 if ( !$found ) {
2161 wfProfileOut( $fname );
2162 return $matches[0];
2163 } else {
2164 wfProfileOut( $fname );
2165 return $text;
2170 * Triple brace replacement -- used for template arguments
2171 * @access private
2173 function argSubstitution( $matches ) {
2174 $arg = trim( $matches[1] );
2175 $text = $matches[0];
2176 $inputArgs = end( $this->mArgStack );
2178 if ( array_key_exists( $arg, $inputArgs ) ) {
2179 $text = $inputArgs[$arg];
2182 return $text;
2186 * Returns true if the function is allowed to include this entity
2187 * @access private
2189 function incrementIncludeCount( $dbk ) {
2190 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
2191 $this->mIncludeCount[$dbk] = 0;
2193 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
2194 return true;
2195 } else {
2196 return false;
2202 * Cleans up HTML, removes dangerous tags and attributes, and
2203 * removes HTML comments
2204 * @access private
2206 function removeHTMLtags( $text ) {
2207 global $wgUseTidy, $wgUserHtml;
2208 $fname = 'Parser::removeHTMLtags';
2209 wfProfileIn( $fname );
2211 if( $wgUserHtml ) {
2212 $htmlpairs = array( # Tags that must be closed
2213 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
2214 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
2215 'strike', 'strong', 'tt', 'var', 'div', 'center',
2216 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
2217 'ruby', 'rt' , 'rb' , 'rp', 'p'
2219 $htmlsingle = array(
2220 'br', 'hr', 'li', 'dt', 'dd'
2222 $htmlnest = array( # Tags that can be nested--??
2223 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2224 'dl', 'font', 'big', 'small', 'sub', 'sup'
2226 $tabletags = array( # Can only appear inside table
2227 'td', 'th', 'tr'
2229 } else {
2230 $htmlpairs = array();
2231 $htmlsingle = array();
2232 $htmlnest = array();
2233 $tabletags = array();
2236 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2237 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2239 $htmlattrs = $this->getHTMLattrs () ;
2241 # Remove HTML comments
2242 $text = $this->removeHTMLcomments( $text );
2244 $bits = explode( '<', $text );
2245 $text = array_shift( $bits );
2246 if(!$wgUseTidy) {
2247 $tagstack = array(); $tablestack = array();
2248 foreach ( $bits as $x ) {
2249 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
2250 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2251 $x, $regs );
2252 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2253 error_reporting( $prev );
2255 $badtag = 0 ;
2256 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2257 # Check our stack
2258 if ( $slash ) {
2259 # Closing a tag...
2260 if ( ! in_array( $t, $htmlsingle ) &&
2261 ( $ot = @array_pop( $tagstack ) ) != $t ) {
2262 @array_push( $tagstack, $ot );
2263 $badtag = 1;
2264 } else {
2265 if ( $t == 'table' ) {
2266 $tagstack = array_pop( $tablestack );
2268 $newparams = '';
2270 } else {
2271 # Keep track for later
2272 if ( in_array( $t, $tabletags ) &&
2273 ! in_array( 'table', $tagstack ) ) {
2274 $badtag = 1;
2275 } else if ( in_array( $t, $tagstack ) &&
2276 ! in_array ( $t , $htmlnest ) ) {
2277 $badtag = 1 ;
2278 } else if ( ! in_array( $t, $htmlsingle ) ) {
2279 if ( $t == 'table' ) {
2280 array_push( $tablestack, $tagstack );
2281 $tagstack = array();
2283 array_push( $tagstack, $t );
2285 # Strip non-approved attributes from the tag
2286 $newparams = $this->fixTagAttributes($params);
2289 if ( ! $badtag ) {
2290 $rest = str_replace( '>', '&gt;', $rest );
2291 $text .= "<$slash$t $newparams$brace$rest";
2292 continue;
2295 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2297 # Close off any remaining tags
2298 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2299 $text .= "</$t>\n";
2300 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2302 } else {
2303 # this might be possible using tidy itself
2304 foreach ( $bits as $x ) {
2305 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2306 $x, $regs );
2307 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2308 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2309 $newparams = $this->fixTagAttributes($params);
2310 $rest = str_replace( '>', '&gt;', $rest );
2311 $text .= "<$slash$t $newparams$brace$rest";
2312 } else {
2313 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2317 wfProfileOut( $fname );
2318 return $text;
2322 * Remove '<!--', '-->', and everything between.
2323 * To avoid leaving blank lines, when a comment is both preceded
2324 * and followed by a newline (ignoring spaces), trim leading and
2325 * trailing spaces and one of the newlines.
2327 * @access private
2329 function removeHTMLcomments( $text ) {
2330 $fname='Parser::removeHTMLcomments';
2331 wfProfileIn( $fname );
2332 while (($start = strpos($text, '<!--')) !== false) {
2333 $end = strpos($text, '-->', $start + 4);
2334 if ($end === false) {
2335 # Unterminated comment; bail out
2336 break;
2339 $end += 3;
2341 # Trim space and newline if the comment is both
2342 # preceded and followed by a newline
2343 $spaceStart = max($start - 1, 0);
2344 $spaceLen = $end - $spaceStart;
2345 while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
2346 $spaceStart--;
2347 $spaceLen++;
2349 while (substr($text, $spaceStart + $spaceLen, 1) === ' ')
2350 $spaceLen++;
2351 if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") {
2352 # Remove the comment, leading and trailing
2353 # spaces, and leave only one newline.
2354 $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
2356 else {
2357 # Remove just the comment.
2358 $text = substr_replace($text, '', $start, $end - $start);
2361 wfProfileOut( $fname );
2362 return $text;
2366 * This function accomplishes several tasks:
2367 * 1) Auto-number headings if that option is enabled
2368 * 2) Add an [edit] link to sections for logged in users who have enabled the option
2369 * 3) Add a Table of contents on the top for users who have enabled the option
2370 * 4) Auto-anchor headings
2372 * It loops through all headlines, collects the necessary data, then splits up the
2373 * string and re-inserts the newly formatted headlines.
2374 * @access private
2376 /* private */ function formatHeadings( $text, $isMain=true ) {
2377 global $wgInputEncoding, $wgMaxTocLevel, $wgContLang, $wgLinkHolders;
2379 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2380 $doShowToc = $this->mOptions->getShowToc();
2381 $forceTocHere = false;
2382 if( !$this->mTitle->userCanEdit() ) {
2383 $showEditLink = 0;
2384 $rightClickHack = 0;
2385 } else {
2386 $showEditLink = $this->mOptions->getEditSection();
2387 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2390 # Inhibit editsection links if requested in the page
2391 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2392 if( $esw->matchAndRemove( $text ) ) {
2393 $showEditLink = 0;
2395 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2396 # do not add TOC
2397 $mw =& MagicWord::get( MAG_NOTOC );
2398 if( $mw->matchAndRemove( $text ) ) {
2399 $doShowToc = 0;
2402 # never add the TOC to the Main Page. This is an entry page that should not
2403 # be more than 1-2 screens large anyway
2404 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2405 $doShowToc = 0;
2408 # Get all headlines for numbering them and adding funky stuff like [edit]
2409 # links - this is for later, but we need the number of headlines right now
2410 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2412 # if there are fewer than 4 headlines in the article, do not show TOC
2413 if( $numMatches < 4 ) {
2414 $doShowToc = 0;
2417 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2418 # override above conditions and always show TOC at that place
2419 $mw =& MagicWord::get( MAG_TOC );
2420 if ($mw->match( $text ) ) {
2421 $doShowToc = 1;
2422 $forceTocHere = true;
2423 } else {
2424 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2425 # override above conditions and always show TOC above first header
2426 $mw =& MagicWord::get( MAG_FORCETOC );
2427 if ($mw->matchAndRemove( $text ) ) {
2428 $doShowToc = 1;
2434 # We need this to perform operations on the HTML
2435 $sk =& $this->mOptions->getSkin();
2437 # headline counter
2438 $headlineCount = 0;
2439 $sectionCount = 0; # headlineCount excluding template sections
2441 # Ugh .. the TOC should have neat indentation levels which can be
2442 # passed to the skin functions. These are determined here
2443 $toclevel = 0;
2444 $toc = '';
2445 $full = '';
2446 $head = array();
2447 $sublevelCount = array();
2448 $level = 0;
2449 $prevlevel = 0;
2450 foreach( $matches[3] as $headline ) {
2451 $istemplate = 0;
2452 $templatetitle = "";
2453 $templatesection = 0;
2455 if (preg_match("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", $headline, $mat)) {
2456 $istemplate = 1;
2457 $templatetitle = base64_decode($mat[1]);
2458 $templatesection = 1 + (int)base64_decode($mat[2]);
2459 $headline = preg_replace("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", "", $headline);
2462 $numbering = '';
2463 if( $level ) {
2464 $prevlevel = $level;
2466 $level = $matches[1][$headlineCount];
2467 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2468 # reset when we enter a new level
2469 $sublevelCount[$level] = 0;
2470 $toc .= $sk->tocIndent( $level - $prevlevel );
2471 $toclevel += $level - $prevlevel;
2473 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2474 # reset when we step back a level
2475 $sublevelCount[$level+1]=0;
2476 $toc .= $sk->tocUnindent( $prevlevel - $level );
2477 $toclevel -= $prevlevel - $level;
2479 # count number of headlines for each level
2480 @$sublevelCount[$level]++;
2481 if( $doNumberHeadings || $doShowToc ) {
2482 $dot = 0;
2483 for( $i = 1; $i <= $level; $i++ ) {
2484 if( !empty( $sublevelCount[$i] ) ) {
2485 if( $dot ) {
2486 $numbering .= '.';
2488 $numbering .= $wgContLang->formatNum( $sublevelCount[$i] );
2489 $dot = 1;
2494 # The canonized header is a version of the header text safe to use for links
2495 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2496 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2497 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2499 # Remove link placeholders by the link text.
2500 # <!--LINK number-->
2501 # turns into
2502 # link text with suffix
2503 $canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
2504 "\$wgLinkHolders['texts'][\$1]",
2505 $canonized_headline );
2507 # strip out HTML
2508 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2509 $tocline = trim( $canonized_headline );
2510 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2511 $replacearray = array(
2512 '%3A' => ':',
2513 '%' => '.'
2515 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2516 $refer[$headlineCount] = $canonized_headline;
2518 # count how many in assoc. array so we can track dupes in anchors
2519 @$refers[$canonized_headline]++;
2520 $refcount[$headlineCount]=$refers[$canonized_headline];
2522 # Prepend the number to the heading text
2524 if( $doNumberHeadings || $doShowToc ) {
2525 $tocline = $numbering . ' ' . $tocline;
2527 # Don't number the heading if it is the only one (looks silly)
2528 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2529 # the two are different if the line contains a link
2530 $headline=$numbering . ' ' . $headline;
2534 # Create the anchor for linking from the TOC to the section
2535 $anchor = $canonized_headline;
2536 if($refcount[$headlineCount] > 1 ) {
2537 $anchor .= '_' . $refcount[$headlineCount];
2539 if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2540 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2542 if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) {
2543 if ( empty( $head[$headlineCount] ) ) {
2544 $head[$headlineCount] = '';
2546 if( $istemplate )
2547 $head[$headlineCount] .= $sk->editSectionLinkForOther($templatetitle, $templatesection);
2548 else
2549 $head[$headlineCount] .= $sk->editSectionLink($this->mTitle, $sectionCount+1);
2552 # Add the edit section span
2553 if( $rightClickHack ) {
2554 if( $istemplate )
2555 $headline = $sk->editSectionScriptForOther($templatetitle, $templatesection, $headline);
2556 else
2557 $headline = $sk->editSectionScript($this->mTitle, $sectionCount+1,$headline);
2560 # give headline the correct <h#> tag
2561 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2563 $headlineCount++;
2564 if( !$istemplate )
2565 $sectionCount++;
2568 if( $doShowToc ) {
2569 $toclines = $headlineCount;
2570 $toc .= $sk->tocUnindent( $toclevel );
2571 $toc = $sk->tocTable( $toc );
2574 # split up and insert constructed headlines
2576 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2577 $i = 0;
2579 foreach( $blocks as $block ) {
2580 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2581 # This is the [edit] link that appears for the top block of text when
2582 # section editing is enabled
2584 # Disabled because it broke block formatting
2585 # For example, a bullet point in the top line
2586 # $full .= $sk->editSectionLink(0);
2588 $full .= $block;
2589 if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2590 # Top anchor now in skin
2591 $full = $full.$toc;
2594 if( !empty( $head[$i] ) ) {
2595 $full .= $head[$i];
2597 $i++;
2599 if($forceTocHere) {
2600 $mw =& MagicWord::get( MAG_TOC );
2601 return $mw->replace( $toc, $full );
2602 } else {
2603 return $full;
2608 * Return an HTML link for the "ISBN 123456" text
2609 * @access private
2611 function magicISBN( $text ) {
2612 global $wgLang;
2613 $fname = 'Parser::magicISBN';
2614 wfProfileIn( $fname );
2616 $a = split( 'ISBN ', ' '.$text );
2617 if ( count ( $a ) < 2 ) {
2618 wfProfileOut( $fname );
2619 return $text;
2621 $text = substr( array_shift( $a ), 1);
2622 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2624 foreach ( $a as $x ) {
2625 $isbn = $blank = '' ;
2626 while ( ' ' == $x{0} ) {
2627 $blank .= ' ';
2628 $x = substr( $x, 1 );
2630 if ( $x == '' ) { # blank isbn
2631 $text .= "ISBN $blank";
2632 continue;
2634 while ( strstr( $valid, $x{0} ) != false ) {
2635 $isbn .= $x{0};
2636 $x = substr( $x, 1 );
2638 $num = str_replace( '-', '', $isbn );
2639 $num = str_replace( ' ', '', $num );
2641 if ( '' == $num ) {
2642 $text .= "ISBN $blank$x";
2643 } else {
2644 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2645 $text .= '<a href="' .
2646 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2647 "\" class=\"internal\">ISBN $isbn</a>";
2648 $text .= $x;
2651 wfProfileOut( $fname );
2652 return $text;
2656 * Return an HTML link for the "GEO ..." text
2657 * @access private
2659 function magicGEO( $text ) {
2660 global $wgLang, $wgUseGeoMode;
2661 $fname = 'Parser::magicGEO';
2662 wfProfileIn( $fname );
2664 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2665 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2666 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2667 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2668 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2669 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2671 $a = split( 'GEO ', ' '.$text );
2672 if ( count ( $a ) < 2 ) {
2673 wfProfileOut( $fname );
2674 return $text;
2676 $text = substr( array_shift( $a ), 1);
2677 $valid = '0123456789.+-:';
2679 foreach ( $a as $x ) {
2680 $geo = $blank = '' ;
2681 while ( ' ' == $x{0} ) {
2682 $blank .= ' ';
2683 $x = substr( $x, 1 );
2685 while ( strstr( $valid, $x{0} ) != false ) {
2686 $geo .= $x{0};
2687 $x = substr( $x, 1 );
2689 $num = str_replace( '+', '', $geo );
2690 $num = str_replace( ' ', '', $num );
2692 if ( '' == $num || count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2693 $text .= "GEO $blank$x";
2694 } else {
2695 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2696 $text .= '<a href="' .
2697 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2698 "\" class=\"internal\">GEO $geo</a>";
2699 $text .= $x;
2702 wfProfileOut( $fname );
2703 return $text;
2707 * Return an HTML link for the "RFC 1234" text
2708 * @access private
2709 * @param string $text text to be processed
2711 function magicRFC( $text, $keyword='RFC ', $urlmsg='rfcurl' ) {
2712 global $wgLang;
2714 $valid = '0123456789';
2715 $internal = false;
2717 $a = split( $keyword, ' '.$text );
2718 if ( count ( $a ) < 2 ) {
2719 return $text;
2721 $text = substr( array_shift( $a ), 1);
2723 /* Check if keyword is preceed by [[.
2724 * This test is made here cause of the array_shift above
2725 * that prevent the test to be done in the foreach.
2727 if ( substr( $text, -2 ) == '[[' ) {
2728 $internal = true;
2731 foreach ( $a as $x ) {
2732 /* token might be empty if we have RFC RFC 1234 */
2733 if ( $x=='' ) {
2734 $text.=$keyword;
2735 continue;
2738 $id = $blank = '' ;
2740 /** remove and save whitespaces in $blank */
2741 while ( $x{0} == ' ' ) {
2742 $blank .= ' ';
2743 $x = substr( $x, 1 );
2746 /** remove and save the rfc number in $id */
2747 while ( strstr( $valid, $x{0} ) != false ) {
2748 $id .= $x{0};
2749 $x = substr( $x, 1 );
2752 if ( $id == '' ) {
2753 /* call back stripped spaces*/
2754 $text .= $keyword.$blank.$x;
2755 } elseif( $internal ) {
2756 /* normal link */
2757 $text .= $keyword.$id.$x;
2758 } else {
2759 /* build the external link*/
2760 $url = wfmsg( $urlmsg );
2761 $url = str_replace( '$1', $id, $url);
2762 $sk =& $this->mOptions->getSkin();
2763 $la = $sk->getExternalLinkAttributes( $url, $keyword.$id );
2764 $text .= "<a href='{$url}'{$la}>{$keyword}{$id}</a>{$x}";
2767 /* Check if the next RFC keyword is preceed by [[ */
2768 $internal = ( substr($x,-2) == '[[' );
2770 return $text;
2774 * Transform wiki markup when saving a page by doing \r\n -> \n
2775 * conversion, substitting signatures, {{subst:}} templates, etc.
2777 * @param string $text the text to transform
2778 * @param Title &$title the Title object for the current article
2779 * @param User &$user the User object describing the current user
2780 * @param ParserOptions $options parsing options
2781 * @param bool $clearState whether to clear the parser state first
2782 * @return string the altered wiki markup
2783 * @access public
2785 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2786 $this->mOptions = $options;
2787 $this->mTitle =& $title;
2788 $this->mOutputType = OT_WIKI;
2790 if ( $clearState ) {
2791 $this->clearState();
2794 $stripState = false;
2795 $pairs = array(
2796 "\r\n" => "\n",
2798 $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
2799 $text = $this->strip( $text, $stripState, false );
2800 $text = $this->pstPass2( $text, $user );
2801 $text = $this->unstrip( $text, $stripState );
2802 $text = $this->unstripNoWiki( $text, $stripState );
2803 return $text;
2807 * Pre-save transform helper function
2808 * @access private
2810 function pstPass2( $text, &$user ) {
2811 global $wgLang, $wgContLang, $wgLocaltimezone;
2813 # Variable replacement
2814 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2815 $text = $this->replaceVariables( $text );
2817 # Signatures
2819 $n = $user->getName();
2820 $k = $user->getOption( 'nickname' );
2821 if ( '' == $k ) { $k = $n; }
2822 if ( isset( $wgLocaltimezone ) ) {
2823 $oldtz = getenv( 'TZ' );
2824 putenv( 'TZ='.$wgLocaltimezone );
2826 /* Note: this is an ugly timezone hack for the European wikis */
2827 $d = $wgContLang->timeanddate( date( 'YmdHis' ), false ) .
2828 ' (' . date( 'T' ) . ')';
2829 if ( isset( $wgLocaltimezone ) ) {
2830 putenv( 'TZ='.$oldtzs );
2833 $text = preg_replace( '/~~~~~~/', $d, $text );
2834 $text = preg_replace( '/~~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2835 $text = preg_replace( '/~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2837 # Context links: [[|name]] and [[name (context)|]]
2839 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2840 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2841 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2842 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2844 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2845 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2846 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]] and [[:namespace:page|]]
2847 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2848 $context = '';
2849 $t = $this->mTitle->getText();
2850 if ( preg_match( $conpat, $t, $m ) ) {
2851 $context = $m[2];
2853 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2854 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2855 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2857 if ( '' == $context ) {
2858 $text = preg_replace( $p2, '[[\\1]]', $text );
2859 } else {
2860 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2863 # Trim trailing whitespace
2864 # MAG_END (__END__) tag allows for trailing
2865 # whitespace to be deliberately included
2866 $text = rtrim( $text );
2867 $mw =& MagicWord::get( MAG_END );
2868 $mw->matchAndRemove( $text );
2870 return $text;
2874 * Set up some variables which are usually set up in parse()
2875 * so that an external function can call some class members with confidence
2876 * @access public
2878 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2879 $this->mTitle =& $title;
2880 $this->mOptions = $options;
2881 $this->mOutputType = $outputType;
2882 if ( $clearState ) {
2883 $this->clearState();
2888 * Transform a MediaWiki message by replacing magic variables.
2890 * @param string $text the text to transform
2891 * @param ParserOptions $options options
2892 * @return string the text with variables substituted
2893 * @access public
2895 function transformMsg( $text, $options ) {
2896 global $wgTitle;
2897 static $executing = false;
2899 # Guard against infinite recursion
2900 if ( $executing ) {
2901 return $text;
2903 $executing = true;
2905 $this->mTitle = $wgTitle;
2906 $this->mOptions = $options;
2907 $this->mOutputType = OT_MSG;
2908 $this->clearState();
2909 $text = $this->replaceVariables( $text );
2911 $executing = false;
2912 return $text;
2916 * Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2917 * Callback will be called with the text within
2918 * Transform and return the text within
2919 * @access public
2921 function setHook( $tag, $callback ) {
2922 $oldVal = @$this->mTagHooks[$tag];
2923 $this->mTagHooks[$tag] = $callback;
2924 return $oldVal;
2928 * Replace <!--LINK--> link placeholders with actual links, in the buffer
2929 * Placeholders created in Skin::makeLinkObj()
2930 * Returns an array of links found, indexed by PDBK:
2931 * 0 - broken
2932 * 1 - normal link
2933 * 2 - stub
2934 * $options is a bit field, RLH_FOR_UPDATE to select for update
2936 function replaceLinkHolders( &$text, $options = 0 ) {
2937 global $wgUser, $wgLinkCache, $wgUseOldExistenceCheck, $wgLinkHolders;
2938 global $wgInterwikiLinkHolders;
2939 global $outputReplace;
2941 if ( $wgUseOldExistenceCheck ) {
2942 return array();
2945 $fname = 'Parser::replaceLinkHolders';
2946 wfProfileIn( $fname );
2948 $pdbks = array();
2949 $colours = array();
2951 #if ( !empty( $tmpLinks[0] ) ) { #TODO
2952 if ( !empty( $wgLinkHolders['namespaces'] ) ) {
2953 wfProfileIn( $fname.'-check' );
2954 $dbr =& wfGetDB( DB_SLAVE );
2955 $cur = $dbr->tableName( 'cur' );
2956 $sk = $wgUser->getSkin();
2957 $threshold = $wgUser->getOption('stubthreshold');
2959 # Sort by namespace
2960 asort( $wgLinkHolders['namespaces'] );
2962 # Generate query
2963 $query = false;
2964 foreach ( $wgLinkHolders['namespaces'] as $key => $val ) {
2965 # Make title object
2966 $title = $wgLinkHolders['titles'][$key];
2968 # Skip invalid entries.
2969 # Result will be ugly, but prevents crash.
2970 if ( is_null( $title ) ) {
2971 continue;
2973 $pdbk = $pdbks[$key] = $title->getPrefixedDBkey();
2975 # Check if it's in the link cache already
2976 if ( $wgLinkCache->getGoodLinkID( $pdbk ) ) {
2977 $colours[$pdbk] = 1;
2978 } elseif ( $wgLinkCache->isBadLink( $pdbk ) ) {
2979 $colours[$pdbk] = 0;
2980 } else {
2981 # Not in the link cache, add it to the query
2982 if ( !isset( $current ) ) {
2983 $current = $val;
2984 $query = "SELECT cur_id, cur_namespace, cur_title";
2985 if ( $threshold > 0 ) {
2986 $query .= ", LENGTH(cur_text) AS cur_len, cur_is_redirect";
2988 $query .= " FROM $cur WHERE (cur_namespace=$val AND cur_title IN(";
2989 } elseif ( $current != $val ) {
2990 $current = $val;
2991 $query .= ")) OR (cur_namespace=$val AND cur_title IN(";
2992 } else {
2993 $query .= ', ';
2996 $query .= $dbr->addQuotes( $wgLinkHolders['dbkeys'][$key] );
2999 if ( $query ) {
3000 $query .= '))';
3001 if ( $options & RLH_FOR_UPDATE ) {
3002 $query .= ' FOR UPDATE';
3005 $res = $dbr->query( $query, $fname );
3007 # Fetch data and form into an associative array
3008 # non-existent = broken
3009 # 1 = known
3010 # 2 = stub
3011 while ( $s = $dbr->fetchObject($res) ) {
3012 $title = Title::makeTitle( $s->cur_namespace, $s->cur_title );
3013 $pdbk = $title->getPrefixedDBkey();
3014 $wgLinkCache->addGoodLink( $s->cur_id, $pdbk );
3016 if ( $threshold > 0 ) {
3017 $size = $s->cur_len;
3018 if ( $s->cur_is_redirect || $s->cur_namespace != 0 || $length < $threshold ) {
3019 $colours[$pdbk] = 1;
3020 } else {
3021 $colours[$pdbk] = 2;
3023 } else {
3024 $colours[$pdbk] = 1;
3028 wfProfileOut( $fname.'-check' );
3030 # Construct search and replace arrays
3031 wfProfileIn( $fname.'-construct' );
3032 $outputReplace = array();
3033 foreach ( $wgLinkHolders['namespaces'] as $key => $ns ) {
3034 $pdbk = $pdbks[$key];
3035 $searchkey = '<!--LINK '.$key.'-->';
3036 $title = $wgLinkHolders['titles'][$key];
3037 if ( empty( $colours[$pdbk] ) ) {
3038 $wgLinkCache->addBadLink( $pdbk );
3039 $colours[$pdbk] = 0;
3040 $outputReplace[$searchkey] = $sk->makeBrokenLinkObj( $title,
3041 $wgLinkHolders['texts'][$key],
3042 $wgLinkHolders['queries'][$key] );
3043 } elseif ( $colours[$pdbk] == 1 ) {
3044 $outputReplace[$searchkey] = $sk->makeKnownLinkObj( $title,
3045 $wgLinkHolders['texts'][$key],
3046 $wgLinkHolders['queries'][$key] );
3047 } elseif ( $colours[$pdbk] == 2 ) {
3048 $outputReplace[$searchkey] = $sk->makeStubLinkObj( $title,
3049 $wgLinkHolders['texts'][$key],
3050 $wgLinkHolders['queries'][$key] );
3053 wfProfileOut( $fname.'-construct' );
3055 # Do the thing
3056 wfProfileIn( $fname.'-replace' );
3058 $text = preg_replace_callback(
3059 '/(<!--LINK .*?-->)/',
3060 "outputReplaceMatches",
3061 $text);
3062 wfProfileOut( $fname.'-replace' );
3065 if ( !empty( $wgInterwikiLinkHolders ) ) {
3066 wfProfileIn( $fname.'-interwiki' );
3067 $outputReplace = $wgInterwikiLinkHolders;
3068 $text = preg_replace_callback(
3069 '/<!--IWLINK (.*?)-->/',
3070 "outputReplaceMatches",
3071 $text );
3072 wfProfileOut( $fname.'-interwiki' );
3075 wfProfileOut( $fname );
3076 return $colours;
3080 * Renders an image gallery from a text with one line per image.
3081 * text labels may be given by using |-style alternative text. E.g.
3082 * Image:one.jpg|The number "1"
3083 * Image:tree.jpg|A tree
3084 * given as text will return the HTML of a gallery with two images,
3085 * labeled 'The number "1"' and
3086 * 'A tree'.
3088 function renderImageGallery( $text ) {
3089 global $wgLinkCache;
3090 $ig = new ImageGallery();
3091 $ig->setShowBytes( false );
3092 $ig->setShowFilename( false );
3093 $lines = explode( "\n", $text );
3095 foreach ( $lines as $line ) {
3096 # match lines like these:
3097 # Image:someimage.jpg|This is some image
3098 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
3099 # Skip empty lines
3100 if ( count( $matches ) == 0 ) {
3101 continue;
3103 $nt = Title::newFromURL( $matches[1] );
3104 if( is_null( $nt ) ) {
3105 # Bogus title. Ignore these so we don't bomb out later.
3106 continue;
3108 if ( isset( $matches[3] ) ) {
3109 $label = $matches[3];
3110 } else {
3111 $label = '';
3114 # FIXME: Use the full wiki parser and add its links
3115 # to the page's links.
3116 $html = $this->mOptions->mSkin->formatComment( $label );
3118 $ig->add( Image::newFromTitle( $nt ), $html );
3119 $wgLinkCache->addImageLinkObj( $nt );
3121 return $ig->toHTML();
3126 * @todo document
3127 * @package MediaWiki
3129 class ParserOutput
3131 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
3132 var $mCacheTime; # Used in ParserCache
3133 var $mVersion; # Compatibility check
3135 function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
3136 $containsOldMagic = false )
3138 $this->mText = $text;
3139 $this->mLanguageLinks = $languageLinks;
3140 $this->mCategoryLinks = $categoryLinks;
3141 $this->mContainsOldMagic = $containsOldMagic;
3142 $this->mCacheTime = '';
3143 $this->mVersion = MW_PARSER_VERSION;
3146 function getText() { return $this->mText; }
3147 function getLanguageLinks() { return $this->mLanguageLinks; }
3148 function getCategoryLinks() { return array_keys( $this->mCategoryLinks ); }
3149 function getCacheTime() { return $this->mCacheTime; }
3150 function containsOldMagic() { return $this->mContainsOldMagic; }
3151 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
3152 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
3153 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
3154 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
3155 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
3156 function addCategoryLink( $c ) { $this->mCategoryLinks[$c] = 1; }
3158 function merge( $other ) {
3159 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
3160 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
3161 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
3165 * Return true if this cached output object predates the global or
3166 * per-article cache invalidation timestamps, or if it comes from
3167 * an incompatible older version.
3169 * @param string $touched the affected article's last touched timestamp
3170 * @return bool
3171 * @access public
3173 function expired( $touched ) {
3174 global $wgCacheEpoch;
3175 return $this->getCacheTime() <= $touched ||
3176 $this->getCacheTime() <= $wgCacheEpoch ||
3177 !isset( $this->mVersion ) ||
3178 version_compare( $this->mVersion, MW_PARSER_VERSION, "lt" );
3183 * Set options of the Parser
3184 * @todo document
3185 * @package MediaWiki
3187 class ParserOptions
3189 # All variables are private
3190 var $mUseTeX; # Use texvc to expand <math> tags
3191 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
3192 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
3193 var $mAllowExternalImages; # Allow external images inline
3194 var $mSkin; # Reference to the preferred skin
3195 var $mDateFormat; # Date format index
3196 var $mEditSection; # Create "edit section" links
3197 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
3198 var $mNumberHeadings; # Automatically number headings
3199 var $mShowToc; # Show table of contents
3201 function getUseTeX() { return $this->mUseTeX; }
3202 function getUseDynamicDates() { return $this->mUseDynamicDates; }
3203 function getInterwikiMagic() { return $this->mInterwikiMagic; }
3204 function getAllowExternalImages() { return $this->mAllowExternalImages; }
3205 function getSkin() { return $this->mSkin; }
3206 function getDateFormat() { return $this->mDateFormat; }
3207 function getEditSection() { return $this->mEditSection; }
3208 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
3209 function getNumberHeadings() { return $this->mNumberHeadings; }
3210 function getShowToc() { return $this->mShowToc; }
3212 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
3213 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
3214 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
3215 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
3216 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
3217 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
3218 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
3219 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
3220 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
3222 function setSkin( &$x ) { $this->mSkin =& $x; }
3224 # Get parser options
3225 /* static */ function newFromUser( &$user ) {
3226 $popts = new ParserOptions;
3227 $popts->initialiseFromUser( $user );
3228 return $popts;
3231 # Get user options
3232 function initialiseFromUser( &$userInput ) {
3233 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
3234 $fname = 'ParserOptions::initialiseFromUser';
3235 wfProfileIn( $fname );
3236 if ( !$userInput ) {
3237 $user = new User;
3238 $user->setLoaded( true );
3239 } else {
3240 $user =& $userInput;
3243 $this->mUseTeX = $wgUseTeX;
3244 $this->mUseDynamicDates = $wgUseDynamicDates;
3245 $this->mInterwikiMagic = $wgInterwikiMagic;
3246 $this->mAllowExternalImages = $wgAllowExternalImages;
3247 wfProfileIn( $fname.'-skin' );
3248 $this->mSkin =& $user->getSkin();
3249 wfProfileOut( $fname.'-skin' );
3250 $this->mDateFormat = $user->getOption( 'date' );
3251 $this->mEditSection = $user->getOption( 'editsection' );
3252 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
3253 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
3254 $this->mShowToc = $user->getOption( 'showtoc' );
3255 wfProfileOut( $fname );
3262 * Callback function used by Parser::replaceLinkHolders()
3263 * to substitute link placeholders.
3265 function &outputReplaceMatches( $matches ) {
3266 global $outputReplace;
3267 return $outputReplace[$matches[1]];
3271 * Return the total number of articles
3273 function wfNumberOfArticles() {
3274 global $wgNumberOfArticles;
3276 wfLoadSiteStats();
3277 return $wgNumberOfArticles;
3281 * Get various statistics from the database
3282 * @private
3284 function wfLoadSiteStats() {
3285 global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
3286 $fname = 'wfLoadSiteStats';
3288 if ( -1 != $wgNumberOfArticles ) return;
3289 $dbr =& wfGetDB( DB_SLAVE );
3290 $s = $dbr->selectRow( 'site_stats',
3291 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
3292 array( 'ss_row_id' => 1 ), $fname
3295 if ( $s === false ) {
3296 return;
3297 } else {
3298 $wgTotalViews = $s->ss_total_views;
3299 $wgTotalEdits = $s->ss_total_edits;
3300 $wgNumberOfArticles = $s->ss_good_articles;
3304 function wfEscapeHTMLTagsOnly( $in ) {
3305 return str_replace(
3306 array( '"', '>', '<' ),
3307 array( '&quot;', '&gt;', '&lt;' ),
3308 $in );