ShortPages and LongPages are almsot identical; remove some duplication.
[mediawiki.git] / includes / Parser.php
blob8e3e8a4263a0f1b0a6d2b753d865f136d78b2378
1 <?php
3 /**
4 * File for Parser and related classes
6 * @package MediaWiki
7 * @version $Id$
8 */
10 /**
11 * Variable substitution O(N^2) attack
13 * Without countermeasures, it would be possible to attack the parser by saving
14 * a page filled with a large number of inclusions of large pages. The size of
15 * the generated page would be proportional to the square of the input size.
16 * Hence, we limit the number of inclusions of any given page, thus bringing any
17 * attack back to O(N).
19 define( 'MAX_INCLUDE_REPEAT', 100 );
20 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
22 # Allowed values for $mOutputType
23 define( 'OT_HTML', 1 );
24 define( 'OT_WIKI', 2 );
25 define( 'OT_MSG' , 3 );
27 # string parameter for extractTags which will cause it
28 # to strip HTML comments in addition to regular
29 # <XML>-style tags. This should not be anything we
30 # may want to use in wikisyntax
31 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
33 # prefix for escaping, used in two functions at least
34 define( 'UNIQ_PREFIX', 'NaodW29');
36 # Constants needed for external link processing
37 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
38 define( 'HTTP_PROTOCOLS', 'http|https' );
39 # Everything except bracket, space, or control characters
40 define( 'EXT_LINK_URL_CLASS', '[^]<>\\x00-\\x20\\x7F]' );
41 # Including space
42 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
43 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
44 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
45 define( 'EXT_LINK_BRACKETED', '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
46 define( 'EXT_IMAGE_REGEX',
47 '/^('.HTTP_PROTOCOLS.':)'. # Protocol
48 '('.EXT_LINK_URL_CLASS.'+)\\/'. # Hostname and path
49 '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
52 /**
53 * PHP Parser
55 * Processes wiki markup
57 * <pre>
58 * There are three main entry points into the Parser class:
59 * parse()
60 * produces HTML output
61 * preSaveTransform().
62 * produces altered wiki markup.
63 * transformMsg()
64 * performs brace substitution on MediaWiki messages
66 * Globals used:
67 * objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
69 * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
71 * settings:
72 * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
73 * $wgNamespacesWithSubpages, $wgAllowExternalImages*,
74 * $wgLocaltimezone
76 * * only within ParserOptions
77 * </pre>
79 * @package MediaWiki
81 class Parser
83 /**#@+
84 * @access private
86 # Persistent:
87 var $mTagHooks;
89 # Cleared with clearState():
90 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
91 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
93 # Temporary:
94 var $mOptions, $mTitle, $mOutputType,
95 $mTemplates, // cache of already loaded templates, avoids
96 // multiple SQL queries for the same string
97 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
98 // in this path. Used for loop detection.
100 /**#@-*/
103 * Constructor
105 * @access public
107 function Parser() {
108 $this->mTemplates = array();
109 $this->mTemplatePath = array();
110 $this->mTagHooks = array();
111 $this->clearState();
115 * Clear Parser state
117 * @access private
119 function clearState() {
120 $this->mOutput = new ParserOutput;
121 $this->mAutonumber = 0;
122 $this->mLastSection = "";
123 $this->mDTopen = false;
124 $this->mVariables = false;
125 $this->mIncludeCount = array();
126 $this->mStripState = array();
127 $this->mArgStack = array();
128 $this->mInPre = false;
132 * First pass--just handle <nowiki> sections, pass the rest off
133 * to internalParse() which does all the real work.
135 * @access private
136 * @return ParserOutput a ParserOutput
138 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
139 global $wgUseTidy;
140 $fname = 'Parser::parse';
141 wfProfileIn( $fname );
143 if ( $clearState ) {
144 $this->clearState();
147 $this->mOptions = $options;
148 $this->mTitle =& $title;
149 $this->mOutputType = OT_HTML;
151 $stripState = NULL;
152 $text = $this->strip( $text, $this->mStripState );
153 $text = $this->internalParse( $text, $linestart );
154 $text = $this->unstrip( $text, $this->mStripState );
155 # Clean up special characters, only run once, next-to-last before doBlockLevels
156 if(!$wgUseTidy) {
157 $fixtags = array(
158 # french spaces, last one Guillemet-left
159 # only if there is something before the space
160 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
161 # french spaces, Guillemet-right
162 "/(\\302\\253) /i"=>"\\1&nbsp;",
163 '/<hr *>/i' => '<hr />',
164 '/<br *>/i' => '<br />',
165 '/<center *>/i' => '<div class="center">',
166 '/<\\/center *>/i' => '</div>',
167 # Clean up spare ampersands; note that we probably ought to be
168 # more careful about named entities.
169 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
171 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
172 } else {
173 $fixtags = array(
174 # french spaces, last one Guillemet-left
175 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
176 # french spaces, Guillemet-right
177 '/(\\302\\253) /i' => '\\1&nbsp;',
178 '/<center *>/i' => '<div class="center">',
179 '/<\\/center *>/i' => '</div>'
181 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
183 # only once and last
184 $text = $this->doBlockLevels( $text, $linestart );
185 $text = $this->unstripNoWiki( $text, $this->mStripState );
186 $this->mOutput->setText( $text );
187 wfProfileOut( $fname );
188 return $this->mOutput;
192 * Get a random string
194 * @access private
195 * @static
197 function getRandomString() {
198 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
201 /**
202 * Replaces all occurrences of <$tag>content</$tag> in the text
203 * with a random marker and returns the new text. the output parameter
204 * $content will be an associative array filled with data on the form
205 * $unique_marker => content.
207 * If $content is already set, the additional entries will be appended
208 * If $tag is set to STRIP_COMMENTS, the function will extract
209 * <!-- HTML comments -->
211 * @access private
212 * @static
214 function extractTags($tag, $text, &$content, $uniq_prefix = ''){
215 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
216 if ( !$content ) {
217 $content = array( );
219 $n = 1;
220 $stripped = '';
222 while ( '' != $text ) {
223 if($tag==STRIP_COMMENTS) {
224 $p = preg_split( '/<!--/i', $text, 2 );
225 } else {
226 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
228 $stripped .= $p[0];
229 if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
230 $text = '';
231 } else {
232 if($tag==STRIP_COMMENTS) {
233 $q = preg_split( '/-->/i', $p[1], 2 );
234 } else {
235 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
237 $marker = $rnd . sprintf('%08X', $n++);
238 $content[$marker] = $q[0];
239 $stripped .= $marker;
240 $text = $q[1];
243 return $stripped;
247 * Strips and renders nowiki, pre, math, hiero
248 * If $render is set, performs necessary rendering operations on plugins
249 * Returns the text, and fills an array with data needed in unstrip()
250 * If the $state is already a valid strip state, it adds to the state
252 * @param bool $stripcomments when set, HTML comments <!-- like this -->
253 * will be stripped in addition to other tags. This is important
254 * for section editing, where these comments cause confusion when
255 * counting the sections in the wikisource
257 * @access private
259 function strip( $text, &$state, $stripcomments = false ) {
260 $render = ($this->mOutputType == OT_HTML);
261 $html_content = array();
262 $nowiki_content = array();
263 $math_content = array();
264 $pre_content = array();
265 $comment_content = array();
266 $ext_content = array();
268 # Replace any instances of the placeholders
269 $uniq_prefix = UNIQ_PREFIX;
270 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
272 # html
273 global $wgRawHtml, $wgWhitelistEdit;
274 if( $wgRawHtml && $wgWhitelistEdit ) {
275 $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
276 foreach( $html_content as $marker => $content ) {
277 if ($render ) {
278 # Raw and unchecked for validity.
279 $html_content[$marker] = $content;
280 } else {
281 $html_content[$marker] = '<html>'.$content.'</html>';
286 # nowiki
287 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
288 foreach( $nowiki_content as $marker => $content ) {
289 if( $render ){
290 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
291 } else {
292 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
296 # math
297 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
298 foreach( $math_content as $marker => $content ){
299 if( $render ) {
300 if( $this->mOptions->getUseTeX() ) {
301 $math_content[$marker] = renderMath( $content );
302 } else {
303 $math_content[$marker] = '&lt;math&gt;'.$content.'&lt;math&gt;';
305 } else {
306 $math_content[$marker] = '<math>'.$content.'</math>';
310 # pre
311 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
312 foreach( $pre_content as $marker => $content ){
313 if( $render ){
314 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
315 } else {
316 $pre_content[$marker] = '<pre>'.$content.'</pre>';
320 # Comments
321 if($stripcomments) {
322 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
323 foreach( $comment_content as $marker => $content ){
324 $comment_content[$marker] = '<!--'.$content.'-->';
328 # Extensions
329 foreach ( $this->mTagHooks as $tag => $callback ) {
330 $ext_contents[$tag] = array();
331 $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
332 foreach( $ext_content[$tag] as $marker => $content ) {
333 if ( $render ) {
334 $ext_content[$tag][$marker] = $callback( $content );
335 } else {
336 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
341 # Merge state with the pre-existing state, if there is one
342 if ( $state ) {
343 $state['html'] = $state['html'] + $html_content;
344 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
345 $state['math'] = $state['math'] + $math_content;
346 $state['pre'] = $state['pre'] + $pre_content;
347 $state['comment'] = $state['comment'] + $comment_content;
349 foreach( $ext_content as $tag => $array ) {
350 if ( array_key_exists( $tag, $state ) ) {
351 $state[$tag] = $state[$tag] + $array;
354 } else {
355 $state = array(
356 'html' => $html_content,
357 'nowiki' => $nowiki_content,
358 'math' => $math_content,
359 'pre' => $pre_content,
360 'comment' => $comment_content,
361 ) + $ext_content;
363 return $text;
367 * restores pre, math, and heiro removed by strip()
369 * always call unstripNoWiki() after this one
370 * @access private
372 function unstrip( $text, &$state ) {
373 # Must expand in reverse order, otherwise nested tags will be corrupted
374 $contentDict = end( $state );
375 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
376 if( key($state) != 'nowiki' && key($state) != 'html') {
377 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
378 $text = str_replace( key( $contentDict ), $content, $text );
383 return $text;
387 * always call this after unstrip() to preserve the order
389 * @access private
391 function unstripNoWiki( $text, &$state ) {
392 # Must expand in reverse order, otherwise nested tags will be corrupted
393 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
394 $text = str_replace( key( $state['nowiki'] ), $content, $text );
397 global $wgRawHtml;
398 if ($wgRawHtml) {
399 for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
400 $text = str_replace( key( $state['html'] ), $content, $text );
404 return $text;
408 * Add an item to the strip state
409 * Returns the unique tag which must be inserted into the stripped text
410 * The tag will be replaced with the original text in unstrip()
412 * @access private
414 function insertStripItem( $text, &$state ) {
415 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
416 if ( !$state ) {
417 $state = array(
418 'html' => array(),
419 'nowiki' => array(),
420 'math' => array(),
421 'pre' => array()
424 $state['item'][$rnd] = $text;
425 return $rnd;
429 * Return allowed HTML attributes
431 * @access private
433 function getHTMLattrs () {
434 $htmlattrs = array( # Allowed attributes--no scripting, etc.
435 'title', 'align', 'lang', 'dir', 'width', 'height',
436 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
437 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
438 /* FONT */ 'type', 'start', 'value', 'compact',
439 /* For various lists, mostly deprecated but safe */
440 'summary', 'width', 'border', 'frame', 'rules',
441 'cellspacing', 'cellpadding', 'valign', 'char',
442 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
443 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
444 'id', 'class', 'name', 'style' /* For CSS */
446 return $htmlattrs ;
450 * Remove non approved attributes and javascript in css
452 * @access private
454 function fixTagAttributes ( $t ) {
455 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
456 $htmlattrs = $this->getHTMLattrs() ;
458 # Strip non-approved attributes from the tag
459 $t = preg_replace(
460 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
461 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
462 $t);
464 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
466 # Strip javascript "expression" from stylesheets. Brute force approach:
467 # If anythin offensive is found, all attributes of the HTML tag are dropped
469 if( preg_match(
470 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
471 wfMungeToUtf8( $t ) ) )
473 $t='';
476 return trim ( $t ) ;
480 * interface with html tidy, used if $wgUseTidy = true
482 * @access public
483 * @static
485 function tidy ( $text ) {
486 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
487 global $wgInputEncoding, $wgOutputEncoding;
488 $fname = 'Parser::tidy';
489 wfProfileIn( $fname );
491 $cleansource = '';
492 $opts = '';
493 switch(strtoupper($wgOutputEncoding)) {
494 case 'ISO-8859-1':
495 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
496 break;
497 case 'UTF-8':
498 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
499 break;
500 default:
501 $opts .= ' -raw';
504 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
505 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
506 '<head><title>test</title></head><body>'.$text.'</body></html>';
507 $descriptorspec = array(
508 0 => array('pipe', 'r'),
509 1 => array('pipe', 'w'),
510 2 => array('file', '/dev/null', 'a')
512 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
513 if (is_resource($process)) {
514 fwrite($pipes[0], $wrappedtext);
515 fclose($pipes[0]);
516 while (!feof($pipes[1])) {
517 $cleansource .= fgets($pipes[1], 1024);
519 fclose($pipes[1]);
520 $return_value = proc_close($process);
523 wfProfileOut( $fname );
525 if( $cleansource == '' && $text != '') {
526 wfDebug( "Tidy error detected!\n" );
527 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
528 } else {
529 return $cleansource;
534 * parse the wiki syntax used to render tables
536 * @access private
538 function doTableStuff ( $t ) {
539 $fname = 'Parser::doTableStuff';
540 wfProfileIn( $fname );
542 $t = explode ( "\n" , $t ) ;
543 $td = array () ; # Is currently a td tag open?
544 $ltd = array () ; # Was it TD or TH?
545 $tr = array () ; # Is currently a tr tag open?
546 $ltr = array () ; # tr attributes
547 $indent_level = 0; # indent level of the table
548 foreach ( $t AS $k => $x )
550 $x = trim ( $x ) ;
551 $fc = substr ( $x , 0 , 1 ) ;
552 if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
553 $indent_level = strlen( $matches[1] );
554 $t[$k] = "\n" .
555 str_repeat( '<dl><dd>', $indent_level ) .
556 '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
557 array_push ( $td , false ) ;
558 array_push ( $ltd , '' ) ;
559 array_push ( $tr , false ) ;
560 array_push ( $ltr , '' ) ;
562 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
563 else if ( '|}' == substr ( $x , 0 , 2 ) ) {
564 $z = "</table>\n" ;
565 $l = array_pop ( $ltd ) ;
566 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
567 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
568 array_pop ( $ltr ) ;
569 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
571 else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
572 $x = substr ( $x , 1 ) ;
573 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
574 $z = '' ;
575 $l = array_pop ( $ltd ) ;
576 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
577 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
578 array_pop ( $ltr ) ;
579 $t[$k] = $z ;
580 array_push ( $tr , false ) ;
581 array_push ( $td , false ) ;
582 array_push ( $ltd , '' ) ;
583 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
585 else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
586 # $x is a table row
587 if ( '|+' == substr ( $x , 0 , 2 ) ) {
588 $fc = '+' ;
589 $x = substr ( $x , 1 ) ;
591 $after = substr ( $x , 1 ) ;
592 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
593 $after = explode ( '||' , $after ) ;
594 $t[$k] = '' ;
596 # Loop through each table cell
597 foreach ( $after AS $theline )
599 $z = '' ;
600 if ( $fc != '+' )
602 $tra = array_pop ( $ltr ) ;
603 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
604 array_push ( $tr , true ) ;
605 array_push ( $ltr , '' ) ;
608 $l = array_pop ( $ltd ) ;
609 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
610 if ( $fc == '|' ) $l = 'td' ;
611 else if ( $fc == '!' ) $l = 'th' ;
612 else if ( $fc == '+' ) $l = 'caption' ;
613 else $l = '' ;
614 array_push ( $ltd , $l ) ;
616 # Cell parameters
617 $y = explode ( '|' , $theline , 2 ) ;
618 # Note that a '|' inside an invalid link should not
619 # be mistaken as delimiting cell parameters
620 if ( strpos( $y[0], '[[' ) !== false ) {
621 $y = array ($theline);
623 if ( count ( $y ) == 1 )
624 $y = "{$z}<{$l}>{$y[0]}" ;
625 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
626 $t[$k] .= $y ;
627 array_push ( $td , true ) ;
632 # Closing open td, tr && table
633 while ( count ( $td ) > 0 )
635 if ( array_pop ( $td ) ) $t[] = '</td>' ;
636 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
637 $t[] = '</table>' ;
640 $t = implode ( "\n" , $t ) ;
641 # $t = $this->removeHTMLtags( $t );
642 wfProfileOut( $fname );
643 return $t ;
647 * Helper function for parse() that transforms wiki markup into
648 * HTML. Only called for $mOutputType == OT_HTML.
650 * @access private
652 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
653 global $wgContLang;
655 $fname = 'Parser::internalParse';
656 wfProfileIn( $fname );
658 $text = $this->removeHTMLtags( $text );
659 $text = $this->replaceVariables( $text, $args );
661 $text = $wgContLang->convert($text);
663 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
665 $text = $this->doHeadings( $text );
666 if($this->mOptions->getUseDynamicDates()) {
667 global $wgDateFormatter;
668 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
670 $text = $this->doAllQuotes( $text );
671 $text = $this->replaceInternalLinks ( $text );
672 # Another call to replace links and images inside captions of images
673 $text = $this->replaceInternalLinks ( $text );
674 $text = $this->replaceExternalLinks( $text );
675 $text = $this->doMagicLinks( $text );
676 $text = $this->doTableStuff( $text );
677 $text = $this->formatHeadings( $text, $isMain );
678 $sk =& $this->mOptions->getSkin();
679 $text = $sk->transformContent( $text );
681 wfProfileOut( $fname );
682 return $text;
686 * Replace special strings like "ISBN xxx" and "RFC xxx" with
687 * magic external links.
689 * @access private
691 function &doMagicLinks( &$text ) {
692 global $wgUseGeoMode;
693 $text = $this->magicISBN( $text );
694 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
695 $text = $this->magicGEO( $text );
697 $text = $this->magicRFC( $text );
698 return $text;
702 * Parse ^^ tokens and return html
704 * @access private
706 function doExponent ( $text ) {
707 $fname = 'Parser::doExponent';
708 wfProfileIn( $fname);
709 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
710 wfProfileOut( $fname);
711 return $text;
715 * Parse headers and return html
717 * @access private
719 function doHeadings( $text ) {
720 $fname = 'Parser::doHeadings';
721 wfProfileIn( $fname );
722 for ( $i = 6; $i >= 1; --$i ) {
723 $h = substr( '======', 0, $i );
724 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
725 "<h{$i}>\\1</h{$i}>\\2", $text );
727 wfProfileOut( $fname );
728 return $text;
732 * Replace single quotes with HTML markup
733 * @access private
734 * @return string the altered text
736 function doAllQuotes( $text ) {
737 $fname = 'Parser::doAllQuotes';
738 wfProfileIn( $fname );
739 $outtext = '';
740 $lines = explode( "\n", $text );
741 foreach ( $lines as $line ) {
742 $outtext .= $this->doQuotes ( $line ) . "\n";
744 $outtext = substr($outtext, 0,-1);
745 wfProfileOut( $fname );
746 return $outtext;
750 * Helper function for doAllQuotes()
751 * @access private
753 function doQuotes( $text ) {
754 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
755 if (count ($arr) == 1)
756 return $text;
757 else
759 # First, do some preliminary work. This may shift some apostrophes from
760 # being mark-up to being text. It also counts the number of occurrences
761 # of bold and italics mark-ups.
762 $i = 0;
763 $numbold = 0;
764 $numitalics = 0;
765 foreach ($arr as $r)
767 if (($i % 2) == 1)
769 # If there are ever four apostrophes, assume the first is supposed to
770 # be text, and the remaining three constitute mark-up for bold text.
771 if (strlen ($arr[$i]) == 4)
773 $arr[$i-1] .= "'";
774 $arr[$i] = "'''";
776 # If there are more than 5 apostrophes in a row, assume they're all
777 # text except for the last 5.
778 else if (strlen ($arr[$i]) > 5)
780 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
781 $arr[$i] = "'''''";
783 # Count the number of occurrences of bold and italics mark-ups.
784 # We are not counting sequences of five apostrophes.
785 if (strlen ($arr[$i]) == 2) $numitalics++; else
786 if (strlen ($arr[$i]) == 3) $numbold++; else
787 if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
789 $i++;
792 # If there is an odd number of both bold and italics, it is likely
793 # that one of the bold ones was meant to be an apostrophe followed
794 # by italics. Which one we cannot know for certain, but it is more
795 # likely to be one that has a single-letter word before it.
796 if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
798 $i = 0;
799 $firstsingleletterword = -1;
800 $firstmultiletterword = -1;
801 $firstspace = -1;
802 foreach ($arr as $r)
804 if (($i % 2 == 1) and (strlen ($r) == 3))
806 $x1 = substr ($arr[$i-1], -1);
807 $x2 = substr ($arr[$i-1], -2, 1);
808 if ($x1 == ' ') {
809 if ($firstspace == -1) $firstspace = $i;
810 } else if ($x2 == ' ') {
811 if ($firstsingleletterword == -1) $firstsingleletterword = $i;
812 } else {
813 if ($firstmultiletterword == -1) $firstmultiletterword = $i;
816 $i++;
819 # If there is a single-letter word, use it!
820 if ($firstsingleletterword > -1)
822 $arr [ $firstsingleletterword ] = "''";
823 $arr [ $firstsingleletterword-1 ] .= "'";
825 # If not, but there's a multi-letter word, use that one.
826 else if ($firstmultiletterword > -1)
828 $arr [ $firstmultiletterword ] = "''";
829 $arr [ $firstmultiletterword-1 ] .= "'";
831 # ... otherwise use the first one that has neither.
832 # (notice that it is possible for all three to be -1 if, for example,
833 # there is only one pentuple-apostrophe in the line)
834 else if ($firstspace > -1)
836 $arr [ $firstspace ] = "''";
837 $arr [ $firstspace-1 ] .= "'";
841 # Now let's actually convert our apostrophic mush to HTML!
842 $output = '';
843 $buffer = '';
844 $state = '';
845 $i = 0;
846 foreach ($arr as $r)
848 if (($i % 2) == 0)
850 if ($state == 'both')
851 $buffer .= $r;
852 else
853 $output .= $r;
855 else
857 if (strlen ($r) == 2)
859 if ($state == 'i')
860 { $output .= '</i>'; $state = ''; }
861 else if ($state == 'bi')
862 { $output .= '</i>'; $state = 'b'; }
863 else if ($state == 'ib')
864 { $output .= '</b></i><b>'; $state = 'b'; }
865 else if ($state == 'both')
866 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
867 else # $state can be 'b' or ''
868 { $output .= '<i>'; $state .= 'i'; }
870 else if (strlen ($r) == 3)
872 if ($state == 'b')
873 { $output .= '</b>'; $state = ''; }
874 else if ($state == 'bi')
875 { $output .= '</i></b><i>'; $state = 'i'; }
876 else if ($state == 'ib')
877 { $output .= '</b>'; $state = 'i'; }
878 else if ($state == 'both')
879 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
880 else # $state can be 'i' or ''
881 { $output .= '<b>'; $state .= 'b'; }
883 else if (strlen ($r) == 5)
885 if ($state == 'b')
886 { $output .= '</b><i>'; $state = 'i'; }
887 else if ($state == 'i')
888 { $output .= '</i><b>'; $state = 'b'; }
889 else if ($state == 'bi')
890 { $output .= '</i></b>'; $state = ''; }
891 else if ($state == 'ib')
892 { $output .= '</b></i>'; $state = ''; }
893 else if ($state == 'both')
894 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
895 else # ($state == '')
896 { $buffer = ''; $state = 'both'; }
899 $i++;
901 # Now close all remaining tags. Notice that the order is important.
902 if ($state == 'b' || $state == 'ib')
903 $output .= '</b>';
904 if ($state == 'i' || $state == 'bi' || $state == 'ib')
905 $output .= '</i>';
906 if ($state == 'bi')
907 $output .= '</b>';
908 if ($state == 'both')
909 $output .= '<b><i>'.$buffer.'</i></b>';
910 return $output;
915 * Replace external links
917 * Note: we have to do external links before the internal ones,
918 * and otherwise take great care in the order of things here, so
919 * that we don't end up interpreting some URLs twice.
921 * @access private
923 function replaceExternalLinks( $text ) {
924 $fname = 'Parser::replaceExternalLinks';
925 wfProfileIn( $fname );
927 $sk =& $this->mOptions->getSkin();
928 $linktrail = wfMsgForContent('linktrail');
929 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
931 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
933 $i = 0;
934 while ( $i<count( $bits ) ) {
935 $url = $bits[$i++];
936 $protocol = $bits[$i++];
937 $text = $bits[$i++];
938 $trail = $bits[$i++];
940 # If the link text is an image URL, replace it with an <img> tag
941 # This happened by accident in the original parser, but some people used it extensively
942 $img = $this->maybeMakeImageLink( $text );
943 if ( $img !== false ) {
944 $text = $img;
947 $dtrail = '';
949 # No link text, e.g. [http://domain.tld/some.link]
950 if ( $text == '' ) {
951 # Autonumber if allowed
952 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
953 $text = '[' . ++$this->mAutonumber . ']';
954 } else {
955 # Otherwise just use the URL
956 $text = htmlspecialchars( $url );
958 } else {
959 # Have link text, e.g. [http://domain.tld/some.link text]s
960 # Check for trail
961 if ( preg_match( $linktrail, $trail, $m2 ) ) {
962 $dtrail = $m2[1];
963 $trail = $m2[2];
967 $encUrl = htmlspecialchars( $url );
968 # Bit in parentheses showing the URL for the printable version
969 if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
970 $paren = '';
971 } else {
972 # Expand the URL for printable version
973 if ( ! $sk->suppressUrlExpansion() ) {
974 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
975 } else {
976 $paren = '';
980 # Process the trail (i.e. everything after this link up until start of the next link),
981 # replacing any non-bracketed links
982 $trail = $this->replaceFreeExternalLinks( $trail );
984 $la = $sk->getExternalLinkAttributes( $url, $text );
986 # Use the encoded URL
987 # This means that users can paste URLs directly into the text
988 # Funny characters like &ouml; aren't valid in URLs anyway
989 # This was changed in August 2004
990 $s .= "<a href=\"{$url}\"{$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
993 wfProfileOut( $fname );
994 return $s;
998 * Replace anything that looks like a URL with a link
999 * @access private
1001 function replaceFreeExternalLinks( $text ) {
1002 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1003 $s = array_shift( $bits );
1004 $i = 0;
1006 $sk =& $this->mOptions->getSkin();
1008 while ( $i < count( $bits ) ){
1009 $protocol = $bits[$i++];
1010 $remainder = $bits[$i++];
1012 if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1013 # Found some characters after the protocol that look promising
1014 $url = $protocol . $m[1];
1015 $trail = $m[2];
1017 # Move trailing punctuation to $trail
1018 $sep = ',;\.:!?';
1019 # If there is no left bracket, then consider right brackets fair game too
1020 if ( strpos( $url, '(' ) === false ) {
1021 $sep .= ')';
1024 $numSepChars = strspn( strrev( $url ), $sep );
1025 if ( $numSepChars ) {
1026 $trail = substr( $url, -$numSepChars ) . $trail;
1027 $url = substr( $url, 0, -$numSepChars );
1030 # Replace &amp; from obsolete syntax with &
1031 $url = str_replace( '&amp;', '&', $url );
1033 # Is this an external image?
1034 $text = $this->maybeMakeImageLink( $url );
1035 if ( $text === false ) {
1036 # Not an image, make a link
1037 $text = $sk->makeExternalLink( $url, $url );
1039 $s .= $text . $trail;
1040 } else {
1041 $s .= $protocol . $remainder;
1044 return $s;
1048 * make an image if it's allowed
1049 * @access private
1051 function maybeMakeImageLink( $url ) {
1052 $sk =& $this->mOptions->getSkin();
1053 $text = false;
1054 if ( $this->mOptions->getAllowExternalImages() ) {
1055 if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1056 # Image found
1057 $text = $sk->makeImage( htmlspecialchars( $url ) );
1060 return $text;
1064 * Process [[ ]] wikilinks
1066 * @access private
1068 function replaceInternalLinks( $s ) {
1069 global $wgLang, $wgContLang, $wgLinkCache;
1070 static $fname = 'Parser::replaceInternalLinks' ;
1071 wfProfileIn( $fname );
1073 wfProfileIn( $fname.'-setup' );
1074 static $tc = FALSE;
1075 # the % is needed to support urlencoded titles as well
1076 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1077 $sk =& $this->mOptions->getSkin();
1079 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1081 $a = explode( '[[', ' ' . $s );
1082 $s = array_shift( $a );
1083 $s = substr( $s, 1 );
1085 # Match a link having the form [[namespace:link|alternate]]trail
1086 static $e1 = FALSE;
1087 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1088 # Match the end of a line for a word that's not followed by whitespace,
1089 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1090 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1092 $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
1093 # Special and Media are pseudo-namespaces; no pages actually exist in them
1095 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1097 if ( $useLinkPrefixExtension ) {
1098 if ( preg_match( $e2, $s, $m ) ) {
1099 $first_prefix = $m[2];
1100 $s = $m[1];
1101 } else {
1102 $first_prefix = false;
1104 } else {
1105 $prefix = '';
1108 wfProfileOut( $fname.'-setup' );
1110 # start procedeeding each line
1111 foreach ( $a as $line ) {
1112 wfProfileIn( $fname.'-prefixhandling' );
1113 if ( $useLinkPrefixExtension ) {
1114 if ( preg_match( $e2, $s, $m ) ) {
1115 $prefix = $m[2];
1116 $s = $m[1];
1117 } else {
1118 $prefix='';
1120 # first link
1121 if($first_prefix) {
1122 $prefix = $first_prefix;
1123 $first_prefix = false;
1126 wfProfileOut( $fname.'-prefixhandling' );
1128 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1129 $text = $m[2];
1130 # fix up urlencoded title texts
1131 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1132 $trail = $m[3];
1133 } else { # Invalid form; output directly
1134 $s .= $prefix . '[[' . $line ;
1135 continue;
1138 # Don't allow internal links to pages containing
1139 # PROTO: where PROTO is a valid URL protocol; these
1140 # should be external links.
1141 if (preg_match('/((?:'.URL_PROTOCOLS.'):)/', $m[1])) {
1142 $s .= $prefix . '[[' . $line ;
1143 continue;
1146 # Make subpage if necessary
1147 $link = $this->maybeDoSubpageLink( $m[1], $text );
1149 $noforce = (substr($m[1], 0, 1) != ':');
1150 if (!$noforce) {
1151 # Strip off leading ':'
1152 $link = substr($link, 1);
1155 $wasblank = ( '' == $text );
1156 if( $wasblank ) $text = $link;
1158 $nt = Title::newFromText( $link );
1159 if( !$nt ) {
1160 $s .= $prefix . '[[' . $line;
1161 continue;
1164 $ns = $nt->getNamespace();
1165 $iw = $nt->getInterWiki();
1167 # Link not escaped by : , create the various objects
1168 if( $noforce ) {
1170 # Interwikis
1171 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
1172 array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1173 $tmp = $prefix . $trail ;
1174 $s .= (trim($tmp) == '')? '': $tmp;
1175 continue;
1178 if ( $ns == NS_IMAGE ) {
1179 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1180 $wgLinkCache->addImageLinkObj( $nt );
1181 continue;
1184 if ( $ns == NS_CATEGORY ) {
1185 $t = $nt->getText() ;
1186 $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).':'.$t ) ;
1188 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1189 $pPLC=$sk->postParseLinkColour();
1190 $sk->postParseLinkColour( false );
1191 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1192 $sk->postParseLinkColour( $pPLC );
1193 $wgLinkCache->resume();
1195 if ( $wasblank ) {
1196 if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {
1197 $sortkey = $this->mTitle->getText();
1198 } else {
1199 $sortkey = $this->mTitle->getPrefixedText();
1201 } else {
1202 $sortkey = $text;
1204 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1205 $this->mOutput->mCategoryLinks[] = $t ;
1206 $s .= $prefix . $trail ;
1207 continue;
1211 if( ( $nt->getPrefixedText() === $this->mTitle->getPrefixedText() ) &&
1212 ( strpos( $link, '#' ) === FALSE ) ) {
1213 # Self-links are handled specially; generally de-link and change to bold.
1214 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1215 continue;
1218 if( $ns == NS_MEDIA ) {
1219 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1220 $wgLinkCache->addImageLinkObj( $nt );
1221 continue;
1222 } elseif( $ns == NS_SPECIAL ) {
1223 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1224 continue;
1226 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1228 wfProfileOut( $fname );
1229 return $s;
1233 * Handle link to subpage if necessary
1234 * @param $target string the source of the link
1235 * @param &$text the link text, modified as necessary
1236 * @return string the full name of the link
1237 * @access private
1239 function maybeDoSubpageLink($target, &$text) {
1240 # Valid link forms:
1241 # Foobar -- normal
1242 # :Foobar -- override special treatment of prefix (images, language links)
1243 # /Foobar -- convert to CurrentPage/Foobar
1244 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1245 global $wgNamespacesWithSubpages;
1247 $fname = 'Parser::maybeDoSubpageLink';
1248 wfProfileIn( $fname );
1249 # Look at the first character
1250 if( $target{0} == '/' ) {
1251 # / at end means we don't want the slash to be shown
1252 if(substr($target,-1,1)=='/') {
1253 $target=substr($target,1,-1);
1254 $noslash=$target;
1255 } else {
1256 $noslash=substr($target,1);
1259 # Some namespaces don't allow subpages
1260 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
1261 # subpages allowed here
1262 $ret = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1263 if( '' === $text ) {
1264 $text = $target;
1265 } # this might be changed for ugliness reasons
1266 } else {
1267 # no subpage allowed, use standard link
1268 $ret = $target;
1270 } else {
1271 # no subpage
1272 $ret = $target;
1275 wfProfileOut( $fname );
1276 return $ret;
1279 /**#@+
1280 * Used by doBlockLevels()
1281 * @access private
1283 /* private */ function closeParagraph() {
1284 $result = '';
1285 if ( '' != $this->mLastSection ) {
1286 $result = '</' . $this->mLastSection . ">\n";
1288 $this->mInPre = false;
1289 $this->mLastSection = '';
1290 return $result;
1292 # getCommon() returns the length of the longest common substring
1293 # of both arguments, starting at the beginning of both.
1295 /* private */ function getCommon( $st1, $st2 ) {
1296 $fl = strlen( $st1 );
1297 $shorter = strlen( $st2 );
1298 if ( $fl < $shorter ) { $shorter = $fl; }
1300 for ( $i = 0; $i < $shorter; ++$i ) {
1301 if ( $st1{$i} != $st2{$i} ) { break; }
1303 return $i;
1305 # These next three functions open, continue, and close the list
1306 # element appropriate to the prefix character passed into them.
1308 /* private */ function openList( $char ) {
1309 $result = $this->closeParagraph();
1311 if ( '*' == $char ) { $result .= '<ul><li>'; }
1312 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1313 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1314 else if ( ';' == $char ) {
1315 $result .= '<dl><dt>';
1316 $this->mDTopen = true;
1318 else { $result = '<!-- ERR 1 -->'; }
1320 return $result;
1323 /* private */ function nextItem( $char ) {
1324 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1325 else if ( ':' == $char || ';' == $char ) {
1326 $close = '</dd>';
1327 if ( $this->mDTopen ) { $close = '</dt>'; }
1328 if ( ';' == $char ) {
1329 $this->mDTopen = true;
1330 return $close . '<dt>';
1331 } else {
1332 $this->mDTopen = false;
1333 return $close . '<dd>';
1336 return '<!-- ERR 2 -->';
1339 /* private */ function closeList( $char ) {
1340 if ( '*' == $char ) { $text = '</li></ul>'; }
1341 else if ( '#' == $char ) { $text = '</li></ol>'; }
1342 else if ( ':' == $char ) {
1343 if ( $this->mDTopen ) {
1344 $this->mDTopen = false;
1345 $text = '</dt></dl>';
1346 } else {
1347 $text = '</dd></dl>';
1350 else { return '<!-- ERR 3 -->'; }
1351 return $text."\n";
1353 /**#@-*/
1356 * Make lists from lines starting with ':', '*', '#', etc.
1358 * @access private
1359 * @return string the lists rendered as HTML
1361 function doBlockLevels( $text, $linestart ) {
1362 $fname = 'Parser::doBlockLevels';
1363 wfProfileIn( $fname );
1365 # Parsing through the text line by line. The main thing
1366 # happening here is handling of block-level elements p, pre,
1367 # and making lists from lines starting with * # : etc.
1369 $textLines = explode( "\n", $text );
1371 $lastPrefix = $output = $lastLine = '';
1372 $this->mDTopen = $inBlockElem = false;
1373 $prefixLength = 0;
1374 $paragraphStack = false;
1376 if ( !$linestart ) {
1377 $output .= array_shift( $textLines );
1379 foreach ( $textLines as $oLine ) {
1380 $lastPrefixLength = strlen( $lastPrefix );
1381 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1382 $preOpenMatch = preg_match('/<pre/i', $oLine );
1383 if ( !$this->mInPre ) {
1384 # Multiple prefixes may abut each other for nested lists.
1385 $prefixLength = strspn( $oLine, '*#:;' );
1386 $pref = substr( $oLine, 0, $prefixLength );
1388 # eh?
1389 $pref2 = str_replace( ';', ':', $pref );
1390 $t = substr( $oLine, $prefixLength );
1391 $this->mInPre = !empty($preOpenMatch);
1392 } else {
1393 # Don't interpret any other prefixes in preformatted text
1394 $prefixLength = 0;
1395 $pref = $pref2 = '';
1396 $t = $oLine;
1399 # List generation
1400 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1401 # Same as the last item, so no need to deal with nesting or opening stuff
1402 $output .= $this->nextItem( substr( $pref, -1 ) );
1403 $paragraphStack = false;
1405 if ( substr( $pref, -1 ) == ';') {
1406 # The one nasty exception: definition lists work like this:
1407 # ; title : definition text
1408 # So we check for : in the remainder text to split up the
1409 # title and definition, without b0rking links.
1410 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1411 $t = $t2;
1412 $output .= $term . $this->nextItem( ':' );
1415 } elseif( $prefixLength || $lastPrefixLength ) {
1416 # Either open or close a level...
1417 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1418 $paragraphStack = false;
1420 while( $commonPrefixLength < $lastPrefixLength ) {
1421 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1422 --$lastPrefixLength;
1424 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1425 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1427 while ( $prefixLength > $commonPrefixLength ) {
1428 $char = substr( $pref, $commonPrefixLength, 1 );
1429 $output .= $this->openList( $char );
1431 if ( ';' == $char ) {
1432 # FIXME: This is dupe of code above
1433 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1434 $t = $t2;
1435 $output .= $term . $this->nextItem( ':' );
1438 ++$commonPrefixLength;
1440 $lastPrefix = $pref2;
1442 if( 0 == $prefixLength ) {
1443 # No prefix (not in list)--go to paragraph mode
1444 $uniq_prefix = UNIQ_PREFIX;
1445 // XXX: use a stack for nestable elements like span, table and div
1446 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1447 $closematch = preg_match(
1448 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1449 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1450 if ( $openmatch or $closematch ) {
1451 $paragraphStack = false;
1452 $output .= $this->closeParagraph();
1453 if($preOpenMatch and !$preCloseMatch) {
1454 $this->mInPre = true;
1456 if ( $closematch ) {
1457 $inBlockElem = false;
1458 } else {
1459 $inBlockElem = true;
1461 } else if ( !$inBlockElem && !$this->mInPre ) {
1462 if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1463 // pre
1464 if ($this->mLastSection != 'pre') {
1465 $paragraphStack = false;
1466 $output .= $this->closeParagraph().'<pre>';
1467 $this->mLastSection = 'pre';
1469 $t = substr( $t, 1 );
1470 } else {
1471 // paragraph
1472 if ( '' == trim($t) ) {
1473 if ( $paragraphStack ) {
1474 $output .= $paragraphStack.'<br />';
1475 $paragraphStack = false;
1476 $this->mLastSection = 'p';
1477 } else {
1478 if ($this->mLastSection != 'p' ) {
1479 $output .= $this->closeParagraph();
1480 $this->mLastSection = '';
1481 $paragraphStack = '<p>';
1482 } else {
1483 $paragraphStack = '</p><p>';
1486 } else {
1487 if ( $paragraphStack ) {
1488 $output .= $paragraphStack;
1489 $paragraphStack = false;
1490 $this->mLastSection = 'p';
1491 } else if ($this->mLastSection != 'p') {
1492 $output .= $this->closeParagraph().'<p>';
1493 $this->mLastSection = 'p';
1499 if ($paragraphStack === false) {
1500 $output .= $t."\n";
1503 while ( $prefixLength ) {
1504 $output .= $this->closeList( $pref2{$prefixLength-1} );
1505 --$prefixLength;
1507 if ( '' != $this->mLastSection ) {
1508 $output .= '</' . $this->mLastSection . '>';
1509 $this->mLastSection = '';
1512 wfProfileOut( $fname );
1513 return $output;
1517 * Split up a string on ':', ignoring any occurences inside
1518 * <a>..</a> or <span>...</span>
1519 * @param $str string the string to split
1520 * @param &$before string set to everything before the ':'
1521 * @param &$after string set to everything after the ':'
1522 * return string the position of the ':', or false if none found
1524 function findColonNoLinks($str, &$before, &$after) {
1525 # I wonder if we should make this count all tags, not just <a>
1526 # and <span>. That would prevent us from matching a ':' that
1527 # comes in the middle of italics other such formatting....
1528 # -- Wil
1529 $fname = 'Parser::findColonNoLinks';
1530 wfProfileIn( $fname );
1531 $pos = 0;
1532 do {
1533 $colon = strpos($str, ':', $pos);
1535 if ($colon !== false) {
1536 $before = substr($str, 0, $colon);
1537 $after = substr($str, $colon + 1);
1539 # Skip any ':' within <a> or <span> pairs
1540 $a = substr_count($before, '<a');
1541 $s = substr_count($before, '<span');
1542 $ca = substr_count($before, '</a>');
1543 $cs = substr_count($before, '</span>');
1545 if ($a <= $ca and $s <= $cs) {
1546 # Tags are balanced before ':'; ok
1547 break;
1549 $pos = $colon + 1;
1551 } while ($colon !== false);
1552 wfProfileOut( $fname );
1553 return $colon;
1557 * Return value of a magic variable (like PAGENAME)
1559 * @access private
1561 function getVariableValue( $index ) {
1562 global $wgContLang, $wgSitename, $wgServer;
1564 switch ( $index ) {
1565 case MAG_CURRENTMONTH:
1566 return $wgContLang->formatNum( date( 'm' ) );
1567 case MAG_CURRENTMONTHNAME:
1568 return $wgContLang->getMonthName( date('n') );
1569 case MAG_CURRENTMONTHNAMEGEN:
1570 return $wgContLang->getMonthNameGen( date('n') );
1571 case MAG_CURRENTDAY:
1572 return $wgContLang->formatNum( date('j') );
1573 case MAG_PAGENAME:
1574 return $this->mTitle->getText();
1575 case MAG_PAGENAMEE:
1576 return $this->mTitle->getPartialURL();
1577 case MAG_NAMESPACE:
1578 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1579 return $wgContLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1580 case MAG_CURRENTDAYNAME:
1581 return $wgContLang->getWeekdayName( date('w')+1 );
1582 case MAG_CURRENTYEAR:
1583 return $wgContLang->formatNum( date( 'Y' ) );
1584 case MAG_CURRENTTIME:
1585 return $wgContLang->time( wfTimestampNow(), false );
1586 case MAG_NUMBEROFARTICLES:
1587 return $wgContLang->formatNum( wfNumberOfArticles() );
1588 case MAG_SITENAME:
1589 return $wgSitename;
1590 case MAG_SERVER:
1591 return $wgServer;
1592 default:
1593 return NULL;
1598 * initialise the magic variables (like CURRENTMONTHNAME)
1600 * @access private
1602 function initialiseVariables() {
1603 $fname = 'Parser::initialiseVariables';
1604 wfProfileIn( $fname );
1605 global $wgVariableIDs;
1606 $this->mVariables = array();
1607 foreach ( $wgVariableIDs as $id ) {
1608 $mw =& MagicWord::get( $id );
1609 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1611 wfProfileOut( $fname );
1615 * Replace magic variables, templates, and template arguments
1616 * with the appropriate text. Templates are substituted recursively,
1617 * taking care to avoid infinite loops.
1619 * Note that the substitution depends on value of $mOutputType:
1620 * OT_WIKI: only {{subst:}} templates
1621 * OT_MSG: only magic variables
1622 * OT_HTML: all templates and magic variables
1624 * @param string $tex The text to transform
1625 * @param array $args Key-value pairs representing template parameters to substitute
1626 * @access private
1628 function replaceVariables( $text, $args = array() ) {
1629 global $wgLang, $wgScript, $wgArticlePath;
1631 # Prevent too big inclusions
1632 if(strlen($text)> MAX_INCLUDE_SIZE)
1633 return $text;
1635 $fname = 'Parser::replaceVariables';
1636 wfProfileIn( $fname );
1638 $titleChars = Title::legalChars();
1640 # This function is called recursively. To keep track of arguments we need a stack:
1641 array_push( $this->mArgStack, $args );
1643 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1644 $GLOBALS['wgCurParser'] =& $this;
1646 # Variable substitution
1647 $text = preg_replace_callback( "/{{([$titleChars]*?)}}/", 'wfVariableSubstitution', $text );
1649 if ( $this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI ) {
1650 # Argument substitution
1651 $text = preg_replace_callback( "/{{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1653 # Template substitution
1654 $regex = '/(\\n|{)?{{(['.$titleChars.']*)(\\|.*?|)}}/s';
1655 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1657 array_pop( $this->mArgStack );
1659 wfProfileOut( $fname );
1660 return $text;
1664 * Replace magic variables
1665 * @access private
1667 function variableSubstitution( $matches ) {
1668 if ( !$this->mVariables ) {
1669 $this->initialiseVariables();
1671 $skip = false;
1672 if ( $this->mOutputType == OT_WIKI ) {
1673 # Do only magic variables prefixed by SUBST
1674 $mwSubst =& MagicWord::get( MAG_SUBST );
1675 if (!$mwSubst->matchStartAndRemove( $matches[1] ))
1676 $skip = true;
1677 # Note that if we don't substitute the variable below,
1678 # we don't remove the {{subst:}} magic word, in case
1679 # it is a template rather than a magic variable.
1681 if ( !$skip && array_key_exists( $matches[1], $this->mVariables ) ) {
1682 $text = $this->mVariables[$matches[1]];
1683 $this->mOutput->mContainsOldMagic = true;
1684 } else {
1685 $text = $matches[0];
1687 return $text;
1690 # Split template arguments
1691 function getTemplateArgs( $argsString ) {
1692 if ( $argsString === '' ) {
1693 return array();
1696 $args = explode( '|', substr( $argsString, 1 ) );
1698 # If any of the arguments contains a '[[' but no ']]', it needs to be
1699 # merged with the next arg because the '|' character between belongs
1700 # to the link syntax and not the template parameter syntax.
1701 $argc = count($args);
1702 $i = 0;
1703 for ( $i = 0; $i < $argc-1; $i++ ) {
1704 if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1705 $args[$i] .= '|'.$args[$i+1];
1706 array_splice($args, $i+1, 1);
1707 $i--;
1708 $argc--;
1712 return $args;
1716 * Return the text of a template, after recursively
1717 * replacing any variables or templates within the template.
1719 * @param array $matches The parts of the template
1720 * $matches[1]: the title, i.e. the part before the |
1721 * $matches[2]: the parameters (including a leading |), if any
1722 * @return string the text of the template
1723 * @access private
1725 function braceSubstitution( $matches ) {
1726 global $wgLinkCache, $wgContLang;
1727 $fname = 'Parser::braceSubstitution';
1728 $found = false;
1729 $nowiki = false;
1730 $noparse = false;
1732 $title = NULL;
1734 # Need to know if the template comes at the start of a line,
1735 # to treat the beginning of the template like the beginning
1736 # of a line for tables and block-level elements.
1737 $linestart = $matches[1];
1739 # $part1 is the bit before the first |, and must contain only title characters
1740 # $args is a list of arguments, starting from index 0, not including $part1
1742 $part1 = $matches[2];
1743 # If the third subpattern matched anything, it will start with |
1745 $args = $this->getTemplateArgs($matches[3]);
1746 $argc = count( $args );
1748 # Don't parse {{{}}} because that's only for template arguments
1749 if ( $linestart === '{' ) {
1750 $text = $matches[0];
1751 $found = true;
1752 $noparse = true;
1755 # SUBST
1756 if ( !$found ) {
1757 $mwSubst =& MagicWord::get( MAG_SUBST );
1758 if ( $mwSubst->matchStartAndRemove( $part1 ) xor ($this->mOutputType == OT_WIKI) ) {
1759 # One of two possibilities is true:
1760 # 1) Found SUBST but not in the PST phase
1761 # 2) Didn't find SUBST and in the PST phase
1762 # In either case, return without further processing
1763 $text = $matches[0];
1764 $found = true;
1765 $noparse = true;
1769 # MSG, MSGNW and INT
1770 if ( !$found ) {
1771 # Check for MSGNW:
1772 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1773 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1774 $nowiki = true;
1775 } else {
1776 # Remove obsolete MSG:
1777 $mwMsg =& MagicWord::get( MAG_MSG );
1778 $mwMsg->matchStartAndRemove( $part1 );
1781 # Check if it is an internal message
1782 $mwInt =& MagicWord::get( MAG_INT );
1783 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1784 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1785 $text = $linestart . wfMsgReal( $part1, $args, true );
1786 $found = true;
1791 # NS
1792 if ( !$found ) {
1793 # Check for NS: (namespace expansion)
1794 $mwNs = MagicWord::get( MAG_NS );
1795 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1796 if ( intval( $part1 ) ) {
1797 $text = $linestart . $wgContLang->getNsText( intval( $part1 ) );
1798 $found = true;
1799 } else {
1800 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1801 if ( !is_null( $index ) ) {
1802 $text = $linestart . $wgContLang->getNsText( $index );
1803 $found = true;
1809 # LOCALURL and LOCALURLE
1810 if ( !$found ) {
1811 $mwLocal = MagicWord::get( MAG_LOCALURL );
1812 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1814 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1815 $func = 'getLocalURL';
1816 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1817 $func = 'escapeLocalURL';
1818 } else {
1819 $func = '';
1822 if ( $func !== '' ) {
1823 $title = Title::newFromText( $part1 );
1824 if ( !is_null( $title ) ) {
1825 if ( $argc > 0 ) {
1826 $text = $linestart . $title->$func( $args[0] );
1827 } else {
1828 $text = $linestart . $title->$func();
1830 $found = true;
1835 # GRAMMAR
1836 if ( !$found && $argc == 1 ) {
1837 $mwGrammar =& MagicWord::get( MAG_GRAMMAR );
1838 if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
1839 $text = $linestart . $wgContLang->convertGrammar( $args[0], $part1 );
1840 $found = true;
1844 # Template table test
1846 # Did we encounter this template already? If yes, it is in the cache
1847 # and we need to check for loops.
1848 if ( !$found && isset( $this->mTemplates[$part1] ) ) {
1849 # set $text to cached message.
1850 $text = $linestart . $this->mTemplates[$part1];
1851 $found = true;
1853 # Infinite loop test
1854 if ( isset( $this->mTemplatePath[$part1] ) ) {
1855 $noparse = true;
1856 $found = true;
1857 $text .= '<!-- WARNING: template loop detected -->';
1861 # Load from database
1862 $itcamefromthedatabase = false;
1863 if ( !$found ) {
1864 $ns = NS_TEMPLATE;
1865 $part1 = $this->maybeDoSubpageLink( $part1, $subpage='' );
1866 if ($subpage !== '') {
1867 $ns = $this->mTitle->getNamespace();
1869 $title = Title::newFromText( $part1, $ns );
1870 if ( !is_null( $title ) && !$title->isExternal() ) {
1871 # Check for excessive inclusion
1872 $dbk = $title->getPrefixedDBkey();
1873 if ( $this->incrementIncludeCount( $dbk ) ) {
1874 # This should never be reached.
1875 $article = new Article( $title );
1876 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1877 if ( $articleContent !== false ) {
1878 $found = true;
1879 $text = $linestart . $articleContent;
1880 $itcamefromthedatabase = true;
1884 # If the title is valid but undisplayable, make a link to it
1885 if ( $this->mOutputType == OT_HTML && !$found ) {
1886 $text = $linestart . '[['.$title->getPrefixedText().']]';
1887 $found = true;
1890 # Template cache array insertion
1891 $this->mTemplates[$part1] = $text;
1895 # Recursive parsing, escaping and link table handling
1896 # Only for HTML output
1897 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1898 $text = wfEscapeWikiText( $text );
1899 } elseif ( ($this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI) && $found && !$noparse) {
1900 # Clean up argument array
1901 $assocArgs = array();
1902 $index = 1;
1903 foreach( $args as $arg ) {
1904 $eqpos = strpos( $arg, '=' );
1905 if ( $eqpos === false ) {
1906 $assocArgs[$index++] = $arg;
1907 } else {
1908 $name = trim( substr( $arg, 0, $eqpos ) );
1909 $value = trim( substr( $arg, $eqpos+1 ) );
1910 if ( $value === false ) {
1911 $value = '';
1913 if ( $name !== false ) {
1914 $assocArgs[$name] = $value;
1919 # Add a new element to the templace recursion path
1920 $this->mTemplatePath[$part1] = 1;
1922 $text = $this->strip( $text, $this->mStripState );
1923 $text = $this->removeHTMLtags( $text );
1924 $text = $this->replaceVariables( $text, $assocArgs );
1926 # Resume the link cache and register the inclusion as a link
1927 if ( $this->mOutputType == OT_HTML && !is_null( $title ) ) {
1928 $wgLinkCache->addLinkObj( $title );
1931 # If the template begins with a table or block-level
1932 # element, it should be treated as beginning a new line.
1933 if ($linestart !== '\n' && preg_match('/^({\\||:|;|#|\*)/', $text)) {
1934 $text = "\n" . $text;
1938 # Empties the template path
1939 $this->mTemplatePath = array();
1940 if ( !$found ) {
1941 return $matches[0];
1942 } else {
1943 # replace ==section headers==
1944 # XXX this needs to go away once we have a better parser.
1945 if ( $this->mOutputType != OT_WIKI && $itcamefromthedatabase ) {
1946 if( !is_null( $title ) )
1947 $encodedname = base64_encode($title->getPrefixedDBkey());
1948 else
1949 $encodedname = base64_encode("");
1950 $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
1951 PREG_SPLIT_DELIM_CAPTURE);
1952 $text = '';
1953 $nsec = 0;
1954 for( $i = 0; $i < count($m); $i += 2 ) {
1955 $text .= $m[$i];
1956 if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue;
1957 $hl = $m[$i + 1];
1958 if( strstr($hl, "<!--MWTEMPLATESECTION") ) {
1959 $text .= $hl;
1960 continue;
1962 preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2);
1963 $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION="
1964 . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3];
1966 $nsec++;
1971 # Empties the template path
1972 $this->mTemplatePath = array();
1973 if ( !$found ) {
1974 return $matches[0];
1975 } else {
1976 return $text;
1981 * Triple brace replacement -- used for template arguments
1982 * @access private
1984 function argSubstitution( $matches ) {
1985 $arg = trim( $matches[1] );
1986 $text = $matches[0];
1987 $inputArgs = end( $this->mArgStack );
1989 if ( array_key_exists( $arg, $inputArgs ) ) {
1990 $text = $inputArgs[$arg];
1993 return $text;
1997 * Returns true if the function is allowed to include this entity
1998 * @access private
2000 function incrementIncludeCount( $dbk ) {
2001 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
2002 $this->mIncludeCount[$dbk] = 0;
2004 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
2005 return true;
2006 } else {
2007 return false;
2013 * Cleans up HTML, removes dangerous tags and attributes, and
2014 * removes HTML comments
2015 * @access private
2017 function removeHTMLtags( $text ) {
2018 global $wgUseTidy, $wgUserHtml;
2019 $fname = 'Parser::removeHTMLtags';
2020 wfProfileIn( $fname );
2022 if( $wgUserHtml ) {
2023 $htmlpairs = array( # Tags that must be closed
2024 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
2025 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
2026 'strike', 'strong', 'tt', 'var', 'div', 'center',
2027 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
2028 'ruby', 'rt' , 'rb' , 'rp', 'p'
2030 $htmlsingle = array(
2031 'br', 'hr', 'li', 'dt', 'dd'
2033 $htmlnest = array( # Tags that can be nested--??
2034 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2035 'dl', 'font', 'big', 'small', 'sub', 'sup'
2037 $tabletags = array( # Can only appear inside table
2038 'td', 'th', 'tr'
2040 } else {
2041 $htmlpairs = array();
2042 $htmlsingle = array();
2043 $htmlnest = array();
2044 $tabletags = array();
2047 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2048 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2050 $htmlattrs = $this->getHTMLattrs () ;
2052 # Remove HTML comments
2053 $text = $this->removeHTMLcomments( $text );
2055 $bits = explode( '<', $text );
2056 $text = array_shift( $bits );
2057 if(!$wgUseTidy) {
2058 $tagstack = array(); $tablestack = array();
2059 foreach ( $bits as $x ) {
2060 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
2061 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2062 $x, $regs );
2063 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2064 error_reporting( $prev );
2066 $badtag = 0 ;
2067 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2068 # Check our stack
2069 if ( $slash ) {
2070 # Closing a tag...
2071 if ( ! in_array( $t, $htmlsingle ) &&
2072 ( $ot = @array_pop( $tagstack ) ) != $t ) {
2073 @array_push( $tagstack, $ot );
2074 $badtag = 1;
2075 } else {
2076 if ( $t == 'table' ) {
2077 $tagstack = array_pop( $tablestack );
2079 $newparams = '';
2081 } else {
2082 # Keep track for later
2083 if ( in_array( $t, $tabletags ) &&
2084 ! in_array( 'table', $tagstack ) ) {
2085 $badtag = 1;
2086 } else if ( in_array( $t, $tagstack ) &&
2087 ! in_array ( $t , $htmlnest ) ) {
2088 $badtag = 1 ;
2089 } else if ( ! in_array( $t, $htmlsingle ) ) {
2090 if ( $t == 'table' ) {
2091 array_push( $tablestack, $tagstack );
2092 $tagstack = array();
2094 array_push( $tagstack, $t );
2096 # Strip non-approved attributes from the tag
2097 $newparams = $this->fixTagAttributes($params);
2100 if ( ! $badtag ) {
2101 $rest = str_replace( '>', '&gt;', $rest );
2102 $text .= "<$slash$t $newparams$brace$rest";
2103 continue;
2106 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2108 # Close off any remaining tags
2109 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2110 $text .= "</$t>\n";
2111 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2113 } else {
2114 # this might be possible using tidy itself
2115 foreach ( $bits as $x ) {
2116 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2117 $x, $regs );
2118 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2119 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2120 $newparams = $this->fixTagAttributes($params);
2121 $rest = str_replace( '>', '&gt;', $rest );
2122 $text .= "<$slash$t $newparams$brace$rest";
2123 } else {
2124 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2128 wfProfileOut( $fname );
2129 return $text;
2133 * Remove '<!--', '-->', and everything between.
2134 * To avoid leaving blank lines, when a comment is both preceded
2135 * and followed by a newline (ignoring spaces), trim leading and
2136 * trailing spaces and one of the newlines.
2138 * @access private
2140 function removeHTMLcomments( $text ) {
2141 $fname='Parser::removeHTMLcomments';
2142 wfProfileIn( $fname );
2143 while (($start = strpos($text, '<!--')) !== false) {
2144 $end = strpos($text, '-->', $start + 4);
2145 if ($end === false) {
2146 # Unterminated comment; bail out
2147 break;
2150 $end += 3;
2152 # Trim space and newline if the comment is both
2153 # preceded and followed by a newline
2154 $spaceStart = max($start - 1, 0);
2155 $spaceLen = $end - $spaceStart;
2156 while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
2157 $spaceStart--;
2158 $spaceLen++;
2160 while (substr($text, $spaceStart + $spaceLen, 1) === ' ')
2161 $spaceLen++;
2162 if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") {
2163 # Remove the comment, leading and trailing
2164 # spaces, and leave only one newline.
2165 $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
2167 else {
2168 # Remove just the comment.
2169 $text = substr_replace($text, '', $start, $end - $start);
2172 wfProfileOut( $fname );
2173 return $text;
2177 * This function accomplishes several tasks:
2178 * 1) Auto-number headings if that option is enabled
2179 * 2) Add an [edit] link to sections for logged in users who have enabled the option
2180 * 3) Add a Table of contents on the top for users who have enabled the option
2181 * 4) Auto-anchor headings
2183 * It loops through all headlines, collects the necessary data, then splits up the
2184 * string and re-inserts the newly formatted headlines.
2185 * @access private
2187 /* private */ function formatHeadings( $text, $isMain=true ) {
2188 global $wgInputEncoding, $wgMaxTocLevel, $wgContLang, $wgLinkHolders;
2190 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2191 $doShowToc = $this->mOptions->getShowToc();
2192 $forceTocHere = false;
2193 if( !$this->mTitle->userCanEdit() ) {
2194 $showEditLink = 0;
2195 $rightClickHack = 0;
2196 } else {
2197 $showEditLink = $this->mOptions->getEditSection();
2198 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2201 # Inhibit editsection links if requested in the page
2202 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2203 if( $esw->matchAndRemove( $text ) ) {
2204 $showEditLink = 0;
2206 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2207 # do not add TOC
2208 $mw =& MagicWord::get( MAG_NOTOC );
2209 if( $mw->matchAndRemove( $text ) ) {
2210 $doShowToc = 0;
2213 # never add the TOC to the Main Page. This is an entry page that should not
2214 # be more than 1-2 screens large anyway
2215 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2216 $doShowToc = 0;
2219 # Get all headlines for numbering them and adding funky stuff like [edit]
2220 # links - this is for later, but we need the number of headlines right now
2221 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2223 # if there are fewer than 4 headlines in the article, do not show TOC
2224 if( $numMatches < 4 ) {
2225 $doShowToc = 0;
2228 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2229 # override above conditions and always show TOC at that place
2230 $mw =& MagicWord::get( MAG_TOC );
2231 if ($mw->match( $text ) ) {
2232 $doShowToc = 1;
2233 $forceTocHere = true;
2234 } else {
2235 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2236 # override above conditions and always show TOC above first header
2237 $mw =& MagicWord::get( MAG_FORCETOC );
2238 if ($mw->matchAndRemove( $text ) ) {
2239 $doShowToc = 1;
2245 # We need this to perform operations on the HTML
2246 $sk =& $this->mOptions->getSkin();
2248 # headline counter
2249 $headlineCount = 0;
2250 $sectionCount = 0; # headlineCount excluding template sections
2252 # Ugh .. the TOC should have neat indentation levels which can be
2253 # passed to the skin functions. These are determined here
2254 $toclevel = 0;
2255 $toc = '';
2256 $full = '';
2257 $head = array();
2258 $sublevelCount = array();
2259 $level = 0;
2260 $prevlevel = 0;
2261 foreach( $matches[3] as $headline ) {
2262 $istemplate = 0;
2263 $templatetitle = "";
2264 $templatesection = 0;
2266 if (preg_match("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", $headline, $mat)) {
2267 $istemplate = 1;
2268 $templatetitle = base64_decode($mat[1]);
2269 $templatesection = 1 + (int)base64_decode($mat[2]);
2270 $headline = preg_replace("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", "", $headline);
2273 $numbering = '';
2274 if( $level ) {
2275 $prevlevel = $level;
2277 $level = $matches[1][$headlineCount];
2278 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2279 # reset when we enter a new level
2280 $sublevelCount[$level] = 0;
2281 $toc .= $sk->tocIndent( $level - $prevlevel );
2282 $toclevel += $level - $prevlevel;
2284 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2285 # reset when we step back a level
2286 $sublevelCount[$level+1]=0;
2287 $toc .= $sk->tocUnindent( $prevlevel - $level );
2288 $toclevel -= $prevlevel - $level;
2290 # count number of headlines for each level
2291 @$sublevelCount[$level]++;
2292 if( $doNumberHeadings || $doShowToc ) {
2293 $dot = 0;
2294 for( $i = 1; $i <= $level; $i++ ) {
2295 if( !empty( $sublevelCount[$i] ) ) {
2296 if( $dot ) {
2297 $numbering .= '.';
2299 $numbering .= $wgContLang->formatNum( $sublevelCount[$i] );
2300 $dot = 1;
2305 # The canonized header is a version of the header text safe to use for links
2306 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2307 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2308 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2310 # Remove link placeholders by the link text.
2311 # <!--LINK number-->
2312 # turns into
2313 # link text with suffix
2314 $canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
2315 "\$wgLinkHolders['texts'][\$1]",
2316 $canonized_headline );
2318 # strip out HTML
2319 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2320 $tocline = trim( $canonized_headline );
2321 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2322 $replacearray = array(
2323 '%3A' => ':',
2324 '%' => '.'
2326 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2327 $refer[$headlineCount] = $canonized_headline;
2329 # count how many in assoc. array so we can track dupes in anchors
2330 @$refers[$canonized_headline]++;
2331 $refcount[$headlineCount]=$refers[$canonized_headline];
2333 # Prepend the number to the heading text
2335 if( $doNumberHeadings || $doShowToc ) {
2336 $tocline = $numbering . ' ' . $tocline;
2338 # Don't number the heading if it is the only one (looks silly)
2339 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2340 # the two are different if the line contains a link
2341 $headline=$numbering . ' ' . $headline;
2345 # Create the anchor for linking from the TOC to the section
2346 $anchor = $canonized_headline;
2347 if($refcount[$headlineCount] > 1 ) {
2348 $anchor .= '_' . $refcount[$headlineCount];
2350 if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2351 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2353 if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) {
2354 if ( empty( $head[$headlineCount] ) ) {
2355 $head[$headlineCount] = '';
2357 if( $istemplate )
2358 $head[$headlineCount] .= $sk->editSectionLinkForOther($templatetitle, $templatesection);
2359 else
2360 $head[$headlineCount] .= $sk->editSectionLink($sectionCount+1);
2363 # Add the edit section span
2364 if( $rightClickHack ) {
2365 if( $istemplate )
2366 $headline = $sk->editSectionScriptForOther($templatetitle, $templatesection, $headline);
2367 else
2368 $headline = $sk->editSectionScript($sectionCount+1,$headline);
2371 # give headline the correct <h#> tag
2372 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2374 $headlineCount++;
2375 if( !$istemplate )
2376 $sectionCount++;
2379 if( $doShowToc ) {
2380 $toclines = $headlineCount;
2381 $toc .= $sk->tocUnindent( $toclevel );
2382 $toc = $sk->tocTable( $toc );
2385 # split up and insert constructed headlines
2387 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2388 $i = 0;
2390 foreach( $blocks as $block ) {
2391 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2392 # This is the [edit] link that appears for the top block of text when
2393 # section editing is enabled
2395 # Disabled because it broke block formatting
2396 # For example, a bullet point in the top line
2397 # $full .= $sk->editSectionLink(0);
2399 $full .= $block;
2400 if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2401 # Top anchor now in skin
2402 $full = $full.$toc;
2405 if( !empty( $head[$i] ) ) {
2406 $full .= $head[$i];
2408 $i++;
2410 if($forceTocHere) {
2411 $mw =& MagicWord::get( MAG_TOC );
2412 return $mw->replace( $toc, $full );
2413 } else {
2414 return $full;
2419 * Return an HTML link for the "ISBN 123456" text
2420 * @access private
2422 function magicISBN( $text ) {
2423 global $wgLang;
2424 $fname = 'Parser::magicISBN';
2425 wfProfileIn( $fname );
2427 $a = split( 'ISBN ', ' '.$text );
2428 if ( count ( $a ) < 2 ) {
2429 wfProfileOut( $fname );
2430 return $text;
2432 $text = substr( array_shift( $a ), 1);
2433 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2435 foreach ( $a as $x ) {
2436 $isbn = $blank = '' ;
2437 while ( ' ' == $x{0} ) {
2438 $blank .= ' ';
2439 $x = substr( $x, 1 );
2441 if ( $x == '' ) { # blank isbn
2442 $text .= "ISBN $blank";
2443 continue;
2445 while ( strstr( $valid, $x{0} ) != false ) {
2446 $isbn .= $x{0};
2447 $x = substr( $x, 1 );
2449 $num = str_replace( '-', '', $isbn );
2450 $num = str_replace( ' ', '', $num );
2452 if ( '' == $num ) {
2453 $text .= "ISBN $blank$x";
2454 } else {
2455 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2456 $text .= '<a href="' .
2457 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2458 "\" class=\"internal\">ISBN $isbn</a>";
2459 $text .= $x;
2462 wfProfileOut( $fname );
2463 return $text;
2467 * Return an HTML link for the "GEO ..." text
2468 * @access private
2470 function magicGEO( $text ) {
2471 global $wgLang, $wgUseGeoMode;
2472 $fname = 'Parser::magicGEO';
2473 wfProfileIn( $fname );
2475 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2476 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2477 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2478 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2479 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2480 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2482 $a = split( 'GEO ', ' '.$text );
2483 if ( count ( $a ) < 2 ) {
2484 wfProfileOut( $fname );
2485 return $text;
2487 $text = substr( array_shift( $a ), 1);
2488 $valid = '0123456789.+-:';
2490 foreach ( $a as $x ) {
2491 $geo = $blank = '' ;
2492 while ( ' ' == $x{0} ) {
2493 $blank .= ' ';
2494 $x = substr( $x, 1 );
2496 while ( strstr( $valid, $x{0} ) != false ) {
2497 $geo .= $x{0};
2498 $x = substr( $x, 1 );
2500 $num = str_replace( '+', '', $geo );
2501 $num = str_replace( ' ', '', $num );
2503 if ( '' == $num || count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2504 $text .= "GEO $blank$x";
2505 } else {
2506 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2507 $text .= '<a href="' .
2508 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2509 "\" class=\"internal\">GEO $geo</a>";
2510 $text .= $x;
2513 wfProfileOut( $fname );
2514 return $text;
2518 * Return an HTML link for the "RFC 1234" text
2519 * @access private
2520 * @param string $text text to be processed
2522 function magicRFC( $text ) {
2523 global $wgLang;
2525 $valid = '0123456789';
2526 $internal = false;
2528 $a = split( 'RFC ', ' '.$text );
2529 if ( count ( $a ) < 2 ) return $text;
2530 $text = substr( array_shift( $a ), 1);
2532 /* Check if RFC keyword is preceed by [[.
2533 * This test is made here cause of the array_shift above
2534 * that prevent the test to be done in the foreach.
2536 if(substr($text, -2) == '[[') { $internal = true; }
2538 foreach ( $a as $x ) {
2539 /* token might be empty if we have RFC RFC 1234 */
2540 if($x=='') {
2541 $text.='RFC ';
2542 continue;
2545 $rfc = $blank = '' ;
2547 /** remove and save whitespaces in $blank */
2548 while ( $x{0} == ' ' ) {
2549 $blank .= ' ';
2550 $x = substr( $x, 1 );
2553 /** remove and save the rfc number in $rfc */
2554 while ( strstr( $valid, $x{0} ) != false ) {
2555 $rfc .= $x{0};
2556 $x = substr( $x, 1 );
2559 if ( $rfc == '') {
2560 /* call back stripped spaces*/
2561 $text .= "RFC $blank$x";
2562 } elseif( $internal) {
2563 /* normal link */
2564 $text .= "RFC $rfc$x";
2565 } else {
2566 /* build the external link*/
2567 $url = wfmsg( 'rfcurl' );
2568 $url = str_replace( '$1', $rfc, $url);
2569 $sk =& $this->mOptions->getSkin();
2570 $la = $sk->getExternalLinkAttributes( $url, 'RFC '.$rfc );
2571 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2574 /* Check if the next RFC keyword is preceed by [[ */
2575 $internal = (substr($x,-2) == '[[');
2577 return $text;
2581 * Transform wiki markup when saving a page by doing \r\n -> \n
2582 * conversion, substitting signatures, {{subst:}} templates, etc.
2584 * @param string $text the text to transform
2585 * @param Title &$title the Title object for the current article
2586 * @param User &$user the User object describing the current user
2587 * @param ParserOptions $options parsing options
2588 * @param bool $clearState whether to clear the parser state first
2589 * @return string the altered wiki markup
2590 * @access public
2592 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2593 $this->mOptions = $options;
2594 $this->mTitle =& $title;
2595 $this->mOutputType = OT_WIKI;
2597 if ( $clearState ) {
2598 $this->clearState();
2601 $stripState = false;
2602 $pairs = array(
2603 "\r\n" => "\n",
2605 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2606 // now with regexes
2608 $pairs = array(
2609 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2610 "/<br *?>/i" => "<br />",
2612 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2614 $text = $this->strip( $text, $stripState, false );
2615 $text = $this->pstPass2( $text, $user );
2616 $text = $this->unstrip( $text, $stripState );
2617 $text = $this->unstripNoWiki( $text, $stripState );
2618 return $text;
2622 * Pre-save transform helper function
2623 * @access private
2625 function pstPass2( $text, &$user ) {
2626 global $wgLang, $wgContLang, $wgLocaltimezone, $wgCurParser;
2628 # Variable replacement
2629 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2630 $text = $this->replaceVariables( $text );
2632 # Signatures
2634 $n = $user->getName();
2635 $k = $user->getOption( 'nickname' );
2636 if ( '' == $k ) { $k = $n; }
2637 if(isset($wgLocaltimezone)) {
2638 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2640 /* Note: this is an ugly timezone hack for the European wikis */
2641 $d = $wgContLang->timeanddate( date( 'YmdHis' ), false ) .
2642 ' (' . date( 'T' ) . ')';
2643 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2645 $text = preg_replace( '/~~~~~/', $d, $text );
2646 $text = preg_replace( '/~~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2647 $text = preg_replace( '/~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2649 # Context links: [[|name]] and [[name (context)|]]
2651 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2652 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2653 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2654 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2656 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2657 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2658 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]] and [[:namespace:page|]]
2659 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2660 $context = '';
2661 $t = $this->mTitle->getText();
2662 if ( preg_match( $conpat, $t, $m ) ) {
2663 $context = $m[2];
2665 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2666 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2667 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2669 if ( '' == $context ) {
2670 $text = preg_replace( $p2, '[[\\1]]', $text );
2671 } else {
2672 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2675 # Trim trailing whitespace
2676 # MAG_END (__END__) tag allows for trailing
2677 # whitespace to be deliberately included
2678 $text = rtrim( $text );
2679 $mw =& MagicWord::get( MAG_END );
2680 $mw->matchAndRemove( $text );
2682 return $text;
2686 * Set up some variables which are usually set up in parse()
2687 * so that an external function can call some class members with confidence
2688 * @access public
2690 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2691 $this->mTitle =& $title;
2692 $this->mOptions = $options;
2693 $this->mOutputType = $outputType;
2694 if ( $clearState ) {
2695 $this->clearState();
2700 * Transform a MediaWiki message by replacing magic variables.
2702 * @param string $text the text to transform
2703 * @param ParserOptions $options options
2704 * @return string the text with variables substituted
2705 * @access public
2707 function transformMsg( $text, $options ) {
2708 global $wgTitle;
2709 static $executing = false;
2711 # Guard against infinite recursion
2712 if ( $executing ) {
2713 return $text;
2715 $executing = true;
2717 $this->mTitle = $wgTitle;
2718 $this->mOptions = $options;
2719 $this->mOutputType = OT_MSG;
2720 $this->clearState();
2721 $text = $this->replaceVariables( $text );
2723 $executing = false;
2724 return $text;
2728 * Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2729 * Callback will be called with the text within
2730 * Transform and return the text within
2731 * @access public
2733 function setHook( $tag, $callback ) {
2734 $oldVal = @$this->mTagHooks[$tag];
2735 $this->mTagHooks[$tag] = $callback;
2736 return $oldVal;
2741 * @todo document
2742 * @package MediaWiki
2744 class ParserOutput
2746 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2747 var $mCacheTime; # Used in ParserCache
2749 function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
2750 $containsOldMagic = false )
2752 $this->mText = $text;
2753 $this->mLanguageLinks = $languageLinks;
2754 $this->mCategoryLinks = $categoryLinks;
2755 $this->mContainsOldMagic = $containsOldMagic;
2756 $this->mCacheTime = '';
2759 function getText() { return $this->mText; }
2760 function getLanguageLinks() { return $this->mLanguageLinks; }
2761 function getCategoryLinks() { return $this->mCategoryLinks; }
2762 function getCacheTime() { return $this->mCacheTime; }
2763 function containsOldMagic() { return $this->mContainsOldMagic; }
2764 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2765 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2766 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2767 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2768 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2770 function merge( $other ) {
2771 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2772 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2773 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2779 * Set options of the Parser
2780 * @todo document
2781 * @package MediaWiki
2783 class ParserOptions
2785 # All variables are private
2786 var $mUseTeX; # Use texvc to expand <math> tags
2787 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2788 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2789 var $mAllowExternalImages; # Allow external images inline
2790 var $mSkin; # Reference to the preferred skin
2791 var $mDateFormat; # Date format index
2792 var $mEditSection; # Create "edit section" links
2793 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2794 var $mNumberHeadings; # Automatically number headings
2795 var $mShowToc; # Show table of contents
2797 function getUseTeX() { return $this->mUseTeX; }
2798 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2799 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2800 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2801 function getSkin() { return $this->mSkin; }
2802 function getDateFormat() { return $this->mDateFormat; }
2803 function getEditSection() { return $this->mEditSection; }
2804 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2805 function getNumberHeadings() { return $this->mNumberHeadings; }
2806 function getShowToc() { return $this->mShowToc; }
2808 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2809 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2810 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2811 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2812 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2813 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2814 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2815 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2816 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2818 function setSkin( &$x ) { $this->mSkin =& $x; }
2820 # Get parser options
2821 /* static */ function newFromUser( &$user ) {
2822 $popts = new ParserOptions;
2823 $popts->initialiseFromUser( $user );
2824 return $popts;
2827 # Get user options
2828 function initialiseFromUser( &$userInput ) {
2829 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2831 $fname = 'ParserOptions::initialiseFromUser';
2832 wfProfileIn( $fname );
2833 if ( !$userInput ) {
2834 $user = new User;
2835 $user->setLoaded( true );
2836 } else {
2837 $user =& $userInput;
2840 $this->mUseTeX = $wgUseTeX;
2841 $this->mUseDynamicDates = $wgUseDynamicDates;
2842 $this->mInterwikiMagic = $wgInterwikiMagic;
2843 $this->mAllowExternalImages = $wgAllowExternalImages;
2844 wfProfileIn( $fname.'-skin' );
2845 $this->mSkin =& $user->getSkin();
2846 wfProfileOut( $fname.'-skin' );
2847 $this->mDateFormat = $user->getOption( 'date' );
2848 $this->mEditSection = $user->getOption( 'editsection' );
2849 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2850 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2851 $this->mShowToc = $user->getOption( 'showtoc' );
2852 wfProfileOut( $fname );
2858 # Regex callbacks, used in Parser::replaceVariables
2859 function wfBraceSubstitution( $matches ) {
2860 global $wgCurParser;
2861 return $wgCurParser->braceSubstitution( $matches );
2864 function wfArgSubstitution( $matches ) {
2865 global $wgCurParser;
2866 return $wgCurParser->argSubstitution( $matches );
2869 function wfVariableSubstitution( $matches ) {
2870 global $wgCurParser;
2871 return $wgCurParser->variableSubstitution( $matches );
2875 * Return the total number of articles
2877 function wfNumberOfArticles() {
2878 global $wgNumberOfArticles;
2880 wfLoadSiteStats();
2881 return $wgNumberOfArticles;
2885 * Get various statistics from the database
2886 * @private
2888 function wfLoadSiteStats() {
2889 global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2890 $fname = 'wfLoadSiteStats';
2892 if ( -1 != $wgNumberOfArticles ) return;
2893 $dbr =& wfGetDB( DB_SLAVE );
2894 $s = $dbr->getArray( 'site_stats',
2895 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2896 array( 'ss_row_id' => 1 ), $fname
2899 if ( $s === false ) {
2900 return;
2901 } else {
2902 $wgTotalViews = $s->ss_total_views;
2903 $wgTotalEdits = $s->ss_total_edits;
2904 $wgNumberOfArticles = $s->ss_good_articles;
2908 function wfEscapeHTMLTagsOnly( $in ) {
2909 return str_replace(
2910 array( '"', '>', '<' ),
2911 array( '&quot;', '&gt;', '&lt;' ),
2912 $in );