template arguments, various improvements to handling of recursive inclusion
[mediawiki.git] / includes / Parser.php
blob7927aa5268e0f6e5a4d6b71f8493e6ac7c2cbc62
1 <?php
3 include_once('Tokenizer.php');
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 include_once('wikihiero.php');
9 # PHP Parser
11 # Processes wiki markup
13 # There are two main entry points into the Parser class: parse() and preSaveTransform().
14 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
16 # Globals used:
17 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
19 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
21 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
22 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
23 # $wgLocaltimezone
25 # * only within ParserOptions
28 #----------------------------------------
29 # Variable substitution O(N^2) attack
30 #-----------------------------------------
31 # Without countermeasures, it would be possible to attack the parser by saving a page
32 # filled with a large number of inclusions of large pages. The size of the generated
33 # page would be proportional to the square of the input size. Hence, we limit the number
34 # of inclusions of any given page, thus bringing any attack back to O(N).
37 define( "MAX_INCLUDE_REPEAT", 5 );
39 # Allowed values for $mOutputType
40 define( "OT_HTML", 1 );
41 define( "OT_WIKI", 2 );
42 define( "OT_MSG", 3 );
44 # prefix for escaping, used in two functions at least
45 define( "UNIQ_PREFIX", "NaodW29");
47 class Parser
49 # Cleared with clearState():
50 var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array();
51 var $mVariables, $mIncludeCount, $mArgStack;
53 # Temporary:
54 var $mOptions, $mTitle, $mOutputType;
56 function Parser()
58 $this->clearState();
61 function clearState()
63 $this->mOutput = new ParserOutput;
64 $this->mAutonumber = 0;
65 $this->mLastSection = "";
66 $this->mDTopen = false;
67 $this->mVariables = false;
68 $this->mIncludeCount = array();
69 $this->mStripState = array();
70 $this->mArgStack = array();
73 # First pass--just handle <nowiki> sections, pass the rest off
74 # to internalParse() which does all the real work.
76 # Returns a ParserOutput
78 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
80 $fname = "Parser::parse";
81 wfProfileIn( $fname );
83 if ( $clearState ) {
84 $this->clearState();
87 $this->mOptions = $options;
88 $this->mTitle =& $title;
89 $this->mOutputType = OT_HTML;
91 $stripState = NULL;
92 $text = $this->strip( $text, $this->mStripState );
93 $text = $this->internalParse( $text, $linestart );
94 $text = $this->unstrip( $text, $this->mStripState );
96 $this->mOutput->setText( $text );
97 wfProfileOut( $fname );
98 return $this->mOutput;
101 /* static */ function getRandomString()
103 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
106 # Replaces all occurences of <$tag>content</$tag> in the text
107 # with a random marker and returns the new text. the output parameter
108 # $content will be an associative array filled with data on the form
109 # $unique_marker => content.
111 # If $content is already set, the additional entries will be appended
113 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
114 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
115 if ( !$content ) {
116 $content = array( );
118 $n = 1;
119 $stripped = "";
121 while ( "" != $text ) {
122 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
123 $stripped .= $p[0];
124 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
125 $text = "";
126 } else {
127 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
128 $marker = $rnd . sprintf("%08X", $n++);
129 $content[$marker] = $q[0];
130 $stripped .= $marker;
131 $text = $q[1];
134 return $stripped;
137 # Strips <nowiki>, <pre> and <math>
138 # Returns the text, and fills an array with data needed in unstrip()
139 # If the $state is already a valid strip state, it adds to the state
141 function strip( $text, &$state )
143 $render = ($this->mOutputType == OT_HTML);
144 $nowiki_content = array();
145 $hiero_content = array();
146 $math_content = array();
147 $pre_content = array();
148 $item_content = array();
150 # Replace any instances of the placeholders
151 $uniq_prefix = UNIQ_PREFIX;
152 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
154 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
155 foreach( $nowiki_content as $marker => $content ){
156 if( $render ){
157 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
158 } else {
159 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
163 if( $GLOBALS['wgUseWikiHiero'] ){
164 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
165 foreach( $hiero_content as $marker => $content ){
166 if( $render ){
167 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
168 } else {
169 $hiero_content[$marker] = "<hiero>$content</hiero>";
174 if( $this->mOptions->getUseTeX() ){
175 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
176 foreach( $math_content as $marker => $content ){
177 if( $render ){
178 $math_content[$marker] = renderMath( $content );
179 } else {
180 $math_content[$marker] = "<math>$content</math>";
185 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
186 foreach( $pre_content as $marker => $content ){
187 if( $render ){
188 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
189 } else {
190 $pre_content[$marker] = "<pre>$content</pre>";
194 # Merge state with the pre-existing state, if there is one
195 if ( $state ) {
196 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
197 $state['hiero'] = $state['hiero'] + $hiero_content;
198 $state['math'] = $state['math'] + $math_content;
199 $state['pre'] = $state['pre'] + $pre_content;
200 } else {
201 $state = array(
202 'nowiki' => $nowiki_content,
203 'hiero' => $hiero_content,
204 'math' => $math_content,
205 'pre' => $pre_content,
206 'item' => $item_content
209 return $text;
212 function unstrip( $text, &$state )
214 # Must expand in reverse order, otherwise nested tags will be corrupted
215 $contentDict = end( $state );
216 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
217 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
218 $text = str_replace( key( $contentDict ), $content, $text );
222 return $text;
225 # Add an item to the strip state
226 # Returns the unique tag which must be inserted into the stripped text
227 # The tag will be replaced with the original text in unstrip()
229 function insertStripItem( $text, &$state )
231 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
232 if ( !$state ) {
233 $state = array(
234 'nowiki' => array(),
235 'hiero' => array(),
236 'math' => array(),
237 'pre' => array(),
238 'item' => array()
241 $state['item'][$rnd] = $text;
242 return $rnd;
245 function categoryMagic ()
247 global $wgLang , $wgUser ;
248 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
249 $id = $this->mTitle->getArticleID() ;
250 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
251 $ti = $this->mTitle->getText() ;
252 $ti = explode ( ":" , $ti , 2 ) ;
253 if ( $cat != $ti[0] ) return "" ;
254 $r = '<br style="clear:both;"/>\n';
256 $articles = array() ;
257 $parents = array () ;
258 $children = array() ;
261 # $sk =& $this->mGetSkin();
262 $sk =& $wgUser->getSkin() ;
264 $data = array () ;
265 $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
266 $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
268 $res = wfQuery ( $sql1, DB_READ ) ;
269 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
271 $res = wfQuery ( $sql2, DB_READ ) ;
272 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
275 foreach ( $data AS $x )
277 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
278 if ( $t != "" ) $t .= ":" ;
279 $t .= $x->cur_title ;
281 $y = explode ( ":" , $t , 2 ) ;
282 if ( count ( $y ) == 2 && $y[0] == $cat ) {
283 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
284 } else {
285 array_push ( $articles , $sk->makeLink ( $t ) ) ;
288 wfFreeResult ( $res ) ;
290 # Children
291 if ( count ( $children ) > 0 )
293 asort ( $children ) ;
294 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
295 $r .= implode ( ", " , $children ) ;
298 # Articles
299 if ( count ( $articles ) > 0 )
301 asort ( $articles ) ;
302 $h = wfMsg( "category_header", $ti[1] );
303 $r .= "<h2>{$h}</h2>\n" ;
304 $r .= implode ( ", " , $articles ) ;
308 return $r ;
311 function getHTMLattrs ()
313 $htmlattrs = array( # Allowed attributes--no scripting, etc.
314 "title", "align", "lang", "dir", "width", "height",
315 "bgcolor", "clear", /* BR */ "noshade", /* HR */
316 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
317 /* FONT */ "type", "start", "value", "compact",
318 /* For various lists, mostly deprecated but safe */
319 "summary", "width", "border", "frame", "rules",
320 "cellspacing", "cellpadding", "valign", "char",
321 "charoff", "colgroup", "col", "span", "abbr", "axis",
322 "headers", "scope", "rowspan", "colspan", /* Tables */
323 "id", "class", "name", "style" /* For CSS */
325 return $htmlattrs ;
328 function fixTagAttributes ( $t )
330 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
331 $htmlattrs = $this->getHTMLattrs() ;
333 # Strip non-approved attributes from the tag
334 $t = preg_replace(
335 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
336 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
337 $t);
338 # Strip javascript "expression" from stylesheets. Brute force approach:
339 # If anythin offensive is found, all attributes of the HTML tag are dropped
341 if( preg_match(
342 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
343 wfMungeToUtf8( $t ) ) )
345 $t="";
348 return trim ( $t ) ;
351 function doTableStuff ( $t )
353 $t = explode ( "\n" , $t ) ;
354 $td = array () ; # Is currently a td tag open?
355 $ltd = array () ; # Was it TD or TH?
356 $tr = array () ; # Is currently a tr tag open?
357 $ltr = array () ; # tr attributes
358 foreach ( $t AS $k => $x )
360 $x = rtrim ( $x ) ;
361 $fc = substr ( $x , 0 , 1 ) ;
362 if ( "{|" == substr ( $x , 0 , 2 ) )
364 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
365 array_push ( $td , false ) ;
366 array_push ( $ltd , "" ) ;
367 array_push ( $tr , false ) ;
368 array_push ( $ltr , "" ) ;
370 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
371 else if ( "|}" == substr ( $x , 0 , 2 ) )
373 $z = "</table>\n" ;
374 $l = array_pop ( $ltd ) ;
375 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
376 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
377 array_pop ( $ltr ) ;
378 $t[$k] = $z ;
380 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
382 $z = trim ( substr ( $x , 2 ) ) ;
383 $t[$k] = "<caption>{$z}</caption>\n" ;
385 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
387 $x = substr ( $x , 1 ) ;
388 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
389 $z = "" ;
390 $l = array_pop ( $ltd ) ;
391 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
392 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
393 array_pop ( $ltr ) ;
394 $t[$k] = $z ;
395 array_push ( $tr , false ) ;
396 array_push ( $td , false ) ;
397 array_push ( $ltd , "" ) ;
398 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
400 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
402 if ( "|+" == substr ( $x , 0 , 2 ) )
404 $fc = "+" ;
405 $x = substr ( $x , 1 ) ;
407 $after = substr ( $x , 1 ) ;
408 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
409 $after = explode ( "||" , $after ) ;
410 $t[$k] = "" ;
411 foreach ( $after AS $theline )
413 $z = "" ;
414 if ( $fc != "+" )
416 $tra = array_pop ( $ltr ) ;
417 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
418 array_push ( $tr , true ) ;
419 array_push ( $ltr , "" ) ;
422 $l = array_pop ( $ltd ) ;
423 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
424 if ( $fc == "|" ) $l = "td" ;
425 else if ( $fc == "!" ) $l = "th" ;
426 else if ( $fc == "+" ) $l = "caption" ;
427 else $l = "" ;
428 array_push ( $ltd , $l ) ;
429 $y = explode ( "|" , $theline , 2 ) ;
430 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
431 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
432 $t[$k] .= $y ;
433 array_push ( $td , true ) ;
438 # Closing open td, tr && table
439 while ( count ( $td ) > 0 )
441 if ( array_pop ( $td ) ) $t[] = "</td>" ;
442 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
443 $t[] = "</table>" ;
446 $t = implode ( "\n" , $t ) ;
447 # $t = $this->removeHTMLtags( $t );
448 return $t ;
451 function internalParse( $text, $linestart, $args = array() )
453 $fname = "Parser::internalParse";
454 wfProfileIn( $fname );
456 $text = $this->removeHTMLtags( $text );
457 $text = $this->replaceVariables( $text, $args );
459 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
461 $text = $this->doHeadings( $text );
462 if($this->mOptions->getUseDynamicDates()) {
463 global $wgDateFormatter;
464 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
466 $text = $this->replaceExternalLinks( $text );
467 $text = $this->doTokenizedParser ( $text );
468 $text = $this->doTableStuff ( $text ) ;
469 $text = $this->formatHeadings( $text );
470 $sk =& $this->mOptions->getSkin();
471 $text = $sk->transformContent( $text );
473 $fixtags = array(
474 "/<hr *>/i" => '<hr/>',
475 "/<br *>/i" => '<br/>',
476 "/<center *>/i"=>'<span style="text-align:center;">',
477 "/<\\/center *>/i" => '</span>'
479 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
480 // another round, but without regex
481 $fixtags = array(
482 '& ' => '&amp;',
483 '&<' => '&amp;<',
485 $text = str_replace( array_keys($fixtags), array_values($fixtags), $text );
487 $text .= $this->categoryMagic () ;
489 # needs to be called last
490 $text = $this->doBlockLevels( $text, $linestart );
492 wfProfileOut( $fname );
493 return $text;
497 /* private */ function doHeadings( $text )
499 for ( $i = 6; $i >= 1; --$i ) {
500 $h = substr( "======", 0, $i );
501 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
502 "<h{$i}>\\1</h{$i}>\\2", $text );
504 return $text;
507 # Note: we have to do external links before the internal ones,
508 # and otherwise take great care in the order of things here, so
509 # that we don't end up interpreting some URLs twice.
511 /* private */ function replaceExternalLinks( $text )
513 $fname = "Parser::replaceExternalLinks";
514 wfProfileIn( $fname );
515 $text = $this->subReplaceExternalLinks( $text, "http", true );
516 $text = $this->subReplaceExternalLinks( $text, "https", true );
517 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
518 $text = $this->subReplaceExternalLinks( $text, "irc", false );
519 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
520 $text = $this->subReplaceExternalLinks( $text, "news", false );
521 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
522 wfProfileOut( $fname );
523 return $text;
526 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
528 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
529 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
531 # this is the list of separators that should be ignored if they
532 # are the last character of an URL but that should be included
533 # if they occur within the URL, e.g. "go to www.foo.com, where .."
534 # in this case, the last comma should not become part of the URL,
535 # but in "www.foo.com/123,2342,32.htm" it should.
536 $sep = ",;\.:";
537 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
538 $images = "gif|png|jpg|jpeg";
540 # PLEASE NOTE: The curly braces { } are not part of the regex,
541 # they are interpreted as part of the string (used to tell PHP
542 # that the content of the string should be inserted there).
543 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
544 "((?i){$images})([^{$uc}]|$)/";
546 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
547 $sk =& $this->mOptions->getSkin();
549 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
550 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
551 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
553 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
554 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
555 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
556 "</a>\\5", $s );
557 $s = str_replace( $unique, $protocol, $s );
559 $a = explode( "[{$protocol}:", " " . $s );
560 $s = array_shift( $a );
561 $s = substr( $s, 1 );
563 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
564 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
566 foreach ( $a as $line ) {
567 if ( preg_match( $e1, $line, $m ) ) {
568 $link = "{$protocol}:{$m[1]}";
569 $trail = $m[2];
570 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
571 else { $text = wfEscapeHTML( $link ); }
572 } else if ( preg_match( $e2, $line, $m ) ) {
573 $link = "{$protocol}:{$m[1]}";
574 $text = $m[2];
575 $trail = $m[3];
576 } else {
577 $s .= "[{$protocol}:" . $line;
578 continue;
580 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
581 $paren = "";
582 } else {
583 # Expand the URL for printable version
584 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
586 $la = $sk->getExternalLinkAttributes( $link, $text );
587 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
590 return $s;
593 /* private */ function handle3Quotes( &$state, $token )
595 if ( $state["strong"] !== false ) {
596 if ( $state["em"] !== false && $state["em"] > $state["strong"] )
598 # ''' lala ''lala '''
599 $s = "</em></strong><em>";
600 } else {
601 $s = "</strong>";
603 $state["strong"] = FALSE;
604 } else {
605 $s = "<strong>";
606 $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
608 return $s;
611 /* private */ function handle2Quotes( &$state, $token )
613 if ( $state["em"] !== false ) {
614 if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
616 # ''lala'''lala'' ....'''
617 $s = "</strong></em><strong>";
618 } else {
619 $s = "</em>";
621 $state["em"] = FALSE;
622 } else {
623 $s = "<em>";
624 $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
627 return $s;
630 /* private */ function handle5Quotes( &$state, $token )
632 $s = "";
633 if ( $state["em"] !== false && $state["strong"] !== false ) {
634 if ( $state["em"] < $state["strong"] ) {
635 $s .= "</strong></em>";
636 } else {
637 $s .= "</em></strong>";
639 $state["strong"] = $state["em"] = FALSE;
640 } elseif ( $state["em"] !== false ) {
641 $s .= "</em><strong>";
642 $state["em"] = FALSE;
643 $state["strong"] = $token["pos"];
644 } elseif ( $state["strong"] !== false ) {
645 $s .= "</strong><em>";
646 $state["strong"] = FALSE;
647 $state["em"] = $token["pos"];
648 } else { # not $em and not $strong
649 $s .= "<strong><em>";
650 $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
652 return $s;
655 /* private */ function doTokenizedParser( $str )
657 global $wgLang; # for language specific parser hook
659 $tokenizer=Tokenizer::newFromString( $str );
660 $tokenStack = array();
662 $s="";
663 $state["em"] = FALSE;
664 $state["strong"] = FALSE;
665 $tagIsOpen = FALSE;
666 $threeopen = false;
668 # The tokenizer splits the text into tokens and returns them one by one.
669 # Every call to the tokenizer returns a new token.
670 while ( $token = $tokenizer->nextToken() )
672 switch ( $token["type"] )
674 case "text":
675 # simple text with no further markup
676 $txt = $token["text"];
677 break;
678 case "[[[":
679 # remember the tag opened with 3 [
680 $threeopen = true;
681 case "[[":
682 # link opening tag.
683 # FIXME : Treat orphaned open tags (stack not empty when text is over)
684 $tagIsOpen = TRUE;
685 array_push( $tokenStack, $token );
686 $txt="";
687 break;
689 case "]]]":
690 case "]]":
691 # link close tag.
692 # get text from stack, glue it together, and call the code to handle a
693 # link
695 if ( count( $tokenStack ) == 0 )
697 # stack empty. Found a ]] without an opening [[
698 $txt = "]]";
699 } else {
700 $linkText = "";
701 $lastToken = array_pop( $tokenStack );
702 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
704 if( !empty( $lastToken["text"] ) ) {
705 $linkText = $lastToken["text"] . $linkText;
707 $lastToken = array_pop( $tokenStack );
710 $txt = $linkText ."]]";
712 if( isset( $lastToken["text"] ) ) {
713 $prefix = $lastToken["text"];
714 } else {
715 $prefix = "";
717 $nextToken = $tokenizer->previewToken();
718 if ( $nextToken["type"] == "text" )
720 # Preview just looks at it. Now we have to fetch it.
721 $nextToken = $tokenizer->nextToken();
722 $txt .= $nextToken["text"];
724 $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
726 # did the tag start with 3 [ ?
727 if($threeopen) {
728 # show the first as text
729 $txt = "[".$txt;
730 $threeopen=false;
734 $tagIsOpen = (count( $tokenStack ) != 0);
735 break;
736 case "----":
737 $txt = "\n<hr />\n";
738 break;
739 case "'''":
740 # This and the three next ones handle quotes
741 $txt = $this->handle3Quotes( $state, $token );
742 break;
743 case "''":
744 $txt = $this->handle2Quotes( $state, $token );
745 break;
746 case "'''''":
747 $txt = $this->handle5Quotes( $state, $token );
748 break;
749 case "":
750 # empty token
751 $txt="";
752 break;
753 case "RFC ":
754 if ( $tagIsOpen ) {
755 $txt = "RFC ";
756 } else {
757 $txt = $this->doMagicRFC( $tokenizer );
759 break;
760 case "ISBN ":
761 if ( $tagIsOpen ) {
762 $txt = "ISBN ";
763 } else {
764 $txt = $this->doMagicISBN( $tokenizer );
766 break;
767 default:
768 # Call language specific Hook.
769 $txt = $wgLang->processToken( $token, $tokenStack );
770 if ( NULL == $txt ) {
771 # An unkown token. Highlight.
772 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
773 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
775 break;
777 # If we're parsing the interior of a link, don't append the interior to $s,
778 # but push it to the stack so it can be processed when a ]] token is found.
779 if ( $tagIsOpen && $txt != "" ) {
780 $token["type"] = "text";
781 $token["text"] = $txt;
782 array_push( $tokenStack, $token );
783 } else {
784 $s .= $txt;
786 } #end while
787 if ( count( $tokenStack ) != 0 )
789 # still objects on stack. opened [[ tag without closing ]] tag.
790 $txt = "";
791 while ( $lastToken = array_pop( $tokenStack ) )
793 if ( $lastToken["type"] == "text" )
795 $txt = $lastToken["text"] . $txt;
796 } else {
797 $txt = $lastToken["type"] . $txt;
800 $s .= $txt;
802 return $s;
805 /* private */ function handleInternalLink( $line, $prefix )
807 global $wgLang, $wgLinkCache;
808 global $wgNamespacesWithSubpages, $wgLanguageCode;
809 static $fname = "Parser::handleInternalLink" ;
810 wfProfileIn( $fname );
812 wfProfileIn( "$fname-setup" );
813 static $tc = FALSE;
814 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
815 $sk =& $this->mOptions->getSkin();
817 # Match a link having the form [[namespace:link|alternate]]trail
818 static $e1 = FALSE;
819 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
820 # Match the end of a line for a word that's not followed by whitespace,
821 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
822 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
823 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
824 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
827 # Special and Media are pseudo-namespaces; no pages actually exist in them
828 static $image = FALSE;
829 static $special = FALSE;
830 static $media = FALSE;
831 static $category = FALSE;
832 if ( !$image ) { $image = Namespace::getImage(); }
833 if ( !$special ) { $special = Namespace::getSpecial(); }
834 if ( !$media ) { $media = Namespace::getMedia(); }
835 if ( !$category ) { $category = wfMsg ( "category" ) ; }
837 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
839 wfProfileOut( "$fname-setup" );
840 $s = "";
842 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
843 $text = $m[2];
844 $trail = $m[3];
845 } else { # Invalid form; output directly
846 $s .= $prefix . "[[" . $line ;
847 return $s;
850 /* Valid link forms:
851 Foobar -- normal
852 :Foobar -- override special treatment of prefix (images, language links)
853 /Foobar -- convert to CurrentPage/Foobar
854 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
856 $c = substr($m[1],0,1);
857 $noforce = ($c != ":");
858 if( $c == "/" ) { # subpage
859 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
860 $m[1]=substr($m[1],1,strlen($m[1])-2);
861 $noslash=$m[1];
862 } else {
863 $noslash=substr($m[1],1);
865 if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
866 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
867 if( "" == $text ) {
868 $text= $m[1];
869 } # this might be changed for ugliness reasons
870 } else {
871 $link = $noslash; # no subpage allowed, use standard link
873 } elseif( $noforce ) { # no subpage
874 $link = $m[1];
875 } else {
876 $link = substr( $m[1], 1 );
878 if( "" == $text )
879 $text = $link;
881 $nt = Title::newFromText( $link );
882 if( !$nt ) {
883 $s .= $prefix . "[[" . $line;
884 return $s;
886 $ns = $nt->getNamespace();
887 $iw = $nt->getInterWiki();
888 if( $noforce ) {
889 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
890 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
891 return (trim($s) == '')? '': $s;
893 if( $ns == $image ) {
894 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
895 $wgLinkCache->addImageLinkObj( $nt );
896 return $s;
899 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
900 ( strpos( $link, "#" ) == FALSE ) ) {
901 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
902 return $s;
905 # Category feature
906 $catns = strtoupper ( $nt->getDBkey () ) ;
907 $catns = explode ( ":" , $catns ) ;
908 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
909 else $catns = "" ;
910 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
911 $t = explode ( ":" , $nt->getText() ) ;
912 array_shift ( $t ) ;
913 $t = implode ( ":" , $t ) ;
914 $t = $wgLang->ucFirst ( $t ) ;
915 $nnt = Title::newFromText ( $category.":".$t ) ;
916 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
917 $this->mOutput->mCategoryLinks[] = $t ;
918 $s .= $prefix . $trail ;
919 return $s ;
922 if( $ns == $media ) {
923 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
924 $wgLinkCache->addImageLinkObj( $nt );
925 return $s;
926 } elseif( $ns == $special ) {
927 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
928 return $s;
930 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
932 wfProfileOut( $fname );
933 return $s;
936 # Some functions here used by doBlockLevels()
938 /* private */ function closeParagraph()
940 $result = "";
941 if ( '' != $this->mLastSection ) {
942 $result = "</" . $this->mLastSection . ">\n";
944 $this->mLastSection = "";
945 return $result;
947 # getCommon() returns the length of the longest common substring
948 # of both arguments, starting at the beginning of both.
950 /* private */ function getCommon( $st1, $st2 )
952 $fl = strlen( $st1 );
953 $shorter = strlen( $st2 );
954 if ( $fl < $shorter ) { $shorter = $fl; }
956 for ( $i = 0; $i < $shorter; ++$i ) {
957 if ( $st1{$i} != $st2{$i} ) { break; }
959 return $i;
961 # These next three functions open, continue, and close the list
962 # element appropriate to the prefix character passed into them.
964 /* private */ function openList( $char )
966 $result = $this->closeParagraph();
968 if ( "*" == $char ) { $result .= "<ul><li>"; }
969 else if ( "#" == $char ) { $result .= "<ol><li>"; }
970 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
971 else if ( ";" == $char ) {
972 $result .= "<dl><dt>";
973 $this->mDTopen = true;
975 else { $result = "<!-- ERR 1 -->"; }
977 return $result;
980 /* private */ function nextItem( $char )
982 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
983 else if ( ":" == $char || ";" == $char ) {
984 $close = "</dd>";
985 if ( $this->mDTopen ) { $close = "</dt>"; }
986 if ( ";" == $char ) {
987 $this->mDTopen = true;
988 return $close . "<dt>";
989 } else {
990 $this->mDTopen = false;
991 return $close . "<dd>";
994 return "<!-- ERR 2 -->";
997 /* private */function closeList( $char )
999 if ( "*" == $char ) { $text = "</li></ul>"; }
1000 else if ( "#" == $char ) { $text = "</li></ol>"; }
1001 else if ( ":" == $char ) {
1002 if ( $this->mDTopen ) {
1003 $this->mDTopen = false;
1004 $text = "</dt></dl>";
1005 } else {
1006 $text = "</dd></dl>";
1009 else { return "<!-- ERR 3 -->"; }
1010 return $text."\n";
1013 /* private */ function doBlockLevels( $text, $linestart )
1015 $fname = "Parser::doBlockLevels";
1016 wfProfileIn( $fname );
1017 # Parsing through the text line by line. The main thing
1018 # happening here is handling of block-level elements p, pre,
1019 # and making lists from lines starting with * # : etc.
1021 $a = explode( "\n", $text );
1023 $lastPref = $text = $lastLine = '';
1024 $this->mDTopen = $inBlockElem = false;
1025 $npl = 0;
1026 $pstack = false;
1028 if ( ! $linestart ) { $text .= array_shift( $a ); }
1029 foreach ( $a as $t ) {
1030 $oLine = $t;
1031 $opl = strlen( $lastPref );
1032 $npl = strspn( $t, "*#:;" );
1033 $pref = substr( $t, 0, $npl );
1034 $pref2 = str_replace( ";", ":", $pref );
1035 $t = substr( $t, $npl );
1036 // list generation
1037 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
1038 $text .= $this->nextItem( substr( $pref, -1 ) );
1039 if ( $pstack ) { $pstack = false; }
1041 if ( ";" == substr( $pref, -1 ) ) {
1042 $cpos = strpos( $t, ":" );
1043 if ( false !== $cpos ) {
1044 $term = substr( $t, 0, $cpos );
1045 $text .= $term . $this->nextItem( ":" );
1046 $t = substr( $t, $cpos + 1 );
1049 } else if (0 != $npl || 0 != $opl) {
1050 $cpl = $this->getCommon( $pref, $lastPref );
1051 if ( $pstack ) { $pstack = false; }
1053 while ( $cpl < $opl ) {
1054 $text .= $this->closeList( $lastPref{$opl-1} );
1055 --$opl;
1057 if ( $npl <= $cpl && $cpl > 0 ) {
1058 $text .= $this->nextItem( $pref{$cpl-1} );
1060 while ( $npl > $cpl ) {
1061 $char = substr( $pref, $cpl, 1 );
1062 $text .= $this->openList( $char );
1064 if ( ";" == $char ) {
1065 $cpos = strpos( $t, ":" );
1066 if ( ! ( false === $cpos ) ) {
1067 $term = substr( $t, 0, $cpos );
1068 $text .= $term . $this->nextItem( ":" );
1069 $t = substr( $t, $cpos + 1 );
1072 ++$cpl;
1074 $lastPref = $pref2;
1076 if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
1077 $uniq_prefix = UNIQ_PREFIX;
1078 // XXX: use a stack for nestable elements like span, table and div
1079 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1080 $closematch = preg_match(
1081 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1082 "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1083 if ( $openmatch or $closematch ) {
1084 if ( $pstack ) { $pstack = false; }
1085 $text .= $this->closeParagraph();
1086 if ( $closematch ) {
1087 $inBlockElem = false;
1088 } else {
1089 $inBlockElem = true;
1091 } else if ( !$inBlockElem ) {
1092 if ( " " == $t{0} ) {
1093 // pre
1094 if ($this->mLastSection != 'pre') {
1095 $pstack = false;
1096 $text .= $this->closeParagraph().'<pre>';
1097 $this->mLastSection = 'pre';
1099 } else {
1100 // paragraph
1101 if ( '' == trim($t) ) {
1102 if ( $pstack ) {
1103 $text .= $pstack.'<br/>';
1104 $pstack = false;
1105 $this->mLastSection = 'p';
1106 } else {
1107 if ($this->mLastSection != 'p' ) {
1108 $text .= $this->closeParagraph();
1109 $this->mLastSection = '';
1110 $pstack = "<p>";
1111 } else {
1112 $pstack = '</p><p>';
1115 } else {
1116 if ( $pstack ) {
1117 $text .= $pstack;
1118 $pstack = false;
1119 $this->mLastSection = 'p';
1120 } else if ($this->mLastSection != 'p') {
1121 $text .= $this->closeParagraph().'<p>';
1122 $this->mLastSection = 'p';
1128 if ($pstack === false) {
1129 $text .= $t."\n";
1132 while ( $npl ) {
1133 $text .= $this->closeList( $pref2{$npl-1} );
1134 --$npl;
1136 if ( "" != $this->mLastSection ) {
1137 $text .= "</" . $this->mLastSection . ">";
1138 $this->mLastSection = "";
1141 wfProfileOut( $fname );
1142 return $text;
1145 function getVariableValue( $index ) {
1146 global $wgLang, $wgSitename, $wgServer;
1148 switch ( $index ) {
1149 case MAG_CURRENTMONTH:
1150 return date( "m" );
1151 case MAG_CURRENTMONTHNAME:
1152 return $wgLang->getMonthName( date("n") );
1153 case MAG_CURRENTMONTHNAMEGEN:
1154 return $wgLang->getMonthNameGen( date("n") );
1155 case MAG_CURRENTDAY:
1156 return date("j");
1157 case MAG_CURRENTDAYNAME:
1158 return $wgLang->getWeekdayName( date("w")+1 );
1159 case MAG_CURRENTYEAR:
1160 return date( "Y" );
1161 case MAG_CURRENTTIME:
1162 return $wgLang->time( wfTimestampNow(), false );
1163 case MAG_NUMBEROFARTICLES:
1164 return wfNumberOfArticles();
1165 case MAG_SITENAME:
1166 return $wgSitename;
1167 case MAG_SERVER:
1168 return $wgServer;
1169 default:
1170 return NULL;
1174 function initialiseVariables()
1176 global $wgVariableIDs;
1177 $this->mVariables = array();
1178 foreach ( $wgVariableIDs as $id ) {
1179 $mw =& MagicWord::get( $id );
1180 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1184 /* private */ function replaceVariables( $text, $args = array() )
1186 global $wgLang, $wgScript, $wgArticlePath;
1188 $fname = "Parser::replaceVariables";
1189 wfProfileIn( $fname );
1191 $bail = false;
1192 if ( !$this->mVariables ) {
1193 $this->initialiseVariables();
1195 $titleChars = Title::legalChars();
1196 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1198 # This function is called recursively. To keep track of arguments we need a stack:
1199 array_push( $this->mArgStack, $args );
1201 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1202 $GLOBALS['wgCurParser'] =& $this;
1203 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1205 array_pop( $this->mArgStack );
1207 return $text;
1210 function braceSubstitution( $matches )
1212 global $wgLinkCache, $wgLang;
1213 $fname = "Parser::braceSubstitution";
1214 $found = false;
1215 $nowiki = false;
1216 $title = NULL;
1218 # $newline is an optional newline character before the braces
1219 # $part1 is the bit before the first |, and must contain only title characters
1220 # $args is a list of arguments, starting from index 0, not including $part1
1222 $newline = $matches[1];
1223 $part1 = $matches[2];
1224 # If the third subpattern matched anything, it will start with |
1225 if ( $matches[3] !== "" ) {
1226 $args = explode( "|", substr( $matches[3], 1 ) );
1227 } else {
1228 $args = array();
1230 $argc = count( $args );
1232 # SUBST
1233 $mwSubst =& MagicWord::get( MAG_SUBST );
1234 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1235 if ( $this->mOutputType != OT_WIKI ) {
1236 # Invalid SUBST not replaced at PST time
1237 # Return without further processing
1238 $text = $matches[0];
1239 $found = true;
1241 } elseif ( $this->mOutputType == OT_WIKI ) {
1242 # SUBST not found in PST pass, do nothing
1243 $text = $matches[0];
1244 $found = true;
1247 # MSG, MSGNW and INT
1248 if ( !$found ) {
1249 # Check for MSGNW:
1250 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1251 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1252 $nowiki = true;
1253 } else {
1254 # Remove obsolete MSG:
1255 $mwMsg =& MagicWord::get( MAG_MSG );
1256 $mwMsg->matchStartAndRemove( $part1 );
1259 # Check if it is an internal message
1260 $mwInt =& MagicWord::get( MAG_INT );
1261 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1262 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1263 $text = wfMsgReal( $part1, $args, true );
1264 $found = true;
1269 # NS
1270 if ( !$found ) {
1271 # Check for NS: (namespace expansion)
1272 $mwNs = MagicWord::get( MAG_NS );
1273 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1274 if ( intval( $part1 ) ) {
1275 $text = $wgLang->getNsText( intval( $part1 ) );
1276 $found = true;
1277 } else {
1278 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1279 if ( !is_null( $index ) ) {
1280 $text = $wgLang->getNsText( $index );
1281 $found = true;
1287 # LOCALURL and LOCALURLE
1288 if ( !$found ) {
1289 $mwLocal = MagicWord::get( MAG_LOCALURL );
1290 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1292 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1293 $func = 'getLocalURL';
1294 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1295 $func = 'escapeLocalURL';
1296 } else {
1297 $func = '';
1300 if ( $func !== '' ) {
1301 $title = Title::newFromText( $part1 );
1302 if ( !is_null( $title ) ) {
1303 if ( $argc > 0 ) {
1304 $text = $title->$func( $args[0] );
1305 } else {
1306 $text = $title->$func();
1308 $found = true;
1313 # Internal variables
1314 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1315 $text = $this->mVariables[$part1];
1316 $found = true;
1317 $this->mOutput->mContainsOldMagic = true;
1320 # Arguments input from the caller
1321 $inputArgs = end( $this->mArgStack );
1322 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1323 $text = $inputArgs[$part1];
1324 $found = true;
1327 # Load from database
1328 if ( !$found ) {
1329 $title = Title::newFromText( $part1, NS_TEMPLATE );
1330 if ( !is_null( $title ) && !$title->isExternal() ) {
1331 # Check for excessive inclusion
1332 $dbk = $title->getPrefixedDBkey();
1333 if ( $this->incrementIncludeCount( $dbk ) ) {
1334 $article = new Article( $title );
1335 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1336 if ( $articleContent !== false ) {
1337 $found = true;
1338 $text = $articleContent;
1343 # If the title is valid but undisplayable, make a link to it
1344 if ( $this->mOutputType == OT_HTML && !$found ) {
1345 $text = "[[" . $title->getPrefixedText() . "]]";
1346 $found = true;
1351 # Recursive parsing, escaping and link table handling
1352 # Only for HTML output
1353 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1354 $text = wfEscapeWikiText( $text );
1355 } elseif ( $this->mOutputType == OT_HTML && $found ) {
1356 # Clean up argument array
1357 $assocArgs = array();
1358 $index = 1;
1359 foreach( $args as $arg ) {
1360 $eqpos = strpos( $arg, "=" );
1361 if ( $eqpos === false ) {
1362 $assocArgs[$index++] = $arg;
1363 } else {
1364 $name = trim( substr( $arg, 0, $eqpos ) );
1365 $value = trim( substr( $arg, $eqpos+1 ) );
1366 if ( $value === false ) {
1367 $value = "";
1369 if ( $name !== false ) {
1370 $assocArgs[$name] = $value;
1375 # Do not enter included links in link table
1376 if ( !is_null( $title ) ) {
1377 $wgLinkCache->suspend();
1380 # Run full parser on the included text
1381 $text = $this->strip( $text, $this->mStripState );
1382 $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
1384 # Add the result to the strip state for re-inclusion after
1385 # the rest of the processing
1386 $text = $this->insertStripItem( $text, $this->mStripState );
1388 # Resume the link cache and register the inclusion as a link
1389 if ( !is_null( $title ) ) {
1390 $wgLinkCache->resume();
1391 $wgLinkCache->addLinkObj( $title );
1395 if ( !$found ) {
1396 return $matches[0];
1397 } else {
1398 return $newline . $text;
1402 # Returns true if the function is allowed to include this entity
1403 function incrementIncludeCount( $dbk )
1405 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1406 $this->mIncludeCount[$dbk] = 0;
1408 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1409 return true;
1410 } else {
1411 return false;
1416 # Cleans up HTML, removes dangerous tags and attributes
1417 /* private */ function removeHTMLtags( $text )
1419 $fname = "Parser::removeHTMLtags";
1420 wfProfileIn( $fname );
1421 $htmlpairs = array( # Tags that must be closed
1422 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1423 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1424 "strike", "strong", "tt", "var", "div", "center",
1425 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1426 "ruby", "rt" , "rb" , "rp", "p"
1428 $htmlsingle = array(
1429 "br", "hr", "li", "dt", "dd"
1431 $htmlnest = array( # Tags that can be nested--??
1432 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1433 "dl", "font", "big", "small", "sub", "sup"
1435 $tabletags = array( # Can only appear inside table
1436 "td", "th", "tr"
1439 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1440 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1442 $htmlattrs = $this->getHTMLattrs () ;
1444 # Remove HTML comments
1445 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1447 $bits = explode( "<", $text );
1448 $text = array_shift( $bits );
1449 $tagstack = array(); $tablestack = array();
1451 foreach ( $bits as $x ) {
1452 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1453 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1454 $x, $regs );
1455 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1456 error_reporting( $prev );
1458 $badtag = 0 ;
1459 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1460 # Check our stack
1461 if ( $slash ) {
1462 # Closing a tag...
1463 if ( ! in_array( $t, $htmlsingle ) &&
1464 ( $ot = array_pop( $tagstack ) ) != $t ) {
1465 array_push( $tagstack, $ot );
1466 $badtag = 1;
1467 } else {
1468 if ( $t == "table" ) {
1469 $tagstack = array_pop( $tablestack );
1471 $newparams = "";
1473 } else {
1474 # Keep track for later
1475 if ( in_array( $t, $tabletags ) &&
1476 ! in_array( "table", $tagstack ) ) {
1477 $badtag = 1;
1478 } else if ( in_array( $t, $tagstack ) &&
1479 ! in_array ( $t , $htmlnest ) ) {
1480 $badtag = 1 ;
1481 } else if ( ! in_array( $t, $htmlsingle ) ) {
1482 if ( $t == "table" ) {
1483 array_push( $tablestack, $tagstack );
1484 $tagstack = array();
1486 array_push( $tagstack, $t );
1488 # Strip non-approved attributes from the tag
1489 $newparams = $this->fixTagAttributes($params);
1492 if ( ! $badtag ) {
1493 $rest = str_replace( ">", "&gt;", $rest );
1494 $text .= "<$slash$t $newparams$brace$rest";
1495 continue;
1498 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1500 # Close off any remaining tags
1501 while ( $t = array_pop( $tagstack ) ) {
1502 $text .= "</$t>\n";
1503 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1505 wfProfileOut( $fname );
1506 return $text;
1511 * This function accomplishes several tasks:
1512 * 1) Auto-number headings if that option is enabled
1513 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1514 * 3) Add a Table of contents on the top for users who have enabled the option
1515 * 4) Auto-anchor headings
1517 * It loops through all headlines, collects the necessary data, then splits up the
1518 * string and re-inserts the newly formatted headlines.
1522 /* private */ function formatHeadings( $text )
1524 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1525 $doShowToc = $this->mOptions->getShowToc();
1526 if( !$this->mTitle->userCanEdit() ) {
1527 $showEditLink = 0;
1528 $rightClickHack = 0;
1529 } else {
1530 $showEditLink = $this->mOptions->getEditSection();
1531 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1534 # Inhibit editsection links if requested in the page
1535 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1536 if( $esw->matchAndRemove( $text ) ) {
1537 $showEditLink = 0;
1539 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1540 # do not add TOC
1541 $mw =& MagicWord::get( MAG_NOTOC );
1542 if( $mw->matchAndRemove( $text ) ) {
1543 $doShowToc = 0;
1546 # never add the TOC to the Main Page. This is an entry page that should not
1547 # be more than 1-2 screens large anyway
1548 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1549 $doShowToc = 0;
1552 # Get all headlines for numbering them and adding funky stuff like [edit]
1553 # links - this is for later, but we need the number of headlines right now
1554 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1556 # if there are fewer than 4 headlines in the article, do not show TOC
1557 if( $numMatches < 4 ) {
1558 $doShowToc = 0;
1561 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1562 # override above conditions and always show TOC
1563 $mw =& MagicWord::get( MAG_FORCETOC );
1564 if ($mw->matchAndRemove( $text ) ) {
1565 $doShowToc = 1;
1569 # We need this to perform operations on the HTML
1570 $sk =& $this->mOptions->getSkin();
1572 # headline counter
1573 $headlineCount = 0;
1575 # Ugh .. the TOC should have neat indentation levels which can be
1576 # passed to the skin functions. These are determined here
1577 $toclevel = 0;
1578 $toc = "";
1579 $full = "";
1580 $head = array();
1581 $sublevelCount = array();
1582 $level = 0;
1583 $prevlevel = 0;
1584 foreach( $matches[3] as $headline ) {
1585 $numbering = "";
1586 if( $level ) {
1587 $prevlevel = $level;
1589 $level = $matches[1][$headlineCount];
1590 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1591 # reset when we enter a new level
1592 $sublevelCount[$level] = 0;
1593 $toc .= $sk->tocIndent( $level - $prevlevel );
1594 $toclevel += $level - $prevlevel;
1596 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1597 # reset when we step back a level
1598 $sublevelCount[$level+1]=0;
1599 $toc .= $sk->tocUnindent( $prevlevel - $level );
1600 $toclevel -= $prevlevel - $level;
1602 # count number of headlines for each level
1603 @$sublevelCount[$level]++;
1604 if( $doNumberHeadings || $doShowToc ) {
1605 $dot = 0;
1606 for( $i = 1; $i <= $level; $i++ ) {
1607 if( !empty( $sublevelCount[$i] ) ) {
1608 if( $dot ) {
1609 $numbering .= ".";
1611 $numbering .= $sublevelCount[$i];
1612 $dot = 1;
1617 # The canonized header is a version of the header text safe to use for links
1618 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1619 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1621 # strip out HTML
1622 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1623 $tocline = trim( $canonized_headline );
1624 $canonized_headline = preg_replace("/[ &\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
1625 $refer[$headlineCount] = $canonized_headline;
1627 # count how many in assoc. array so we can track dupes in anchors
1628 @$refers[$canonized_headline]++;
1629 $refcount[$headlineCount]=$refers[$canonized_headline];
1631 # Prepend the number to the heading text
1633 if( $doNumberHeadings || $doShowToc ) {
1634 $tocline = $numbering . " " . $tocline;
1636 # Don't number the heading if it is the only one (looks silly)
1637 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1638 # the two are different if the line contains a link
1639 $headline=$numbering . " " . $headline;
1643 # Create the anchor for linking from the TOC to the section
1644 $anchor = $canonized_headline;
1645 if($refcount[$headlineCount] > 1 ) {
1646 $anchor .= "_" . $refcount[$headlineCount];
1648 if( $doShowToc ) {
1649 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1651 if( $showEditLink ) {
1652 if ( empty( $head[$headlineCount] ) ) {
1653 $head[$headlineCount] = "";
1655 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1658 # Add the edit section span
1659 if( $rightClickHack ) {
1660 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1663 # give headline the correct <h#> tag
1664 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1666 $headlineCount++;
1669 if( $doShowToc ) {
1670 $toclines = $headlineCount;
1671 $toc .= $sk->tocUnindent( $toclevel );
1672 $toc = $sk->tocTable( $toc );
1675 # split up and insert constructed headlines
1677 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1678 $i = 0;
1680 foreach( $blocks as $block ) {
1681 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1682 # This is the [edit] link that appears for the top block of text when
1683 # section editing is enabled
1685 # Disabled because it broke block formatting
1686 # For example, a bullet point in the top line
1687 # $full .= $sk->editSectionLink(0);
1689 $full .= $block;
1690 if( $doShowToc && !$i) {
1691 # Top anchor now in skin
1692 $full = $full.$toc;
1695 if( !empty( $head[$i] ) ) {
1696 $full .= $head[$i];
1698 $i++;
1701 return $full;
1704 /* private */ function doMagicISBN( &$tokenizer )
1706 global $wgLang;
1708 # Check whether next token is a text token
1709 # If yes, fetch it and convert the text into a
1710 # Special::BookSources link
1711 $token = $tokenizer->previewToken();
1712 while ( $token["type"] == "" )
1714 $tokenizer->nextToken();
1715 $token = $tokenizer->previewToken();
1717 if ( $token["type"] == "text" )
1719 $token = $tokenizer->nextToken();
1720 $x = $token["text"];
1721 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1723 $isbn = $blank = "" ;
1724 while ( " " == $x{0} ) {
1725 $blank .= " ";
1726 $x = substr( $x, 1 );
1728 while ( strstr( $valid, $x{0} ) != false ) {
1729 $isbn .= $x{0};
1730 $x = substr( $x, 1 );
1732 $num = str_replace( "-", "", $isbn );
1733 $num = str_replace( " ", "", $num );
1735 if ( "" == $num ) {
1736 $text = "ISBN $blank$x";
1737 } else {
1738 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1739 $text = "<a href=\"" .
1740 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1741 "\" class=\"internal\">ISBN $isbn</a>";
1742 $text .= $x;
1744 } else {
1745 $text = "ISBN ";
1747 return $text;
1749 /* private */ function doMagicRFC( &$tokenizer )
1751 global $wgLang;
1753 # Check whether next token is a text token
1754 # If yes, fetch it and convert the text into a
1755 # link to an RFC source
1756 $token = $tokenizer->previewToken();
1757 while ( $token["type"] == "" )
1759 $tokenizer->nextToken();
1760 $token = $tokenizer->previewToken();
1762 if ( $token["type"] == "text" )
1764 $token = $tokenizer->nextToken();
1765 $x = $token["text"];
1766 $valid = "0123456789";
1768 $rfc = $blank = "" ;
1769 while ( " " == $x{0} ) {
1770 $blank .= " ";
1771 $x = substr( $x, 1 );
1773 while ( strstr( $valid, $x{0} ) != false ) {
1774 $rfc .= $x{0};
1775 $x = substr( $x, 1 );
1778 if ( "" == $rfc ) {
1779 $text .= "RFC $blank$x";
1780 } else {
1781 $url = wfmsg( "rfcurl" );
1782 $url = str_replace( "$1", $rfc, $url);
1783 $sk =& $this->mOptions->getSkin();
1784 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1785 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1787 } else {
1788 $text = "RFC ";
1790 return $text;
1793 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1795 $this->mOptions = $options;
1796 $this->mTitle =& $title;
1797 $this->mOutputType = OT_WIKI;
1799 if ( $clearState ) {
1800 $this->clearState();
1803 $stripState = false;
1804 $pairs = array(
1805 "\r\n" => "\n",
1807 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1808 // now with regexes
1809 $pairs = array(
1810 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1811 "/<br *?>/i" => "<br/>",
1813 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1814 $text = $this->strip( $text, $stripState, false );
1815 $text = $this->pstPass2( $text, $user );
1816 $text = $this->unstrip( $text, $stripState );
1817 return $text;
1820 /* private */ function pstPass2( $text, &$user )
1822 global $wgLang, $wgLocaltimezone, $wgCurParser;
1824 # Variable replacement
1825 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1826 $text = $this->replaceVariables( $text );
1828 # Signatures
1830 $n = $user->getName();
1831 $k = $user->getOption( "nickname" );
1832 if ( "" == $k ) { $k = $n; }
1833 if(isset($wgLocaltimezone)) {
1834 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1836 /* Note: this is an ugly timezone hack for the European wikis */
1837 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1838 " (" . date( "T" ) . ")";
1839 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1841 $text = preg_replace( "/~~~~~/", $d, $text );
1842 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1843 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1844 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1845 Namespace::getUser() ) . ":$n|$k]]", $text );
1847 # Context links: [[|name]] and [[name (context)|]]
1849 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1850 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1851 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1852 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1854 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1855 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1856 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1857 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1858 # [[ns:page (cont)|]]
1859 $context = "";
1860 $t = $this->mTitle->getText();
1861 if ( preg_match( $conpat, $t, $m ) ) {
1862 $context = $m[2];
1864 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1865 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1866 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1868 if ( "" == $context ) {
1869 $text = preg_replace( $p2, "[[\\1]]", $text );
1870 } else {
1871 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1875 $mw =& MagicWord::get( MAG_SUBST );
1876 $wgCurParser = $this->fork();
1877 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1878 $this->merge( $wgCurParser );
1881 # Trim trailing whitespace
1882 # MAG_END (__END__) tag allows for trailing
1883 # whitespace to be deliberately included
1884 $text = rtrim( $text );
1885 $mw =& MagicWord::get( MAG_END );
1886 $mw->matchAndRemove( $text );
1888 return $text;
1891 # Set up some variables which are usually set up in parse()
1892 # so that an external function can call some class members with confidence
1893 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1895 $this->mTitle =& $title;
1896 $this->mOptions = $options;
1897 $this->mOutputType = $outputType;
1898 if ( $clearState ) {
1899 $this->clearState();
1903 function transformMsg( $text, $options ) {
1904 global $wgTitle;
1905 static $executing = false;
1907 # Guard against infinite recursion
1908 if ( $executing ) {
1909 return $text;
1911 $executing = true;
1913 $this->mTitle = $wgTitle;
1914 $this->mOptions = $options;
1915 $this->mOutputType = OT_MSG;
1916 $this->clearState();
1917 $text = $this->replaceVariables( $text );
1919 $executing = false;
1920 return $text;
1924 class ParserOutput
1926 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1928 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1929 $containsOldMagic = false )
1931 $this->mText = $text;
1932 $this->mLanguageLinks = $languageLinks;
1933 $this->mCategoryLinks = $categoryLinks;
1934 $this->mContainsOldMagic = $containsOldMagic;
1937 function getText() { return $this->mText; }
1938 function getLanguageLinks() { return $this->mLanguageLinks; }
1939 function getCategoryLinks() { return $this->mCategoryLinks; }
1940 function containsOldMagic() { return $this->mContainsOldMagic; }
1941 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1942 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1943 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1944 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1946 function merge( $other ) {
1947 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1948 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1949 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1954 class ParserOptions
1956 # All variables are private
1957 var $mUseTeX; # Use texvc to expand <math> tags
1958 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1959 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1960 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1961 var $mAllowExternalImages; # Allow external images inline
1962 var $mSkin; # Reference to the preferred skin
1963 var $mDateFormat; # Date format index
1964 var $mEditSection; # Create "edit section" links
1965 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1966 var $mNumberHeadings; # Automatically number headings
1967 var $mShowToc; # Show table of contents
1969 function getUseTeX() { return $this->mUseTeX; }
1970 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1971 function getUseDynamicDates() { return $this->mUseDynamicDates; }
1972 function getInterwikiMagic() { return $this->mInterwikiMagic; }
1973 function getAllowExternalImages() { return $this->mAllowExternalImages; }
1974 function getSkin() { return $this->mSkin; }
1975 function getDateFormat() { return $this->mDateFormat; }
1976 function getEditSection() { return $this->mEditSection; }
1977 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1978 function getNumberHeadings() { return $this->mNumberHeadings; }
1979 function getShowToc() { return $this->mShowToc; }
1981 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1982 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1983 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1984 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1985 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1986 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1987 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1988 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1989 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1990 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1991 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1993 /* static */ function newFromUser( &$user )
1995 $popts = new ParserOptions;
1996 $popts->initialiseFromUser( &$user );
1997 return $popts;
2000 function initialiseFromUser( &$userInput )
2002 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2004 if ( !$userInput ) {
2005 $user = new User;
2006 $user->setLoaded( true );
2007 } else {
2008 $user =& $userInput;
2011 $this->mUseTeX = $wgUseTeX;
2012 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2013 $this->mUseDynamicDates = $wgUseDynamicDates;
2014 $this->mInterwikiMagic = $wgInterwikiMagic;
2015 $this->mAllowExternalImages = $wgAllowExternalImages;
2016 $this->mSkin =& $user->getSkin();
2017 $this->mDateFormat = $user->getOption( "date" );
2018 $this->mEditSection = $user->getOption( "editsection" );
2019 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2020 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2021 $this->mShowToc = $user->getOption( "showtoc" );
2027 # Regex callbacks, used in Parser::replaceVariables
2028 function wfBraceSubstitution( $matches )
2030 global $wgCurParser;
2031 return $wgCurParser->braceSubstitution( $matches );