$_REQUEST -> $wgRequest
[mediawiki.git] / includes / Parser.php
blob98bf5fe928265742be1183ace4390fe2af3d6780
1 <?php
3 // require_once('Tokenizer.php');
5 # PHP Parser
7 # Processes wiki markup
9 # There are two main entry points into the Parser class: parse() and preSaveTransform().
10 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
12 # Globals used:
13 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
15 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
17 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
18 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
19 # $wgLocaltimezone
21 # * only within ParserOptions
24 #----------------------------------------
25 # Variable substitution O(N^2) attack
26 #-----------------------------------------
27 # Without countermeasures, it would be possible to attack the parser by saving a page
28 # filled with a large number of inclusions of large pages. The size of the generated
29 # page would be proportional to the square of the input size. Hence, we limit the number
30 # of inclusions of any given page, thus bringing any attack back to O(N).
33 define( "MAX_INCLUDE_REPEAT", 5 );
34 define( "MAX_INCLUDE_SIZE", 1000000 ); // 1 Million
36 # Allowed values for $mOutputType
37 define( "OT_HTML", 1 );
38 define( "OT_WIKI", 2 );
39 define( "OT_MSG", 3 );
41 # string parameter for extractTags which will cause it
42 # to strip HTML comments in addition to regular
43 # <XML>-style tags. This should not be anything we
44 # may want to use in wikisyntax
45 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
47 # prefix for escaping, used in two functions at least
48 define( "UNIQ_PREFIX", "NaodW29");
50 class Parser
52 # Persistent:
53 var $mTagHooks;
55 # Cleared with clearState():
56 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
57 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
59 # Temporary:
60 var $mOptions, $mTitle, $mOutputType,
61 $mTemplates, // cache of already loaded templates, avoids
62 // multiple SQL queries for the same string
63 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
64 // in this path. Used for loop detection.
66 function Parser() {
67 $this->mTemplates = array();
68 $this->mTemplatePath = array();
69 $this->mTagHooks = array();
70 $this->clearState();
73 function clearState() {
74 $this->mOutput = new ParserOutput;
75 $this->mAutonumber = 0;
76 $this->mLastSection = "";
77 $this->mDTopen = false;
78 $this->mVariables = false;
79 $this->mIncludeCount = array();
80 $this->mStripState = array();
81 $this->mArgStack = array();
82 $this->mInPre = false;
85 # First pass--just handle <nowiki> sections, pass the rest off
86 # to internalParse() which does all the real work.
88 # Returns a ParserOutput
90 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
91 global $wgUseTidy;
92 $fname = "Parser::parse";
93 wfProfileIn( $fname );
95 if ( $clearState ) {
96 $this->clearState();
99 $this->mOptions = $options;
100 $this->mTitle =& $title;
101 $this->mOutputType = OT_HTML;
103 $stripState = NULL;
104 $text = $this->strip( $text, $this->mStripState );
105 $text = $this->internalParse( $text, $linestart );
106 $text = $this->unstrip( $text, $this->mStripState );
107 # Clean up special characters, only run once, next-to-last before doBlockLevels
108 if(!$wgUseTidy) {
109 $fixtags = array(
110 # french spaces, last one Guillemet-left
111 # only if there is something before the space
112 '/(.) (\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
113 # french spaces, Guillemet-right
114 "/(\\302\\253) /i"=>"\\1&nbsp;",
115 '/<hr *>/i' => '<hr />',
116 '/<br *>/i' => '<br />',
117 '/<center *>/i' => '<div class="center">',
118 '/<\\/center *>/i' => '</div>',
119 # Clean up spare ampersands; note that we probably ought to be
120 # more careful about named entities.
121 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
123 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
124 } else {
125 $fixtags = array(
126 # french spaces, last one Guillemet-left
127 '/ (\\?|:|!|\\302\\273)/i' => '&nbsp;\\1',
128 # french spaces, Guillemet-right
129 '/(\\302\\253) /i' => '\\1&nbsp;',
130 '/([^> ]+(&#x30(1|3|9);)[^< ]*)/i' => '<span class="diacrit">\\1</span>',
131 '/<center *>/i' => '<div class="center">',
132 '/<\\/center *>/i' => '</div>'
134 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
136 # only once and last
137 $text = $this->doBlockLevels( $text, $linestart );
138 $text = $this->unstripNoWiki( $text, $this->mStripState );
139 if($wgUseTidy) {
140 $text = $this->tidy($text);
142 $this->mOutput->setText( $text );
143 wfProfileOut( $fname );
144 return $this->mOutput;
147 /* static */ function getRandomString() {
148 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
151 # Replaces all occurrences of <$tag>content</$tag> in the text
152 # with a random marker and returns the new text. the output parameter
153 # $content will be an associative array filled with data on the form
154 # $unique_marker => content.
156 # If $content is already set, the additional entries will be appended
158 # If $tag is set to STRIP_COMMENTS, the function will extract
159 # <!-- HTML comments -->
161 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
162 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
163 if ( !$content ) {
164 $content = array( );
166 $n = 1;
167 $stripped = '';
169 while ( '' != $text ) {
170 if($tag==STRIP_COMMENTS) {
171 $p = preg_split( '/<!--/i', $text, 2 );
172 } else {
173 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
175 $stripped .= $p[0];
176 if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
177 $text = '';
178 } else {
179 if($tag==STRIP_COMMENTS) {
180 $q = preg_split( '/-->/i', $p[1], 2 );
181 } else {
182 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
184 $marker = $rnd . sprintf('%08X', $n++);
185 $content[$marker] = $q[0];
186 $stripped .= $marker;
187 $text = $q[1];
190 return $stripped;
193 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
194 # If $render is set, performs necessary rendering operations on plugins
195 # Returns the text, and fills an array with data needed in unstrip()
196 # If the $state is already a valid strip state, it adds to the state
198 # When $stripcomments is set, HTML comments <!-- like this -->
199 # will be stripped in addition to other tags. This is important
200 # for section editing, where these comments cause confusion when
201 # counting the sections in the wikisource
202 function strip( $text, &$state, $stripcomments = false ) {
203 $render = ($this->mOutputType == OT_HTML);
204 $nowiki_content = array();
205 $math_content = array();
206 $pre_content = array();
207 $comment_content = array();
208 $ext_content = array();
210 # Replace any instances of the placeholders
211 $uniq_prefix = UNIQ_PREFIX;
212 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
215 # nowiki
216 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
217 foreach( $nowiki_content as $marker => $content ){
218 if( $render ){
219 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
220 } else {
221 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
225 # math
226 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
227 foreach( $math_content as $marker => $content ){
228 if( $render ) {
229 if( $this->mOptions->getUseTeX() ) {
230 $math_content[$marker] = renderMath( $content );
231 } else {
232 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
234 } else {
235 $math_content[$marker] = "<math>$content</math>";
239 # pre
240 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
241 foreach( $pre_content as $marker => $content ){
242 if( $render ){
243 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
244 } else {
245 $pre_content[$marker] = "<pre>$content</pre>";
249 # Comments
250 if($stripcomments) {
251 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
252 foreach( $comment_content as $marker => $content ){
253 $comment_content[$marker] = "<!--$content-->";
257 # Extensions
258 foreach ( $this->mTagHooks as $tag => $callback ) {
259 $ext_contents[$tag] = array();
260 $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
261 foreach( $ext_content[$tag] as $marker => $content ) {
262 if ( $render ) {
263 $ext_content[$tag][$marker] = $callback( $content );
264 } else {
265 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
270 # Merge state with the pre-existing state, if there is one
271 if ( $state ) {
272 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
273 $state['math'] = $state['math'] + $math_content;
274 $state['pre'] = $state['pre'] + $pre_content;
275 $state['comment'] = $state['comment'] + $comment_content;
277 foreach( $ext_content as $tag => $array ) {
278 if ( array_key_exists( $tag, $state ) ) {
279 $state[$tag] = $state[$tag] + $array;
282 } else {
283 $state = array(
284 'nowiki' => $nowiki_content,
285 'math' => $math_content,
286 'pre' => $pre_content,
287 'comment' => $comment_content,
288 ) + $ext_content;
290 return $text;
293 # always call unstripNoWiki() after this one
294 function unstrip( $text, &$state ) {
295 # Must expand in reverse order, otherwise nested tags will be corrupted
296 $contentDict = end( $state );
297 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
298 if( key($state) != 'nowiki') {
299 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
300 $text = str_replace( key( $contentDict ), $content, $text );
305 return $text;
307 # always call this after unstrip() to preserve the order
308 function unstripNoWiki( $text, &$state ) {
309 # Must expand in reverse order, otherwise nested tags will be corrupted
310 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
311 $text = str_replace( key( $state['nowiki'] ), $content, $text );
314 return $text;
317 # Add an item to the strip state
318 # Returns the unique tag which must be inserted into the stripped text
319 # The tag will be replaced with the original text in unstrip()
321 function insertStripItem( $text, &$state ) {
322 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
323 if ( !$state ) {
324 $state = array(
325 'nowiki' => array(),
326 'math' => array(),
327 'pre' => array()
330 $state['item'][$rnd] = $text;
331 return $rnd;
334 # categoryMagic
335 # generate a list of subcategories and pages for a category
336 # depending on wfMsg("usenewcategorypage") it either calls the new
337 # or the old code. The new code will not work properly for some
338 # languages due to sorting issues, so they might want to turn it
339 # off.
340 function categoryMagic() {
341 $msg = wfMsg('usenewcategorypage');
342 if ( '0' == @$msg[0] )
344 return $this->oldCategoryMagic();
345 } else {
346 return $this->newCategoryMagic();
350 # This method generates the list of subcategories and pages for a category
351 function oldCategoryMagic () {
352 global $wgLang , $wgUser ;
353 $fname = 'Parser::oldCategoryMagic';
355 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
357 $cns = Namespace::getCategory() ;
358 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
360 $r = "<br style=\"clear:both;\"/>\n";
363 $sk =& $wgUser->getSkin() ;
365 $articles = array() ;
366 $children = array() ;
367 $data = array () ;
368 $id = $this->mTitle->getArticleID() ;
370 # FIXME: add limits
371 $dbr =& wfGetDB( DB_SLAVE );
372 $cur = $dbr->tableName( 'cur' );
373 $categorylinks = $dbr->tableName( 'categorylinks' );
375 $t = $dbr->strencode( $this->mTitle->getDBKey() );
376 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM $cur,$categorylinks " .
377 "WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
378 $res = $dbr->query( $sql, $fname ) ;
379 while ( $x = $dbr->fetchObject ( $res ) ) $data[] = $x ;
381 # For all pages that link to this category
382 foreach ( $data AS $x )
384 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
385 if ( $t != "" ) $t .= ":" ;
386 $t .= $x->cur_title ;
388 if ( $x->cur_namespace == $cns ) {
389 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
390 } else {
391 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
394 $dbr->freeResult ( $res ) ;
396 # Showing subcategories
397 if ( count ( $children ) > 0 ) {
398 $r .= '<h2>'.wfMsg('subcategories')."</h2>\n" ;
399 $r .= implode ( ', ' , $children ) ;
402 # Showing pages in this category
403 if ( count ( $articles ) > 0 ) {
404 $ti = $this->mTitle->getText() ;
405 $h = wfMsg( 'category_header', $ti );
406 $r .= "<h2>{$h}</h2>\n" ;
407 $r .= implode ( ', ' , $articles ) ;
410 return $r ;
415 function newCategoryMagic () {
416 global $wgLang , $wgUser ;
417 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
419 $cns = Namespace::getCategory() ;
420 if ( $this->mTitle->getNamespace() != $cns ) return '' ; # This ain't a category page
422 $r = "<br style=\"clear:both;\"/>\n";
425 $sk =& $wgUser->getSkin() ;
427 $articles = array() ;
428 $articles_start_char = array();
429 $children = array() ;
430 $children_start_char = array();
431 $data = array () ;
432 $id = $this->mTitle->getArticleID() ;
434 # FIXME: add limits
435 $dbr =& wfGetDB( DB_SLAVE );
436 $cur = $dbr->tableName( 'cur' );
437 $categorylinks = $dbr->tableName( 'categorylinks' );
439 $t = $dbr->strencode( $this->mTitle->getDBKey() );
440 $sql = "SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM " .
441 "$cur,$categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
442 $res = $dbr->query ( $sql ) ;
443 while ( $x = $dbr->fetchObject ( $res ) )
445 $t = $ns = $wgLang->getNsText ( $x->cur_namespace ) ;
446 if ( $t != '' ) $t .= ':' ;
447 $t .= $x->cur_title ;
449 if ( $x->cur_namespace == $cns ) {
450 $ctitle = str_replace( '_',' ',$x->cur_title );
451 array_push ( $children, $sk->makeKnownLink ( $t, $ctitle ) ) ; # Subcategory
453 // If there's a link from Category:A to Category:B, the sortkey of the resulting
454 // entry in the categorylinks table is Category:A, not A, which it SHOULD be.
455 // Workaround: If sortkey == "Category:".$title, than use $title for sorting,
456 // else use sortkey...
457 if ( ($ns.":".$ctitle) == $x->cl_sortkey ) {
458 array_push ( $children_start_char, $wgLang->firstChar( $x->cur_title ) );
459 } else {
460 array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
462 } else {
463 array_push ( $articles , $sk->makeKnownLink ( $t ) ) ; # Page in this category
464 array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
467 $dbr->freeResult ( $res ) ;
469 $ti = $this->mTitle->getText() ;
471 # Don't show subcategories section if there are none.
472 if ( count ( $children ) > 0 )
474 # Showing subcategories
475 $r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n"
476 . wfMsg( 'subcategorycount', count( $children ) );
477 if ( count ( $children ) > 6 ) {
479 // divide list into three equal chunks
480 $chunk = (int) (count ( $children ) / 3);
482 // get and display header
483 $r .= '<table width="100%"><tr valign="top">';
485 $startChunk = 0;
486 $endChunk = $chunk;
488 // loop through the chunks
489 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
490 $chunkIndex < 3;
491 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
494 $r .= '<td><ul>';
495 // output all subcategories to category
496 for ($index = $startChunk ;
497 $index < $endChunk && $index < count($children);
498 $index++ )
500 // check for change of starting letter or begging of chunk
501 if ( ($children_start_char[$index] != $children_start_char[$index - 1])
502 || ($index == $startChunk) )
504 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
507 $r .= "<li>{$children[$index]}</li>";
509 $r .= '</ul></td>';
513 $r .= '</tr></table>';
514 } else {
515 // for short lists of subcategories to category.
517 $r .= "<h3>{$children_start_char[0]}</h3>\n";
518 $r .= '<ul><li>'.$children[0].'</li>';
519 for ($index = 1; $index < count($children); $index++ )
521 if ($children_start_char[$index] != $children_start_char[$index - 1])
523 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
526 $r .= "<li>{$children[$index]}</li>";
528 $r .= '</ul>';
530 } # END of if ( count($children) > 0 )
532 $r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n" .
533 wfMsg( 'categoryarticlecount', count( $articles ) );
535 # Showing articles in this category
536 if ( count ( $articles ) > 6) {
537 $ti = $this->mTitle->getText() ;
539 // divide list into three equal chunks
540 $chunk = (int) (count ( $articles ) / 3);
542 // get and display header
543 $r .= '<table width="100%"><tr valign="top">';
545 // loop through the chunks
546 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
547 $chunkIndex < 3;
548 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
551 $r .= '<td><ul>';
553 // output all articles in category
554 for ($index = $startChunk ;
555 $index < $endChunk && $index < count($articles);
556 $index++ )
558 // check for change of starting letter or begging of chunk
559 if ( ($articles_start_char[$index] != $articles_start_char[$index - 1])
560 || ($index == $startChunk) )
562 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
565 $r .= "<li>{$articles[$index]}</li>";
567 $r .= '</ul></td>';
571 $r .= '</tr></table>';
572 } elseif ( count ( $articles ) > 0) {
573 // for short lists of articles in categories.
574 $ti = $this->mTitle->getText() ;
576 $r .= '<h3>'.$articles_start_char[0]."</h3>\n";
577 $r .= '<ul><li>'.$articles[0].'</li>';
578 for ($index = 1; $index < count($articles); $index++ )
580 if ($articles_start_char[$index] != $articles_start_char[$index - 1])
582 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
585 $r .= "<li>{$articles[$index]}</li>";
587 $r .= '</ul>';
591 return $r ;
594 # Return allowed HTML attributes
595 function getHTMLattrs () {
596 $htmlattrs = array( # Allowed attributes--no scripting, etc.
597 'title', 'align', 'lang', 'dir', 'width', 'height',
598 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
599 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
600 /* FONT */ 'type', 'start', 'value', 'compact',
601 /* For various lists, mostly deprecated but safe */
602 'summary', 'width', 'border', 'frame', 'rules',
603 'cellspacing', 'cellpadding', 'valign', 'char',
604 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
605 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
606 'id', 'class', 'name', 'style' /* For CSS */
608 return $htmlattrs ;
611 # Remove non approved attributes and javascript in css
612 function fixTagAttributes ( $t ) {
613 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
614 $htmlattrs = $this->getHTMLattrs() ;
616 # Strip non-approved attributes from the tag
617 $t = preg_replace(
618 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
619 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
620 $t);
621 # Strip javascript "expression" from stylesheets. Brute force approach:
622 # If anythin offensive is found, all attributes of the HTML tag are dropped
624 if( preg_match(
625 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
626 wfMungeToUtf8( $t ) ) )
628 $t='';
631 return trim ( $t ) ;
634 # interface with html tidy, used if $wgUseTidy = true
635 function tidy ( $text ) {
636 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
637 global $wgInputEncoding, $wgOutputEncoding;
638 $fname = 'Parser::tidy';
639 wfProfileIn( $fname );
641 $cleansource = '';
642 switch(strtoupper($wgOutputEncoding)) {
643 case 'ISO-8859-1':
644 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
645 break;
646 case 'UTF-8':
647 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
648 break;
649 default:
650 $wgTidyOpts .= ' -raw';
653 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
654 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
655 '<head><title>test</title></head><body>'.$text.'</body></html>';
656 $descriptorspec = array(
657 0 => array('pipe', 'r'),
658 1 => array('pipe', 'w'),
659 2 => array('file', '/dev/null', 'a')
661 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
662 if (is_resource($process)) {
663 fwrite($pipes[0], $wrappedtext);
664 fclose($pipes[0]);
665 while (!feof($pipes[1])) {
666 $cleansource .= fgets($pipes[1], 1024);
668 fclose($pipes[1]);
669 $return_value = proc_close($process);
672 wfProfileOut( $fname );
674 if( $cleansource == '' && $text != '') {
675 wfDebug( "Tidy error detected!\n" );
676 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
677 } else {
678 return $cleansource;
682 # parse the wiki syntax used to render tables
683 function doTableStuff ( $t ) {
684 $fname = 'Parser::doTableStuff';
685 wfProfileIn( $fname );
687 $t = explode ( "\n" , $t ) ;
688 $td = array () ; # Is currently a td tag open?
689 $ltd = array () ; # Was it TD or TH?
690 $tr = array () ; # Is currently a tr tag open?
691 $ltr = array () ; # tr attributes
692 foreach ( $t AS $k => $x )
694 $x = trim ( $x ) ;
695 $fc = substr ( $x , 0 , 1 ) ;
696 if ( '{|' == substr ( $x , 0 , 2 ) )
698 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 2 ) ) . '>' ;
699 array_push ( $td , false ) ;
700 array_push ( $ltd , '' ) ;
701 array_push ( $tr , false ) ;
702 array_push ( $ltr , '' ) ;
704 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
705 else if ( '|}' == substr ( $x , 0 , 2 ) )
707 $z = "</table>\n" ;
708 $l = array_pop ( $ltd ) ;
709 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
710 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
711 array_pop ( $ltr ) ;
712 $t[$k] = $z ;
714 else if ( '|-' == substr ( $x , 0 , 2 ) ) # Allows for |---------------
716 $x = substr ( $x , 1 ) ;
717 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
718 $z = '' ;
719 $l = array_pop ( $ltd ) ;
720 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
721 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
722 array_pop ( $ltr ) ;
723 $t[$k] = $z ;
724 array_push ( $tr , false ) ;
725 array_push ( $td , false ) ;
726 array_push ( $ltd , '' ) ;
727 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
729 else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) # Caption
731 if ( '|+' == substr ( $x , 0 , 2 ) )
733 $fc = '+' ;
734 $x = substr ( $x , 1 ) ;
736 $after = substr ( $x , 1 ) ;
737 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
738 $after = explode ( '||' , $after ) ;
739 $t[$k] = '' ;
740 foreach ( $after AS $theline )
742 $z = '' ;
743 if ( $fc != '+' )
745 $tra = array_pop ( $ltr ) ;
746 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
747 array_push ( $tr , true ) ;
748 array_push ( $ltr , '' ) ;
751 $l = array_pop ( $ltd ) ;
752 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
753 if ( $fc == '|' ) $l = 'td' ;
754 else if ( $fc == '!' ) $l = 'th' ;
755 else if ( $fc == '+' ) $l = 'caption' ;
756 else $l = '' ;
757 array_push ( $ltd , $l ) ;
758 $y = explode ( '|' , $theline , 2 ) ;
759 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
760 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
761 $t[$k] .= $y ;
762 array_push ( $td , true ) ;
767 # Closing open td, tr && table
768 while ( count ( $td ) > 0 )
770 if ( array_pop ( $td ) ) $t[] = '</td>' ;
771 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
772 $t[] = '</table>' ;
775 $t = implode ( "\n" , $t ) ;
776 # $t = $this->removeHTMLtags( $t );
777 wfProfileOut( $fname );
778 return $t ;
781 # Parses the text and adds the result to the strip state
782 # Returns the strip tag
783 function stripParse( $text, $newline, $args )
785 $text = $this->strip( $text, $this->mStripState );
786 $text = $this->internalParse( $text, (bool)$newline, $args, false );
787 return $newline.$this->insertStripItem( $text, $this->mStripState );
790 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
791 $fname = 'Parser::internalParse';
792 wfProfileIn( $fname );
794 $text = $this->removeHTMLtags( $text );
795 $text = $this->replaceVariables( $text, $args );
797 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
799 $text = $this->doHeadings( $text );
800 if($this->mOptions->getUseDynamicDates()) {
801 global $wgDateFormatter;
802 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
804 $text = $this->doAllQuotes( $text );
805 // $text = $this->doExponent( $text );
806 $text = $this->replaceExternalLinks( $text );
807 $text = $this->replaceInternalLinks ( $text );
808 $text = $this->replaceInternalLinks ( $text );
809 //$text = $this->doTokenizedParser ( $text );
810 $text = $this->doTableStuff ( $text ) ;
811 $text = $this->magicISBN( $text );
812 $text = $this->magicGEO( $text );
813 $text = $this->magicRFC( $text );
814 $text = $this->formatHeadings( $text, $isMain );
815 $sk =& $this->mOptions->getSkin();
816 $text = $sk->transformContent( $text );
818 if ( $isMain && !isset ( $this->categoryMagicDone ) ) {
819 $text .= $this->categoryMagic () ;
820 $this->categoryMagicDone = true ;
823 wfProfileOut( $fname );
824 return $text;
827 # Parse ^^ tokens and return html
828 /* private */ function doExponent ( $text )
830 $fname = 'Parser::doExponent';
831 wfProfileIn( $fname);
832 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
833 wfProfileOut( $fname);
834 return $text;
837 # Parse headers and return html
838 /* private */ function doHeadings( $text ) {
839 $fname = 'Parser::doHeadings';
840 wfProfileIn( $fname );
841 for ( $i = 6; $i >= 1; --$i ) {
842 $h = substr( '======', 0, $i );
843 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
844 "<h{$i}>\\1</h{$i}>\\2", $text );
846 wfProfileOut( $fname );
847 return $text;
850 /* private */ function doAllQuotes( $text ) {
851 $fname = 'Parser::doAllQuotes';
852 wfProfileIn( $fname );
853 $outtext = '';
854 $lines = explode( "\n", $text );
855 foreach ( $lines as $line ) {
856 $outtext .= $this->doQuotes ( '', $line, '' ) . "\n";
858 $outtext = substr($outtext, 0,-1);
859 wfProfileOut( $fname );
860 return $outtext;
863 /* private */ function doQuotes( $pre, $text, $mode ) {
864 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
865 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
866 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
867 if ( substr ($m[2], 0, 1) == '\'' ) {
868 $m[2] = substr ($m[2], 1);
869 if ($mode == 'em') {
870 return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'emstrong' );
871 } else if ($mode == 'strong') {
872 return $m1_strong . $this->doQuotes ( '', $m[2], '' );
873 } else if (($mode == 'emstrong') || ($mode == 'both')) {
874 return $this->doQuotes ( '', $pre.$m1_strong.$m[2], 'em' );
875 } else if ($mode == 'strongem') {
876 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( '', $m[2], 'em' );
877 } else {
878 return $m[1] . $this->doQuotes ( '', $m[2], 'strong' );
880 } else {
881 if ($mode == 'strong') {
882 return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'strongem' );
883 } else if ($mode == 'em') {
884 return $m1_em . $this->doQuotes ( '', $m[2], '' );
885 } else if ($mode == 'emstrong') {
886 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( '', $m[2], 'strong' );
887 } else if (($mode == 'strongem') || ($mode == 'both')) {
888 return $this->doQuotes ( '', $pre.$m1_em.$m[2], 'strong' );
889 } else {
890 return $m[1] . $this->doQuotes ( '', $m[2], 'em' );
893 } else {
894 $text_strong = ($text == '') ? '' : "<strong>{$text}</strong>";
895 $text_em = ($text == '') ? '' : "<em>{$text}</em>";
896 if ($mode == '') {
897 return $pre . $text;
898 } else if ($mode == 'em') {
899 return $pre . $text_em;
900 } else if ($mode == 'strong') {
901 return $pre . $text_strong;
902 } else if ($mode == 'strongem') {
903 return (($pre == '') && ($text == '')) ? '' : "<strong>{$pre}{$text_em}</strong>";
904 } else {
905 return (($pre == '') && ($text == '')) ? '' : "<em>{$pre}{$text_strong}</em>";
910 # Note: we have to do external links before the internal ones,
911 # and otherwise take great care in the order of things here, so
912 # that we don't end up interpreting some URLs twice.
914 /* private */ function replaceExternalLinks( $text ) {
915 $fname = 'Parser::replaceExternalLinks';
916 wfProfileIn( $fname );
917 $text = $this->subReplaceExternalLinks( $text, 'http', true );
918 $text = $this->subReplaceExternalLinks( $text, 'https', true );
919 $text = $this->subReplaceExternalLinks( $text, 'ftp', false );
920 $text = $this->subReplaceExternalLinks( $text, 'irc', false );
921 $text = $this->subReplaceExternalLinks( $text, 'gopher', false );
922 $text = $this->subReplaceExternalLinks( $text, 'news', false );
923 $text = $this->subReplaceExternalLinks( $text, 'mailto', false );
924 wfProfileOut( $fname );
925 return $text;
928 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber ) {
929 $unique = '4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3';
930 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
932 # this is the list of separators that should be ignored if they
933 # are the last character of an URL but that should be included
934 # if they occur within the URL, e.g. "go to www.foo.com, where .."
935 # in this case, the last comma should not become part of the URL,
936 # but in "www.foo.com/123,2342,32.htm" it should.
937 $sep = ",;\.:";
938 $fnc = 'A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF';
939 $images = 'gif|png|jpg|jpeg';
941 # PLEASE NOTE: The curly braces { } are not part of the regex,
942 # they are interpreted as part of the string (used to tell PHP
943 # that the content of the string should be inserted there).
944 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
945 "((?i){$images})([^{$uc}]|$)/";
947 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
948 $sk =& $this->mOptions->getSkin();
950 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
951 $s = preg_replace( $e1, '\\1' . $sk->makeImage( "{$unique}:\\3" .
952 '/\\4.\\5', '\\4.\\5' ) . '\\6', $s );
954 $s = preg_replace( $e2, '\\1' . "<a href=\"{$unique}:\\3\"" .
955 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
956 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
957 '</a>\\5', $s );
958 $s = str_replace( $unique, $protocol, $s );
960 $a = explode( "[{$protocol}:", " " . $s );
961 $s = array_shift( $a );
962 $s = substr( $s, 1 );
964 # Regexp for URL in square brackets
965 $e1 = "/^([{$uc}{$sep}]+)\\](.*)\$/sD";
966 # Regexp for URL with link text in square brackets
967 $e2 = "/^([{$uc}{$sep}]+)\\s+([^\\]]+)\\](.*)\$/sD";
969 foreach ( $a as $line ) {
971 # CASE 1: Link in square brackets, e.g.
972 # some text [http://domain.tld/some.link] more text
973 if ( preg_match( $e1, $line, $m ) ) {
974 $link = "{$protocol}:{$m[1]}";
975 $trail = $m[2];
976 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
977 else { $text = wfEscapeHTML( $link ); }
980 # CASE 2: Link with link text and text directly following it, e.g.
981 # This is a collection of [http://domain.tld/some.link link]s
982 else if ( preg_match( $e2, $line, $m ) ) {
983 $link = "{$protocol}:{$m[1]}";
984 $text = $m[2];
985 $dtrail = '';
986 $trail = $m[3];
987 if ( preg_match( wfMsg ('linktrail'), $trail, $m2 ) ) {
988 $dtrail = $m2[1];
989 $trail = $m2[2];
993 # CASE 3: Nothing matches, just output the source text
994 else {
995 $s .= "[{$protocol}:" . $line;
996 continue;
999 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
1000 $paren = '';
1001 } else {
1002 # Expand the URL for printable version
1003 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
1005 $la = $sk->getExternalLinkAttributes( $link, $text );
1006 $s .= "<a href='{$link}'{$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
1009 return $s;
1013 /* private */ function replaceInternalLinks( $s ) {
1014 global $wgLang, $wgLinkCache;
1015 global $wgNamespacesWithSubpages, $wgLanguageCode;
1016 static $fname = 'Parser::replaceInternalLinks' ;
1017 wfProfileIn( $fname );
1019 wfProfileIn( $fname.'-setup' );
1020 static $tc = FALSE;
1021 # the % is needed to support urlencoded titles as well
1022 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1023 $sk =& $this->mOptions->getSkin();
1025 $a = explode( '[[', ' ' . $s );
1026 $s = array_shift( $a );
1027 $s = substr( $s, 1 );
1029 # Match a link having the form [[namespace:link|alternate]]trail
1030 static $e1 = FALSE;
1031 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1032 # Match the end of a line for a word that's not followed by whitespace,
1033 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1034 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1036 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
1037 # Special and Media are pseudo-namespaces; no pages actually exist in them
1038 static $image = FALSE;
1039 static $special = FALSE;
1040 static $media = FALSE;
1041 static $category = FALSE;
1042 if ( !$image ) { $image = Namespace::getImage(); }
1043 if ( !$special ) { $special = Namespace::getSpecial(); }
1044 if ( !$media ) { $media = Namespace::getMedia(); }
1045 if ( !$category ) { $category = Namespace::getCategory(); }
1047 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1049 if ( $useLinkPrefixExtension ) {
1050 if ( preg_match( $e2, $s, $m ) ) {
1051 $first_prefix = $m[2];
1052 $s = $m[1];
1053 } else {
1054 $first_prefix = false;
1056 } else {
1057 $prefix = '';
1060 wfProfileOut( $fname.'-setup' );
1062 foreach ( $a as $line ) {
1063 wfProfileIn( $fname.'-prefixhandling' );
1064 if ( $useLinkPrefixExtension ) {
1065 if ( preg_match( $e2, $s, $m ) ) {
1066 $prefix = $m[2];
1067 $s = $m[1];
1068 } else {
1069 $prefix='';
1071 # first link
1072 if($first_prefix) {
1073 $prefix = $first_prefix;
1074 $first_prefix = false;
1077 wfProfileOut( $fname.'-prefixhandling' );
1079 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1080 $text = $m[2];
1081 # fix up urlencoded title texts
1082 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1083 $trail = $m[3];
1084 } else { # Invalid form; output directly
1085 $s .= $prefix . '[[' . $line ;
1086 continue;
1089 /* Valid link forms:
1090 Foobar -- normal
1091 :Foobar -- override special treatment of prefix (images, language links)
1092 /Foobar -- convert to CurrentPage/Foobar
1093 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1095 $c = substr($m[1],0,1);
1096 $noforce = ($c != ':');
1097 if( $c == '/' ) { # subpage
1098 if(substr($m[1],-1,1)=='/') { # / at end means we don't want the slash to be shown
1099 $m[1]=substr($m[1],1,strlen($m[1])-2);
1100 $noslash=$m[1];
1101 } else {
1102 $noslash=substr($m[1],1);
1104 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
1105 $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1106 if( '' == $text ) {
1107 $text= $m[1];
1108 } # this might be changed for ugliness reasons
1109 } else {
1110 $link = $noslash; # no subpage allowed, use standard link
1112 } elseif( $noforce ) { # no subpage
1113 $link = $m[1];
1114 } else {
1115 $link = substr( $m[1], 1 );
1117 $wasblank = ( '' == $text );
1118 if( $wasblank )
1119 $text = $link;
1121 $nt = Title::newFromText( $link );
1122 if( !$nt ) {
1123 $s .= $prefix . '[[' . $line;
1124 continue;
1126 $ns = $nt->getNamespace();
1127 $iw = $nt->getInterWiki();
1128 if( $noforce ) {
1129 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1130 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
1131 $tmp = $prefix . $trail ;
1132 $s .= (trim($tmp) == '')? '': $tmp;
1133 continue;
1135 if ( $ns == $image ) {
1136 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1137 $wgLinkCache->addImageLinkObj( $nt );
1138 continue;
1140 if ( $ns == $category ) {
1141 $t = $nt->getText() ;
1142 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
1144 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1145 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1146 $wgLinkCache->resume();
1148 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
1149 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1150 $this->mOutput->mCategoryLinks[] = $t ;
1151 $s .= $prefix . $trail ;
1152 continue;
1155 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
1156 ( strpos( $link, '#' ) == FALSE ) ) {
1157 # Self-links are handled specially; generally de-link and change to bold.
1158 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1159 continue;
1162 if( $ns == $media ) {
1163 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1164 $wgLinkCache->addImageLinkObj( $nt );
1165 continue;
1166 } elseif( $ns == $special ) {
1167 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1168 continue;
1170 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1172 wfProfileOut( $fname );
1173 return $s;
1176 # Some functions here used by doBlockLevels()
1178 /* private */ function closeParagraph() {
1179 $result = '';
1180 if ( '' != $this->mLastSection ) {
1181 $result = '</' . $this->mLastSection . ">\n";
1183 $this->mInPre = false;
1184 $this->mLastSection = '';
1185 return $result;
1187 # getCommon() returns the length of the longest common substring
1188 # of both arguments, starting at the beginning of both.
1190 /* private */ function getCommon( $st1, $st2 ) {
1191 $fl = strlen( $st1 );
1192 $shorter = strlen( $st2 );
1193 if ( $fl < $shorter ) { $shorter = $fl; }
1195 for ( $i = 0; $i < $shorter; ++$i ) {
1196 if ( $st1{$i} != $st2{$i} ) { break; }
1198 return $i;
1200 # These next three functions open, continue, and close the list
1201 # element appropriate to the prefix character passed into them.
1203 /* private */ function openList( $char )
1205 $result = $this->closeParagraph();
1207 if ( '*' == $char ) { $result .= '<ul><li>'; }
1208 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1209 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1210 else if ( ';' == $char ) {
1211 $result .= '<dl><dt>';
1212 $this->mDTopen = true;
1214 else { $result = '<!-- ERR 1 -->'; }
1216 return $result;
1219 /* private */ function nextItem( $char ) {
1220 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1221 else if ( ':' == $char || ';' == $char ) {
1222 $close = "</dd>";
1223 if ( $this->mDTopen ) { $close = '</dt>'; }
1224 if ( ';' == $char ) {
1225 $this->mDTopen = true;
1226 return $close . '<dt>';
1227 } else {
1228 $this->mDTopen = false;
1229 return $close . '<dd>';
1232 return '<!-- ERR 2 -->';
1235 /* private */function closeList( $char ) {
1236 if ( '*' == $char ) { $text = '</li></ul>'; }
1237 else if ( '#' == $char ) { $text = '</li></ol>'; }
1238 else if ( ':' == $char ) {
1239 if ( $this->mDTopen ) {
1240 $this->mDTopen = false;
1241 $text = '</dt></dl>';
1242 } else {
1243 $text = '</dd></dl>';
1246 else { return '<!-- ERR 3 -->'; }
1247 return $text."\n";
1250 /* private */ function doBlockLevels( $text, $linestart ) {
1251 $fname = 'Parser::doBlockLevels';
1252 wfProfileIn( $fname );
1254 # Parsing through the text line by line. The main thing
1255 # happening here is handling of block-level elements p, pre,
1256 # and making lists from lines starting with * # : etc.
1258 $textLines = explode( "\n", $text );
1260 $lastPrefix = $output = $lastLine = '';
1261 $this->mDTopen = $inBlockElem = false;
1262 $prefixLength = 0;
1263 $paragraphStack = false;
1265 if ( !$linestart ) {
1266 $output .= array_shift( $textLines );
1268 foreach ( $textLines as $oLine ) {
1269 $lastPrefixLength = strlen( $lastPrefix );
1270 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1271 $preOpenMatch = preg_match("/<pre/i", $oLine );
1272 if ( !$this->mInPre ) {
1273 # Multiple prefixes may abut each other for nested lists.
1274 $prefixLength = strspn( $oLine, '*#:;' );
1275 $pref = substr( $oLine, 0, $prefixLength );
1277 # eh?
1278 $pref2 = str_replace( ';', ':', $pref );
1279 $t = substr( $oLine, $prefixLength );
1280 $this->mInPre = !empty($preOpenMatch);
1281 } else {
1282 # Don't interpret any other prefixes in preformatted text
1283 $prefixLength = 0;
1284 $pref = $pref2 = '';
1285 $t = $oLine;
1288 # List generation
1289 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1290 # Same as the last item, so no need to deal with nesting or opening stuff
1291 $output .= $this->nextItem( substr( $pref, -1 ) );
1292 $paragraphStack = false;
1294 if ( ";" == substr( $pref, -1 ) ) {
1295 # The one nasty exception: definition lists work like this:
1296 # ; title : definition text
1297 # So we check for : in the remainder text to split up the
1298 # title and definition, without b0rking links.
1299 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1300 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1301 $term = $match[1];
1302 $output .= $term . $this->nextItem( ':' );
1303 $t = $match[2];
1306 } elseif( $prefixLength || $lastPrefixLength ) {
1307 # Either open or close a level...
1308 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1309 $paragraphStack = false;
1311 while( $commonPrefixLength < $lastPrefixLength ) {
1312 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1313 --$lastPrefixLength;
1315 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1316 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1318 while ( $prefixLength > $commonPrefixLength ) {
1319 $char = substr( $pref, $commonPrefixLength, 1 );
1320 $output .= $this->openList( $char );
1322 if ( ';' == $char ) {
1323 # FIXME: This is dupe of code above
1324 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1325 $term = $match[1];
1326 $output .= $term . $this->nextItem( ":" );
1327 $t = $match[2];
1330 ++$commonPrefixLength;
1332 $lastPrefix = $pref2;
1334 if( 0 == $prefixLength ) {
1335 # No prefix (not in list)--go to paragraph mode
1336 $uniq_prefix = UNIQ_PREFIX;
1337 // XXX: use a stack for nestable elements like span, table and div
1338 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1339 $closematch = preg_match(
1340 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1341 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1342 if ( $openmatch or $closematch ) {
1343 $paragraphStack = false;
1344 $output .= $this->closeParagraph();
1345 if($preOpenMatch and !$preCloseMatch) {
1346 $this->mInPre = true;
1348 if ( $closematch ) {
1349 $inBlockElem = false;
1350 } else {
1351 $inBlockElem = true;
1353 } else if ( !$inBlockElem && !$this->mInPre ) {
1354 if ( " " == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1355 // pre
1356 if ($this->mLastSection != 'pre') {
1357 $paragraphStack = false;
1358 $output .= $this->closeParagraph().'<pre>';
1359 $this->mLastSection = 'pre';
1361 } else {
1362 // paragraph
1363 if ( '' == trim($t) ) {
1364 if ( $paragraphStack ) {
1365 $output .= $paragraphStack.'<br />';
1366 $paragraphStack = false;
1367 $this->mLastSection = 'p';
1368 } else {
1369 if ($this->mLastSection != 'p' ) {
1370 $output .= $this->closeParagraph();
1371 $this->mLastSection = '';
1372 $paragraphStack = '<p>';
1373 } else {
1374 $paragraphStack = '</p><p>';
1377 } else {
1378 if ( $paragraphStack ) {
1379 $output .= $paragraphStack;
1380 $paragraphStack = false;
1381 $this->mLastSection = 'p';
1382 } else if ($this->mLastSection != 'p') {
1383 $output .= $this->closeParagraph().'<p>';
1384 $this->mLastSection = 'p';
1390 if ($paragraphStack === false) {
1391 $output .= $t."\n";
1394 while ( $prefixLength ) {
1395 $output .= $this->closeList( $pref2{$prefixLength-1} );
1396 --$prefixLength;
1398 if ( '' != $this->mLastSection ) {
1399 $output .= '</' . $this->mLastSection . '>';
1400 $this->mLastSection = '';
1403 wfProfileOut( $fname );
1404 return $output;
1407 # Return value of a magic variable (like PAGENAME)
1408 function getVariableValue( $index ) {
1409 global $wgLang, $wgSitename, $wgServer;
1411 switch ( $index ) {
1412 case MAG_CURRENTMONTH:
1413 return date( 'm' );
1414 case MAG_CURRENTMONTHNAME:
1415 return $wgLang->getMonthName( date('n') );
1416 case MAG_CURRENTMONTHNAMEGEN:
1417 return $wgLang->getMonthNameGen( date('n') );
1418 case MAG_CURRENTDAY:
1419 return date('j');
1420 case MAG_PAGENAME:
1421 return $this->mTitle->getText();
1422 case MAG_NAMESPACE:
1423 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1424 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1425 case MAG_CURRENTDAYNAME:
1426 return $wgLang->getWeekdayName( date('w')+1 );
1427 case MAG_CURRENTYEAR:
1428 return date( 'Y' );
1429 case MAG_CURRENTTIME:
1430 return $wgLang->time( wfTimestampNow(), false );
1431 case MAG_NUMBEROFARTICLES:
1432 return wfNumberOfArticles();
1433 case MAG_SITENAME:
1434 return $wgSitename;
1435 case MAG_SERVER:
1436 return $wgServer;
1437 default:
1438 return NULL;
1442 # initialise the magic variables (like CURRENTMONTHNAME)
1443 function initialiseVariables() {
1444 global $wgVariableIDs;
1445 $this->mVariables = array();
1446 foreach ( $wgVariableIDs as $id ) {
1447 $mw =& MagicWord::get( $id );
1448 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1452 /* private */ function replaceVariables( $text, $args = array() ) {
1453 global $wgLang, $wgScript, $wgArticlePath;
1455 # Prevent too big inclusions
1456 if(strlen($text)> MAX_INCLUDE_SIZE)
1457 return $text;
1459 $fname = 'Parser::replaceVariables';
1460 wfProfileIn( $fname );
1462 $bail = false;
1463 $titleChars = Title::legalChars();
1464 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1466 # This function is called recursively. To keep track of arguments we need a stack:
1467 array_push( $this->mArgStack, $args );
1469 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1470 $GLOBALS['wgCurParser'] =& $this;
1473 if ( $this->mOutputType == OT_HTML ) {
1474 # Variable substitution
1475 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1477 # Argument substitution
1478 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1480 # Template substitution
1481 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1482 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1484 array_pop( $this->mArgStack );
1486 wfProfileOut( $fname );
1487 return $text;
1490 function variableSubstitution( $matches ) {
1491 if ( !$this->mVariables ) {
1492 $this->initialiseVariables();
1494 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1495 $text = $this->mVariables[$matches[1]];
1496 $this->mOutput->mContainsOldMagic = true;
1497 } else {
1498 $text = $matches[0];
1500 return $text;
1503 function braceSubstitution( $matches ) {
1504 global $wgLinkCache, $wgLang;
1505 $fname = 'Parser::braceSubstitution';
1506 $found = false;
1507 $nowiki = false;
1508 $noparse = false;
1510 $title = NULL;
1512 # $newline is an optional newline character before the braces
1513 # $part1 is the bit before the first |, and must contain only title characters
1514 # $args is a list of arguments, starting from index 0, not including $part1
1516 $newline = $matches[1];
1517 $part1 = $matches[2];
1518 # If the third subpattern matched anything, it will start with |
1519 if ( $matches[3] !== '' ) {
1520 $args = explode( '|', substr( $matches[3], 1 ) );
1521 } else {
1522 $args = array();
1524 $argc = count( $args );
1526 # {{{}}}
1527 if ( strpos( $matches[0], '{{{' ) !== false ) {
1528 $text = $matches[0];
1529 $found = true;
1530 $noparse = true;
1533 # SUBST
1534 if ( !$found ) {
1535 $mwSubst =& MagicWord::get( MAG_SUBST );
1536 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1537 if ( $this->mOutputType != OT_WIKI ) {
1538 # Invalid SUBST not replaced at PST time
1539 # Return without further processing
1540 $text = $matches[0];
1541 $found = true;
1542 $noparse= true;
1544 } elseif ( $this->mOutputType == OT_WIKI ) {
1545 # SUBST not found in PST pass, do nothing
1546 $text = $matches[0];
1547 $found = true;
1551 # MSG, MSGNW and INT
1552 if ( !$found ) {
1553 # Check for MSGNW:
1554 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1555 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1556 $nowiki = true;
1557 } else {
1558 # Remove obsolete MSG:
1559 $mwMsg =& MagicWord::get( MAG_MSG );
1560 $mwMsg->matchStartAndRemove( $part1 );
1563 # Check if it is an internal message
1564 $mwInt =& MagicWord::get( MAG_INT );
1565 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1566 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1567 $text = wfMsgReal( $part1, $args, true );
1568 $found = true;
1573 # NS
1574 if ( !$found ) {
1575 # Check for NS: (namespace expansion)
1576 $mwNs = MagicWord::get( MAG_NS );
1577 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1578 if ( intval( $part1 ) ) {
1579 $text = $wgLang->getNsText( intval( $part1 ) );
1580 $found = true;
1581 } else {
1582 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1583 if ( !is_null( $index ) ) {
1584 $text = $wgLang->getNsText( $index );
1585 $found = true;
1591 # LOCALURL and LOCALURLE
1592 if ( !$found ) {
1593 $mwLocal = MagicWord::get( MAG_LOCALURL );
1594 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1596 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1597 $func = 'getLocalURL';
1598 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1599 $func = 'escapeLocalURL';
1600 } else {
1601 $func = '';
1604 if ( $func !== '' ) {
1605 $title = Title::newFromText( $part1 );
1606 if ( !is_null( $title ) ) {
1607 if ( $argc > 0 ) {
1608 $text = $title->$func( $args[0] );
1609 } else {
1610 $text = $title->$func();
1612 $found = true;
1617 # Internal variables
1618 if ( !$this->mVariables ) {
1619 $this->initialiseVariables();
1621 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1622 $text = $this->mVariables[$part1];
1623 $found = true;
1624 $this->mOutput->mContainsOldMagic = true;
1627 # Template table test
1629 # Did we encounter this template already? If yes, it is in the cache
1630 # and we need to check for loops.
1631 if ( isset( $this->mTemplates[$part1] ) ) {
1632 # Infinite loop test
1633 if ( isset( $this->mTemplatePath[$part1] ) ) {
1634 $noparse = true;
1635 $found = true;
1637 # set $text to cached message.
1638 $text = $this->mTemplates[$part1];
1639 $found = true;
1642 # Load from database
1643 if ( !$found ) {
1644 $title = Title::newFromText( $part1, NS_TEMPLATE );
1645 if ( !is_null( $title ) && !$title->isExternal() ) {
1646 # Check for excessive inclusion
1647 $dbk = $title->getPrefixedDBkey();
1648 if ( $this->incrementIncludeCount( $dbk ) ) {
1649 # This should never be reached.
1650 $article = new Article( $title );
1651 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1652 if ( $articleContent !== false ) {
1653 $found = true;
1654 $text = $articleContent;
1659 # If the title is valid but undisplayable, make a link to it
1660 if ( $this->mOutputType == OT_HTML && !$found ) {
1661 $text = '[[' . $title->getPrefixedText() . ']]';
1662 $found = true;
1665 # Template cache array insertion
1666 $this->mTemplates[$part1] = $text;
1670 # Recursive parsing, escaping and link table handling
1671 # Only for HTML output
1672 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1673 $text = wfEscapeWikiText( $text );
1674 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1675 # Clean up argument array
1676 $assocArgs = array();
1677 $index = 1;
1678 foreach( $args as $arg ) {
1679 $eqpos = strpos( $arg, '=' );
1680 if ( $eqpos === false ) {
1681 $assocArgs[$index++] = $arg;
1682 } else {
1683 $name = trim( substr( $arg, 0, $eqpos ) );
1684 $value = trim( substr( $arg, $eqpos+1 ) );
1685 if ( $value === false ) {
1686 $value = '';
1688 if ( $name !== false ) {
1689 $assocArgs[$name] = $value;
1694 # Do not enter included links in link table
1695 if ( !is_null( $title ) ) {
1696 $wgLinkCache->suspend();
1699 # Add a new element to the templace recursion path
1700 $this->mTemplatePath[$part1] = 1;
1702 # Run full parser on the included text
1703 $text = $this->stripParse( $text, $newline, $assocArgs );
1705 # Resume the link cache and register the inclusion as a link
1706 if ( !is_null( $title ) ) {
1707 $wgLinkCache->resume();
1708 $wgLinkCache->addLinkObj( $title );
1711 # Empties the template path
1712 $this->mTemplatePath = array();
1714 if ( !$found ) {
1715 return $matches[0];
1716 } else {
1717 return $text;
1721 # Triple brace replacement -- used for template arguments
1722 function argSubstitution( $matches ) {
1723 $newline = $matches[1];
1724 $arg = trim( $matches[2] );
1725 $text = $matches[0];
1726 $inputArgs = end( $this->mArgStack );
1728 if ( array_key_exists( $arg, $inputArgs ) ) {
1729 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1732 return $text;
1735 # Returns true if the function is allowed to include this entity
1736 function incrementIncludeCount( $dbk ) {
1737 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1738 $this->mIncludeCount[$dbk] = 0;
1740 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1741 return true;
1742 } else {
1743 return false;
1748 # Cleans up HTML, removes dangerous tags and attributes
1749 /* private */ function removeHTMLtags( $text ) {
1750 global $wgUseTidy, $wgUserHtml;
1751 $fname = 'Parser::removeHTMLtags';
1752 wfProfileIn( $fname );
1754 if( $wgUserHtml ) {
1755 $htmlpairs = array( # Tags that must be closed
1756 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1757 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1758 'strike', 'strong', 'tt', 'var', 'div', 'center',
1759 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1760 'ruby', 'rt' , 'rb' , 'rp', 'p'
1762 $htmlsingle = array(
1763 'br', 'hr', 'li', 'dt', 'dd'
1765 $htmlnest = array( # Tags that can be nested--??
1766 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
1767 'dl', 'font', 'big', 'small', 'sub', 'sup'
1769 $tabletags = array( # Can only appear inside table
1770 'td', 'th', 'tr'
1772 } else {
1773 $htmlpairs = array();
1774 $htmlsingle = array();
1775 $htmlnest = array();
1776 $tabletags = array();
1779 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1780 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1782 $htmlattrs = $this->getHTMLattrs () ;
1784 # Remove HTML comments
1785 $text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
1787 $bits = explode( '<', $text );
1788 $text = array_shift( $bits );
1789 if(!$wgUseTidy) {
1790 $tagstack = array(); $tablestack = array();
1791 foreach ( $bits as $x ) {
1792 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1793 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1794 $x, $regs );
1795 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1796 error_reporting( $prev );
1798 $badtag = 0 ;
1799 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1800 # Check our stack
1801 if ( $slash ) {
1802 # Closing a tag...
1803 if ( ! in_array( $t, $htmlsingle ) &&
1804 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1805 @array_push( $tagstack, $ot );
1806 $badtag = 1;
1807 } else {
1808 if ( $t == 'table' ) {
1809 $tagstack = array_pop( $tablestack );
1811 $newparams = '';
1813 } else {
1814 # Keep track for later
1815 if ( in_array( $t, $tabletags ) &&
1816 ! in_array( 'table', $tagstack ) ) {
1817 $badtag = 1;
1818 } else if ( in_array( $t, $tagstack ) &&
1819 ! in_array ( $t , $htmlnest ) ) {
1820 $badtag = 1 ;
1821 } else if ( ! in_array( $t, $htmlsingle ) ) {
1822 if ( $t == 'table' ) {
1823 array_push( $tablestack, $tagstack );
1824 $tagstack = array();
1826 array_push( $tagstack, $t );
1828 # Strip non-approved attributes from the tag
1829 $newparams = $this->fixTagAttributes($params);
1832 if ( ! $badtag ) {
1833 $rest = str_replace( '>', '&gt;', $rest );
1834 $text .= "<$slash$t $newparams$brace$rest";
1835 continue;
1838 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1840 # Close off any remaining tags
1841 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1842 $text .= "</$t>\n";
1843 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
1845 } else {
1846 # this might be possible using tidy itself
1847 foreach ( $bits as $x ) {
1848 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1849 $x, $regs );
1850 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1851 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1852 $newparams = $this->fixTagAttributes($params);
1853 $rest = str_replace( '>', '&gt;', $rest );
1854 $text .= "<$slash$t $newparams$brace$rest";
1855 } else {
1856 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1860 wfProfileOut( $fname );
1861 return $text;
1867 * This function accomplishes several tasks:
1868 * 1) Auto-number headings if that option is enabled
1869 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1870 * 3) Add a Table of contents on the top for users who have enabled the option
1871 * 4) Auto-anchor headings
1873 * It loops through all headlines, collects the necessary data, then splits up the
1874 * string and re-inserts the newly formatted headlines.
1878 /* private */ function formatHeadings( $text, $isMain=true ) {
1879 global $wgInputEncoding, $wgMaxTocLevel;
1881 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1882 $doShowToc = $this->mOptions->getShowToc();
1883 $forceTocHere = false;
1884 if( !$this->mTitle->userCanEdit() ) {
1885 $showEditLink = 0;
1886 $rightClickHack = 0;
1887 } else {
1888 $showEditLink = $this->mOptions->getEditSection();
1889 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1892 # Inhibit editsection links if requested in the page
1893 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1894 if( $esw->matchAndRemove( $text ) ) {
1895 $showEditLink = 0;
1897 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1898 # do not add TOC
1899 $mw =& MagicWord::get( MAG_NOTOC );
1900 if( $mw->matchAndRemove( $text ) ) {
1901 $doShowToc = 0;
1904 # never add the TOC to the Main Page. This is an entry page that should not
1905 # be more than 1-2 screens large anyway
1906 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
1907 $doShowToc = 0;
1910 # Get all headlines for numbering them and adding funky stuff like [edit]
1911 # links - this is for later, but we need the number of headlines right now
1912 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
1914 # if there are fewer than 4 headlines in the article, do not show TOC
1915 if( $numMatches < 4 ) {
1916 $doShowToc = 0;
1919 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
1920 # override above conditions and always show TOC at that place
1921 $mw =& MagicWord::get( MAG_TOC );
1922 if ($mw->match( $text ) ) {
1923 $doShowToc = 1;
1924 $forceTocHere = true;
1925 } else {
1926 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1927 # override above conditions and always show TOC above first header
1928 $mw =& MagicWord::get( MAG_FORCETOC );
1929 if ($mw->matchAndRemove( $text ) ) {
1930 $doShowToc = 1;
1936 # We need this to perform operations on the HTML
1937 $sk =& $this->mOptions->getSkin();
1939 # headline counter
1940 $headlineCount = 0;
1942 # Ugh .. the TOC should have neat indentation levels which can be
1943 # passed to the skin functions. These are determined here
1944 $toclevel = 0;
1945 $toc = '';
1946 $full = '';
1947 $head = array();
1948 $sublevelCount = array();
1949 $level = 0;
1950 $prevlevel = 0;
1951 foreach( $matches[3] as $headline ) {
1952 $numbering = '';
1953 if( $level ) {
1954 $prevlevel = $level;
1956 $level = $matches[1][$headlineCount];
1957 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1958 # reset when we enter a new level
1959 $sublevelCount[$level] = 0;
1960 $toc .= $sk->tocIndent( $level - $prevlevel );
1961 $toclevel += $level - $prevlevel;
1963 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1964 # reset when we step back a level
1965 $sublevelCount[$level+1]=0;
1966 $toc .= $sk->tocUnindent( $prevlevel - $level );
1967 $toclevel -= $prevlevel - $level;
1969 # count number of headlines for each level
1970 @$sublevelCount[$level]++;
1971 if( $doNumberHeadings || $doShowToc ) {
1972 $dot = 0;
1973 for( $i = 1; $i <= $level; $i++ ) {
1974 if( !empty( $sublevelCount[$i] ) ) {
1975 if( $dot ) {
1976 $numbering .= '.';
1978 $numbering .= $sublevelCount[$i];
1979 $dot = 1;
1984 # The canonized header is a version of the header text safe to use for links
1985 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1986 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1987 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1989 # strip out HTML
1990 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
1991 $tocline = trim( $canonized_headline );
1992 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
1993 $replacearray = array(
1994 '%3A' => ':',
1995 '%' => '.'
1997 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
1998 $refer[$headlineCount] = $canonized_headline;
2000 # count how many in assoc. array so we can track dupes in anchors
2001 @$refers[$canonized_headline]++;
2002 $refcount[$headlineCount]=$refers[$canonized_headline];
2004 # Prepend the number to the heading text
2006 if( $doNumberHeadings || $doShowToc ) {
2007 $tocline = $numbering . ' ' . $tocline;
2009 # Don't number the heading if it is the only one (looks silly)
2010 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2011 # the two are different if the line contains a link
2012 $headline=$numbering . ' ' . $headline;
2016 # Create the anchor for linking from the TOC to the section
2017 $anchor = $canonized_headline;
2018 if($refcount[$headlineCount] > 1 ) {
2019 $anchor .= '_' . $refcount[$headlineCount];
2021 if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2022 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2024 if( $showEditLink ) {
2025 if ( empty( $head[$headlineCount] ) ) {
2026 $head[$headlineCount] = '';
2028 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
2031 # Add the edit section span
2032 if( $rightClickHack ) {
2033 $headline = $sk->editSectionScript($headlineCount+1,$headline);
2036 # give headline the correct <h#> tag
2037 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
2039 $headlineCount++;
2042 if( $doShowToc ) {
2043 $toclines = $headlineCount;
2044 $toc .= $sk->tocUnindent( $toclevel );
2045 $toc = $sk->tocTable( $toc );
2048 # split up and insert constructed headlines
2050 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2051 $i = 0;
2053 foreach( $blocks as $block ) {
2054 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2055 # This is the [edit] link that appears for the top block of text when
2056 # section editing is enabled
2058 # Disabled because it broke block formatting
2059 # For example, a bullet point in the top line
2060 # $full .= $sk->editSectionLink(0);
2062 $full .= $block;
2063 if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2064 # Top anchor now in skin
2065 $full = $full.$toc;
2068 if( !empty( $head[$i] ) ) {
2069 $full .= $head[$i];
2071 $i++;
2073 if($forceTocHere) {
2074 $mw =& MagicWord::get( MAG_TOC );
2075 return $mw->replace( $toc, $full );
2076 } else {
2077 return $full;
2081 # Return an HTML link for the "ISBN 123456" text
2082 /* private */ function magicISBN( $text ) {
2083 global $wgLang;
2084 $fname = 'Parser::magicISBN';
2085 wfProfileIn( $fname );
2087 $a = split( 'ISBN ', " $text" );
2088 if ( count ( $a ) < 2 ) {
2089 wfProfileOut( $fname );
2090 return $text;
2092 $text = substr( array_shift( $a ), 1);
2093 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2095 foreach ( $a as $x ) {
2096 $isbn = $blank = '' ;
2097 while ( ' ' == $x{0} ) {
2098 $blank .= ' ';
2099 $x = substr( $x, 1 );
2101 while ( strstr( $valid, $x{0} ) != false ) {
2102 $isbn .= $x{0};
2103 $x = substr( $x, 1 );
2105 $num = str_replace( '-', '', $isbn );
2106 $num = str_replace( ' ', '', $num );
2108 if ( '' == $num ) {
2109 $text .= "ISBN $blank$x";
2110 } else {
2111 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2112 $text .= '<a href="' .
2113 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
2114 "\" class=\"internal\">ISBN $isbn</a>";
2115 $text .= $x;
2118 wfProfileOut( $fname );
2119 return $text;
2122 # Return an HTML link for the "GEO ..." text
2123 /* private */ function magicGEO( $text ) {
2124 global $wgLang, $wgUseGeoMode;
2125 if ( !isset ( $wgUseGeoMode ) || !$wgUseGeoMode ) return $text ;
2126 $fname = 'Parser::magicGEO';
2127 wfProfileIn( $fname );
2129 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2130 $directions = array ( "N" => "North" , "S" => "South" , "E" => "East" , "W" => "West" ) ;
2131 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2132 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2133 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2134 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2136 $a = split( 'GEO ', " $text" );
2137 if ( count ( $a ) < 2 ) {
2138 wfProfileOut( $fname );
2139 return $text;
2141 $text = substr( array_shift( $a ), 1);
2142 $valid = '0123456789.+-:';
2144 foreach ( $a as $x ) {
2145 $geo = $blank = '' ;
2146 while ( ' ' == $x{0} ) {
2147 $blank .= ' ';
2148 $x = substr( $x, 1 );
2150 while ( strstr( $valid, $x{0} ) != false ) {
2151 $geo .= $x{0};
2152 $x = substr( $x, 1 );
2154 $num = str_replace( '+', '', $geo );
2155 $num = str_replace( ' ', '', $num );
2157 if ( '' == $num || count ( explode ( ":" , $num , 3 ) ) < 2 ) {
2158 $text .= "GEO $blank$x";
2159 } else {
2160 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2161 $text .= '<a href="' .
2162 $titleObj->escapeLocalUrl( "coordinates={$num}" ) .
2163 "\" class=\"internal\">GEO $geo</a>";
2164 $text .= $x;
2167 wfProfileOut( $fname );
2168 return $text;
2171 # Return an HTML link for the "RFC 1234" text
2172 /* private */ function magicRFC( $text ) {
2173 global $wgLang;
2175 $a = split( 'RFC ', ' '.$text );
2176 if ( count ( $a ) < 2 ) return $text;
2177 $text = substr( array_shift( $a ), 1);
2178 $valid = '0123456789';
2180 foreach ( $a as $x ) {
2181 $rfc = $blank = '' ;
2182 while ( ' ' == $x{0} ) {
2183 $blank .= ' ';
2184 $x = substr( $x, 1 );
2186 while ( strstr( $valid, $x{0} ) != false ) {
2187 $rfc .= $x{0};
2188 $x = substr( $x, 1 );
2191 if ( '' == $rfc ) {
2192 $text .= "RFC $blank$x";
2193 } else {
2194 $url = wfmsg( 'rfcurl' );
2195 $url = str_replace( '$1', $rfc, $url);
2196 $sk =& $this->mOptions->getSkin();
2197 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
2198 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2201 return $text;
2204 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2205 $this->mOptions = $options;
2206 $this->mTitle =& $title;
2207 $this->mOutputType = OT_WIKI;
2209 if ( $clearState ) {
2210 $this->clearState();
2213 $stripState = false;
2214 $pairs = array(
2215 "\r\n" => "\n",
2217 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2218 // now with regexes
2220 $pairs = array(
2221 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2222 "/<br *?>/i" => "<br />",
2224 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2226 $text = $this->strip( $text, $stripState, false );
2227 $text = $this->pstPass2( $text, $user );
2228 $text = $this->unstrip( $text, $stripState );
2229 $text = $this->unstripNoWiki( $text, $stripState );
2230 return $text;
2233 /* private */ function pstPass2( $text, &$user ) {
2234 global $wgLang, $wgLocaltimezone, $wgCurParser;
2236 # Variable replacement
2237 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2238 $text = $this->replaceVariables( $text );
2240 # Signatures
2242 $n = $user->getName();
2243 $k = $user->getOption( 'nickname' );
2244 if ( '' == $k ) { $k = $n; }
2245 if(isset($wgLocaltimezone)) {
2246 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2248 /* Note: this is an ugly timezone hack for the European wikis */
2249 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2250 ' (' . date( 'T' ) . ')';
2251 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2253 $text = preg_replace( '/~~~~~/', $d, $text );
2254 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText(
2255 Namespace::getUser() ) . ":$n|$k]] $d", $text );
2256 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText(
2257 Namespace::getUser() ) . ":$n|$k]]", $text );
2259 # Context links: [[|name]] and [[name (context)|]]
2261 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2262 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2263 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2264 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2266 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2267 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2268 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
2269 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
2270 # [[ns:page (cont)|]]
2271 $context = "";
2272 $t = $this->mTitle->getText();
2273 if ( preg_match( $conpat, $t, $m ) ) {
2274 $context = $m[2];
2276 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2277 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2278 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2280 if ( '' == $context ) {
2281 $text = preg_replace( $p2, '[[\\1]]', $text );
2282 } else {
2283 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2287 $mw =& MagicWord::get( MAG_SUBST );
2288 $wgCurParser = $this->fork();
2289 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2290 $this->merge( $wgCurParser );
2293 # Trim trailing whitespace
2294 # MAG_END (__END__) tag allows for trailing
2295 # whitespace to be deliberately included
2296 $text = rtrim( $text );
2297 $mw =& MagicWord::get( MAG_END );
2298 $mw->matchAndRemove( $text );
2300 return $text;
2303 # Set up some variables which are usually set up in parse()
2304 # so that an external function can call some class members with confidence
2305 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2306 $this->mTitle =& $title;
2307 $this->mOptions = $options;
2308 $this->mOutputType = $outputType;
2309 if ( $clearState ) {
2310 $this->clearState();
2314 function transformMsg( $text, $options ) {
2315 global $wgTitle;
2316 static $executing = false;
2318 # Guard against infinite recursion
2319 if ( $executing ) {
2320 return $text;
2322 $executing = true;
2324 $this->mTitle = $wgTitle;
2325 $this->mOptions = $options;
2326 $this->mOutputType = OT_MSG;
2327 $this->clearState();
2328 $text = $this->replaceVariables( $text );
2330 $executing = false;
2331 return $text;
2334 # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2335 # Callback will be called with the text within
2336 # Transform and return the text within
2337 function setHook( $tag, $callback ) {
2338 $oldVal = @$this->mTagHooks[$tag];
2339 $this->mTagHooks[$tag] = $callback;
2340 return $oldVal;
2344 class ParserOutput
2346 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2347 var $mCacheTime; # Used in ParserCache
2349 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2350 $containsOldMagic = false )
2352 $this->mText = $text;
2353 $this->mLanguageLinks = $languageLinks;
2354 $this->mCategoryLinks = $categoryLinks;
2355 $this->mContainsOldMagic = $containsOldMagic;
2356 $this->mCacheTime = "";
2359 function getText() { return $this->mText; }
2360 function getLanguageLinks() { return $this->mLanguageLinks; }
2361 function getCategoryLinks() { return $this->mCategoryLinks; }
2362 function getCacheTime() { return $this->mCacheTime; }
2363 function containsOldMagic() { return $this->mContainsOldMagic; }
2364 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2365 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2366 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2367 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2368 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2370 function merge( $other ) {
2371 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2372 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2373 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2378 class ParserOptions
2380 # All variables are private
2381 var $mUseTeX; # Use texvc to expand <math> tags
2382 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2383 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2384 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2385 var $mAllowExternalImages; # Allow external images inline
2386 var $mSkin; # Reference to the preferred skin
2387 var $mDateFormat; # Date format index
2388 var $mEditSection; # Create "edit section" links
2389 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2390 var $mNumberHeadings; # Automatically number headings
2391 var $mShowToc; # Show table of contents
2393 function getUseTeX() { return $this->mUseTeX; }
2394 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2395 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2396 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2397 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2398 function getSkin() { return $this->mSkin; }
2399 function getDateFormat() { return $this->mDateFormat; }
2400 function getEditSection() { return $this->mEditSection; }
2401 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2402 function getNumberHeadings() { return $this->mNumberHeadings; }
2403 function getShowToc() { return $this->mShowToc; }
2405 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2406 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2407 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2408 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2409 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2410 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2411 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2412 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2413 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2414 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2416 function setSkin( &$x ) { $this->mSkin =& $x; }
2418 /* static */ function newFromUser( &$user ) {
2419 $popts = new ParserOptions;
2420 $popts->initialiseFromUser( $user );
2421 return $popts;
2424 function initialiseFromUser( &$userInput ) {
2425 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2427 if ( !$userInput ) {
2428 $user = new User;
2429 $user->setLoaded( true );
2430 } else {
2431 $user =& $userInput;
2434 $this->mUseTeX = $wgUseTeX;
2435 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2436 $this->mUseDynamicDates = $wgUseDynamicDates;
2437 $this->mInterwikiMagic = $wgInterwikiMagic;
2438 $this->mAllowExternalImages = $wgAllowExternalImages;
2439 $this->mSkin =& $user->getSkin();
2440 $this->mDateFormat = $user->getOption( 'date' );
2441 $this->mEditSection = $user->getOption( 'editsection' );
2442 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2443 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2444 $this->mShowToc = $user->getOption( 'showtoc' );
2450 # Regex callbacks, used in Parser::replaceVariables
2451 function wfBraceSubstitution( $matches )
2453 global $wgCurParser;
2454 return $wgCurParser->braceSubstitution( $matches );
2457 function wfArgSubstitution( $matches )
2459 global $wgCurParser;
2460 return $wgCurParser->argSubstitution( $matches );
2463 function wfVariableSubstitution( $matches )
2465 global $wgCurParser;
2466 return $wgCurParser->variableSubstitution( $matches );