select less fields from sql.
[mediawiki.git] / includes / Parser.php
blob15a272d0828cde4692da769f64a9573760e11e89
1 <?php
3 // require_once('Tokenizer.php');
5 # PHP Parser
7 # Processes wiki markup
9 # There are two main entry points into the Parser class: parse() and preSaveTransform().
10 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
12 # Globals used:
13 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
15 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
17 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
18 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
19 # $wgLocaltimezone
21 # * only within ParserOptions
24 #----------------------------------------
25 # Variable substitution O(N^2) attack
26 #-----------------------------------------
27 # Without countermeasures, it would be possible to attack the parser by saving a page
28 # filled with a large number of inclusions of large pages. The size of the generated
29 # page would be proportional to the square of the input size. Hence, we limit the number
30 # of inclusions of any given page, thus bringing any attack back to O(N).
33 define( "MAX_INCLUDE_REPEAT", 5 );
35 # Allowed values for $mOutputType
36 define( "OT_HTML", 1 );
37 define( "OT_WIKI", 2 );
38 define( "OT_MSG", 3 );
40 # string parameter for extractTags which will cause it
41 # to strip HTML comments in addition to regular
42 # <XML>-style tags. This should not be anything we
43 # may want to use in wikisyntax
44 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
46 # prefix for escaping, used in two functions at least
47 define( "UNIQ_PREFIX", "NaodW29");
49 /* private */ $wgParserHooks = array();
51 class Parser
53 # Cleared with clearState():
54 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
55 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
57 # Temporary:
58 var $mOptions, $mTitle, $mOutputType;
60 function Parser()
62 $this->clearState();
65 function clearState()
67 $this->mOutput = new ParserOutput;
68 $this->mAutonumber = 0;
69 $this->mLastSection = "";
70 $this->mDTopen = false;
71 $this->mVariables = false;
72 $this->mIncludeCount = array();
73 $this->mStripState = array();
74 $this->mArgStack = array();
75 $this->mInPre = false;
78 # First pass--just handle <nowiki> sections, pass the rest off
79 # to internalParse() which does all the real work.
81 # Returns a ParserOutput
83 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
85 global $wgUseTidy;
86 $fname = "Parser::parse";
87 wfProfileIn( $fname );
89 if ( $clearState ) {
90 $this->clearState();
93 $this->mOptions = $options;
94 $this->mTitle =& $title;
95 $this->mOutputType = OT_HTML;
97 $stripState = NULL;
98 $text = $this->strip( $text, $this->mStripState );
99 $text = $this->internalParse( $text, $linestart );
100 $text = $this->unstrip( $text, $this->mStripState );
101 # Clean up special characters, only run once, next-to-last before doBlockLevels
102 if(!$wgUseTidy) {
103 $fixtags = array(
104 # french spaces, last one Guillemet-left
105 # only if there is something before the space
106 '/(.) (\\?|:|!|\\302\\273)/i' => '\\1&nbsp;\\2',
107 # french spaces, Guillemet-right
108 "/(\\302\\253) /i"=>"\\1&nbsp;",
109 '/<hr *>/i' => '<hr />',
110 '/<br *>/i' => '<br />',
111 '/<center *>/i' => '<div class="center">',
112 '/<\\/center *>/i' => '</div>',
113 # Clean up spare ampersands; note that we probably ought to be
114 # more careful about named entities.
115 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
117 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
118 } else {
119 $fixtags = array(
120 # french spaces, last one Guillemet-left
121 '/ (\\?|:|!|\\302\\273)/i' => '&nbsp;\\1',
122 # french spaces, Guillemet-right
123 '/(\\302\\253) /i' => '\\1&nbsp;',
124 '/<center *>/i' => '<div class="center">',
125 '/<\\/center *>/i' => '</div>'
127 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
129 # only once and last
130 $text = $this->doBlockLevels( $text, $linestart );
131 $text = $this->unstripNoWiki( $text, $this->mStripState );
132 if($wgUseTidy) {
133 $text = $this->tidy($text);
135 $this->mOutput->setText( $text );
136 wfProfileOut( $fname );
137 return $this->mOutput;
140 /* static */ function getRandomString()
142 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
145 # Replaces all occurrences of <$tag>content</$tag> in the text
146 # with a random marker and returns the new text. the output parameter
147 # $content will be an associative array filled with data on the form
148 # $unique_marker => content.
150 # If $content is already set, the additional entries will be appended
152 # If $tag is set to STRIP_COMMENTS, the function will extract
153 # <!-- HTML comments -->
155 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
156 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
157 if ( !$content ) {
158 $content = array( );
160 $n = 1;
161 $stripped = '';
163 while ( '' != $text ) {
164 if($tag==STRIP_COMMENTS) {
165 $p = preg_split( '/<!--/i', $text, 2 );
166 } else {
167 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
169 $stripped .= $p[0];
170 if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
171 $text = '';
172 } else {
173 if($tag==STRIP_COMMENTS) {
174 $q = preg_split( '/-->/i', $p[1], 2 );
175 } else {
176 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
178 $marker = $rnd . sprintf('%08X', $n++);
179 $content[$marker] = $q[0];
180 $stripped .= $marker;
181 $text = $q[1];
184 return $stripped;
187 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
188 # If $render is set, performs necessary rendering operations on plugins
189 # Returns the text, and fills an array with data needed in unstrip()
190 # If the $state is already a valid strip state, it adds to the state
192 # When $stripcomments is set, HTML comments <!-- like this -->
193 # will be stripped in addition to other tags. This is important
194 # for section editing, where these comments cause confusion when
195 # counting the sections in the wikisource
196 function strip( $text, &$state, $stripcomments = false )
198 global $wgParserHooks;
200 $render = ($this->mOutputType == OT_HTML);
201 $nowiki_content = array();
202 $math_content = array();
203 $pre_content = array();
204 $comment_content = array();
205 $ext_content = array();
207 # Replace any instances of the placeholders
208 $uniq_prefix = UNIQ_PREFIX;
209 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
212 # nowiki
213 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
214 foreach( $nowiki_content as $marker => $content ){
215 if( $render ){
216 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
217 } else {
218 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
222 # math
223 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
224 foreach( $math_content as $marker => $content ){
225 if( $render ) {
226 if( $this->mOptions->getUseTeX() ) {
227 $math_content[$marker] = renderMath( $content );
228 } else {
229 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
231 } else {
232 $math_content[$marker] = "<math>$content</math>";
236 # pre
237 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
238 foreach( $pre_content as $marker => $content ){
239 if( $render ){
240 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
241 } else {
242 $pre_content[$marker] = "<pre>$content</pre>";
246 # Comments
247 if($stripcomments) {
248 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
249 foreach( $comment_content as $marker => $content ){
250 $comment_content[$marker] = "<!--$content-->";
254 # Extensions
255 foreach ( $wgParserHooks as $tag => $callback ) {
256 $ext_contents[$tag] = array();
257 $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
258 foreach( $ext_content[$tag] as $marker => $content ) {
259 if ( $render ) {
260 $ext_content[$tag][$marker] = $callback( $content );
261 } else {
262 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
267 # Merge state with the pre-existing state, if there is one
268 if ( $state ) {
269 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
270 $state['math'] = $state['math'] + $math_content;
271 $state['pre'] = $state['pre'] + $pre_content;
272 $state['comment'] = $state['comment'] + $comment_content;
274 foreach( $ext_content as $tag => $array ) {
275 if ( array_key_exists( $tag, $state ) ) {
276 $state[$tag] = $state[$tag] + $array;
279 } else {
280 $state = array(
281 'nowiki' => $nowiki_content,
282 'math' => $math_content,
283 'pre' => $pre_content,
284 'comment' => $comment_content,
285 ) + $ext_content;
287 return $text;
290 # always call unstripNoWiki() after this one
291 function unstrip( $text, &$state )
293 # Must expand in reverse order, otherwise nested tags will be corrupted
294 $contentDict = end( $state );
295 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
296 if( key($state) != 'nowiki') {
297 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
298 $text = str_replace( key( $contentDict ), $content, $text );
303 return $text;
305 # always call this after unstrip() to preserve the order
306 function unstripNoWiki( $text, &$state )
308 # Must expand in reverse order, otherwise nested tags will be corrupted
309 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
310 $text = str_replace( key( $state['nowiki'] ), $content, $text );
313 return $text;
316 # Add an item to the strip state
317 # Returns the unique tag which must be inserted into the stripped text
318 # The tag will be replaced with the original text in unstrip()
320 function insertStripItem( $text, &$state )
322 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
323 if ( !$state ) {
324 $state = array(
325 'nowiki' => array(),
326 'math' => array(),
327 'pre' => array()
330 $state['item'][$rnd] = $text;
331 return $rnd;
334 # categoryMagic
335 # generate a list of subcategories and pages for a category
336 # depending on wfMsg("usenewcategorypage") it either calls the new
337 # or the old code. The new code will not work properly for some
338 # languages due to sorting issues, so they might want to turn it
339 # off.
340 function categoryMagic()
342 $msg = wfMsg('usenewcategorypage');
343 if ( '0' == @$msg[0] )
345 return $this->oldCategoryMagic();
346 } else {
347 return $this->newCategoryMagic();
351 # This method generates the list of subcategories and pages for a category
352 function oldCategoryMagic ()
354 global $wgLang , $wgUser ;
355 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
357 $cns = Namespace::getCategory() ;
358 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
360 $r = "<br style=\"clear:both;\"/>\n";
363 $sk =& $wgUser->getSkin() ;
365 $articles = array() ;
366 $children = array() ;
367 $data = array () ;
368 $id = $this->mTitle->getArticleID() ;
370 # FIXME: add limits
371 $t = wfStrencode( $this->mTitle->getDBKey() );
372 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
373 $res = wfQuery ( $sql, DB_READ ) ;
374 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
376 # For all pages that link to this category
377 foreach ( $data AS $x )
379 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
380 if ( $t != "" ) $t .= ":" ;
381 $t .= $x->cur_title ;
383 if ( $x->cur_namespace == $cns ) {
384 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
385 } else {
386 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
389 wfFreeResult ( $res ) ;
391 # Showing subcategories
392 if ( count ( $children ) > 0 ) {
393 $r .= '<h2>'.wfMsg('subcategories')."</h2>\n" ;
394 $r .= implode ( ', ' , $children ) ;
397 # Showing pages in this category
398 if ( count ( $articles ) > 0 ) {
399 $ti = $this->mTitle->getText() ;
400 $h = wfMsg( 'category_header', $ti );
401 $r .= "<h2>{$h}</h2>\n" ;
402 $r .= implode ( ', ' , $articles ) ;
406 return $r ;
411 function newCategoryMagic ()
413 global $wgLang , $wgUser ;
414 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
416 $cns = Namespace::getCategory() ;
417 if ( $this->mTitle->getNamespace() != $cns ) return '' ; # This ain't a category page
419 $r = "<br style=\"clear:both;\"/>\n";
422 $sk =& $wgUser->getSkin() ;
424 $articles = array() ;
425 $articles_start_char = array();
426 $children = array() ;
427 $children_start_char = array();
428 $data = array () ;
429 $id = $this->mTitle->getArticleID() ;
431 # FIXME: add limits
432 $t = wfStrencode( $this->mTitle->getDBKey() );
433 $sql = "SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM
434 cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY
435 cl_sortkey" ;
436 $res = wfQuery ( $sql, DB_READ ) ;
437 while ( $x = wfFetchObject ( $res ) )
439 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
440 if ( $t != '' ) $t .= ':' ;
441 $t .= $x->cur_title ;
443 if ( $x->cur_namespace == $cns ) {
444 array_push ( $children, $sk->makeKnownLink ( $t, str_replace( '_',' ',$x->cur_title) ) ) ; # Subcategory
445 array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
446 } else {
447 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
448 array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
451 wfFreeResult ( $res ) ;
453 $ti = $this->mTitle->getText() ;
455 # Don't show subcategories section if there are none.
456 if ( count ( $children ) > 0 )
458 # Showing subcategories
459 $r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n"
460 . wfMsg( 'subcategorycount', count( $children ) );
461 if ( count ( $children ) > 20) {
463 // divide list into three equal chunks
464 $chunk = (int) (count ( $children ) / 3);
466 // get and display header
467 $r .= '<table width="100%"><tr valign="top">';
469 $startChunk = 0;
470 $endChunk = $chunk;
472 // loop through the chunks
473 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
474 $chunkIndex < 3;
475 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
478 $r .= '<td><ul>';
479 // output all subcategories to category
480 for ($index = $startChunk ;
481 $index < $endChunk && $index < count($children);
482 $index++ )
484 // check for change of starting letter or begging of chunk
485 if ( ($children_start_char[$index] != $children_start_char[$index - 1])
486 || ($index == $startChunk) )
488 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
491 $r .= "<li>{$children[$index]}</li>";
493 $r .= '</ul></td>';
497 $r .= '</tr></table>';
498 } else {
499 // for short lists of subcategories to category.
501 $r .= "<h3>{$children_start_char[0]}</h3>\n";
502 $r .= '<ul><li>'.$children[0].'</li>';
503 for ($index = 1; $index < count($children); $index++ )
505 if ($children_start_char[$index] != $children_start_char[$index - 1])
507 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
510 $r .= "<li>{$children[$index]}</li>";
512 $r .= '</ul>';
514 } # END of if ( count($children) > 0 )
516 $r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n" .
517 wfMsg( 'categoryarticlecount', count( $articles ) );
519 # Showing articles in this category
520 if ( count ( $articles ) > 6) {
521 $ti = $this->mTitle->getText() ;
523 // divide list into three equal chunks
524 $chunk = (int) (count ( $articles ) / 3);
526 // get and display header
527 $r .= '<table width="100%"><tr valign="top">';
529 // loop through the chunks
530 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
531 $chunkIndex < 3;
532 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
535 $r .= '<td><ul>';
537 // output all articles in category
538 for ($index = $startChunk ;
539 $index < $endChunk && $index < count($articles);
540 $index++ )
542 // check for change of starting letter or begging of chunk
543 if ( ($articles_start_char[$index] != $articles_start_char[$index - 1])
544 || ($index == $startChunk) )
546 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
549 $r .= "<li>{$articles[$index]}</li>";
551 $r .= '</ul></td>';
555 $r .= '</tr></table>';
556 } elseif ( count ( $articles ) > 0) {
557 // for short lists of articles in categories.
558 $ti = $this->mTitle->getText() ;
560 $r .= '<h3>'.$articles_start_char[0]."</h3>\n";
561 $r .= '<ul><li>'.$articles[0].'</li>';
562 for ($index = 1; $index < count($articles); $index++ )
564 if ($articles_start_char[$index] != $articles_start_char[$index - 1])
566 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
569 $r .= "<li>{$articles[$index]}</li>";
571 $r .= '</ul>';
575 return $r ;
578 # Return allowed HTML attributes
579 function getHTMLattrs ()
581 $htmlattrs = array( # Allowed attributes--no scripting, etc.
582 'title', 'align', 'lang', 'dir', 'width', 'height',
583 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
584 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
585 /* FONT */ 'type', 'start', 'value', 'compact',
586 /* For various lists, mostly deprecated but safe */
587 'summary', 'width', 'border', 'frame', 'rules',
588 'cellspacing', 'cellpadding', 'valign', 'char',
589 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
590 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
591 'id', 'class', 'name', 'style' /* For CSS */
593 return $htmlattrs ;
596 # Remove non approved attributes and javascript in css
597 function fixTagAttributes ( $t )
599 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
600 $htmlattrs = $this->getHTMLattrs() ;
602 # Strip non-approved attributes from the tag
603 $t = preg_replace(
604 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
605 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
606 $t);
607 # Strip javascript "expression" from stylesheets. Brute force approach:
608 # If anythin offensive is found, all attributes of the HTML tag are dropped
610 if( preg_match(
611 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
612 wfMungeToUtf8( $t ) ) )
614 $t='';
617 return trim ( $t ) ;
620 # interface with html tidy, used if $wgUseTidy = true
621 function tidy ( $text ) {
622 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
623 global $wgInputEncoding, $wgOutputEncoding;
624 $fname = 'Parser::tidy';
625 wfProfileIn( $fname );
627 $cleansource = '';
628 switch(strtoupper($wgOutputEncoding)) {
629 case 'ISO-8859-1':
630 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
631 break;
632 case 'UTF-8':
633 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
634 break;
635 default:
636 $wgTidyOpts .= ' -raw';
639 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
640 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
641 '<head><title>test</title></head><body>'.$text.'</body></html>';
642 $descriptorspec = array(
643 0 => array('pipe', 'r'),
644 1 => array('pipe', 'w'),
645 2 => array('file', '/dev/null', 'a')
647 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
648 if (is_resource($process)) {
649 fwrite($pipes[0], $wrappedtext);
650 fclose($pipes[0]);
651 while (!feof($pipes[1])) {
652 $cleansource .= fgets($pipes[1], 1024);
654 fclose($pipes[1]);
655 $return_value = proc_close($process);
658 wfProfileOut( $fname );
660 if( $cleansource == '' && $text != '') {
661 wfDebug( "Tidy error detected!\n" );
662 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
663 } else {
664 return $cleansource;
668 # parse the wiki syntax used to render tables
669 function doTableStuff ( $t )
671 $t = explode ( "\n" , $t ) ;
672 $td = array () ; # Is currently a td tag open?
673 $ltd = array () ; # Was it TD or TH?
674 $tr = array () ; # Is currently a tr tag open?
675 $ltr = array () ; # tr attributes
676 foreach ( $t AS $k => $x )
678 $x = trim ( $x ) ;
679 $fc = substr ( $x , 0 , 1 ) ;
680 if ( '{|' == substr ( $x , 0 , 2 ) )
682 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . '>' ;
683 array_push ( $td , false ) ;
684 array_push ( $ltd , '' ) ;
685 array_push ( $tr , false ) ;
686 array_push ( $ltr , '' ) ;
688 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
689 else if ( '|}' == substr ( $x , 0 , 2 ) )
691 $z = "</table>\n" ;
692 $l = array_pop ( $ltd ) ;
693 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
694 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
695 array_pop ( $ltr ) ;
696 $t[$k] = $z ;
698 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
700 $z = trim ( substr ( $x , 2 ) ) ;
701 $t[$k] = "<caption>{$z}</caption>\n" ;
703 else if ( '|-' == substr ( $x , 0 , 2 ) ) # Allows for |---------------
705 $x = substr ( $x , 1 ) ;
706 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
707 $z = '' ;
708 $l = array_pop ( $ltd ) ;
709 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
710 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
711 array_pop ( $ltr ) ;
712 $t[$k] = $z ;
713 array_push ( $tr , false ) ;
714 array_push ( $td , false ) ;
715 array_push ( $ltd , '' ) ;
716 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
718 else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) # Caption
720 if ( '|+' == substr ( $x , 0 , 2 ) )
722 $fc = '+' ;
723 $x = substr ( $x , 1 ) ;
725 $after = substr ( $x , 1 ) ;
726 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
727 $after = explode ( '||' , $after ) ;
728 $t[$k] = '' ;
729 foreach ( $after AS $theline )
731 $z = '' ;
732 if ( $fc != '+' )
734 $tra = array_pop ( $ltr ) ;
735 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
736 array_push ( $tr , true ) ;
737 array_push ( $ltr , '' ) ;
740 $l = array_pop ( $ltd ) ;
741 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
742 if ( $fc == '|' ) $l = 'td' ;
743 else if ( $fc == '!' ) $l = 'th' ;
744 else if ( $fc == '+' ) $l = 'caption' ;
745 else $l = '' ;
746 array_push ( $ltd , $l ) ;
747 $y = explode ( '|' , $theline , 2 ) ;
748 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
749 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
750 $t[$k] .= $y ;
751 array_push ( $td , true ) ;
756 # Closing open td, tr && table
757 while ( count ( $td ) > 0 )
759 if ( array_pop ( $td ) ) $t[] = '</td>' ;
760 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
761 $t[] = '</table>' ;
764 $t = implode ( "\n" , $t ) ;
765 # $t = $this->removeHTMLtags( $t );
766 return $t ;
769 # Parses the text and adds the result to the strip state
770 # Returns the strip tag
771 function stripParse( $text, $newline, $args )
773 $text = $this->strip( $text, $this->mStripState );
774 $text = $this->internalParse( $text, (bool)$newline, $args, false );
775 return $newline.$this->insertStripItem( $text, $this->mStripState );
778 function internalParse( $text, $linestart, $args = array(), $isMain=true )
780 $fname = 'Parser::internalParse';
781 wfProfileIn( $fname );
783 $text = $this->removeHTMLtags( $text );
784 $text = $this->replaceVariables( $text, $args );
786 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
788 $text = $this->doHeadings( $text );
789 if($this->mOptions->getUseDynamicDates()) {
790 global $wgDateFormatter;
791 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
793 $text = $this->doAllQuotes( $text );
794 $text = $this->replaceExternalLinks( $text );
795 $text = $this->replaceInternalLinks ( $text );
796 $text = $this->replaceInternalLinks ( $text );
797 //$text = $this->doTokenizedParser ( $text );
798 $text = $this->doTableStuff ( $text ) ;
799 $text = $this->magicISBN( $text );
800 $text = $this->magicRFC( $text );
801 $text = $this->formatHeadings( $text, $isMain );
802 $sk =& $this->mOptions->getSkin();
803 $text = $sk->transformContent( $text );
805 if ( !isset ( $this->categoryMagicDone ) ) {
806 $text .= $this->categoryMagic () ;
807 $this->categoryMagicDone = true ;
810 wfProfileOut( $fname );
811 return $text;
814 # Parse headers and return html
815 /* private */ function doHeadings( $text )
817 $fname = 'Parser::doHeadings';
818 wfProfileIn( $fname );
819 for ( $i = 6; $i >= 1; --$i ) {
820 $h = substr( '======', 0, $i );
821 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
822 "<h{$i}>\\1</h{$i}>\\2", $text );
824 wfProfileOut( $fname );
825 return $text;
828 /* private */ function doAllQuotes( $text )
830 $fname = 'Parser::doAllQuotes';
831 wfProfileIn( $fname );
832 $outtext = '';
833 $lines = explode( "\n", $text );
834 foreach ( $lines as $line ) {
835 $outtext .= $this->doQuotes ( '', $line, '' ) . "\n";
837 $outtext = substr($outtext, 0,-1);
838 wfProfileOut( $fname );
839 return $outtext;
842 /* private */ function doQuotes( $pre, $text, $mode )
844 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
845 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
846 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
847 if ( substr ($m[2], 0, 1) == '\'' ) {
848 $m[2] = substr ($m[2], 1);
849 if ($mode == 'em') {
850 return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'emstrong' );
851 } else if ($mode == 'strong') {
852 return $m1_strong . $this->doQuotes ( '', $m[2], '' );
853 } else if (($mode == 'emstrong') || ($mode == 'both')) {
854 return $this->doQuotes ( '', $pre.$m1_strong.$m[2], 'em' );
855 } else if ($mode == 'strongem') {
856 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( '', $m[2], 'em' );
857 } else {
858 return $m[1] . $this->doQuotes ( '', $m[2], 'strong' );
860 } else {
861 if ($mode == 'strong') {
862 return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'strongem' );
863 } else if ($mode == 'em') {
864 return $m1_em . $this->doQuotes ( '', $m[2], '' );
865 } else if ($mode == 'emstrong') {
866 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( '', $m[2], 'strong' );
867 } else if (($mode == 'strongem') || ($mode == 'both')) {
868 return $this->doQuotes ( '', $pre.$m1_em.$m[2], 'strong' );
869 } else {
870 return $m[1] . $this->doQuotes ( '', $m[2], 'em' );
873 } else {
874 $text_strong = ($text == '') ? '' : "<strong>{$text}</strong>";
875 $text_em = ($text == '') ? '' : "<em>{$text}</em>";
876 if ($mode == '') {
877 return $pre . $text;
878 } else if ($mode == 'em') {
879 return $pre . $text_em;
880 } else if ($mode == 'strong') {
881 return $pre . $text_strong;
882 } else if ($mode == 'strongem') {
883 return (($pre == '') && ($text == '')) ? '' : "<strong>{$pre}{$text_em}</strong>";
884 } else {
885 return (($pre == '') && ($text == '')) ? '' : "<em>{$pre}{$text_strong}</em>";
890 # Note: we have to do external links before the internal ones,
891 # and otherwise take great care in the order of things here, so
892 # that we don't end up interpreting some URLs twice.
894 /* private */ function replaceExternalLinks( $text )
896 $fname = 'Parser::replaceExternalLinks';
897 wfProfileIn( $fname );
898 $text = $this->subReplaceExternalLinks( $text, 'http', true );
899 $text = $this->subReplaceExternalLinks( $text, 'https', true );
900 $text = $this->subReplaceExternalLinks( $text, 'ftp', false );
901 $text = $this->subReplaceExternalLinks( $text, 'irc', false );
902 $text = $this->subReplaceExternalLinks( $text, 'gopher', false );
903 $text = $this->subReplaceExternalLinks( $text, 'news', false );
904 $text = $this->subReplaceExternalLinks( $text, 'mailto', false );
905 wfProfileOut( $fname );
906 return $text;
909 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
911 $unique = '4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3';
912 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
914 # this is the list of separators that should be ignored if they
915 # are the last character of an URL but that should be included
916 # if they occur within the URL, e.g. "go to www.foo.com, where .."
917 # in this case, the last comma should not become part of the URL,
918 # but in "www.foo.com/123,2342,32.htm" it should.
919 $sep = ",;\.:";
920 $fnc = 'A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF';
921 $images = 'gif|png|jpg|jpeg';
923 # PLEASE NOTE: The curly braces { } are not part of the regex,
924 # they are interpreted as part of the string (used to tell PHP
925 # that the content of the string should be inserted there).
926 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
927 "((?i){$images})([^{$uc}]|$)/";
929 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
930 $sk =& $this->mOptions->getSkin();
932 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
933 $s = preg_replace( $e1, '\\1' . $sk->makeImage( "{$unique}:\\3" .
934 '/\\4.\\5', '\\4.\\5' ) . '\\6', $s );
936 $s = preg_replace( $e2, '\\1' . "<a href=\"{$unique}:\\3\"" .
937 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
938 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
939 '</a>\\5', $s );
940 $s = str_replace( $unique, $protocol, $s );
942 $a = explode( "[{$protocol}:", " " . $s );
943 $s = array_shift( $a );
944 $s = substr( $s, 1 );
946 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
947 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
949 foreach ( $a as $line ) {
950 if ( preg_match( $e1, $line, $m ) ) {
951 $link = "{$protocol}:{$m[1]}";
952 $trail = $m[2];
953 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
954 else { $text = wfEscapeHTML( $link ); }
955 } else if ( preg_match( $e2, $line, $m ) ) {
956 $link = "{$protocol}:{$m[1]}";
957 $text = $m[2];
958 $trail = $m[3];
959 } else {
960 $s .= "[{$protocol}:" . $line;
961 continue;
963 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
964 $paren = '';
965 } else {
966 # Expand the URL for printable version
967 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
969 $la = $sk->getExternalLinkAttributes( $link, $text );
970 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
973 return $s;
977 /* private */ function replaceInternalLinks( $s )
979 global $wgLang, $wgLinkCache;
980 global $wgNamespacesWithSubpages, $wgLanguageCode;
981 static $fname = 'Parser::replaceInternalLinks' ;
982 wfProfileIn( $fname );
984 wfProfileIn( $fname.'-setup' );
985 static $tc = FALSE;
986 # the % is needed to support urlencoded titles as well
987 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
988 $sk =& $this->mOptions->getSkin();
990 $a = explode( '[[', ' ' . $s );
991 $s = array_shift( $a );
992 $s = substr( $s, 1 );
994 # Match a link having the form [[namespace:link|alternate]]trail
995 static $e1 = FALSE;
996 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
997 # Match the end of a line for a word that's not followed by whitespace,
998 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
999 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1001 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
1002 # Special and Media are pseudo-namespaces; no pages actually exist in them
1003 static $image = FALSE;
1004 static $special = FALSE;
1005 static $media = FALSE;
1006 static $category = FALSE;
1007 if ( !$image ) { $image = Namespace::getImage(); }
1008 if ( !$special ) { $special = Namespace::getSpecial(); }
1009 if ( !$media ) { $media = Namespace::getMedia(); }
1010 if ( !$category ) { $category = Namespace::getCategory(); }
1012 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1014 if ( $useLinkPrefixExtension ) {
1015 if ( preg_match( $e2, $s, $m ) ) {
1016 $first_prefix = $m[2];
1017 $s = $m[1];
1018 } else {
1019 $first_prefix = false;
1021 } else {
1022 $prefix = '';
1025 wfProfileOut( $fname.'-setup' );
1027 foreach ( $a as $line ) {
1028 wfProfileIn( $fname.'-prefixhandling' );
1029 if ( $useLinkPrefixExtension ) {
1030 if ( preg_match( $e2, $s, $m ) ) {
1031 $prefix = $m[2];
1032 $s = $m[1];
1033 } else {
1034 $prefix='';
1036 # first link
1037 if($first_prefix) {
1038 $prefix = $first_prefix;
1039 $first_prefix = false;
1042 wfProfileOut( $fname.'-prefixhandling' );
1044 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1045 $text = $m[2];
1046 # fix up urlencoded title texts
1047 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1048 $trail = $m[3];
1049 } else { # Invalid form; output directly
1050 $s .= $prefix . '[[' . $line ;
1051 continue;
1054 /* Valid link forms:
1055 Foobar -- normal
1056 :Foobar -- override special treatment of prefix (images, language links)
1057 /Foobar -- convert to CurrentPage/Foobar
1058 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1060 $c = substr($m[1],0,1);
1061 $noforce = ($c != ':');
1062 if( $c == '/' ) { # subpage
1063 if(substr($m[1],-1,1)=='/') { # / at end means we don't want the slash to be shown
1064 $m[1]=substr($m[1],1,strlen($m[1])-2);
1065 $noslash=$m[1];
1066 } else {
1067 $noslash=substr($m[1],1);
1069 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
1070 $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1071 if( '' == $text ) {
1072 $text= $m[1];
1073 } # this might be changed for ugliness reasons
1074 } else {
1075 $link = $noslash; # no subpage allowed, use standard link
1077 } elseif( $noforce ) { # no subpage
1078 $link = $m[1];
1079 } else {
1080 $link = substr( $m[1], 1 );
1082 $wasblank = ( '' == $text );
1083 if( $wasblank )
1084 $text = $link;
1086 $nt = Title::newFromText( $link );
1087 if( !$nt ) {
1088 $s .= $prefix . '[[' . $line;
1089 continue;
1091 $ns = $nt->getNamespace();
1092 $iw = $nt->getInterWiki();
1093 if( $noforce ) {
1094 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1095 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
1096 $tmp = $prefix . $trail ;
1097 $s .= (trim($tmp) == '')? '': $tmp;
1098 continue;
1100 if ( $ns == $image ) {
1101 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1102 $wgLinkCache->addImageLinkObj( $nt );
1103 continue;
1105 if ( $ns == $category ) {
1106 $t = $nt->getText() ;
1107 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
1109 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1110 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1111 $wgLinkCache->resume();
1113 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
1114 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1115 $this->mOutput->mCategoryLinks[] = $t ;
1116 $s .= $prefix . $trail ;
1117 continue;
1120 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
1121 ( strpos( $link, '#' ) == FALSE ) ) {
1122 # Self-links are handled specially; generally de-link and change to bold.
1123 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1124 continue;
1127 if( $ns == $media ) {
1128 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1129 $wgLinkCache->addImageLinkObj( $nt );
1130 continue;
1131 } elseif( $ns == $special ) {
1132 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1133 continue;
1135 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1137 wfProfileOut( $fname );
1138 return $s;
1141 # Some functions here used by doBlockLevels()
1143 /* private */ function closeParagraph()
1145 $result = '';
1146 if ( '' != $this->mLastSection ) {
1147 $result = '</' . $this->mLastSection . ">\n";
1149 $this->mInPre = false;
1150 $this->mLastSection = '';
1151 return $result;
1153 # getCommon() returns the length of the longest common substring
1154 # of both arguments, starting at the beginning of both.
1156 /* private */ function getCommon( $st1, $st2 )
1158 $fl = strlen( $st1 );
1159 $shorter = strlen( $st2 );
1160 if ( $fl < $shorter ) { $shorter = $fl; }
1162 for ( $i = 0; $i < $shorter; ++$i ) {
1163 if ( $st1{$i} != $st2{$i} ) { break; }
1165 return $i;
1167 # These next three functions open, continue, and close the list
1168 # element appropriate to the prefix character passed into them.
1170 /* private */ function openList( $char )
1172 $result = $this->closeParagraph();
1174 if ( '*' == $char ) { $result .= '<ul><li>'; }
1175 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1176 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1177 else if ( ';' == $char ) {
1178 $result .= '<dl><dt>';
1179 $this->mDTopen = true;
1181 else { $result = '<!-- ERR 1 -->'; }
1183 return $result;
1186 /* private */ function nextItem( $char )
1188 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1189 else if ( ':' == $char || ';' == $char ) {
1190 $close = "</dd>";
1191 if ( $this->mDTopen ) { $close = '</dt>'; }
1192 if ( ';' == $char ) {
1193 $this->mDTopen = true;
1194 return $close . '<dt>';
1195 } else {
1196 $this->mDTopen = false;
1197 return $close . '<dd>';
1200 return '<!-- ERR 2 -->';
1203 /* private */function closeList( $char )
1205 if ( '*' == $char ) { $text = '</li></ul>'; }
1206 else if ( '#' == $char ) { $text = '</li></ol>'; }
1207 else if ( ':' == $char ) {
1208 if ( $this->mDTopen ) {
1209 $this->mDTopen = false;
1210 $text = '</dt></dl>';
1211 } else {
1212 $text = '</dd></dl>';
1215 else { return '<!-- ERR 3 -->'; }
1216 return $text."\n";
1219 /* private */ function doBlockLevels( $text, $linestart ) {
1220 $fname = 'Parser::doBlockLevels';
1221 wfProfileIn( $fname );
1223 # Parsing through the text line by line. The main thing
1224 # happening here is handling of block-level elements p, pre,
1225 # and making lists from lines starting with * # : etc.
1227 $textLines = explode( "\n", $text );
1229 $lastPrefix = $output = $lastLine = '';
1230 $this->mDTopen = $inBlockElem = false;
1231 $prefixLength = 0;
1232 $paragraphStack = false;
1234 if ( !$linestart ) {
1235 $output .= array_shift( $textLines );
1237 foreach ( $textLines as $oLine ) {
1238 $lastPrefixLength = strlen( $lastPrefix );
1239 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1240 $preOpenMatch = preg_match("/<pre/i", $oLine );
1241 if (!$this->mInPre) {
1242 $this->mInPre = !empty($preOpenMatch);
1244 if ( !$this->mInPre ) {
1245 # Multiple prefixes may abut each other for nested lists.
1246 $prefixLength = strspn( $oLine, '*#:;' );
1247 $pref = substr( $oLine, 0, $prefixLength );
1249 # eh?
1250 $pref2 = str_replace( ';', ':', $pref );
1251 $t = substr( $oLine, $prefixLength );
1252 } else {
1253 # Don't interpret any other prefixes in preformatted text
1254 $prefixLength = 0;
1255 $pref = $pref2 = '';
1256 $t = $oLine;
1259 # List generation
1260 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1261 # Same as the last item, so no need to deal with nesting or opening stuff
1262 $output .= $this->nextItem( substr( $pref, -1 ) );
1263 $paragraphStack = false;
1265 if ( ";" == substr( $pref, -1 ) ) {
1266 # The one nasty exception: definition lists work like this:
1267 # ; title : definition text
1268 # So we check for : in the remainder text to split up the
1269 # title and definition, without b0rking links.
1270 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1271 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1272 $term = $match[1];
1273 $output .= $term . $this->nextItem( ':' );
1274 $t = $match[2];
1277 } elseif( $prefixLength || $lastPrefixLength ) {
1278 # Either open or close a level...
1279 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1280 $paragraphStack = false;
1282 while( $commonPrefixLength < $lastPrefixLength ) {
1283 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1284 --$lastPrefixLength;
1286 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1287 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1289 while ( $prefixLength > $commonPrefixLength ) {
1290 $char = substr( $pref, $commonPrefixLength, 1 );
1291 $output .= $this->openList( $char );
1293 if ( ';' == $char ) {
1294 # FIXME: This is dupe of code above
1295 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1296 $term = $match[1];
1297 $output .= $term . $this->nextItem( ":" );
1298 $t = $match[2];
1301 ++$commonPrefixLength;
1303 $lastPrefix = $pref2;
1305 if( 0 == $prefixLength ) {
1306 # No prefix (not in list)--go to paragraph mode
1307 $uniq_prefix = UNIQ_PREFIX;
1308 // XXX: use a stack for nestable elements like span, table and div
1309 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1310 $closematch = preg_match(
1311 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1312 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1313 if ( $openmatch or $closematch ) {
1314 $paragraphStack = false;
1315 $output .= $this->closeParagraph();
1316 if($preOpenMatch and !$preCloseMatch) {
1317 $this->mInPre = true;
1319 if ( $closematch ) {
1320 $inBlockElem = false;
1321 } else {
1322 $inBlockElem = true;
1324 } else if ( !$inBlockElem && !$this->mInPre ) {
1325 if ( " " == $t{0} and trim($t) != '' ) {
1326 // pre
1327 if ($this->mLastSection != 'pre') {
1328 $paragraphStack = false;
1329 $output .= $this->closeParagraph().'<pre>';
1330 $this->mLastSection = 'pre';
1332 } else {
1333 // paragraph
1334 if ( '' == trim($t) ) {
1335 if ( $paragraphStack ) {
1336 $output .= $paragraphStack.'<br />';
1337 $paragraphStack = false;
1338 $this->mLastSection = 'p';
1339 } else {
1340 if ($this->mLastSection != 'p' ) {
1341 $output .= $this->closeParagraph();
1342 $this->mLastSection = '';
1343 $paragraphStack = '<p>';
1344 } else {
1345 $paragraphStack = '</p><p>';
1348 } else {
1349 if ( $paragraphStack ) {
1350 $output .= $paragraphStack;
1351 $paragraphStack = false;
1352 $this->mLastSection = 'p';
1353 } else if ($this->mLastSection != 'p') {
1354 $output .= $this->closeParagraph().'<p>';
1355 $this->mLastSection = 'p';
1361 if ($paragraphStack === false) {
1362 $output .= $t."\n";
1365 while ( $prefixLength ) {
1366 $output .= $this->closeList( $pref2{$prefixLength-1} );
1367 --$prefixLength;
1369 if ( '' != $this->mLastSection ) {
1370 $output .= '</' . $this->mLastSection . '>';
1371 $this->mLastSection = '';
1374 wfProfileOut( $fname );
1375 return $output;
1378 # Return value of a magic variable (like PAGENAME)
1379 function getVariableValue( $index ) {
1380 global $wgLang, $wgSitename, $wgServer;
1382 switch ( $index ) {
1383 case MAG_CURRENTMONTH:
1384 return date( 'm' );
1385 case MAG_CURRENTMONTHNAME:
1386 return $wgLang->getMonthName( date('n') );
1387 case MAG_CURRENTMONTHNAMEGEN:
1388 return $wgLang->getMonthNameGen( date('n') );
1389 case MAG_CURRENTDAY:
1390 return date('j');
1391 case MAG_PAGENAME:
1392 return $this->mTitle->getText();
1393 case MAG_NAMESPACE:
1394 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1395 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1396 case MAG_CURRENTDAYNAME:
1397 return $wgLang->getWeekdayName( date('w')+1 );
1398 case MAG_CURRENTYEAR:
1399 return date( 'Y' );
1400 case MAG_CURRENTTIME:
1401 return $wgLang->time( wfTimestampNow(), false );
1402 case MAG_NUMBEROFARTICLES:
1403 return wfNumberOfArticles();
1404 case MAG_SITENAME:
1405 return $wgSitename;
1406 case MAG_SERVER:
1407 return $wgServer;
1408 default:
1409 return NULL;
1413 # initialise the magic variables (like CURRENTMONTHNAME)
1414 function initialiseVariables()
1416 global $wgVariableIDs;
1417 $this->mVariables = array();
1418 foreach ( $wgVariableIDs as $id ) {
1419 $mw =& MagicWord::get( $id );
1420 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1424 /* private */ function replaceVariables( $text, $args = array() )
1426 global $wgLang, $wgScript, $wgArticlePath;
1428 $fname = 'Parser::replaceVariables';
1429 wfProfileIn( $fname );
1431 $bail = false;
1432 if ( !$this->mVariables ) {
1433 $this->initialiseVariables();
1435 $titleChars = Title::legalChars();
1436 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1438 # This function is called recursively. To keep track of arguments we need a stack:
1439 array_push( $this->mArgStack, $args );
1441 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1442 $GLOBALS['wgCurParser'] =& $this;
1445 if ( $this->mOutputType == OT_HTML ) {
1446 # Variable substitution
1447 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1449 # Argument substitution
1450 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1452 # Template substitution
1453 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1454 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1456 array_pop( $this->mArgStack );
1458 wfProfileOut( $fname );
1459 return $text;
1462 function variableSubstitution( $matches )
1464 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1465 $text = $this->mVariables[$matches[1]];
1466 $this->mOutput->mContainsOldMagic = true;
1467 } else {
1468 $text = $matches[0];
1470 return $text;
1473 function braceSubstitution( $matches )
1475 global $wgLinkCache, $wgLang;
1476 $fname = 'Parser::braceSubstitution';
1477 $found = false;
1478 $nowiki = false;
1479 $noparse = false;
1481 $title = NULL;
1483 # $newline is an optional newline character before the braces
1484 # $part1 is the bit before the first |, and must contain only title characters
1485 # $args is a list of arguments, starting from index 0, not including $part1
1487 $newline = $matches[1];
1488 $part1 = $matches[2];
1489 # If the third subpattern matched anything, it will start with |
1490 if ( $matches[3] !== '' ) {
1491 $args = explode( '|', substr( $matches[3], 1 ) );
1492 } else {
1493 $args = array();
1495 $argc = count( $args );
1497 # {{{}}}
1498 if ( strpos( $matches[0], '{{{' ) !== false ) {
1499 $text = $matches[0];
1500 $found = true;
1501 $noparse = true;
1504 # SUBST
1505 if ( !$found ) {
1506 $mwSubst =& MagicWord::get( MAG_SUBST );
1507 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1508 if ( $this->mOutputType != OT_WIKI ) {
1509 # Invalid SUBST not replaced at PST time
1510 # Return without further processing
1511 $text = $matches[0];
1512 $found = true;
1513 $noparse= true;
1515 } elseif ( $this->mOutputType == OT_WIKI ) {
1516 # SUBST not found in PST pass, do nothing
1517 $text = $matches[0];
1518 $found = true;
1522 # MSG, MSGNW and INT
1523 if ( !$found ) {
1524 # Check for MSGNW:
1525 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1526 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1527 $nowiki = true;
1528 } else {
1529 # Remove obsolete MSG:
1530 $mwMsg =& MagicWord::get( MAG_MSG );
1531 $mwMsg->matchStartAndRemove( $part1 );
1534 # Check if it is an internal message
1535 $mwInt =& MagicWord::get( MAG_INT );
1536 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1537 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1538 $text = wfMsgReal( $part1, $args, true );
1539 $found = true;
1544 # NS
1545 if ( !$found ) {
1546 # Check for NS: (namespace expansion)
1547 $mwNs = MagicWord::get( MAG_NS );
1548 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1549 if ( intval( $part1 ) ) {
1550 $text = $wgLang->getNsText( intval( $part1 ) );
1551 $found = true;
1552 } else {
1553 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1554 if ( !is_null( $index ) ) {
1555 $text = $wgLang->getNsText( $index );
1556 $found = true;
1562 # LOCALURL and LOCALURLE
1563 if ( !$found ) {
1564 $mwLocal = MagicWord::get( MAG_LOCALURL );
1565 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1567 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1568 $func = 'getLocalURL';
1569 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1570 $func = 'escapeLocalURL';
1571 } else {
1572 $func = '';
1575 if ( $func !== '' ) {
1576 $title = Title::newFromText( $part1 );
1577 if ( !is_null( $title ) ) {
1578 if ( $argc > 0 ) {
1579 $text = $title->$func( $args[0] );
1580 } else {
1581 $text = $title->$func();
1583 $found = true;
1588 # Internal variables
1589 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1590 $text = $this->mVariables[$part1];
1591 $found = true;
1592 $this->mOutput->mContainsOldMagic = true;
1595 # Arguments input from the caller
1596 $inputArgs = end( $this->mArgStack );
1597 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1598 $text = $inputArgs[$part1];
1599 $found = true;
1602 # Load from database
1603 if ( !$found ) {
1604 $title = Title::newFromText( $part1, NS_TEMPLATE );
1605 if ( !is_null( $title ) && !$title->isExternal() ) {
1606 # Check for excessive inclusion
1607 $dbk = $title->getPrefixedDBkey();
1608 if ( $this->incrementIncludeCount( $dbk ) ) {
1609 $article = new Article( $title );
1610 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1611 if ( $articleContent !== false ) {
1612 $found = true;
1613 $text = $articleContent;
1618 # If the title is valid but undisplayable, make a link to it
1619 if ( $this->mOutputType == OT_HTML && !$found ) {
1620 $text = '[[' . $title->getPrefixedText() . ']]';
1621 $found = true;
1626 # Recursive parsing, escaping and link table handling
1627 # Only for HTML output
1628 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1629 $text = wfEscapeWikiText( $text );
1630 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1631 # Clean up argument array
1632 $assocArgs = array();
1633 $index = 1;
1634 foreach( $args as $arg ) {
1635 $eqpos = strpos( $arg, '=' );
1636 if ( $eqpos === false ) {
1637 $assocArgs[$index++] = $arg;
1638 } else {
1639 $name = trim( substr( $arg, 0, $eqpos ) );
1640 $value = trim( substr( $arg, $eqpos+1 ) );
1641 if ( $value === false ) {
1642 $value = '';
1644 if ( $name !== false ) {
1645 $assocArgs[$name] = $value;
1650 # Do not enter included links in link table
1651 if ( !is_null( $title ) ) {
1652 $wgLinkCache->suspend();
1655 # Run full parser on the included text
1656 $text = $this->stripParse( $text, $newline, $assocArgs );
1658 # Resume the link cache and register the inclusion as a link
1659 if ( !is_null( $title ) ) {
1660 $wgLinkCache->resume();
1661 $wgLinkCache->addLinkObj( $title );
1665 if ( !$found ) {
1666 return $matches[0];
1667 } else {
1668 return $text;
1672 # Triple brace replacement -- used for template arguments
1673 function argSubstitution( $matches )
1675 $newline = $matches[1];
1676 $arg = trim( $matches[2] );
1677 $text = $matches[0];
1678 $inputArgs = end( $this->mArgStack );
1680 if ( array_key_exists( $arg, $inputArgs ) ) {
1681 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1684 return $text;
1687 # Returns true if the function is allowed to include this entity
1688 function incrementIncludeCount( $dbk )
1690 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1691 $this->mIncludeCount[$dbk] = 0;
1693 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1694 return true;
1695 } else {
1696 return false;
1701 # Cleans up HTML, removes dangerous tags and attributes
1702 /* private */ function removeHTMLtags( $text )
1704 global $wgUseTidy, $wgUserHtml;
1705 $fname = 'Parser::removeHTMLtags';
1706 wfProfileIn( $fname );
1708 if( $wgUserHtml ) {
1709 $htmlpairs = array( # Tags that must be closed
1710 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1711 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1712 'strike', 'strong', 'tt', 'var', 'div', 'center',
1713 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1714 'ruby', 'rt' , 'rb' , 'rp', 'p'
1716 $htmlsingle = array(
1717 'br', 'hr', 'li', 'dt', 'dd'
1719 $htmlnest = array( # Tags that can be nested--??
1720 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
1721 'dl', 'font', 'big', 'small', 'sub', 'sup'
1723 $tabletags = array( # Can only appear inside table
1724 'td', 'th', 'tr'
1726 } else {
1727 $htmlpairs = array();
1728 $htmlsingle = array();
1729 $htmlnest = array();
1730 $tabletags = array();
1733 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1734 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1736 $htmlattrs = $this->getHTMLattrs () ;
1738 # Remove HTML comments
1739 $text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
1741 $bits = explode( '<', $text );
1742 $text = array_shift( $bits );
1743 if(!$wgUseTidy) {
1744 $tagstack = array(); $tablestack = array();
1745 foreach ( $bits as $x ) {
1746 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1747 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1748 $x, $regs );
1749 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1750 error_reporting( $prev );
1752 $badtag = 0 ;
1753 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1754 # Check our stack
1755 if ( $slash ) {
1756 # Closing a tag...
1757 if ( ! in_array( $t, $htmlsingle ) &&
1758 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1759 @array_push( $tagstack, $ot );
1760 $badtag = 1;
1761 } else {
1762 if ( $t == 'table' ) {
1763 $tagstack = array_pop( $tablestack );
1765 $newparams = '';
1767 } else {
1768 # Keep track for later
1769 if ( in_array( $t, $tabletags ) &&
1770 ! in_array( 'table', $tagstack ) ) {
1771 $badtag = 1;
1772 } else if ( in_array( $t, $tagstack ) &&
1773 ! in_array ( $t , $htmlnest ) ) {
1774 $badtag = 1 ;
1775 } else if ( ! in_array( $t, $htmlsingle ) ) {
1776 if ( $t == 'table' ) {
1777 array_push( $tablestack, $tagstack );
1778 $tagstack = array();
1780 array_push( $tagstack, $t );
1782 # Strip non-approved attributes from the tag
1783 $newparams = $this->fixTagAttributes($params);
1786 if ( ! $badtag ) {
1787 $rest = str_replace( '>', '&gt;', $rest );
1788 $text .= "<$slash$t $newparams$brace$rest";
1789 continue;
1792 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1794 # Close off any remaining tags
1795 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1796 $text .= "</$t>\n";
1797 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
1799 } else {
1800 # this might be possible using tidy itself
1801 foreach ( $bits as $x ) {
1802 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1803 $x, $regs );
1804 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1805 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1806 $newparams = $this->fixTagAttributes($params);
1807 $rest = str_replace( '>', '&gt;', $rest );
1808 $text .= "<$slash$t $newparams$brace$rest";
1809 } else {
1810 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1814 wfProfileOut( $fname );
1815 return $text;
1821 * This function accomplishes several tasks:
1822 * 1) Auto-number headings if that option is enabled
1823 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1824 * 3) Add a Table of contents on the top for users who have enabled the option
1825 * 4) Auto-anchor headings
1827 * It loops through all headlines, collects the necessary data, then splits up the
1828 * string and re-inserts the newly formatted headlines.
1832 /* private */ function formatHeadings( $text, $isMain=true )
1834 global $wgInputEncoding;
1836 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1837 $doShowToc = $this->mOptions->getShowToc();
1838 if( !$this->mTitle->userCanEdit() ) {
1839 $showEditLink = 0;
1840 $rightClickHack = 0;
1841 } else {
1842 $showEditLink = $this->mOptions->getEditSection();
1843 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1846 # Inhibit editsection links if requested in the page
1847 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1848 if( $esw->matchAndRemove( $text ) ) {
1849 $showEditLink = 0;
1851 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1852 # do not add TOC
1853 $mw =& MagicWord::get( MAG_NOTOC );
1854 if( $mw->matchAndRemove( $text ) ) {
1855 $doShowToc = 0;
1858 # never add the TOC to the Main Page. This is an entry page that should not
1859 # be more than 1-2 screens large anyway
1860 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
1861 $doShowToc = 0;
1864 # Get all headlines for numbering them and adding funky stuff like [edit]
1865 # links - this is for later, but we need the number of headlines right now
1866 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
1868 # if there are fewer than 4 headlines in the article, do not show TOC
1869 if( $numMatches < 4 ) {
1870 $doShowToc = 0;
1873 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1874 # override above conditions and always show TOC
1875 $mw =& MagicWord::get( MAG_FORCETOC );
1876 if ($mw->matchAndRemove( $text ) ) {
1877 $doShowToc = 1;
1881 # We need this to perform operations on the HTML
1882 $sk =& $this->mOptions->getSkin();
1884 # headline counter
1885 $headlineCount = 0;
1887 # Ugh .. the TOC should have neat indentation levels which can be
1888 # passed to the skin functions. These are determined here
1889 $toclevel = 0;
1890 $toc = '';
1891 $full = '';
1892 $head = array();
1893 $sublevelCount = array();
1894 $level = 0;
1895 $prevlevel = 0;
1896 foreach( $matches[3] as $headline ) {
1897 $numbering = '';
1898 if( $level ) {
1899 $prevlevel = $level;
1901 $level = $matches[1][$headlineCount];
1902 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1903 # reset when we enter a new level
1904 $sublevelCount[$level] = 0;
1905 $toc .= $sk->tocIndent( $level - $prevlevel );
1906 $toclevel += $level - $prevlevel;
1908 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1909 # reset when we step back a level
1910 $sublevelCount[$level+1]=0;
1911 $toc .= $sk->tocUnindent( $prevlevel - $level );
1912 $toclevel -= $prevlevel - $level;
1914 # count number of headlines for each level
1915 @$sublevelCount[$level]++;
1916 if( $doNumberHeadings || $doShowToc ) {
1917 $dot = 0;
1918 for( $i = 1; $i <= $level; $i++ ) {
1919 if( !empty( $sublevelCount[$i] ) ) {
1920 if( $dot ) {
1921 $numbering .= '.';
1923 $numbering .= $sublevelCount[$i];
1924 $dot = 1;
1929 # The canonized header is a version of the header text safe to use for links
1930 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1931 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1932 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1934 # strip out HTML
1935 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
1936 $tocline = trim( $canonized_headline );
1937 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
1938 $replacearray = array(
1939 '%3A' => ':',
1940 '%' => '.'
1942 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
1943 $refer[$headlineCount] = $canonized_headline;
1945 # count how many in assoc. array so we can track dupes in anchors
1946 @$refers[$canonized_headline]++;
1947 $refcount[$headlineCount]=$refers[$canonized_headline];
1949 # Prepend the number to the heading text
1951 if( $doNumberHeadings || $doShowToc ) {
1952 $tocline = $numbering . ' ' . $tocline;
1954 # Don't number the heading if it is the only one (looks silly)
1955 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1956 # the two are different if the line contains a link
1957 $headline=$numbering . ' ' . $headline;
1961 # Create the anchor for linking from the TOC to the section
1962 $anchor = $canonized_headline;
1963 if($refcount[$headlineCount] > 1 ) {
1964 $anchor .= '_' . $refcount[$headlineCount];
1966 if( $doShowToc ) {
1967 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1969 if( $showEditLink ) {
1970 if ( empty( $head[$headlineCount] ) ) {
1971 $head[$headlineCount] = '';
1973 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1976 # Add the edit section span
1977 if( $rightClickHack ) {
1978 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1981 # give headline the correct <h#> tag
1982 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1984 $headlineCount++;
1987 if( $doShowToc ) {
1988 $toclines = $headlineCount;
1989 $toc .= $sk->tocUnindent( $toclevel );
1990 $toc = $sk->tocTable( $toc );
1993 # split up and insert constructed headlines
1995 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
1996 $i = 0;
1998 foreach( $blocks as $block ) {
1999 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2000 # This is the [edit] link that appears for the top block of text when
2001 # section editing is enabled
2003 # Disabled because it broke block formatting
2004 # For example, a bullet point in the top line
2005 # $full .= $sk->editSectionLink(0);
2007 $full .= $block;
2008 if( $doShowToc && !$i && $isMain) {
2009 # Top anchor now in skin
2010 $full = $full.$toc;
2013 if( !empty( $head[$i] ) ) {
2014 $full .= $head[$i];
2016 $i++;
2019 return $full;
2022 # Return an HTML link for the "ISBN 123456" text
2023 /* private */ function magicISBN( $text )
2025 global $wgLang;
2027 $a = split( 'ISBN ', " $text" );
2028 if ( count ( $a ) < 2 ) return $text;
2029 $text = substr( array_shift( $a ), 1);
2030 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2032 foreach ( $a as $x ) {
2033 $isbn = $blank = '' ;
2034 while ( ' ' == $x{0} ) {
2035 $blank .= ' ';
2036 $x = substr( $x, 1 );
2038 while ( strstr( $valid, $x{0} ) != false ) {
2039 $isbn .= $x{0};
2040 $x = substr( $x, 1 );
2042 $num = str_replace( '-', '', $isbn );
2043 $num = str_replace( ' ', '', $num );
2045 if ( '' == $num ) {
2046 $text .= "ISBN $blank$x";
2047 } else {
2048 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2049 $text .= '<a href="' .
2050 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
2051 "\" class=\"internal\">ISBN $isbn</a>";
2052 $text .= $x;
2055 return $text;
2058 # Return an HTML link for the "RFC 1234" text
2059 /* private */ function magicRFC( $text )
2061 global $wgLang;
2063 $a = split( 'RFC ', ' '.$text );
2064 if ( count ( $a ) < 2 ) return $text;
2065 $text = substr( array_shift( $a ), 1);
2066 $valid = '0123456789';
2068 foreach ( $a as $x ) {
2069 $rfc = $blank = '' ;
2070 while ( ' ' == $x{0} ) {
2071 $blank .= ' ';
2072 $x = substr( $x, 1 );
2074 while ( strstr( $valid, $x{0} ) != false ) {
2075 $rfc .= $x{0};
2076 $x = substr( $x, 1 );
2079 if ( '' == $rfc ) {
2080 $text .= "RFC $blank$x";
2081 } else {
2082 $url = wfmsg( 'rfcurl' );
2083 $url = str_replace( '$1', $rfc, $url);
2084 $sk =& $this->mOptions->getSkin();
2085 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
2086 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2089 return $text;
2092 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
2094 $this->mOptions = $options;
2095 $this->mTitle =& $title;
2096 $this->mOutputType = OT_WIKI;
2098 if ( $clearState ) {
2099 $this->clearState();
2102 $stripState = false;
2103 $pairs = array(
2104 "\r\n" => "\n",
2106 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2107 // now with regexes
2109 $pairs = array(
2110 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2111 "/<br *?>/i" => "<br />",
2113 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2115 $text = $this->strip( $text, $stripState, false );
2116 $text = $this->pstPass2( $text, $user );
2117 $text = $this->unstrip( $text, $stripState );
2118 $text = $this->unstripNoWiki( $text, $stripState );
2119 return $text;
2122 /* private */ function pstPass2( $text, &$user )
2124 global $wgLang, $wgLocaltimezone, $wgCurParser;
2126 # Variable replacement
2127 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2128 $text = $this->replaceVariables( $text );
2130 # Signatures
2132 $n = $user->getName();
2133 $k = $user->getOption( 'nickname' );
2134 if ( '' == $k ) { $k = $n; }
2135 if(isset($wgLocaltimezone)) {
2136 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2138 /* Note: this is an ugly timezone hack for the European wikis */
2139 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2140 ' (' . date( 'T' ) . ')';
2141 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2143 $text = preg_replace( '/~~~~~/', $d, $text );
2144 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText(
2145 Namespace::getUser() ) . ":$n|$k]] $d", $text );
2146 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText(
2147 Namespace::getUser() ) . ":$n|$k]]", $text );
2149 # Context links: [[|name]] and [[name (context)|]]
2151 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2152 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2153 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2154 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2156 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2157 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2158 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
2159 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
2160 # [[ns:page (cont)|]]
2161 $context = "";
2162 $t = $this->mTitle->getText();
2163 if ( preg_match( $conpat, $t, $m ) ) {
2164 $context = $m[2];
2166 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2167 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2168 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2170 if ( '' == $context ) {
2171 $text = preg_replace( $p2, '[[\\1]]', $text );
2172 } else {
2173 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2177 $mw =& MagicWord::get( MAG_SUBST );
2178 $wgCurParser = $this->fork();
2179 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2180 $this->merge( $wgCurParser );
2183 # Trim trailing whitespace
2184 # MAG_END (__END__) tag allows for trailing
2185 # whitespace to be deliberately included
2186 $text = rtrim( $text );
2187 $mw =& MagicWord::get( MAG_END );
2188 $mw->matchAndRemove( $text );
2190 return $text;
2193 # Set up some variables which are usually set up in parse()
2194 # so that an external function can call some class members with confidence
2195 function startExternalParse( &$title, $options, $outputType, $clearState = true )
2197 $this->mTitle =& $title;
2198 $this->mOptions = $options;
2199 $this->mOutputType = $outputType;
2200 if ( $clearState ) {
2201 $this->clearState();
2205 function transformMsg( $text, $options ) {
2206 global $wgTitle;
2207 static $executing = false;
2209 # Guard against infinite recursion
2210 if ( $executing ) {
2211 return $text;
2213 $executing = true;
2215 $this->mTitle = $wgTitle;
2216 $this->mOptions = $options;
2217 $this->mOutputType = OT_MSG;
2218 $this->clearState();
2219 $text = $this->replaceVariables( $text );
2221 $executing = false;
2222 return $text;
2225 # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2226 # Callback will be called with the text within
2227 # Transform and return the text within
2228 /* static */ function setHook( $tag, $callback ) {
2229 global $wgParserHooks;
2230 $oldVal = @$wgParserHooks[$tag];
2231 $wgParserHooks[$tag] = $callback;
2232 return $oldVal;
2236 class ParserOutput
2238 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2239 var $mCacheTime; # Used in ParserCache
2241 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2242 $containsOldMagic = false )
2244 $this->mText = $text;
2245 $this->mLanguageLinks = $languageLinks;
2246 $this->mCategoryLinks = $categoryLinks;
2247 $this->mContainsOldMagic = $containsOldMagic;
2248 $this->mCacheTime = "";
2251 function getText() { return $this->mText; }
2252 function getLanguageLinks() { return $this->mLanguageLinks; }
2253 function getCategoryLinks() { return $this->mCategoryLinks; }
2254 function getCacheTime() { return $this->mCacheTime; }
2255 function containsOldMagic() { return $this->mContainsOldMagic; }
2256 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2257 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2258 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2259 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2260 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2262 function merge( $other ) {
2263 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2264 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2265 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2270 class ParserOptions
2272 # All variables are private
2273 var $mUseTeX; # Use texvc to expand <math> tags
2274 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2275 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2276 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2277 var $mAllowExternalImages; # Allow external images inline
2278 var $mSkin; # Reference to the preferred skin
2279 var $mDateFormat; # Date format index
2280 var $mEditSection; # Create "edit section" links
2281 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2282 var $mNumberHeadings; # Automatically number headings
2283 var $mShowToc; # Show table of contents
2285 function getUseTeX() { return $this->mUseTeX; }
2286 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2287 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2288 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2289 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2290 function getSkin() { return $this->mSkin; }
2291 function getDateFormat() { return $this->mDateFormat; }
2292 function getEditSection() { return $this->mEditSection; }
2293 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2294 function getNumberHeadings() { return $this->mNumberHeadings; }
2295 function getShowToc() { return $this->mShowToc; }
2297 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2298 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2299 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2300 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2301 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2302 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2303 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2304 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2305 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2306 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2307 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2309 /* static */ function newFromUser( &$user )
2311 $popts = new ParserOptions;
2312 $popts->initialiseFromUser( $user );
2313 return $popts;
2316 function initialiseFromUser( &$userInput )
2318 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2320 if ( !$userInput ) {
2321 $user = new User;
2322 $user->setLoaded( true );
2323 } else {
2324 $user =& $userInput;
2327 $this->mUseTeX = $wgUseTeX;
2328 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2329 $this->mUseDynamicDates = $wgUseDynamicDates;
2330 $this->mInterwikiMagic = $wgInterwikiMagic;
2331 $this->mAllowExternalImages = $wgAllowExternalImages;
2332 $this->mSkin =& $user->getSkin();
2333 $this->mDateFormat = $user->getOption( 'date' );
2334 $this->mEditSection = $user->getOption( 'editsection' );
2335 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2336 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2337 $this->mShowToc = $user->getOption( 'showtoc' );
2343 # Regex callbacks, used in Parser::replaceVariables
2344 function wfBraceSubstitution( $matches )
2346 global $wgCurParser;
2347 return $wgCurParser->braceSubstitution( $matches );
2350 function wfArgSubstitution( $matches )
2352 global $wgCurParser;
2353 return $wgCurParser->argSubstitution( $matches );
2356 function wfVariableSubstitution( $matches )
2358 global $wgCurParser;
2359 return $wgCurParser->variableSubstitution( $matches );