fixed error handling (at least partially)
[mediawiki.git] / includes / Parser.php
blobff8a94f272539213e919f9ce0e9d7513937e5006
1 <?php
3 // require_once('Tokenizer.php');
5 # PHP Parser
7 # Processes wiki markup
9 # There are two main entry points into the Parser class: parse() and preSaveTransform().
10 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
12 # Globals used:
13 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
15 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
17 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
18 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
19 # $wgLocaltimezone
21 # * only within ParserOptions
24 #----------------------------------------
25 # Variable substitution O(N^2) attack
26 #-----------------------------------------
27 # Without countermeasures, it would be possible to attack the parser by saving a page
28 # filled with a large number of inclusions of large pages. The size of the generated
29 # page would be proportional to the square of the input size. Hence, we limit the number
30 # of inclusions of any given page, thus bringing any attack back to O(N).
33 define( "MAX_INCLUDE_REPEAT", 5 );
35 # Allowed values for $mOutputType
36 define( "OT_HTML", 1 );
37 define( "OT_WIKI", 2 );
38 define( "OT_MSG", 3 );
40 # string parameter for extractTags which will cause it
41 # to strip HTML comments in addition to regular
42 # <XML>-style tags. This should not be anything we
43 # may want to use in wikisyntax
44 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
46 # prefix for escaping, used in two functions at least
47 define( "UNIQ_PREFIX", "NaodW29");
49 class Parser
51 # Persistent:
52 var $mTagHooks;
54 # Cleared with clearState():
55 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
56 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
58 # Temporary:
59 var $mOptions, $mTitle, $mOutputType;
61 function Parser() {
62 $this->mTagHooks = array();
63 $this->clearState();
66 function clearState() {
67 $this->mOutput = new ParserOutput;
68 $this->mAutonumber = 0;
69 $this->mLastSection = "";
70 $this->mDTopen = false;
71 $this->mVariables = false;
72 $this->mIncludeCount = array();
73 $this->mStripState = array();
74 $this->mArgStack = array();
75 $this->mInPre = false;
78 # First pass--just handle <nowiki> sections, pass the rest off
79 # to internalParse() which does all the real work.
81 # Returns a ParserOutput
83 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
84 global $wgUseTidy;
85 $fname = "Parser::parse";
86 wfProfileIn( $fname );
88 if ( $clearState ) {
89 $this->clearState();
92 $this->mOptions = $options;
93 $this->mTitle =& $title;
94 $this->mOutputType = OT_HTML;
96 $stripState = NULL;
97 $text = $this->strip( $text, $this->mStripState );
98 $text = $this->internalParse( $text, $linestart );
99 $text = $this->unstrip( $text, $this->mStripState );
100 # Clean up special characters, only run once, next-to-last before doBlockLevels
101 if(!$wgUseTidy) {
102 $fixtags = array(
103 # french spaces, last one Guillemet-left
104 # only if there is something before the space
105 '/(.) (\\?|:|!|\\302\\273)/i' => '\\1&nbsp;\\2',
106 # french spaces, Guillemet-right
107 "/(\\302\\253) /i"=>"\\1&nbsp;",
108 '/<hr *>/i' => '<hr />',
109 '/<br *>/i' => '<br />',
110 '/<center *>/i' => '<div class="center">',
111 '/<\\/center *>/i' => '</div>',
112 # Clean up spare ampersands; note that we probably ought to be
113 # more careful about named entities.
114 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
116 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
117 } else {
118 $fixtags = array(
119 # french spaces, last one Guillemet-left
120 '/ (\\?|:|!|\\302\\273)/i' => '&nbsp;\\1',
121 # french spaces, Guillemet-right
122 '/(\\302\\253) /i' => '\\1&nbsp;',
123 '/<center *>/i' => '<div class="center">',
124 '/<\\/center *>/i' => '</div>'
126 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
128 # only once and last
129 $text = $this->doBlockLevels( $text, $linestart );
130 $text = $this->unstripNoWiki( $text, $this->mStripState );
131 if($wgUseTidy) {
132 $text = $this->tidy($text);
134 $this->mOutput->setText( $text );
135 wfProfileOut( $fname );
136 return $this->mOutput;
139 /* static */ function getRandomString() {
140 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
143 # Replaces all occurrences of <$tag>content</$tag> in the text
144 # with a random marker and returns the new text. the output parameter
145 # $content will be an associative array filled with data on the form
146 # $unique_marker => content.
148 # If $content is already set, the additional entries will be appended
150 # If $tag is set to STRIP_COMMENTS, the function will extract
151 # <!-- HTML comments -->
153 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
154 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
155 if ( !$content ) {
156 $content = array( );
158 $n = 1;
159 $stripped = '';
161 while ( '' != $text ) {
162 if($tag==STRIP_COMMENTS) {
163 $p = preg_split( '/<!--/i', $text, 2 );
164 } else {
165 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
167 $stripped .= $p[0];
168 if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
169 $text = '';
170 } else {
171 if($tag==STRIP_COMMENTS) {
172 $q = preg_split( '/-->/i', $p[1], 2 );
173 } else {
174 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
176 $marker = $rnd . sprintf('%08X', $n++);
177 $content[$marker] = $q[0];
178 $stripped .= $marker;
179 $text = $q[1];
182 return $stripped;
185 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
186 # If $render is set, performs necessary rendering operations on plugins
187 # Returns the text, and fills an array with data needed in unstrip()
188 # If the $state is already a valid strip state, it adds to the state
190 # When $stripcomments is set, HTML comments <!-- like this -->
191 # will be stripped in addition to other tags. This is important
192 # for section editing, where these comments cause confusion when
193 # counting the sections in the wikisource
194 function strip( $text, &$state, $stripcomments = false ) {
195 $render = ($this->mOutputType == OT_HTML);
196 $nowiki_content = array();
197 $math_content = array();
198 $pre_content = array();
199 $comment_content = array();
200 $ext_content = array();
202 # Replace any instances of the placeholders
203 $uniq_prefix = UNIQ_PREFIX;
204 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
207 # nowiki
208 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
209 foreach( $nowiki_content as $marker => $content ){
210 if( $render ){
211 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
212 } else {
213 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
217 # math
218 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
219 foreach( $math_content as $marker => $content ){
220 if( $render ) {
221 if( $this->mOptions->getUseTeX() ) {
222 $math_content[$marker] = renderMath( $content );
223 } else {
224 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
226 } else {
227 $math_content[$marker] = "<math>$content</math>";
231 # pre
232 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
233 foreach( $pre_content as $marker => $content ){
234 if( $render ){
235 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
236 } else {
237 $pre_content[$marker] = "<pre>$content</pre>";
241 # Comments
242 if($stripcomments) {
243 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
244 foreach( $comment_content as $marker => $content ){
245 $comment_content[$marker] = "<!--$content-->";
249 # Extensions
250 foreach ( $this->mTagHooks as $tag => $callback ) {
251 $ext_contents[$tag] = array();
252 $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
253 foreach( $ext_content[$tag] as $marker => $content ) {
254 if ( $render ) {
255 $ext_content[$tag][$marker] = $callback( $content );
256 } else {
257 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
262 # Merge state with the pre-existing state, if there is one
263 if ( $state ) {
264 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
265 $state['math'] = $state['math'] + $math_content;
266 $state['pre'] = $state['pre'] + $pre_content;
267 $state['comment'] = $state['comment'] + $comment_content;
269 foreach( $ext_content as $tag => $array ) {
270 if ( array_key_exists( $tag, $state ) ) {
271 $state[$tag] = $state[$tag] + $array;
274 } else {
275 $state = array(
276 'nowiki' => $nowiki_content,
277 'math' => $math_content,
278 'pre' => $pre_content,
279 'comment' => $comment_content,
280 ) + $ext_content;
282 return $text;
285 # always call unstripNoWiki() after this one
286 function unstrip( $text, &$state ) {
287 # Must expand in reverse order, otherwise nested tags will be corrupted
288 $contentDict = end( $state );
289 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
290 if( key($state) != 'nowiki') {
291 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
292 $text = str_replace( key( $contentDict ), $content, $text );
297 return $text;
299 # always call this after unstrip() to preserve the order
300 function unstripNoWiki( $text, &$state ) {
301 # Must expand in reverse order, otherwise nested tags will be corrupted
302 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
303 $text = str_replace( key( $state['nowiki'] ), $content, $text );
306 return $text;
309 # Add an item to the strip state
310 # Returns the unique tag which must be inserted into the stripped text
311 # The tag will be replaced with the original text in unstrip()
313 function insertStripItem( $text, &$state ) {
314 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
315 if ( !$state ) {
316 $state = array(
317 'nowiki' => array(),
318 'math' => array(),
319 'pre' => array()
322 $state['item'][$rnd] = $text;
323 return $rnd;
326 # categoryMagic
327 # generate a list of subcategories and pages for a category
328 # depending on wfMsg("usenewcategorypage") it either calls the new
329 # or the old code. The new code will not work properly for some
330 # languages due to sorting issues, so they might want to turn it
331 # off.
332 function categoryMagic() {
333 $msg = wfMsg('usenewcategorypage');
334 if ( '0' == @$msg[0] )
336 return $this->oldCategoryMagic();
337 } else {
338 return $this->newCategoryMagic();
342 # This method generates the list of subcategories and pages for a category
343 function oldCategoryMagic () {
344 global $wgLang , $wgUser ;
345 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
347 $cns = Namespace::getCategory() ;
348 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
350 $r = "<br style=\"clear:both;\"/>\n";
353 $sk =& $wgUser->getSkin() ;
355 $articles = array() ;
356 $children = array() ;
357 $data = array () ;
358 $id = $this->mTitle->getArticleID() ;
360 # FIXME: add limits
361 $t = wfStrencode( $this->mTitle->getDBKey() );
362 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
363 $res = wfQuery ( $sql, DB_READ ) ;
364 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
366 # For all pages that link to this category
367 foreach ( $data AS $x )
369 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
370 if ( $t != "" ) $t .= ":" ;
371 $t .= $x->cur_title ;
373 if ( $x->cur_namespace == $cns ) {
374 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
375 } else {
376 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
379 wfFreeResult ( $res ) ;
381 # Showing subcategories
382 if ( count ( $children ) > 0 ) {
383 $r .= '<h2>'.wfMsg('subcategories')."</h2>\n" ;
384 $r .= implode ( ', ' , $children ) ;
387 # Showing pages in this category
388 if ( count ( $articles ) > 0 ) {
389 $ti = $this->mTitle->getText() ;
390 $h = wfMsg( 'category_header', $ti );
391 $r .= "<h2>{$h}</h2>\n" ;
392 $r .= implode ( ', ' , $articles ) ;
396 return $r ;
401 function newCategoryMagic () {
402 global $wgLang , $wgUser ;
403 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
405 $cns = Namespace::getCategory() ;
406 if ( $this->mTitle->getNamespace() != $cns ) return '' ; # This ain't a category page
408 $r = "<br style=\"clear:both;\"/>\n";
411 $sk =& $wgUser->getSkin() ;
413 $articles = array() ;
414 $articles_start_char = array();
415 $children = array() ;
416 $children_start_char = array();
417 $data = array () ;
418 $id = $this->mTitle->getArticleID() ;
420 # FIXME: add limits
421 $t = wfStrencode( $this->mTitle->getDBKey() );
422 $sql = "SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM
423 cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY
424 cl_sortkey" ;
425 $res = wfQuery ( $sql, DB_READ ) ;
426 while ( $x = wfFetchObject ( $res ) )
428 $t = $ns = $wgLang->getNsText ( $x->cur_namespace ) ;
429 if ( $t != '' ) $t .= ':' ;
430 $t .= $x->cur_title ;
432 if ( $x->cur_namespace == $cns ) {
433 $ctitle = str_replace( '_',' ',$x->cur_title );
434 array_push ( $children, $sk->makeKnownLink ( $t, $ctitle ) ) ; # Subcategory
436 // If there's a link from Category:A to Category:B, the sortkey of the resulting
437 // entry in the categorylinks table is Category:A, not A, which it SHOULD be.
438 // Workaround: If sortkey == "Category:".$title, than use $title for sorting,
439 // else use sortkey...
440 if ( ($ns.":".$ctitle) == $x->cl_sortkey ) {
441 array_push ( $children_start_char, $wgLang->firstChar( $x->cur_title ) );
442 } else {
443 array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
445 } else {
446 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
447 array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
450 wfFreeResult ( $res ) ;
452 $ti = $this->mTitle->getText() ;
454 # Don't show subcategories section if there are none.
455 if ( count ( $children ) > 0 )
457 # Showing subcategories
458 $r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n"
459 . wfMsg( 'subcategorycount', count( $children ) );
460 if ( count ( $children ) > 6 ) {
462 // divide list into three equal chunks
463 $chunk = (int) (count ( $children ) / 3);
465 // get and display header
466 $r .= '<table width="100%"><tr valign="top">';
468 $startChunk = 0;
469 $endChunk = $chunk;
471 // loop through the chunks
472 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
473 $chunkIndex < 3;
474 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
477 $r .= '<td><ul>';
478 // output all subcategories to category
479 for ($index = $startChunk ;
480 $index < $endChunk && $index < count($children);
481 $index++ )
483 // check for change of starting letter or begging of chunk
484 if ( ($children_start_char[$index] != $children_start_char[$index - 1])
485 || ($index == $startChunk) )
487 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
490 $r .= "<li>{$children[$index]}</li>";
492 $r .= '</ul></td>';
496 $r .= '</tr></table>';
497 } else {
498 // for short lists of subcategories to category.
500 $r .= "<h3>{$children_start_char[0]}</h3>\n";
501 $r .= '<ul><li>'.$children[0].'</li>';
502 for ($index = 1; $index < count($children); $index++ )
504 if ($children_start_char[$index] != $children_start_char[$index - 1])
506 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
509 $r .= "<li>{$children[$index]}</li>";
511 $r .= '</ul>';
513 } # END of if ( count($children) > 0 )
515 $r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n" .
516 wfMsg( 'categoryarticlecount', count( $articles ) );
518 # Showing articles in this category
519 if ( count ( $articles ) > 6) {
520 $ti = $this->mTitle->getText() ;
522 // divide list into three equal chunks
523 $chunk = (int) (count ( $articles ) / 3);
525 // get and display header
526 $r .= '<table width="100%"><tr valign="top">';
528 // loop through the chunks
529 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
530 $chunkIndex < 3;
531 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
534 $r .= '<td><ul>';
536 // output all articles in category
537 for ($index = $startChunk ;
538 $index < $endChunk && $index < count($articles);
539 $index++ )
541 // check for change of starting letter or begging of chunk
542 if ( ($articles_start_char[$index] != $articles_start_char[$index - 1])
543 || ($index == $startChunk) )
545 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
548 $r .= "<li>{$articles[$index]}</li>";
550 $r .= '</ul></td>';
554 $r .= '</tr></table>';
555 } elseif ( count ( $articles ) > 0) {
556 // for short lists of articles in categories.
557 $ti = $this->mTitle->getText() ;
559 $r .= '<h3>'.$articles_start_char[0]."</h3>\n";
560 $r .= '<ul><li>'.$articles[0].'</li>';
561 for ($index = 1; $index < count($articles); $index++ )
563 if ($articles_start_char[$index] != $articles_start_char[$index - 1])
565 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
568 $r .= "<li>{$articles[$index]}</li>";
570 $r .= '</ul>';
574 return $r ;
577 # Return allowed HTML attributes
578 function getHTMLattrs () {
579 $htmlattrs = array( # Allowed attributes--no scripting, etc.
580 'title', 'align', 'lang', 'dir', 'width', 'height',
581 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
582 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
583 /* FONT */ 'type', 'start', 'value', 'compact',
584 /* For various lists, mostly deprecated but safe */
585 'summary', 'width', 'border', 'frame', 'rules',
586 'cellspacing', 'cellpadding', 'valign', 'char',
587 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
588 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
589 'id', 'class', 'name', 'style' /* For CSS */
591 return $htmlattrs ;
594 # Remove non approved attributes and javascript in css
595 function fixTagAttributes ( $t ) {
596 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
597 $htmlattrs = $this->getHTMLattrs() ;
599 # Strip non-approved attributes from the tag
600 $t = preg_replace(
601 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
602 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
603 $t);
604 # Strip javascript "expression" from stylesheets. Brute force approach:
605 # If anythin offensive is found, all attributes of the HTML tag are dropped
607 if( preg_match(
608 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
609 wfMungeToUtf8( $t ) ) )
611 $t='';
614 return trim ( $t ) ;
617 # interface with html tidy, used if $wgUseTidy = true
618 function tidy ( $text ) {
619 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
620 global $wgInputEncoding, $wgOutputEncoding;
621 $fname = 'Parser::tidy';
622 wfProfileIn( $fname );
624 $cleansource = '';
625 switch(strtoupper($wgOutputEncoding)) {
626 case 'ISO-8859-1':
627 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
628 break;
629 case 'UTF-8':
630 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
631 break;
632 default:
633 $wgTidyOpts .= ' -raw';
636 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
637 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
638 '<head><title>test</title></head><body>'.$text.'</body></html>';
639 $descriptorspec = array(
640 0 => array('pipe', 'r'),
641 1 => array('pipe', 'w'),
642 2 => array('file', '/dev/null', 'a')
644 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
645 if (is_resource($process)) {
646 fwrite($pipes[0], $wrappedtext);
647 fclose($pipes[0]);
648 while (!feof($pipes[1])) {
649 $cleansource .= fgets($pipes[1], 1024);
651 fclose($pipes[1]);
652 $return_value = proc_close($process);
655 wfProfileOut( $fname );
657 if( $cleansource == '' && $text != '') {
658 wfDebug( "Tidy error detected!\n" );
659 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
660 } else {
661 return $cleansource;
665 # parse the wiki syntax used to render tables
666 function doTableStuff ( $t ) {
667 $t = explode ( "\n" , $t ) ;
668 $td = array () ; # Is currently a td tag open?
669 $ltd = array () ; # Was it TD or TH?
670 $tr = array () ; # Is currently a tr tag open?
671 $ltr = array () ; # tr attributes
672 foreach ( $t AS $k => $x )
674 $x = trim ( $x ) ;
675 $fc = substr ( $x , 0 , 1 ) ;
676 if ( '{|' == substr ( $x , 0 , 2 ) )
678 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . '>' ;
679 array_push ( $td , false ) ;
680 array_push ( $ltd , '' ) ;
681 array_push ( $tr , false ) ;
682 array_push ( $ltr , '' ) ;
684 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
685 else if ( '|}' == substr ( $x , 0 , 2 ) )
687 $z = "</table>\n" ;
688 $l = array_pop ( $ltd ) ;
689 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
690 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
691 array_pop ( $ltr ) ;
692 $t[$k] = $z ;
694 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
696 $z = trim ( substr ( $x , 2 ) ) ;
697 $t[$k] = "<caption>{$z}</caption>\n" ;
699 else if ( '|-' == substr ( $x , 0 , 2 ) ) # Allows for |---------------
701 $x = substr ( $x , 1 ) ;
702 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
703 $z = '' ;
704 $l = array_pop ( $ltd ) ;
705 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
706 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
707 array_pop ( $ltr ) ;
708 $t[$k] = $z ;
709 array_push ( $tr , false ) ;
710 array_push ( $td , false ) ;
711 array_push ( $ltd , '' ) ;
712 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
714 else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) # Caption
716 if ( '|+' == substr ( $x , 0 , 2 ) )
718 $fc = '+' ;
719 $x = substr ( $x , 1 ) ;
721 $after = substr ( $x , 1 ) ;
722 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
723 $after = explode ( '||' , $after ) ;
724 $t[$k] = '' ;
725 foreach ( $after AS $theline )
727 $z = '' ;
728 if ( $fc != '+' )
730 $tra = array_pop ( $ltr ) ;
731 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
732 array_push ( $tr , true ) ;
733 array_push ( $ltr , '' ) ;
736 $l = array_pop ( $ltd ) ;
737 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
738 if ( $fc == '|' ) $l = 'td' ;
739 else if ( $fc == '!' ) $l = 'th' ;
740 else if ( $fc == '+' ) $l = 'caption' ;
741 else $l = '' ;
742 array_push ( $ltd , $l ) ;
743 $y = explode ( '|' , $theline , 2 ) ;
744 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
745 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
746 $t[$k] .= $y ;
747 array_push ( $td , true ) ;
752 # Closing open td, tr && table
753 while ( count ( $td ) > 0 )
755 if ( array_pop ( $td ) ) $t[] = '</td>' ;
756 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
757 $t[] = '</table>' ;
760 $t = implode ( "\n" , $t ) ;
761 # $t = $this->removeHTMLtags( $t );
762 return $t ;
765 # Parses the text and adds the result to the strip state
766 # Returns the strip tag
767 function stripParse( $text, $newline, $args )
769 $text = $this->strip( $text, $this->mStripState );
770 $text = $this->internalParse( $text, (bool)$newline, $args, false );
771 return $newline.$this->insertStripItem( $text, $this->mStripState );
774 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
775 $fname = 'Parser::internalParse';
776 wfProfileIn( $fname );
778 $text = $this->removeHTMLtags( $text );
779 $text = $this->replaceVariables( $text, $args );
781 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
783 $text = $this->doHeadings( $text );
784 if($this->mOptions->getUseDynamicDates()) {
785 global $wgDateFormatter;
786 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
788 $text = $this->doAllQuotes( $text );
789 // $text = $this->doExponant( $text );
790 $text = $this->replaceExternalLinks( $text );
791 $text = $this->replaceInternalLinks ( $text );
792 $text = $this->replaceInternalLinks ( $text );
793 //$text = $this->doTokenizedParser ( $text );
794 $text = $this->doTableStuff ( $text ) ;
795 $text = $this->magicISBN( $text );
796 $text = $this->magicRFC( $text );
797 $text = $this->formatHeadings( $text, $isMain );
798 $sk =& $this->mOptions->getSkin();
799 $text = $sk->transformContent( $text );
801 if ( !isset ( $this->categoryMagicDone ) ) {
802 $text .= $this->categoryMagic () ;
803 $this->categoryMagicDone = true ;
806 wfProfileOut( $fname );
807 return $text;
810 # Parse ^^ tokens and return html
811 /* private */ function doExponant ( $text )
813 $fname = 'Parser::doExponant';
814 wfProfileIn( $fname);
815 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
816 wfProfileOut( $fname);
817 return $text;
820 # Parse headers and return html
821 /* private */ function doHeadings( $text ) {
822 $fname = 'Parser::doHeadings';
823 wfProfileIn( $fname );
824 for ( $i = 6; $i >= 1; --$i ) {
825 $h = substr( '======', 0, $i );
826 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
827 "<h{$i}>\\1</h{$i}>\\2", $text );
829 wfProfileOut( $fname );
830 return $text;
833 /* private */ function doAllQuotes( $text ) {
834 $fname = 'Parser::doAllQuotes';
835 wfProfileIn( $fname );
836 $outtext = '';
837 $lines = explode( "\n", $text );
838 foreach ( $lines as $line ) {
839 $outtext .= $this->doQuotes ( '', $line, '' ) . "\n";
841 $outtext = substr($outtext, 0,-1);
842 wfProfileOut( $fname );
843 return $outtext;
846 /* private */ function doQuotes( $pre, $text, $mode ) {
847 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
848 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
849 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
850 if ( substr ($m[2], 0, 1) == '\'' ) {
851 $m[2] = substr ($m[2], 1);
852 if ($mode == 'em') {
853 return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'emstrong' );
854 } else if ($mode == 'strong') {
855 return $m1_strong . $this->doQuotes ( '', $m[2], '' );
856 } else if (($mode == 'emstrong') || ($mode == 'both')) {
857 return $this->doQuotes ( '', $pre.$m1_strong.$m[2], 'em' );
858 } else if ($mode == 'strongem') {
859 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( '', $m[2], 'em' );
860 } else {
861 return $m[1] . $this->doQuotes ( '', $m[2], 'strong' );
863 } else {
864 if ($mode == 'strong') {
865 return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'strongem' );
866 } else if ($mode == 'em') {
867 return $m1_em . $this->doQuotes ( '', $m[2], '' );
868 } else if ($mode == 'emstrong') {
869 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( '', $m[2], 'strong' );
870 } else if (($mode == 'strongem') || ($mode == 'both')) {
871 return $this->doQuotes ( '', $pre.$m1_em.$m[2], 'strong' );
872 } else {
873 return $m[1] . $this->doQuotes ( '', $m[2], 'em' );
876 } else {
877 $text_strong = ($text == '') ? '' : "<strong>{$text}</strong>";
878 $text_em = ($text == '') ? '' : "<em>{$text}</em>";
879 if ($mode == '') {
880 return $pre . $text;
881 } else if ($mode == 'em') {
882 return $pre . $text_em;
883 } else if ($mode == 'strong') {
884 return $pre . $text_strong;
885 } else if ($mode == 'strongem') {
886 return (($pre == '') && ($text == '')) ? '' : "<strong>{$pre}{$text_em}</strong>";
887 } else {
888 return (($pre == '') && ($text == '')) ? '' : "<em>{$pre}{$text_strong}</em>";
893 # Note: we have to do external links before the internal ones,
894 # and otherwise take great care in the order of things here, so
895 # that we don't end up interpreting some URLs twice.
897 /* private */ function replaceExternalLinks( $text ) {
898 $fname = 'Parser::replaceExternalLinks';
899 wfProfileIn( $fname );
900 $text = $this->subReplaceExternalLinks( $text, 'http', true );
901 $text = $this->subReplaceExternalLinks( $text, 'https', true );
902 $text = $this->subReplaceExternalLinks( $text, 'ftp', false );
903 $text = $this->subReplaceExternalLinks( $text, 'irc', false );
904 $text = $this->subReplaceExternalLinks( $text, 'gopher', false );
905 $text = $this->subReplaceExternalLinks( $text, 'news', false );
906 $text = $this->subReplaceExternalLinks( $text, 'mailto', false );
907 wfProfileOut( $fname );
908 return $text;
911 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber ) {
912 $unique = '4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3';
913 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
915 # this is the list of separators that should be ignored if they
916 # are the last character of an URL but that should be included
917 # if they occur within the URL, e.g. "go to www.foo.com, where .."
918 # in this case, the last comma should not become part of the URL,
919 # but in "www.foo.com/123,2342,32.htm" it should.
920 $sep = ",;\.:";
921 $fnc = 'A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF';
922 $images = 'gif|png|jpg|jpeg';
924 # PLEASE NOTE: The curly braces { } are not part of the regex,
925 # they are interpreted as part of the string (used to tell PHP
926 # that the content of the string should be inserted there).
927 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
928 "((?i){$images})([^{$uc}]|$)/";
930 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
931 $sk =& $this->mOptions->getSkin();
933 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
934 $s = preg_replace( $e1, '\\1' . $sk->makeImage( "{$unique}:\\3" .
935 '/\\4.\\5', '\\4.\\5' ) . '\\6', $s );
937 $s = preg_replace( $e2, '\\1' . "<a href=\"{$unique}:\\3\"" .
938 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
939 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
940 '</a>\\5', $s );
941 $s = str_replace( $unique, $protocol, $s );
943 $a = explode( "[{$protocol}:", " " . $s );
944 $s = array_shift( $a );
945 $s = substr( $s, 1 );
947 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
948 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
950 foreach ( $a as $line ) {
951 if ( preg_match( $e1, $line, $m ) ) {
952 $link = "{$protocol}:{$m[1]}";
953 $trail = $m[2];
954 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
955 else { $text = wfEscapeHTML( $link ); }
956 } else if ( preg_match( $e2, $line, $m ) ) {
957 $link = "{$protocol}:{$m[1]}";
958 $text = $m[2];
959 $trail = $m[3];
960 } else {
961 $s .= "[{$protocol}:" . $line;
962 continue;
964 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
965 $paren = '';
966 } else {
967 # Expand the URL for printable version
968 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
970 $la = $sk->getExternalLinkAttributes( $link, $text );
971 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
974 return $s;
978 /* private */ function replaceInternalLinks( $s ) {
979 global $wgLang, $wgLinkCache;
980 global $wgNamespacesWithSubpages, $wgLanguageCode;
981 static $fname = 'Parser::replaceInternalLinks' ;
982 wfProfileIn( $fname );
984 wfProfileIn( $fname.'-setup' );
985 static $tc = FALSE;
986 # the % is needed to support urlencoded titles as well
987 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
988 $sk =& $this->mOptions->getSkin();
990 $a = explode( '[[', ' ' . $s );
991 $s = array_shift( $a );
992 $s = substr( $s, 1 );
994 # Match a link having the form [[namespace:link|alternate]]trail
995 static $e1 = FALSE;
996 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
997 # Match the end of a line for a word that's not followed by whitespace,
998 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
999 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1001 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
1002 # Special and Media are pseudo-namespaces; no pages actually exist in them
1003 static $image = FALSE;
1004 static $special = FALSE;
1005 static $media = FALSE;
1006 static $category = FALSE;
1007 if ( !$image ) { $image = Namespace::getImage(); }
1008 if ( !$special ) { $special = Namespace::getSpecial(); }
1009 if ( !$media ) { $media = Namespace::getMedia(); }
1010 if ( !$category ) { $category = Namespace::getCategory(); }
1012 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1014 if ( $useLinkPrefixExtension ) {
1015 if ( preg_match( $e2, $s, $m ) ) {
1016 $first_prefix = $m[2];
1017 $s = $m[1];
1018 } else {
1019 $first_prefix = false;
1021 } else {
1022 $prefix = '';
1025 wfProfileOut( $fname.'-setup' );
1027 foreach ( $a as $line ) {
1028 wfProfileIn( $fname.'-prefixhandling' );
1029 if ( $useLinkPrefixExtension ) {
1030 if ( preg_match( $e2, $s, $m ) ) {
1031 $prefix = $m[2];
1032 $s = $m[1];
1033 } else {
1034 $prefix='';
1036 # first link
1037 if($first_prefix) {
1038 $prefix = $first_prefix;
1039 $first_prefix = false;
1042 wfProfileOut( $fname.'-prefixhandling' );
1044 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1045 $text = $m[2];
1046 # fix up urlencoded title texts
1047 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1048 $trail = $m[3];
1049 } else { # Invalid form; output directly
1050 $s .= $prefix . '[[' . $line ;
1051 continue;
1054 /* Valid link forms:
1055 Foobar -- normal
1056 :Foobar -- override special treatment of prefix (images, language links)
1057 /Foobar -- convert to CurrentPage/Foobar
1058 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1060 $c = substr($m[1],0,1);
1061 $noforce = ($c != ':');
1062 if( $c == '/' ) { # subpage
1063 if(substr($m[1],-1,1)=='/') { # / at end means we don't want the slash to be shown
1064 $m[1]=substr($m[1],1,strlen($m[1])-2);
1065 $noslash=$m[1];
1066 } else {
1067 $noslash=substr($m[1],1);
1069 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
1070 $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1071 if( '' == $text ) {
1072 $text= $m[1];
1073 } # this might be changed for ugliness reasons
1074 } else {
1075 $link = $noslash; # no subpage allowed, use standard link
1077 } elseif( $noforce ) { # no subpage
1078 $link = $m[1];
1079 } else {
1080 $link = substr( $m[1], 1 );
1082 $wasblank = ( '' == $text );
1083 if( $wasblank )
1084 $text = $link;
1086 $nt = Title::newFromText( $link );
1087 if( !$nt ) {
1088 $s .= $prefix . '[[' . $line;
1089 continue;
1091 $ns = $nt->getNamespace();
1092 $iw = $nt->getInterWiki();
1093 if( $noforce ) {
1094 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1095 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
1096 $tmp = $prefix . $trail ;
1097 $s .= (trim($tmp) == '')? '': $tmp;
1098 continue;
1100 if ( $ns == $image ) {
1101 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1102 $wgLinkCache->addImageLinkObj( $nt );
1103 continue;
1105 if ( $ns == $category ) {
1106 $t = $nt->getText() ;
1107 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
1109 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1110 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1111 $wgLinkCache->resume();
1113 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
1114 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1115 $this->mOutput->mCategoryLinks[] = $t ;
1116 $s .= $prefix . $trail ;
1117 continue;
1120 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
1121 ( strpos( $link, '#' ) == FALSE ) ) {
1122 # Self-links are handled specially; generally de-link and change to bold.
1123 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1124 continue;
1127 if( $ns == $media ) {
1128 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1129 $wgLinkCache->addImageLinkObj( $nt );
1130 continue;
1131 } elseif( $ns == $special ) {
1132 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1133 continue;
1135 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1137 wfProfileOut( $fname );
1138 return $s;
1141 # Some functions here used by doBlockLevels()
1143 /* private */ function closeParagraph() {
1144 $result = '';
1145 if ( '' != $this->mLastSection ) {
1146 $result = '</' . $this->mLastSection . ">\n";
1148 $this->mInPre = false;
1149 $this->mLastSection = '';
1150 return $result;
1152 # getCommon() returns the length of the longest common substring
1153 # of both arguments, starting at the beginning of both.
1155 /* private */ function getCommon( $st1, $st2 ) {
1156 $fl = strlen( $st1 );
1157 $shorter = strlen( $st2 );
1158 if ( $fl < $shorter ) { $shorter = $fl; }
1160 for ( $i = 0; $i < $shorter; ++$i ) {
1161 if ( $st1{$i} != $st2{$i} ) { break; }
1163 return $i;
1165 # These next three functions open, continue, and close the list
1166 # element appropriate to the prefix character passed into them.
1168 /* private */ function openList( $char )
1170 $result = $this->closeParagraph();
1172 if ( '*' == $char ) { $result .= '<ul><li>'; }
1173 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1174 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1175 else if ( ';' == $char ) {
1176 $result .= '<dl><dt>';
1177 $this->mDTopen = true;
1179 else { $result = '<!-- ERR 1 -->'; }
1181 return $result;
1184 /* private */ function nextItem( $char ) {
1185 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1186 else if ( ':' == $char || ';' == $char ) {
1187 $close = "</dd>";
1188 if ( $this->mDTopen ) { $close = '</dt>'; }
1189 if ( ';' == $char ) {
1190 $this->mDTopen = true;
1191 return $close . '<dt>';
1192 } else {
1193 $this->mDTopen = false;
1194 return $close . '<dd>';
1197 return '<!-- ERR 2 -->';
1200 /* private */function closeList( $char ) {
1201 if ( '*' == $char ) { $text = '</li></ul>'; }
1202 else if ( '#' == $char ) { $text = '</li></ol>'; }
1203 else if ( ':' == $char ) {
1204 if ( $this->mDTopen ) {
1205 $this->mDTopen = false;
1206 $text = '</dt></dl>';
1207 } else {
1208 $text = '</dd></dl>';
1211 else { return '<!-- ERR 3 -->'; }
1212 return $text."\n";
1215 /* private */ function doBlockLevels( $text, $linestart ) {
1216 $fname = 'Parser::doBlockLevels';
1217 wfProfileIn( $fname );
1219 # Parsing through the text line by line. The main thing
1220 # happening here is handling of block-level elements p, pre,
1221 # and making lists from lines starting with * # : etc.
1223 $textLines = explode( "\n", $text );
1225 $lastPrefix = $output = $lastLine = '';
1226 $this->mDTopen = $inBlockElem = false;
1227 $prefixLength = 0;
1228 $paragraphStack = false;
1230 if ( !$linestart ) {
1231 $output .= array_shift( $textLines );
1233 foreach ( $textLines as $oLine ) {
1234 $lastPrefixLength = strlen( $lastPrefix );
1235 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1236 $preOpenMatch = preg_match("/<pre/i", $oLine );
1237 if (!$this->mInPre) {
1238 $this->mInPre = !empty($preOpenMatch);
1240 if ( !$this->mInPre ) {
1241 # Multiple prefixes may abut each other for nested lists.
1242 $prefixLength = strspn( $oLine, '*#:;' );
1243 $pref = substr( $oLine, 0, $prefixLength );
1245 # eh?
1246 $pref2 = str_replace( ';', ':', $pref );
1247 $t = substr( $oLine, $prefixLength );
1248 } else {
1249 # Don't interpret any other prefixes in preformatted text
1250 $prefixLength = 0;
1251 $pref = $pref2 = '';
1252 $t = $oLine;
1255 # List generation
1256 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1257 # Same as the last item, so no need to deal with nesting or opening stuff
1258 $output .= $this->nextItem( substr( $pref, -1 ) );
1259 $paragraphStack = false;
1261 if ( ";" == substr( $pref, -1 ) ) {
1262 # The one nasty exception: definition lists work like this:
1263 # ; title : definition text
1264 # So we check for : in the remainder text to split up the
1265 # title and definition, without b0rking links.
1266 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1267 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1268 $term = $match[1];
1269 $output .= $term . $this->nextItem( ':' );
1270 $t = $match[2];
1273 } elseif( $prefixLength || $lastPrefixLength ) {
1274 # Either open or close a level...
1275 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1276 $paragraphStack = false;
1278 while( $commonPrefixLength < $lastPrefixLength ) {
1279 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1280 --$lastPrefixLength;
1282 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1283 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1285 while ( $prefixLength > $commonPrefixLength ) {
1286 $char = substr( $pref, $commonPrefixLength, 1 );
1287 $output .= $this->openList( $char );
1289 if ( ';' == $char ) {
1290 # FIXME: This is dupe of code above
1291 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1292 $term = $match[1];
1293 $output .= $term . $this->nextItem( ":" );
1294 $t = $match[2];
1297 ++$commonPrefixLength;
1299 $lastPrefix = $pref2;
1301 if( 0 == $prefixLength ) {
1302 # No prefix (not in list)--go to paragraph mode
1303 $uniq_prefix = UNIQ_PREFIX;
1304 // XXX: use a stack for nestable elements like span, table and div
1305 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1306 $closematch = preg_match(
1307 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1308 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1309 if ( $openmatch or $closematch ) {
1310 $paragraphStack = false;
1311 $output .= $this->closeParagraph();
1312 if($preOpenMatch and !$preCloseMatch) {
1313 $this->mInPre = true;
1315 if ( $closematch ) {
1316 $inBlockElem = false;
1317 } else {
1318 $inBlockElem = true;
1320 } else if ( !$inBlockElem && !$this->mInPre ) {
1321 if ( " " == $t{0} and trim($t) != '' ) {
1322 // pre
1323 if ($this->mLastSection != 'pre') {
1324 $paragraphStack = false;
1325 $output .= $this->closeParagraph().'<pre>';
1326 $this->mLastSection = 'pre';
1328 } else {
1329 // paragraph
1330 if ( '' == trim($t) ) {
1331 if ( $paragraphStack ) {
1332 $output .= $paragraphStack.'<br />';
1333 $paragraphStack = false;
1334 $this->mLastSection = 'p';
1335 } else {
1336 if ($this->mLastSection != 'p' ) {
1337 $output .= $this->closeParagraph();
1338 $this->mLastSection = '';
1339 $paragraphStack = '<p>';
1340 } else {
1341 $paragraphStack = '</p><p>';
1344 } else {
1345 if ( $paragraphStack ) {
1346 $output .= $paragraphStack;
1347 $paragraphStack = false;
1348 $this->mLastSection = 'p';
1349 } else if ($this->mLastSection != 'p') {
1350 $output .= $this->closeParagraph().'<p>';
1351 $this->mLastSection = 'p';
1357 if ($paragraphStack === false) {
1358 $output .= $t."\n";
1361 while ( $prefixLength ) {
1362 $output .= $this->closeList( $pref2{$prefixLength-1} );
1363 --$prefixLength;
1365 if ( '' != $this->mLastSection ) {
1366 $output .= '</' . $this->mLastSection . '>';
1367 $this->mLastSection = '';
1370 wfProfileOut( $fname );
1371 return $output;
1374 # Return value of a magic variable (like PAGENAME)
1375 function getVariableValue( $index ) {
1376 global $wgLang, $wgSitename, $wgServer;
1378 switch ( $index ) {
1379 case MAG_CURRENTMONTH:
1380 return date( 'm' );
1381 case MAG_CURRENTMONTHNAME:
1382 return $wgLang->getMonthName( date('n') );
1383 case MAG_CURRENTMONTHNAMEGEN:
1384 return $wgLang->getMonthNameGen( date('n') );
1385 case MAG_CURRENTDAY:
1386 return date('j');
1387 case MAG_PAGENAME:
1388 return $this->mTitle->getText();
1389 case MAG_NAMESPACE:
1390 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1391 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1392 case MAG_CURRENTDAYNAME:
1393 return $wgLang->getWeekdayName( date('w')+1 );
1394 case MAG_CURRENTYEAR:
1395 return date( 'Y' );
1396 case MAG_CURRENTTIME:
1397 return $wgLang->time( wfTimestampNow(), false );
1398 case MAG_NUMBEROFARTICLES:
1399 return wfNumberOfArticles();
1400 case MAG_SITENAME:
1401 return $wgSitename;
1402 case MAG_SERVER:
1403 return $wgServer;
1404 default:
1405 return NULL;
1409 # initialise the magic variables (like CURRENTMONTHNAME)
1410 function initialiseVariables() {
1411 global $wgVariableIDs;
1412 $this->mVariables = array();
1413 foreach ( $wgVariableIDs as $id ) {
1414 $mw =& MagicWord::get( $id );
1415 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1419 /* private */ function replaceVariables( $text, $args = array() ) {
1420 global $wgLang, $wgScript, $wgArticlePath;
1422 $fname = 'Parser::replaceVariables';
1423 wfProfileIn( $fname );
1425 $bail = false;
1426 if ( !$this->mVariables ) {
1427 $this->initialiseVariables();
1429 $titleChars = Title::legalChars();
1430 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1432 # This function is called recursively. To keep track of arguments we need a stack:
1433 array_push( $this->mArgStack, $args );
1435 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1436 $GLOBALS['wgCurParser'] =& $this;
1439 if ( $this->mOutputType == OT_HTML ) {
1440 # Variable substitution
1441 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1443 # Argument substitution
1444 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1446 # Template substitution
1447 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1448 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1450 array_pop( $this->mArgStack );
1452 wfProfileOut( $fname );
1453 return $text;
1456 function variableSubstitution( $matches ) {
1457 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1458 $text = $this->mVariables[$matches[1]];
1459 $this->mOutput->mContainsOldMagic = true;
1460 } else {
1461 $text = $matches[0];
1463 return $text;
1466 function braceSubstitution( $matches ) {
1467 global $wgLinkCache, $wgLang;
1468 $fname = 'Parser::braceSubstitution';
1469 $found = false;
1470 $nowiki = false;
1471 $noparse = false;
1473 $title = NULL;
1475 # $newline is an optional newline character before the braces
1476 # $part1 is the bit before the first |, and must contain only title characters
1477 # $args is a list of arguments, starting from index 0, not including $part1
1479 $newline = $matches[1];
1480 $part1 = $matches[2];
1481 # If the third subpattern matched anything, it will start with |
1482 if ( $matches[3] !== '' ) {
1483 $args = explode( '|', substr( $matches[3], 1 ) );
1484 } else {
1485 $args = array();
1487 $argc = count( $args );
1489 # {{{}}}
1490 if ( strpos( $matches[0], '{{{' ) !== false ) {
1491 $text = $matches[0];
1492 $found = true;
1493 $noparse = true;
1496 # SUBST
1497 if ( !$found ) {
1498 $mwSubst =& MagicWord::get( MAG_SUBST );
1499 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1500 if ( $this->mOutputType != OT_WIKI ) {
1501 # Invalid SUBST not replaced at PST time
1502 # Return without further processing
1503 $text = $matches[0];
1504 $found = true;
1505 $noparse= true;
1507 } elseif ( $this->mOutputType == OT_WIKI ) {
1508 # SUBST not found in PST pass, do nothing
1509 $text = $matches[0];
1510 $found = true;
1514 # MSG, MSGNW and INT
1515 if ( !$found ) {
1516 # Check for MSGNW:
1517 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1518 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1519 $nowiki = true;
1520 } else {
1521 # Remove obsolete MSG:
1522 $mwMsg =& MagicWord::get( MAG_MSG );
1523 $mwMsg->matchStartAndRemove( $part1 );
1526 # Check if it is an internal message
1527 $mwInt =& MagicWord::get( MAG_INT );
1528 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1529 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1530 $text = wfMsgReal( $part1, $args, true );
1531 $found = true;
1536 # NS
1537 if ( !$found ) {
1538 # Check for NS: (namespace expansion)
1539 $mwNs = MagicWord::get( MAG_NS );
1540 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1541 if ( intval( $part1 ) ) {
1542 $text = $wgLang->getNsText( intval( $part1 ) );
1543 $found = true;
1544 } else {
1545 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1546 if ( !is_null( $index ) ) {
1547 $text = $wgLang->getNsText( $index );
1548 $found = true;
1554 # LOCALURL and LOCALURLE
1555 if ( !$found ) {
1556 $mwLocal = MagicWord::get( MAG_LOCALURL );
1557 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1559 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1560 $func = 'getLocalURL';
1561 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1562 $func = 'escapeLocalURL';
1563 } else {
1564 $func = '';
1567 if ( $func !== '' ) {
1568 $title = Title::newFromText( $part1 );
1569 if ( !is_null( $title ) ) {
1570 if ( $argc > 0 ) {
1571 $text = $title->$func( $args[0] );
1572 } else {
1573 $text = $title->$func();
1575 $found = true;
1580 # Internal variables
1581 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1582 $text = $this->mVariables[$part1];
1583 $found = true;
1584 $this->mOutput->mContainsOldMagic = true;
1587 # Arguments input from the caller
1588 $inputArgs = end( $this->mArgStack );
1589 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1590 $text = $inputArgs[$part1];
1591 $found = true;
1594 # Load from database
1595 if ( !$found ) {
1596 $title = Title::newFromText( $part1, NS_TEMPLATE );
1597 if ( !is_null( $title ) && !$title->isExternal() ) {
1598 # Check for excessive inclusion
1599 $dbk = $title->getPrefixedDBkey();
1600 if ( $this->incrementIncludeCount( $dbk ) ) {
1601 $article = new Article( $title );
1602 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1603 if ( $articleContent !== false ) {
1604 $found = true;
1605 $text = $articleContent;
1610 # If the title is valid but undisplayable, make a link to it
1611 if ( $this->mOutputType == OT_HTML && !$found ) {
1612 $text = '[[' . $title->getPrefixedText() . ']]';
1613 $found = true;
1618 # Recursive parsing, escaping and link table handling
1619 # Only for HTML output
1620 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1621 $text = wfEscapeWikiText( $text );
1622 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1623 # Clean up argument array
1624 $assocArgs = array();
1625 $index = 1;
1626 foreach( $args as $arg ) {
1627 $eqpos = strpos( $arg, '=' );
1628 if ( $eqpos === false ) {
1629 $assocArgs[$index++] = $arg;
1630 } else {
1631 $name = trim( substr( $arg, 0, $eqpos ) );
1632 $value = trim( substr( $arg, $eqpos+1 ) );
1633 if ( $value === false ) {
1634 $value = '';
1636 if ( $name !== false ) {
1637 $assocArgs[$name] = $value;
1642 # Do not enter included links in link table
1643 if ( !is_null( $title ) ) {
1644 $wgLinkCache->suspend();
1647 # Run full parser on the included text
1648 $text = $this->stripParse( $text, $newline, $assocArgs );
1650 # Resume the link cache and register the inclusion as a link
1651 if ( !is_null( $title ) ) {
1652 $wgLinkCache->resume();
1653 $wgLinkCache->addLinkObj( $title );
1657 if ( !$found ) {
1658 return $matches[0];
1659 } else {
1660 return $text;
1664 # Triple brace replacement -- used for template arguments
1665 function argSubstitution( $matches ) {
1666 $newline = $matches[1];
1667 $arg = trim( $matches[2] );
1668 $text = $matches[0];
1669 $inputArgs = end( $this->mArgStack );
1671 if ( array_key_exists( $arg, $inputArgs ) ) {
1672 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1675 return $text;
1678 # Returns true if the function is allowed to include this entity
1679 function incrementIncludeCount( $dbk ) {
1680 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1681 $this->mIncludeCount[$dbk] = 0;
1683 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1684 return true;
1685 } else {
1686 return false;
1691 # Cleans up HTML, removes dangerous tags and attributes
1692 /* private */ function removeHTMLtags( $text ) {
1693 global $wgUseTidy, $wgUserHtml;
1694 $fname = 'Parser::removeHTMLtags';
1695 wfProfileIn( $fname );
1697 if( $wgUserHtml ) {
1698 $htmlpairs = array( # Tags that must be closed
1699 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1700 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1701 'strike', 'strong', 'tt', 'var', 'div', 'center',
1702 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1703 'ruby', 'rt' , 'rb' , 'rp', 'p'
1705 $htmlsingle = array(
1706 'br', 'hr', 'li', 'dt', 'dd'
1708 $htmlnest = array( # Tags that can be nested--??
1709 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
1710 'dl', 'font', 'big', 'small', 'sub', 'sup'
1712 $tabletags = array( # Can only appear inside table
1713 'td', 'th', 'tr'
1715 } else {
1716 $htmlpairs = array();
1717 $htmlsingle = array();
1718 $htmlnest = array();
1719 $tabletags = array();
1722 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1723 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1725 $htmlattrs = $this->getHTMLattrs () ;
1727 # Remove HTML comments
1728 $text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
1730 $bits = explode( '<', $text );
1731 $text = array_shift( $bits );
1732 if(!$wgUseTidy) {
1733 $tagstack = array(); $tablestack = array();
1734 foreach ( $bits as $x ) {
1735 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1736 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1737 $x, $regs );
1738 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1739 error_reporting( $prev );
1741 $badtag = 0 ;
1742 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1743 # Check our stack
1744 if ( $slash ) {
1745 # Closing a tag...
1746 if ( ! in_array( $t, $htmlsingle ) &&
1747 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1748 @array_push( $tagstack, $ot );
1749 $badtag = 1;
1750 } else {
1751 if ( $t == 'table' ) {
1752 $tagstack = array_pop( $tablestack );
1754 $newparams = '';
1756 } else {
1757 # Keep track for later
1758 if ( in_array( $t, $tabletags ) &&
1759 ! in_array( 'table', $tagstack ) ) {
1760 $badtag = 1;
1761 } else if ( in_array( $t, $tagstack ) &&
1762 ! in_array ( $t , $htmlnest ) ) {
1763 $badtag = 1 ;
1764 } else if ( ! in_array( $t, $htmlsingle ) ) {
1765 if ( $t == 'table' ) {
1766 array_push( $tablestack, $tagstack );
1767 $tagstack = array();
1769 array_push( $tagstack, $t );
1771 # Strip non-approved attributes from the tag
1772 $newparams = $this->fixTagAttributes($params);
1775 if ( ! $badtag ) {
1776 $rest = str_replace( '>', '&gt;', $rest );
1777 $text .= "<$slash$t $newparams$brace$rest";
1778 continue;
1781 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1783 # Close off any remaining tags
1784 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1785 $text .= "</$t>\n";
1786 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
1788 } else {
1789 # this might be possible using tidy itself
1790 foreach ( $bits as $x ) {
1791 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1792 $x, $regs );
1793 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1794 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1795 $newparams = $this->fixTagAttributes($params);
1796 $rest = str_replace( '>', '&gt;', $rest );
1797 $text .= "<$slash$t $newparams$brace$rest";
1798 } else {
1799 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1803 wfProfileOut( $fname );
1804 return $text;
1810 * This function accomplishes several tasks:
1811 * 1) Auto-number headings if that option is enabled
1812 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1813 * 3) Add a Table of contents on the top for users who have enabled the option
1814 * 4) Auto-anchor headings
1816 * It loops through all headlines, collects the necessary data, then splits up the
1817 * string and re-inserts the newly formatted headlines.
1821 /* private */ function formatHeadings( $text, $isMain=true ) {
1822 global $wgInputEncoding;
1824 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1825 $doShowToc = $this->mOptions->getShowToc();
1826 if( !$this->mTitle->userCanEdit() ) {
1827 $showEditLink = 0;
1828 $rightClickHack = 0;
1829 } else {
1830 $showEditLink = $this->mOptions->getEditSection();
1831 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1834 # Inhibit editsection links if requested in the page
1835 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1836 if( $esw->matchAndRemove( $text ) ) {
1837 $showEditLink = 0;
1839 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1840 # do not add TOC
1841 $mw =& MagicWord::get( MAG_NOTOC );
1842 if( $mw->matchAndRemove( $text ) ) {
1843 $doShowToc = 0;
1846 # never add the TOC to the Main Page. This is an entry page that should not
1847 # be more than 1-2 screens large anyway
1848 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
1849 $doShowToc = 0;
1852 # Get all headlines for numbering them and adding funky stuff like [edit]
1853 # links - this is for later, but we need the number of headlines right now
1854 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
1856 # if there are fewer than 4 headlines in the article, do not show TOC
1857 if( $numMatches < 4 ) {
1858 $doShowToc = 0;
1861 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1862 # override above conditions and always show TOC
1863 $mw =& MagicWord::get( MAG_FORCETOC );
1864 if ($mw->matchAndRemove( $text ) ) {
1865 $doShowToc = 1;
1869 # We need this to perform operations on the HTML
1870 $sk =& $this->mOptions->getSkin();
1872 # headline counter
1873 $headlineCount = 0;
1875 # Ugh .. the TOC should have neat indentation levels which can be
1876 # passed to the skin functions. These are determined here
1877 $toclevel = 0;
1878 $toc = '';
1879 $full = '';
1880 $head = array();
1881 $sublevelCount = array();
1882 $level = 0;
1883 $prevlevel = 0;
1884 foreach( $matches[3] as $headline ) {
1885 $numbering = '';
1886 if( $level ) {
1887 $prevlevel = $level;
1889 $level = $matches[1][$headlineCount];
1890 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1891 # reset when we enter a new level
1892 $sublevelCount[$level] = 0;
1893 $toc .= $sk->tocIndent( $level - $prevlevel );
1894 $toclevel += $level - $prevlevel;
1896 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1897 # reset when we step back a level
1898 $sublevelCount[$level+1]=0;
1899 $toc .= $sk->tocUnindent( $prevlevel - $level );
1900 $toclevel -= $prevlevel - $level;
1902 # count number of headlines for each level
1903 @$sublevelCount[$level]++;
1904 if( $doNumberHeadings || $doShowToc ) {
1905 $dot = 0;
1906 for( $i = 1; $i <= $level; $i++ ) {
1907 if( !empty( $sublevelCount[$i] ) ) {
1908 if( $dot ) {
1909 $numbering .= '.';
1911 $numbering .= $sublevelCount[$i];
1912 $dot = 1;
1917 # The canonized header is a version of the header text safe to use for links
1918 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1919 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1920 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1922 # strip out HTML
1923 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
1924 $tocline = trim( $canonized_headline );
1925 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
1926 $replacearray = array(
1927 '%3A' => ':',
1928 '%' => '.'
1930 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
1931 $refer[$headlineCount] = $canonized_headline;
1933 # count how many in assoc. array so we can track dupes in anchors
1934 @$refers[$canonized_headline]++;
1935 $refcount[$headlineCount]=$refers[$canonized_headline];
1937 # Prepend the number to the heading text
1939 if( $doNumberHeadings || $doShowToc ) {
1940 $tocline = $numbering . ' ' . $tocline;
1942 # Don't number the heading if it is the only one (looks silly)
1943 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1944 # the two are different if the line contains a link
1945 $headline=$numbering . ' ' . $headline;
1949 # Create the anchor for linking from the TOC to the section
1950 $anchor = $canonized_headline;
1951 if($refcount[$headlineCount] > 1 ) {
1952 $anchor .= '_' . $refcount[$headlineCount];
1954 if( $doShowToc ) {
1955 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1957 if( $showEditLink ) {
1958 if ( empty( $head[$headlineCount] ) ) {
1959 $head[$headlineCount] = '';
1961 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1964 # Add the edit section span
1965 if( $rightClickHack ) {
1966 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1969 # give headline the correct <h#> tag
1970 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1972 $headlineCount++;
1975 if( $doShowToc ) {
1976 $toclines = $headlineCount;
1977 $toc .= $sk->tocUnindent( $toclevel );
1978 $toc = $sk->tocTable( $toc );
1981 # split up and insert constructed headlines
1983 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
1984 $i = 0;
1986 foreach( $blocks as $block ) {
1987 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1988 # This is the [edit] link that appears for the top block of text when
1989 # section editing is enabled
1991 # Disabled because it broke block formatting
1992 # For example, a bullet point in the top line
1993 # $full .= $sk->editSectionLink(0);
1995 $full .= $block;
1996 if( $doShowToc && !$i && $isMain) {
1997 # Top anchor now in skin
1998 $full = $full.$toc;
2001 if( !empty( $head[$i] ) ) {
2002 $full .= $head[$i];
2004 $i++;
2007 return $full;
2010 # Return an HTML link for the "ISBN 123456" text
2011 /* private */ function magicISBN( $text ) {
2012 global $wgLang;
2014 $a = split( 'ISBN ', " $text" );
2015 if ( count ( $a ) < 2 ) return $text;
2016 $text = substr( array_shift( $a ), 1);
2017 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2019 foreach ( $a as $x ) {
2020 $isbn = $blank = '' ;
2021 while ( ' ' == $x{0} ) {
2022 $blank .= ' ';
2023 $x = substr( $x, 1 );
2025 while ( strstr( $valid, $x{0} ) != false ) {
2026 $isbn .= $x{0};
2027 $x = substr( $x, 1 );
2029 $num = str_replace( '-', '', $isbn );
2030 $num = str_replace( ' ', '', $num );
2032 if ( '' == $num ) {
2033 $text .= "ISBN $blank$x";
2034 } else {
2035 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2036 $text .= '<a href="' .
2037 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
2038 "\" class=\"internal\">ISBN $isbn</a>";
2039 $text .= $x;
2042 return $text;
2045 # Return an HTML link for the "RFC 1234" text
2046 /* private */ function magicRFC( $text ) {
2047 global $wgLang;
2049 $a = split( 'RFC ', ' '.$text );
2050 if ( count ( $a ) < 2 ) return $text;
2051 $text = substr( array_shift( $a ), 1);
2052 $valid = '0123456789';
2054 foreach ( $a as $x ) {
2055 $rfc = $blank = '' ;
2056 while ( ' ' == $x{0} ) {
2057 $blank .= ' ';
2058 $x = substr( $x, 1 );
2060 while ( strstr( $valid, $x{0} ) != false ) {
2061 $rfc .= $x{0};
2062 $x = substr( $x, 1 );
2065 if ( '' == $rfc ) {
2066 $text .= "RFC $blank$x";
2067 } else {
2068 $url = wfmsg( 'rfcurl' );
2069 $url = str_replace( '$1', $rfc, $url);
2070 $sk =& $this->mOptions->getSkin();
2071 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
2072 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2075 return $text;
2078 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2079 $this->mOptions = $options;
2080 $this->mTitle =& $title;
2081 $this->mOutputType = OT_WIKI;
2083 if ( $clearState ) {
2084 $this->clearState();
2087 $stripState = false;
2088 $pairs = array(
2089 "\r\n" => "\n",
2091 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2092 // now with regexes
2094 $pairs = array(
2095 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2096 "/<br *?>/i" => "<br />",
2098 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2100 $text = $this->strip( $text, $stripState, false );
2101 $text = $this->pstPass2( $text, $user );
2102 $text = $this->unstrip( $text, $stripState );
2103 $text = $this->unstripNoWiki( $text, $stripState );
2104 return $text;
2107 /* private */ function pstPass2( $text, &$user ) {
2108 global $wgLang, $wgLocaltimezone, $wgCurParser;
2110 # Variable replacement
2111 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2112 $text = $this->replaceVariables( $text );
2114 # Signatures
2116 $n = $user->getName();
2117 $k = $user->getOption( 'nickname' );
2118 if ( '' == $k ) { $k = $n; }
2119 if(isset($wgLocaltimezone)) {
2120 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2122 /* Note: this is an ugly timezone hack for the European wikis */
2123 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2124 ' (' . date( 'T' ) . ')';
2125 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2127 $text = preg_replace( '/~~~~~/', $d, $text );
2128 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText(
2129 Namespace::getUser() ) . ":$n|$k]] $d", $text );
2130 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText(
2131 Namespace::getUser() ) . ":$n|$k]]", $text );
2133 # Context links: [[|name]] and [[name (context)|]]
2135 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2136 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2137 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2138 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2140 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2141 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2142 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
2143 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
2144 # [[ns:page (cont)|]]
2145 $context = "";
2146 $t = $this->mTitle->getText();
2147 if ( preg_match( $conpat, $t, $m ) ) {
2148 $context = $m[2];
2150 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2151 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2152 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2154 if ( '' == $context ) {
2155 $text = preg_replace( $p2, '[[\\1]]', $text );
2156 } else {
2157 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2161 $mw =& MagicWord::get( MAG_SUBST );
2162 $wgCurParser = $this->fork();
2163 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2164 $this->merge( $wgCurParser );
2167 # Trim trailing whitespace
2168 # MAG_END (__END__) tag allows for trailing
2169 # whitespace to be deliberately included
2170 $text = rtrim( $text );
2171 $mw =& MagicWord::get( MAG_END );
2172 $mw->matchAndRemove( $text );
2174 return $text;
2177 # Set up some variables which are usually set up in parse()
2178 # so that an external function can call some class members with confidence
2179 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2180 $this->mTitle =& $title;
2181 $this->mOptions = $options;
2182 $this->mOutputType = $outputType;
2183 if ( $clearState ) {
2184 $this->clearState();
2188 function transformMsg( $text, $options ) {
2189 global $wgTitle;
2190 static $executing = false;
2192 # Guard against infinite recursion
2193 if ( $executing ) {
2194 return $text;
2196 $executing = true;
2198 $this->mTitle = $wgTitle;
2199 $this->mOptions = $options;
2200 $this->mOutputType = OT_MSG;
2201 $this->clearState();
2202 $text = $this->replaceVariables( $text );
2204 $executing = false;
2205 return $text;
2208 # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2209 # Callback will be called with the text within
2210 # Transform and return the text within
2211 function setHook( $tag, $callback ) {
2212 $oldVal = @$this->mTagHooks[$tag];
2213 $this->mTagHooks[$tag] = $callback;
2214 return $oldVal;
2218 class ParserOutput
2220 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2221 var $mCacheTime; # Used in ParserCache
2223 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2224 $containsOldMagic = false )
2226 $this->mText = $text;
2227 $this->mLanguageLinks = $languageLinks;
2228 $this->mCategoryLinks = $categoryLinks;
2229 $this->mContainsOldMagic = $containsOldMagic;
2230 $this->mCacheTime = "";
2233 function getText() { return $this->mText; }
2234 function getLanguageLinks() { return $this->mLanguageLinks; }
2235 function getCategoryLinks() { return $this->mCategoryLinks; }
2236 function getCacheTime() { return $this->mCacheTime; }
2237 function containsOldMagic() { return $this->mContainsOldMagic; }
2238 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2239 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2240 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2241 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2242 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2244 function merge( $other ) {
2245 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2246 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2247 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2252 class ParserOptions
2254 # All variables are private
2255 var $mUseTeX; # Use texvc to expand <math> tags
2256 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2257 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2258 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2259 var $mAllowExternalImages; # Allow external images inline
2260 var $mSkin; # Reference to the preferred skin
2261 var $mDateFormat; # Date format index
2262 var $mEditSection; # Create "edit section" links
2263 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2264 var $mNumberHeadings; # Automatically number headings
2265 var $mShowToc; # Show table of contents
2267 function getUseTeX() { return $this->mUseTeX; }
2268 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2269 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2270 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2271 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2272 function getSkin() { return $this->mSkin; }
2273 function getDateFormat() { return $this->mDateFormat; }
2274 function getEditSection() { return $this->mEditSection; }
2275 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2276 function getNumberHeadings() { return $this->mNumberHeadings; }
2277 function getShowToc() { return $this->mShowToc; }
2279 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2280 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2281 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2282 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2283 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2284 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2285 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2286 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2287 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2288 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2290 function setSkin( &$x ) { $this->mSkin =& $x; }
2292 /* static */ function newFromUser( &$user ) {
2293 $popts = new ParserOptions;
2294 $popts->initialiseFromUser( $user );
2295 return $popts;
2298 function initialiseFromUser( &$userInput ) {
2299 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2301 if ( !$userInput ) {
2302 $user = new User;
2303 $user->setLoaded( true );
2304 } else {
2305 $user =& $userInput;
2308 $this->mUseTeX = $wgUseTeX;
2309 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2310 $this->mUseDynamicDates = $wgUseDynamicDates;
2311 $this->mInterwikiMagic = $wgInterwikiMagic;
2312 $this->mAllowExternalImages = $wgAllowExternalImages;
2313 $this->mSkin =& $user->getSkin();
2314 $this->mDateFormat = $user->getOption( 'date' );
2315 $this->mEditSection = $user->getOption( 'editsection' );
2316 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2317 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2318 $this->mShowToc = $user->getOption( 'showtoc' );
2324 # Regex callbacks, used in Parser::replaceVariables
2325 function wfBraceSubstitution( $matches )
2327 global $wgCurParser;
2328 return $wgCurParser->braceSubstitution( $matches );
2331 function wfArgSubstitution( $matches )
2333 global $wgCurParser;
2334 return $wgCurParser->argSubstitution( $matches );
2337 function wfVariableSubstitution( $matches )
2339 global $wgCurParser;
2340 return $wgCurParser->variableSubstitution( $matches );