fixed section anchors
[mediawiki.git] / includes / Parser.php
blob186a3b21674d7096ccd76e9559a463ead816285c
1 <?php
3 include_once('Tokenizer.php');
5 # PHP Parser
6 #
7 # Processes wiki markup
9 # There are two main entry points into the Parser class: parse() and preSaveTransform().
10 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
12 # Globals used:
13 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
15 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
17 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
18 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
19 # $wgLocaltimezone
21 # * only within ParserOptions
24 #----------------------------------------
25 # Variable substitution O(N^2) attack
26 #-----------------------------------------
27 # Without countermeasures, it would be possible to attack the parser by saving a page
28 # filled with a large number of inclusions of large pages. The size of the generated
29 # page would be proportional to the square of the input size. Hence, we limit the number
30 # of inclusions of any given page, thus bringing any attack back to O(N).
32 define( "MAX_INCLUDE_REPEAT", 5 );
34 # Recursion depth of variable/inclusion evaluation
35 define( "MAX_INCLUDE_PASSES", 3 );
37 # Allowed values for $mOutputType
38 define( "OT_HTML", 1 );
39 define( "OT_WIKI", 2 );
41 class Parser
43 # Cleared with clearState():
44 var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
45 var $mVariables, $mIncludeCount;
47 # Temporary:
48 var $mOptions, $mTitle, $mOutputType;
50 function Parser()
52 $this->clearState();
55 function clearState()
57 $this->mOutput = new ParserOutput;
58 $this->mAutonumber = 0;
59 $this->mLastSection = "";
60 $this->mDTopen = false;
61 $this->mStripState = false;
62 $this->mVariables = false;
63 $this->mIncludeCount = array();
66 # First pass--just handle <nowiki> sections, pass the rest off
67 # to doWikiPass2() which does all the real work.
69 # Returns a ParserOutput
71 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
73 $fname = "Parser::parse";
74 wfProfileIn( $fname );
76 if ( $clearState ) {
77 $this->clearState();
80 $this->mOptions = $options;
81 $this->mTitle =& $title;
82 $this->mOutputType = OT_HTML;
84 $stripState = NULL;
85 $text = $this->strip( $text, $this->mStripState );
86 $text = $this->doWikiPass2( $text, $linestart );
87 $text = $this->unstrip( $text, $this->mStripState );
89 $this->mOutput->setText( $text );
90 wfProfileOut( $fname );
91 return $this->mOutput;
94 /* static */ function getRandomString()
96 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
99 # Strips <nowiki>, <pre> and <math>
100 # Returns the text, and fills an array with data needed in unstrip()
102 function strip( $text, &$state )
104 $state = array(
105 'nwlist' => array(),
106 'nwsecs' => 0,
107 'nwunq' => Parser::getRandomString(),
108 'mathlist' => array(),
109 'mathsecs' => 0,
110 'mathunq' => Parser::getRandomString(),
111 'prelist' => array(),
112 'presecs' => 0,
113 'preunq' => Parser::getRandomString()
115 $render = ($this->mOutputType == OT_HTML);
116 $stripped = "";
117 $stripped2 = "";
118 $stripped3 = "";
120 # Replace any instances of the placeholders
121 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
122 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
123 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
125 while ( "" != $text ) {
126 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
127 $stripped .= $p[0];
128 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
129 $text = "";
130 } else {
131 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
132 ++$state['nwsecs'];
134 if ( $render ) {
135 $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
136 } else {
137 $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
140 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
141 $text = $q[1];
145 if( $this->mOptions->getUseTeX() ) {
146 while ( "" != $stripped ) {
147 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
148 $stripped2 .= $p[0];
149 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
150 $stripped = "";
151 } else {
152 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
153 ++$state['mathsecs'];
155 if ( $render ) {
156 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
157 } else {
158 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
161 $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
162 $stripped = $q[1];
165 } else {
166 $stripped2 = $stripped;
169 while ( "" != $stripped2 ) {
170 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
171 $stripped3 .= $p[0];
172 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
173 $stripped2 = "";
174 } else {
175 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
176 ++$state['presecs'];
178 if ( $render ) {
179 $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
180 } else {
181 $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
184 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
185 $stripped2 = $q[1];
188 return $stripped3;
191 function unstrip( $text, &$state )
193 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
194 $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
197 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
198 $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
201 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
202 $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
204 return $text;
207 function categoryMagic ()
209 global $wgLang , $wgUser ;
210 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
211 $id = $this->mTitle->getArticleID() ;
212 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
213 $ti = $this->mTitle->getText() ;
214 $ti = explode ( ":" , $ti , 2 ) ;
215 if ( $cat != $ti[0] ) return "" ;
216 $r = "<br break='all'/>\n" ;
218 $articles = array() ;
219 $parents = array () ;
220 $children = array() ;
223 # $sk =& $this->mGetSkin();
224 $sk =& $wgUser->getSkin() ;
226 $doesexist = false ;
227 if ( $doesexist ) {
228 $sql = "SELECT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
229 } else {
230 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
233 $res = wfQuery ( $sql, DB_READ ) ;
234 while ( $x = wfFetchObject ( $res ) )
236 # $t = new Title ;
237 # $t->newFromDBkey ( $x->l_from ) ;
238 # $t = $t->getText() ;
239 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
240 if ( $t != "" ) $t .= ":" ;
241 $t .= $x->cur_title ;
243 $y = explode ( ":" , $t , 2 ) ;
244 if ( count ( $y ) == 2 && $y[0] == $cat ) {
245 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
246 } else {
247 array_push ( $articles , $sk->makeLink ( $t ) ) ;
250 wfFreeResult ( $res ) ;
252 # Children
253 if ( count ( $children ) > 0 )
255 asort ( $children ) ;
256 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
257 $r .= implode ( ", " , $children ) ;
260 # Articles
261 if ( count ( $articles ) > 0 )
263 asort ( $articles ) ;
264 $h = wfMsg( "category_header", $ti[1] );
265 $r .= "<h2>{$h}</h2>\n" ;
266 $r .= implode ( ", " , $articles ) ;
270 return $r ;
273 function getHTMLattrs ()
275 $htmlattrs = array( # Allowed attributes--no scripting, etc.
276 "title", "align", "lang", "dir", "width", "height",
277 "bgcolor", "clear", /* BR */ "noshade", /* HR */
278 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
279 /* FONT */ "type", "start", "value", "compact",
280 /* For various lists, mostly deprecated but safe */
281 "summary", "width", "border", "frame", "rules",
282 "cellspacing", "cellpadding", "valign", "char",
283 "charoff", "colgroup", "col", "span", "abbr", "axis",
284 "headers", "scope", "rowspan", "colspan", /* Tables */
285 "id", "class", "name", "style" /* For CSS */
287 return $htmlattrs ;
290 function fixTagAttributes ( $t )
292 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
293 $htmlattrs = $this->getHTMLattrs() ;
295 # Strip non-approved attributes from the tag
296 $t = preg_replace(
297 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
298 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
299 $t);
300 # Strip javascript "expression" from stylesheets. Brute force approach:
301 # If anythin offensive is found, all attributes of the HTML tag are dropped
303 if( preg_match(
304 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
305 wfMungeToUtf8( $t ) ) )
307 $t="";
310 return trim ( $t ) ;
313 function doTableStuff ( $t )
315 $t = explode ( "\n" , $t ) ;
316 $td = array () ; # Is currently a td tag open?
317 $ltd = array () ; # Was it TD or TH?
318 $tr = array () ; # Is currently a tr tag open?
319 $ltr = array () ; # tr attributes
320 foreach ( $t AS $k => $x )
322 $x = rtrim ( $x ) ;
323 $fc = substr ( $x , 0 , 1 ) ;
324 if ( "{|" == substr ( $x , 0 , 2 ) )
326 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
327 array_push ( $td , false ) ;
328 array_push ( $ltd , "" ) ;
329 array_push ( $tr , false ) ;
330 array_push ( $ltr , "" ) ;
332 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
333 else if ( "|}" == substr ( $x , 0 , 2 ) )
335 $z = "</table>\n" ;
336 $l = array_pop ( $ltd ) ;
337 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
338 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
339 array_pop ( $ltr ) ;
340 $t[$k] = $z ;
342 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
344 $z = trim ( substr ( $x , 2 ) ) ;
345 $t[$k] = "<caption>{$z}</caption>\n" ;
347 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
349 $x = substr ( $x , 1 ) ;
350 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
351 $z = "" ;
352 $l = array_pop ( $ltd ) ;
353 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
354 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
355 array_pop ( $ltr ) ;
356 $t[$k] = $z ;
357 array_push ( $tr , false ) ;
358 array_push ( $td , false ) ;
359 array_push ( $ltd , "" ) ;
360 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
362 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
364 if ( "|+" == substr ( $x , 0 , 2 ) )
366 $fc = "+" ;
367 $x = substr ( $x , 1 ) ;
369 $after = substr ( $x , 1 ) ;
370 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
371 $after = explode ( "||" , $after ) ;
372 $t[$k] = "" ;
373 foreach ( $after AS $theline )
375 $z = "" ;
376 if ( $fc != "+" )
378 $tra = array_pop ( $ltr ) ;
379 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
380 array_push ( $tr , true ) ;
381 array_push ( $ltr , "" ) ;
384 $l = array_pop ( $ltd ) ;
385 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
386 if ( $fc == "|" ) $l = "TD" ;
387 else if ( $fc == "!" ) $l = "TH" ;
388 else if ( $fc == "+" ) $l = "CAPTION" ;
389 else $l = "" ;
390 array_push ( $ltd , $l ) ;
391 $y = explode ( "|" , $theline , 2 ) ;
392 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
393 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
394 $t[$k] .= $y ;
395 array_push ( $td , true ) ;
400 # Closing open td, tr && table
401 while ( count ( $td ) > 0 )
403 if ( array_pop ( $td ) ) $t[] = "</td>" ;
404 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
405 $t[] = "</table>" ;
408 $t = implode ( "\n" , $t ) ;
409 # $t = $this->removeHTMLtags( $t );
410 return $t ;
413 # Well, OK, it's actually about 14 passes. But since all the
414 # hard lifting is done inside PHP's regex code, it probably
415 # wouldn't speed things up much to add a real parser.
417 function doWikiPass2( $text, $linestart )
419 $fname = "Parser::doWikiPass2";
420 wfProfileIn( $fname );
422 $text = $this->removeHTMLtags( $text );
423 $text = $this->replaceVariables( $text );
425 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
426 $text = str_replace ( "<HR>", "<hr/>", $text );
428 $text = $this->doHeadings( $text );
429 $text = $this->doBlockLevels( $text, $linestart );
431 if($this->mOptions->getUseDynamicDates()) {
432 global $wgDateFormatter;
433 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
436 $text = $this->replaceExternalLinks( $text );
437 $text = $this->replaceInternalLinks ( $text );
438 $text = $this->doTableStuff ( $text ) ;
440 $text = $this->formatHeadings( $text );
442 $sk =& $this->mOptions->getSkin();
443 $text = $sk->transformContent( $text );
444 $text .= $this->categoryMagic () ;
446 wfProfileOut( $fname );
447 return $text;
451 /* private */ function doHeadings( $text )
453 for ( $i = 6; $i >= 1; --$i ) {
454 $h = substr( "======", 0, $i );
455 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
456 "<h{$i}>\\1</h{$i}>\\2", $text );
458 return $text;
461 # Note: we have to do external links before the internal ones,
462 # and otherwise take great care in the order of things here, so
463 # that we don't end up interpreting some URLs twice.
465 /* private */ function replaceExternalLinks( $text )
467 $fname = "Parser::replaceExternalLinks";
468 wfProfileIn( $fname );
469 $text = $this->subReplaceExternalLinks( $text, "http", true );
470 $text = $this->subReplaceExternalLinks( $text, "https", true );
471 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
472 $text = $this->subReplaceExternalLinks( $text, "irc", false );
473 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
474 $text = $this->subReplaceExternalLinks( $text, "news", false );
475 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
476 wfProfileOut( $fname );
477 return $text;
480 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
482 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
483 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
485 # this is the list of separators that should be ignored if they
486 # are the last character of an URL but that should be included
487 # if they occur within the URL, e.g. "go to www.foo.com, where .."
488 # in this case, the last comma should not become part of the URL,
489 # but in "www.foo.com/123,2342,32.htm" it should.
490 $sep = ",;\.:";
491 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
492 $images = "gif|png|jpg|jpeg";
494 # PLEASE NOTE: The curly braces { } are not part of the regex,
495 # they are interpreted as part of the string (used to tell PHP
496 # that the content of the string should be inserted there).
497 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
498 "((?i){$images})([^{$uc}]|$)/";
500 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
501 $sk =& $this->mOptions->getSkin();
503 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
504 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
505 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
507 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
508 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
509 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
510 "</a>\\5", $s );
511 $s = str_replace( $unique, $protocol, $s );
513 $a = explode( "[{$protocol}:", " " . $s );
514 $s = array_shift( $a );
515 $s = substr( $s, 1 );
517 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
518 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
520 foreach ( $a as $line ) {
521 if ( preg_match( $e1, $line, $m ) ) {
522 $link = "{$protocol}:{$m[1]}";
523 $trail = $m[2];
524 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
525 else { $text = wfEscapeHTML( $link ); }
526 } else if ( preg_match( $e2, $line, $m ) ) {
527 $link = "{$protocol}:{$m[1]}";
528 $text = $m[2];
529 $trail = $m[3];
530 } else {
531 $s .= "[{$protocol}:" . $line;
532 continue;
534 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
535 $paren = "";
536 } else {
537 # Expand the URL for printable version
538 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
540 $la = $sk->getExternalLinkAttributes( $link, $text );
541 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
544 return $s;
547 /* private */ function handle3Quotes( &$state, $token )
549 if ( $state["strong"] ) {
550 if ( $state["em"] && $state["em"] > $state["strong"] )
552 # ''' lala ''lala '''
553 $s = "</em></strong><em>";
554 } else {
555 $s = "</strong>";
557 $state["strong"] = FALSE;
558 } else {
559 $s = "<strong>";
560 $state["strong"] = $token["pos"];
562 return $s;
565 /* private */ function handle2Quotes( &$state, $token )
567 if ( $state["em"] ) {
568 if ( $state["strong"] && $state["strong"] > $state["em"] )
570 # ''lala'''lala'' ....'''
571 $s = "</strong></em><strong>";
572 } else {
573 $s = "</em>";
575 $state["em"] = FALSE;
576 } else {
577 $s = "<em>";
578 $state["em"] = $token["pos"];
580 return $s;
583 /* private */ function handle5Quotes( &$state, $token )
585 if ( $state["em"] && $state["strong"] ) {
586 if ( $state["em"] < $state["strong"] ) {
587 $s .= "</strong></em>";
588 } else {
589 $s .= "</em></strong>";
591 $state["strong"] = $state["em"] = FALSE;
592 } elseif ( $state["em"] ) {
593 $s .= "</em><strong>";
594 $state["em"] = FALSE;
595 $state["strong"] = $token["pos"];
596 } elseif ( $state["strong"] ) {
597 $s .= "</strong><em>";
598 $state["strong"] = FALSE;
599 $state["em"] = $token["pos"];
600 } else { # not $em and not $strong
601 $s .= "<strong><em>";
602 $state["strong"] = $state["em"] = $token["pos"];
604 return $s;
607 /* private */ function replaceInternalLinks( $str )
609 global $wgLang; # for language specific parser hook
611 $tokenizer=Tokenizer::newFromString( $str );
612 $tokenStack = array();
614 $s="";
615 $state["em"] = FALSE;
616 $state["strong"] = FALSE;
617 $tagIsOpen = FALSE;
618 $threeopen = false;
620 # The tokenizer splits the text into tokens and returns them one by one.
621 # Every call to the tokenizer returns a new token.
622 while ( $token = $tokenizer->nextToken() )
624 $threeopen = false;
625 switch ( $token["type"] )
627 case "text":
628 # simple text with no further markup
629 $txt = $token["text"];
630 break;
631 case "[[[":
632 # remember the tag opened with 3 [
633 $threeopen = true;
634 case "[[":
635 # link opening tag.
636 # FIXME : Treat orphaned open tags (stack not empty when text is over)
637 $tagIsOpen = TRUE;
638 array_push( $tokenStack, $token );
639 $txt="";
640 break;
642 case "]]]":
643 case "]]":
644 # link close tag.
645 # get text from stack, glue it together, and call the code to handle a
646 # link
648 if ( count( $tokenStack ) == 0 )
650 # stack empty. Found a ]] without an opening [[
651 $txt = "]]";
652 } else {
653 $linkText = "";
654 $lastToken = array_pop( $tokenStack );
655 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
657 if( !empty( $lastToken["text"] ) ) {
658 $linkText = $lastToken["text"] . $linkText;
660 $lastToken = array_pop( $tokenStack );
663 $txt = $linkText ."]]";
665 if( isset( $lastToken["text"] ) ) {
666 $prefix = $lastToken["text"];
667 } else {
668 $prefix = "";
670 $nextToken = $tokenizer->previewToken();
671 if ( $nextToken["type"] == "text" )
673 # Preview just looks at it. Now we have to fetch it.
674 $nextToken = $tokenizer->nextToken();
675 $txt .= $nextToken["text"];
677 $txt = $this->handleInternalLink( $txt, $prefix );
679 # did the tag start with 3 [ ?
680 if($threeopen) {
681 # show the first as text
682 $txt = "[".$txt;
683 $threeopen=false;
687 $tagIsOpen = (count( $tokenStack ) != 0);
688 break;
689 case "----":
690 $txt = "\n<hr/>\n";
691 break;
692 case "'''":
693 # This and the three next ones handle quotes
694 $txt = $this->handle3Quotes( $state, $token );
695 break;
696 case "''":
697 $txt = $this->handle2Quotes( $state, $token );
698 break;
699 case "'''''":
700 $txt = $this->handle5Quotes( $state, $token );
701 break;
702 case "":
703 # empty token
704 $txt="";
705 break;
706 case "RFC ":
707 if ( $tagIsOpen ) {
708 $txt = "RFC ";
709 } else {
710 $txt = $this->doMagicRFC( $tokenizer );
712 break;
713 case "ISBN ":
714 if ( $tagIsOpen ) {
715 $txt = "ISBN ";
716 } else {
717 $txt = $this->doMagicISBN( $tokenizer );
719 break;
720 default:
721 # Call language specific Hook.
722 $txt = $wgLang->processToken( $token, $tokenStack );
723 if ( NULL == $txt ) {
724 # An unkown token. Highlight.
725 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
726 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
728 break;
730 # If we're parsing the interior of a link, don't append the interior to $s,
731 # but push it to the stack so it can be processed when a ]] token is found.
732 if ( $tagIsOpen && $txt != "" ) {
733 $token["type"] = "text";
734 $token["text"] = $txt;
735 array_push( $tokenStack, $token );
736 } else {
737 $s .= $txt;
739 } #end while
740 if ( count( $tokenStack ) != 0 )
742 # still objects on stack. opened [[ tag without closing ]] tag.
743 $txt = "";
744 while ( $lastToken = array_pop( $tokenStack ) )
746 if ( $lastToken["type"] == "text" )
748 $txt = $lastToken["text"] . $txt;
749 } else {
750 $txt = $lastToken["type"] . $txt;
753 $s .= $txt;
755 return $s;
758 /* private */ function handleInternalLink( $line, $prefix )
760 global $wgLang, $wgLinkCache;
761 global $wgNamespacesWithSubpages, $wgLanguageCode;
762 static $fname = "Parser::replaceInternalLinks" ;
763 wfProfileIn( $fname );
765 wfProfileIn( "$fname-setup" );
766 static $tc = FALSE;
767 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
768 $sk =& $this->mOptions->getSkin();
770 # Match a link having the form [[namespace:link|alternate]]trail
771 static $e1 = FALSE;
772 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
773 # Match the end of a line for a word that's not followed by whitespace,
774 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
775 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
776 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
777 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
780 # Special and Media are pseudo-namespaces; no pages actually exist in them
781 static $image = FALSE;
782 static $special = FALSE;
783 static $media = FALSE;
784 static $category = FALSE;
785 if ( !$image ) { $image = Namespace::getImage(); }
786 if ( !$special ) { $special = Namespace::getSpecial(); }
787 if ( !$media ) { $media = Namespace::getMedia(); }
788 if ( !$category ) { $category = wfMsg ( "category" ) ; }
790 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
792 wfProfileOut( "$fname-setup" );
793 $s = "";
795 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
796 $text = $m[2];
797 $trail = $m[3];
798 } else { # Invalid form; output directly
799 $s .= $prefix . "[[" . $line ;
800 return $s;
803 /* Valid link forms:
804 Foobar -- normal
805 :Foobar -- override special treatment of prefix (images, language links)
806 /Foobar -- convert to CurrentPage/Foobar
807 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
809 $c = substr($m[1],0,1);
810 $noforce = ($c != ":");
811 if( $c == "/" ) { # subpage
812 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
813 $m[1]=substr($m[1],1,strlen($m[1])-2);
814 $noslash=$m[1];
815 } else {
816 $noslash=substr($m[1],1);
818 if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
819 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
820 if( "" == $text ) {
821 $text= $m[1];
822 } # this might be changed for ugliness reasons
823 } else {
824 $link = $noslash; # no subpage allowed, use standard link
826 } elseif( $noforce ) { # no subpage
827 $link = $m[1];
828 } else {
829 $link = substr( $m[1], 1 );
831 if( "" == $text )
832 $text = $link;
834 $nt = Title::newFromText( $link );
835 if( !$nt ) {
836 $s .= $prefix . "[[" . $line;
837 return $s;
839 $ns = $nt->getNamespace();
840 $iw = $nt->getInterWiki();
841 if( $noforce ) {
842 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
843 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
844 $s .= $prefix . $trail;
845 return $s;
847 if( $ns == $image ) {
848 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
849 $wgLinkCache->addImageLinkObj( $nt );
850 return $s;
853 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
854 ( strpos( $link, "#" ) == FALSE ) ) {
855 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
856 return $s;
859 # Category feature
860 $catns = strtoupper ( $nt->getDBkey () ) ;
861 $catns = explode ( ":" , $catns ) ;
862 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
863 else $catns = "" ;
864 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
865 $t = explode ( ":" , $nt->getText() ) ;
866 array_shift ( $t ) ;
867 $t = implode ( ":" , $t ) ;
868 $t = $wgLang->ucFirst ( $t ) ;
869 $nnt = Title::newFromText ( $category.":".$t ) ;
870 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
871 $this->mOutput->mCategoryLinks[] = $t ;
872 $s .= $prefix . $trail ;
873 return $s ;
875 if( $ns == $media ) {
876 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
877 $wgLinkCache->addImageLinkObj( $nt );
878 return $s;
879 } elseif( $ns == $special ) {
880 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
881 return $s;
883 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
885 wfProfileOut( $fname );
886 return $s;
889 # Some functions here used by doBlockLevels()
891 /* private */ function closeParagraph()
893 $result = "";
894 if ( 0 != strcmp( "", $this->mLastSection ) ) {
895 $result = "</" . $this->mLastSection . ">";
897 $this->mLastSection = "";
898 return $result."\n";
900 # getCommon() returns the length of the longest common substring
901 # of both arguments, starting at the beginning of both.
903 /* private */ function getCommon( $st1, $st2 )
905 $fl = strlen( $st1 );
906 $shorter = strlen( $st2 );
907 if ( $fl < $shorter ) { $shorter = $fl; }
909 for ( $i = 0; $i < $shorter; ++$i ) {
910 if ( $st1{$i} != $st2{$i} ) { break; }
912 return $i;
914 # These next three functions open, continue, and close the list
915 # element appropriate to the prefix character passed into them.
917 /* private */ function openList( $char )
919 $result = $this->closeParagraph();
921 if ( "*" == $char ) { $result .= "<ul><li>"; }
922 else if ( "#" == $char ) { $result .= "<ol><li>"; }
923 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
924 else if ( ";" == $char ) {
925 $result .= "<dl><dt>";
926 $this->mDTopen = true;
928 else { $result = "<!-- ERR 1 -->"; }
930 return $result;
933 /* private */ function nextItem( $char )
935 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
936 else if ( ":" == $char || ";" == $char ) {
937 $close = "</dd>";
938 if ( $this->mDTopen ) { $close = "</dt>"; }
939 if ( ";" == $char ) {
940 $this->mDTopen = true;
941 return $close . "<dt>";
942 } else {
943 $this->mDTopen = false;
944 return $close . "<dd>";
947 return "<!-- ERR 2 -->";
950 /* private */function closeList( $char )
952 if ( "*" == $char ) { $text = "</li></ul>"; }
953 else if ( "#" == $char ) { $text = "</li></ol>"; }
954 else if ( ":" == $char ) {
955 if ( $this->mDTopen ) {
956 $this->mDTopen = false;
957 $text = "</dt></dl>";
958 } else {
959 $text = "</dd></dl>";
962 else { return "<!-- ERR 3 -->"; }
963 return $text."\n";
966 /* private */ function doBlockLevels( $text, $linestart )
968 $fname = "Parser::doBlockLevels";
969 wfProfileIn( $fname );
970 # Parsing through the text line by line. The main thing
971 # happening here is handling of block-level elements p, pre,
972 # and making lists from lines starting with * # : etc.
974 $a = explode( "\n", $text );
975 $text = $lastPref = "";
976 $this->mDTopen = $inBlockElem = false;
978 if ( ! $linestart ) { $text .= array_shift( $a ); }
979 foreach ( $a as $t ) {
980 if ( "" != $text ) { $text .= "\n"; }
982 $oLine = $t;
983 $opl = strlen( $lastPref );
984 $npl = strspn( $t, "*#:;" );
985 $pref = substr( $t, 0, $npl );
986 $pref2 = str_replace( ";", ":", $pref );
987 $t = substr( $t, $npl );
989 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
990 $text .= $this->nextItem( substr( $pref, -1 ) );
992 if ( ";" == substr( $pref, -1 ) ) {
993 $cpos = strpos( $t, ":" );
994 if ( ! ( false === $cpos ) ) {
995 $term = substr( $t, 0, $cpos );
996 $text .= $term . $this->nextItem( ":" );
997 $t = substr( $t, $cpos + 1 );
1000 } else if (0 != $npl || 0 != $opl) {
1001 $cpl = $this->getCommon( $pref, $lastPref );
1003 while ( $cpl < $opl ) {
1004 $text .= $this->closeList( $lastPref{$opl-1} );
1005 --$opl;
1007 if ( $npl <= $cpl && $cpl > 0 ) {
1008 $text .= $this->nextItem( $pref{$cpl-1} );
1010 while ( $npl > $cpl ) {
1011 $char = substr( $pref, $cpl, 1 );
1012 $text .= $this->openList( $char );
1014 if ( ";" == $char ) {
1015 $cpos = strpos( $t, ":" );
1016 if ( ! ( false === $cpos ) ) {
1017 $term = substr( $t, 0, $cpos );
1018 $text .= $term . $this->nextItem( ":" );
1019 $t = substr( $t, $cpos + 1 );
1022 ++$cpl;
1024 $lastPref = $pref2;
1026 if ( 0 == $npl ) { # No prefix--go to paragraph mode
1027 if ( preg_match(
1028 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<p)/i", $t ) ) {
1029 $text .= $this->closeParagraph();
1030 $inBlockElem = true;
1032 if ( ! $inBlockElem ) {
1033 if ( " " == $t{0} ) {
1034 $newSection = "pre";
1035 # $t = wfEscapeHTML( $t );
1037 else { $newSection = "p"; }
1039 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
1040 $text .= $this->closeParagraph();
1041 $text .= "<" . $newSection . ">";
1042 } else if ( 0 != strcmp( $this->mLastSection,
1043 $newSection ) ) {
1044 $text .= $this->closeParagraph();
1045 if ( 0 != strcmp( "p", $newSection ) ) {
1046 $text .= "<" . $newSection . ">";
1049 $this->mLastSection = $newSection;
1051 if ( $inBlockElem &&
1052 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|<\\/p)/i", $t ) ) {
1053 $inBlockElem = false;
1056 $text .= $t;
1058 while ( $npl ) {
1059 $text .= $this->closeList( $pref2{$npl-1} );
1060 --$npl;
1062 if ( "" != $this->mLastSection ) {
1063 if ( "p" != $this->mLastSection ) {
1064 $text .= "</" . $this->mLastSection . ">";
1066 $this->mLastSection = "";
1068 wfProfileOut( $fname );
1069 return $text;
1072 function getVariableValue( $index ) {
1073 global $wgLang;
1075 switch ( $index ) {
1076 case MAG_CURRENTMONTH:
1077 return date( "m" );
1078 case MAG_CURRENTMONTHNAME:
1079 return $wgLang->getMonthName( date("n") );
1080 case MAG_CURRENTMONTHNAMEGEN:
1081 return $wgLang->getMonthNameGen( date("n") );
1082 case MAG_CURRENTDAY:
1083 return date("j");
1084 case MAG_CURRENTDAYNAME:
1085 return $wgLang->getWeekdayName( date("w")+1 );
1086 case MAG_CURRENTYEAR:
1087 return date( "Y" );
1088 case MAG_CURRENTTIME:
1089 return $wgLang->time( wfTimestampNow(), false );
1090 case MAG_NUMBEROFARTICLES:
1091 return wfNumberOfArticles();
1092 default:
1093 return NULL;
1097 function initialiseVariables()
1099 global $wgVariableIDs;
1100 $this->mVariables = array();
1102 foreach ( $wgVariableIDs as $id ) {
1103 $mw =& MagicWord::get( $id );
1104 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1108 /* private */ function replaceVariables( $text )
1110 global $wgLang, $wgCurParser;
1111 global $wgScript, $wgArticlePath;
1113 $fname = "Parser::replaceVariables";
1114 wfProfileIn( $fname );
1116 $bail = false;
1117 if ( !$this->mVariables ) {
1118 $this->initialiseVariables();
1120 $titleChars = Title::legalChars();
1121 $regex = "/{{([$titleChars]*?)}}/s";
1123 # "Recursive" variable expansion: run it through a couple of passes
1124 for ( $i=0; $i<MAX_INCLUDE_REPEAT && !$bail; $i++ ) {
1125 $oldText = $text;
1127 # It's impossible to rebind a global in PHP
1128 # Instead, we run the substitution on a copy, then merge the changed fields back in
1129 $wgCurParser = $this->fork();
1131 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1132 if ( $oldText == $text ) {
1133 $bail = true;
1135 $this->merge( $wgCurParser );
1138 return $text;
1141 # Returns a copy of this object except with various variables cleared
1142 # This copy can be re-merged with the parent after operations on the copy
1143 function fork()
1145 $copy = $this;
1146 $copy->mOutput = new ParserOutput;
1147 return $copy;
1150 # Merges a copy split off with fork()
1151 function merge( &$copy )
1153 $this->mOutput->merge( $copy->mOutput );
1155 # Merge include throttling arrays
1156 foreach( $copy->mIncludeCount as $dbk => $count ) {
1157 if ( array_key_exists( $dbk, $this->mIncludeCount ) ) {
1158 $this->mIncludeCount[$dbk] += $count;
1159 } else {
1160 $this->mIncludeCount[$dbk] = $count;
1165 function braceSubstitution( $matches )
1167 global $wgLinkCache;
1168 $fname = "Parser::braceSubstitution";
1169 $found = false;
1170 $nowiki = false;
1172 $text = $matches[1];
1174 # SUBST
1175 $mwSubst =& MagicWord::get( MAG_SUBST );
1176 if ( $mwSubst->matchStartAndRemove( $text ) ) {
1177 if ( $this->mOutputType == OT_HTML ) {
1178 # Invalid SUBST not replaced at PST time
1179 # Return without further processing
1180 $text = $matches[0];
1181 $found = true;
1183 } elseif ( $this->mOutputType == OT_WIKI ) {
1184 # SUBST not found in PST pass, do nothing
1185 $text = $matches[0];
1186 $found = true;
1189 # Various prefixes
1190 if ( !$found ) {
1191 # Check for MSGNW:
1192 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1193 if ( $mwMsgnw->matchStartAndRemove( $text ) ) {
1194 $nowiki = true;
1195 } else {
1196 # Remove obsolete MSG:
1197 $mwMsg =& MagicWord::get( MAG_MSG );
1198 $mwMsg->matchStartAndRemove( $text );
1201 # Check if it is an internal message
1202 $mwInt =& MagicWord::get( MAG_INT );
1203 if ( $mwInt->matchStartAndRemove( $text ) ) {
1204 $text = wfMsg( $text );
1205 $found = true;
1209 # Check for a match against internal variables
1210 if ( !$found && array_key_exists( $text, $this->mVariables ) ) {
1211 $text = $this->mVariables[$text];
1212 $found = true;
1213 $this->mOutput->mContainsOldMagic = true;
1216 # Load from database
1217 if ( !$found ) {
1218 $title = Title::newFromText( $text, NS_TEMPLATE );
1219 if ( !is_null( $text ) && !$title->isExternal() ) {
1220 # Check for excessive inclusion
1221 $dbk = $title->getPrefixedDBkey();
1222 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1223 $this->mIncludeCount[$dbk] = 0;
1225 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1226 $article = new Article( $title );
1227 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1228 if ( $articleContent !== false ) {
1229 $found = true;
1230 $text = $articleContent;
1232 # Escaping and link table handling
1233 # Not required for preSaveTransform()
1234 if ( $this->mOutputType == OT_HTML ) {
1235 if ( $nowiki ) {
1236 $text = wfEscapeWikiText( $text );
1237 } else {
1238 $text = $this->removeHTMLtags( $text );
1240 $wgLinkCache->suspend();
1241 $text = $this->replaceInternalLinks( $text );
1242 $wgLinkCache->resume();
1243 $wgLinkCache->addLinkObj( $title );
1249 # If the title is valid but undisplayable, make a link to it
1250 if ( $this->mOutputType == OT_HTML && !$found ) {
1251 $text = "[[" . $title->getPrefixedText() . "]]";
1252 $found = true;
1257 if ( !$found ) {
1258 return $matches[0];
1259 } else {
1260 return $text;
1264 # Cleans up HTML, removes dangerous tags and attributes
1265 /* private */ function removeHTMLtags( $text )
1267 $fname = "Parser::removeHTMLtags";
1268 wfProfileIn( $fname );
1269 $htmlpairs = array( # Tags that must be closed
1270 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1271 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1272 "strike", "strong", "tt", "var", "div", "center",
1273 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1274 "ruby", "rt" , "rb" , "rp", "p"
1276 $htmlsingle = array(
1277 "br", "hr", "li", "dt", "dd", "hr/"
1279 $htmlnest = array( # Tags that can be nested--??
1280 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1281 "dl", "font", "big", "small", "sub", "sup"
1283 $tabletags = array( # Can only appear inside table
1284 "td", "th", "tr"
1287 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1288 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1290 $htmlattrs = $this->getHTMLattrs () ;
1292 # Remove HTML comments
1293 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1295 $bits = explode( "<", $text );
1296 $text = array_shift( $bits );
1297 $tagstack = array(); $tablestack = array();
1299 foreach ( $bits as $x ) {
1300 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1301 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1302 $x, $regs );
1303 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1304 error_reporting( $prev );
1306 $badtag = 0 ;
1307 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1308 # Check our stack
1309 if ( $slash ) {
1310 # Closing a tag...
1311 if ( ! in_array( $t, $htmlsingle ) &&
1312 ( $ot = array_pop( $tagstack ) ) != $t ) {
1313 array_push( $tagstack, $ot );
1314 $badtag = 1;
1315 } else {
1316 if ( $t == "table" ) {
1317 $tagstack = array_pop( $tablestack );
1319 $newparams = "";
1321 } else {
1322 # Keep track for later
1323 if ( in_array( $t, $tabletags ) &&
1324 ! in_array( "table", $tagstack ) ) {
1325 $badtag = 1;
1326 } else if ( in_array( $t, $tagstack ) &&
1327 ! in_array ( $t , $htmlnest ) ) {
1328 $badtag = 1 ;
1329 } else if ( ! in_array( $t, $htmlsingle ) ) {
1330 if ( $t == "table" ) {
1331 array_push( $tablestack, $tagstack );
1332 $tagstack = array();
1334 array_push( $tagstack, $t );
1336 # Strip non-approved attributes from the tag
1337 $newparams = $this->fixTagAttributes($params);
1340 if ( ! $badtag ) {
1341 $rest = str_replace( ">", "&gt;", $rest );
1342 $text .= "<$slash$t $newparams$brace$rest";
1343 continue;
1346 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1348 # Close off any remaining tags
1349 while ( $t = array_pop( $tagstack ) ) {
1350 $text .= "</$t>\n";
1351 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1353 wfProfileOut( $fname );
1354 return $text;
1359 * This function accomplishes several tasks:
1360 * 1) Auto-number headings if that option is enabled
1361 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1362 * 3) Add a Table of contents on the top for users who have enabled the option
1363 * 4) Auto-anchor headings
1365 * It loops through all headlines, collects the necessary data, then splits up the
1366 * string and re-inserts the newly formatted headlines.
1370 /* private */ function formatHeadings( $text )
1372 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1373 $doShowToc = $this->mOptions->getShowToc();
1374 if( !$this->mTitle->userCanEdit() ) {
1375 $showEditLink = 0;
1376 $rightClickHack = 0;
1377 } else {
1378 $showEditLink = $this->mOptions->getEditSection();
1379 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1382 # Inhibit editsection links if requested in the page
1383 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1384 if( $esw->matchAndRemove( $text ) ) {
1385 $showEditLink = 0;
1387 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1388 # do not add TOC
1389 $mw =& MagicWord::get( MAG_NOTOC );
1390 if( $mw->matchAndRemove( $text ) ) {
1391 $doShowToc = 0;
1394 # never add the TOC to the Main Page. This is an entry page that should not
1395 # be more than 1-2 screens large anyway
1396 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1397 $doShowToc = 0;
1400 # We need this to perform operations on the HTML
1401 $sk =& $this->mOptions->getSkin();
1403 # Get all headlines for numbering them and adding funky stuff like [edit]
1404 # links
1405 preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1407 # headline counter
1408 $headlineCount = 0;
1410 # Ugh .. the TOC should have neat indentation levels which can be
1411 # passed to the skin functions. These are determined here
1412 $toclevel = 0;
1413 $toc = "";
1414 $full = "";
1415 $head = array();
1416 $sublevelCount = array();
1417 foreach( $matches[3] as $headline ) {
1418 if( $level ) {
1419 $prevlevel = $level;
1421 $level = $matches[1][$headlineCount];
1422 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1423 # reset when we enter a new level
1424 $sublevelCount[$level] = 0;
1425 $toc .= $sk->tocIndent( $level - $prevlevel );
1426 $toclevel += $level - $prevlevel;
1428 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1429 # reset when we step back a level
1430 $sublevelCount[$level+1]=0;
1431 $toc .= $sk->tocUnindent( $prevlevel - $level );
1432 $toclevel -= $prevlevel - $level;
1434 # count number of headlines for each level
1435 $sublevelCount[$level]++;
1437 if( $doNumberHeadings || $doShowToc ) {
1438 for( $i = 1; $i <= $level; $i++ ) {
1439 if( $sublevelCount[$i] ) {
1440 if( $dot ) {
1441 $numbering .= ".";
1443 $numbering .= $sublevelCount[$i];
1444 $dot = 1;
1449 # The canonized header is a version of the header text safe to use for links
1450 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1451 $canonized_headline = Parser::unstrip( $headline, $this->mStripState );
1453 # strip out HTML
1454 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1456 $tocline = trim( $canonized_headline );
1457 $canonized_headline = str_replace( '"', "", $canonized_headline );
1458 $canonized_headline = str_replace( " ", "_", trim( $canonized_headline) );
1459 $refer[$headlineCount] = $canonized_headline;
1461 # count how many in assoc. array so we can track dupes in anchors
1462 $refers[$canonized_headline]++;
1463 $refcount[$headlineCount]=$refers[$canonized_headline];
1465 # Prepend the number to the heading text
1467 if( $doNumberHeadings || $doShowToc ) {
1468 $tocline = $numbering . " " . $tocline;
1470 # Don't number the heading if it is the only one (looks silly)
1471 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1472 # the two are different if the line contains a link
1473 $headline=$numbering . " " . $headline;
1477 # Create the anchor for linking from the TOC to the section
1478 $anchor = $canonized_headline;
1479 if($refcount[$headlineCount] > 1 ) {
1480 $anchor .= "_" . $refcount[$headlineCount];
1482 if( $doShowToc ) {
1483 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1485 if( $showEditLink ) {
1486 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1490 # the headline might have a link
1491 if( preg_match( "/(.*)<a(.*)/", $headline, $headlinematches ) ) {
1492 # if so give an anchor name to the already existent link
1493 $headline = $headlinematches[1]
1494 . "<a name=\"$anchor\" " . $headlinematches[2];
1495 } else {
1496 # else create an anchor link for the headline
1497 $headline = "<a name=\"$anchor\">$headline</a>";
1500 # give headline the correct <h#> tag
1501 $head[$headlineCount] .= "<h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1503 # Add the edit section link
1504 if( $rightClickHack ) {
1505 $head[$headlineCount] = $sk->editSectionScript($headlineCount+1,$head[$headlineCount]);
1508 $numbering = "";
1509 $headlineCount++;
1510 $dot = 0;
1513 if( $doShowToc ) {
1514 $toclines = $headlineCount;
1515 $toc .= $sk->tocUnindent( $toclevel );
1516 $toc = $sk->tocTable( $toc );
1519 # split up and insert constructed headlines
1521 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1522 $i = 0;
1524 foreach( $blocks as $block ) {
1525 if( $showEditLink && $headlineCount > 0 && $i == 0 ) {
1526 # This is the [edit] link that appears for the top block of text when
1527 # section editing is enabled
1528 $full .= $sk->editSectionLink(0);
1530 $full .= $block;
1531 if( $doShowToc && $toclines>3 && !$i) {
1532 # Let's add a top anchor just in case we want to link to the top of the page
1533 $full = "<a name=\"top\"></a>".$full.$toc;
1536 if( !empty( $head[$i] ) ) {
1537 $full .= $head[$i];
1539 $i++;
1542 return $full;
1545 /* private */ function doMagicISBN( &$tokenizer )
1547 global $wgLang;
1549 # Check whether next token is a text token
1550 # If yes, fetch it and convert the text into a
1551 # Special::BookSources link
1552 $token = $tokenizer->previewToken();
1553 while ( $token["type"] == "" )
1555 $tokenizer->nextToken();
1556 $token = $tokenizer->previewToken();
1558 if ( $token["type"] == "text" )
1560 $token = $tokenizer->nextToken();
1561 $x = $token["text"];
1562 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1564 $isbn = $blank = "" ;
1565 while ( " " == $x{0} ) {
1566 $blank .= " ";
1567 $x = substr( $x, 1 );
1569 while ( strstr( $valid, $x{0} ) != false ) {
1570 $isbn .= $x{0};
1571 $x = substr( $x, 1 );
1573 $num = str_replace( "-", "", $isbn );
1574 $num = str_replace( " ", "", $num );
1576 if ( "" == $num ) {
1577 $text = "ISBN $blank$x";
1578 } else {
1579 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1580 $text = "<a href=\"" .
1581 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1582 "\" class=\"internal\">ISBN $isbn</a>";
1583 $text .= $x;
1585 } else {
1586 $text = "ISBN ";
1588 return $text;
1590 /* private */ function doMagicRFC( &$tokenizer )
1592 global $wgLang;
1594 # Check whether next token is a text token
1595 # If yes, fetch it and convert the text into a
1596 # link to an RFC source
1597 $token = $tokenizer->previewToken();
1598 while ( $token["type"] == "" )
1600 $tokenizer->nextToken();
1601 $token = $tokenizer->previewToken();
1603 if ( $token["type"] == "text" )
1605 $token = $tokenizer->nextToken();
1606 $x = $token["text"];
1607 $valid = "0123456789";
1609 $rfc = $blank = "" ;
1610 while ( " " == $x{0} ) {
1611 $blank .= " ";
1612 $x = substr( $x, 1 );
1614 while ( strstr( $valid, $x{0} ) != false ) {
1615 $rfc .= $x{0};
1616 $x = substr( $x, 1 );
1619 if ( "" == $rfc ) {
1620 $text .= "RFC $blank$x";
1621 } else {
1622 $url = wfmsg( "rfcurl" );
1623 $url = str_replace( "$1", $rfc, $url);
1624 $sk =& $this->mOptions->getSkin();
1625 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1626 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1628 } else {
1629 $text = "RFC ";
1631 return $text;
1634 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1636 $this->mOptions = $options;
1637 $this->mTitle = $title;
1638 $this->mOutputType = OT_WIKI;
1640 if ( $clearState ) {
1641 $this->clearState();
1644 $stripState = false;
1645 $text = str_replace("\r\n", "\n", $text);
1646 $text = $this->strip( $text, $stripState, false );
1647 $text = $this->pstPass2( $text, $user );
1648 $text = $this->unstrip( $text, $stripState );
1649 return $text;
1652 /* private */ function pstPass2( $text, &$user )
1654 global $wgLang, $wgLocaltimezone, $wgCurParser;
1656 # Variable replacement
1657 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1658 $text = $this->replaceVariables( $text );
1660 # Signatures
1662 $n = $user->getName();
1663 $k = $user->getOption( "nickname" );
1664 if ( "" == $k ) { $k = $n; }
1665 if(isset($wgLocaltimezone)) {
1666 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1668 /* Note: this is an ugly timezone hack for the European wikis */
1669 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1670 " (" . date( "T" ) . ")";
1671 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1673 $text = preg_replace( "/~~~~~/", $d, $text );
1674 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1675 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1676 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1677 Namespace::getUser() ) . ":$n|$k]]", $text );
1679 # Context links: [[|name]] and [[name (context)|]]
1681 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1682 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1683 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1684 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1686 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1687 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1688 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1689 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1690 # [[ns:page (cont)|]]
1691 $context = "";
1692 $t = $this->mTitle->getText();
1693 if ( preg_match( $conpat, $t, $m ) ) {
1694 $context = $m[2];
1696 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1697 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1698 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1700 if ( "" == $context ) {
1701 $text = preg_replace( $p2, "[[\\1]]", $text );
1702 } else {
1703 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1707 $mw =& MagicWord::get( MAG_SUBST );
1708 $wgCurParser = $this->fork();
1709 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1710 $this->merge( $wgCurParser );
1713 # Trim trailing whitespace
1714 # MAG_END (__END__) tag allows for trailing
1715 # whitespace to be deliberately included
1716 $text = rtrim( $text );
1717 $mw =& MagicWord::get( MAG_END );
1718 $mw->matchAndRemove( $text );
1720 return $text;
1726 class ParserOutput
1728 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1730 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1731 $containsOldMagic = false )
1733 $this->mText = $text;
1734 $this->mLanguageLinks = $languageLinks;
1735 $this->mCategoryLinks = $categoryLinks;
1736 $this->mContainsOldMagic = $containsOldMagic;
1739 function getText() { return $this->mText; }
1740 function getLanguageLinks() { return $this->mLanguageLinks; }
1741 function getCategoryLinks() { return $this->mCategoryLinks; }
1742 function containsOldMagic() { return $this->mContainsOldMagic; }
1743 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1744 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1745 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1746 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1748 function merge( $other ) {
1749 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1750 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1751 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1756 class ParserOptions
1758 # All variables are private
1759 var $mUseTeX; # Use texvc to expand <math> tags
1760 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1761 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1762 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1763 var $mAllowExternalImages; # Allow external images inline
1764 var $mSkin; # Reference to the preferred skin
1765 var $mDateFormat; # Date format index
1766 var $mEditSection; # Create "edit section" links
1767 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1768 var $mNumberHeadings; # Automatically number headings
1769 var $mShowToc; # Show table of contents
1771 function getUseTeX() { return $this->mUseTeX; }
1772 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1773 function getUseDynamicDates() { return $this->mUseDynamicDates; }
1774 function getInterwikiMagic() { return $this->mInterwikiMagic; }
1775 function getAllowExternalImages() { return $this->mAllowExternalImages; }
1776 function getSkin() { return $this->mSkin; }
1777 function getDateFormat() { return $this->mDateFormat; }
1778 function getEditSection() { return $this->mEditSection; }
1779 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1780 function getNumberHeadings() { return $this->mNumberHeadings; }
1781 function getShowToc() { return $this->mShowToc; }
1783 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1784 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1785 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1786 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1787 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1788 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1789 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1790 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1791 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1792 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1793 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1795 /* static */ function newFromUser( &$user )
1797 $popts = new ParserOptions;
1798 $popts->initialiseFromUser( &$user );
1799 return $popts;
1802 function initialiseFromUser( &$userInput )
1804 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1806 if ( !$userInput ) {
1807 $user = new User;
1808 } else {
1809 $user =& $userInput;
1812 $this->mUseTeX = $wgUseTeX;
1813 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1814 $this->mUseDynamicDates = $wgUseDynamicDates;
1815 $this->mInterwikiMagic = $wgInterwikiMagic;
1816 $this->mAllowExternalImages = $wgAllowExternalImages;
1817 $this->mSkin =& $user->getSkin();
1818 $this->mDateFormat = $user->getOption( "date" );
1819 $this->mEditSection = $user->getOption( "editsection" );
1820 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1821 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1822 $this->mShowToc = $user->getOption( "showtoc" );
1828 # Regex callbacks, used in Parser::replaceVariables
1829 function wfBraceSubstitution( $matches )
1831 global $wgCurParser;
1832 return $wgCurParser->braceSubstitution( $matches );