fixed a couple of minor bugs
[mediawiki.git] / includes / Parser.php
blob435b4061178998c58745e9229141e22290e5d5be
1 <?php
3 include_once('Tokenizer.php');
5 # PHP Parser
6 #
7 # Converts wikitext to HTML.
9 # Globals used:
10 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
14 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
15 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
16 # $wgLocaltimezone
18 # * only within ParserOptions
20 class Parser
22 # Cleared with clearState():
23 var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
25 # Temporary:
26 var $mOptions, $mTitle;
28 function Parser()
30 $this->clearState();
33 function clearState()
35 $this->mOutput = new ParserOutput;
36 $this->mAutonumber = 0;
37 $this->mLastSection = "";
38 $this->mDTopen = false;
39 $this->mStripState = false;
42 # First pass--just handle <nowiki> sections, pass the rest off
43 # to doWikiPass2() which does all the real work.
45 # Returns a ParserOutput
47 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
49 $fname = "Parser::parse";
50 wfProfileIn( $fname );
52 if ( $clearState ) {
53 $this->clearState();
56 $this->mOptions = $options;
57 $this->mTitle =& $title;
59 $stripState = NULL;
60 $text = $this->strip( $text, $this->mStripState, true );
61 $text = $this->doWikiPass2( $text, $linestart );
62 $text = $this->unstrip( $text, $this->mStripState );
64 $this->mOutput->setText( $text );
65 wfProfileOut( $fname );
66 return $this->mOutput;
69 /* static */ function getRandomString()
71 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
74 # Strips <nowiki>, <pre> and <math>
75 # Returns the text, and fills an array with data needed in unstrip()
77 function strip( $text, &$state, $render = true )
79 $state = array(
80 'nwlist' => array(),
81 'nwsecs' => 0,
82 'nwunq' => Parser::getRandomString(),
83 'mathlist' => array(),
84 'mathsecs' => 0,
85 'mathunq' => Parser::getRandomString(),
86 'prelist' => array(),
87 'presecs' => 0,
88 'preunq' => Parser::getRandomString()
91 $stripped = "";
92 $stripped2 = "";
93 $stripped3 = "";
95 # Replace any instances of the placeholders
96 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
97 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
98 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
100 while ( "" != $text ) {
101 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
102 $stripped .= $p[0];
103 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
104 $text = "";
105 } else {
106 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
107 ++$state['nwsecs'];
109 if ( $render ) {
110 $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
111 } else {
112 $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
115 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
116 $text = $q[1];
120 if( $this->mOptions->getUseTeX() ) {
121 while ( "" != $stripped ) {
122 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
123 $stripped2 .= $p[0];
124 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
125 $stripped = "";
126 } else {
127 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
128 ++$state['mathsecs'];
130 if ( $render ) {
131 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
132 } else {
133 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
136 $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
137 $stripped = $q[1];
140 } else {
141 $stripped2 = $stripped;
144 while ( "" != $stripped2 ) {
145 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
146 $stripped3 .= $p[0];
147 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
148 $stripped2 = "";
149 } else {
150 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
151 ++$state['presecs'];
153 if ( $render ) {
154 $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
155 } else {
156 $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
159 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
160 $stripped2 = $q[1];
163 return $stripped3;
166 function unstrip( $text, &$state )
168 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
169 $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
172 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
173 $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
176 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
177 $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
179 return $text;
182 function categoryMagic ()
184 global $wgLang , $wgUser ;
185 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
186 $id = $this->mTitle->getArticleID() ;
187 $cat = ucfirst ( wfMsg ( "category" ) ) ;
188 $ti = $this->mTitle->getText() ;
189 $ti = explode ( ":" , $ti , 2 ) ;
190 if ( $cat != $ti[0] ) return "" ;
191 $r = "<br break=all>\n" ;
193 $articles = array() ;
194 $parents = array () ;
195 $children = array() ;
198 # $sk =& $this->mGetSkin();
199 $sk =& $wgUser->getSkin() ;
201 $doesexist = false ;
202 if ( $doesexist ) {
203 $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
204 } else {
205 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
208 $res = wfQuery ( $sql, DB_READ ) ;
209 while ( $x = wfFetchObject ( $res ) )
211 # $t = new Title ;
212 # $t->newFromDBkey ( $x->l_from ) ;
213 # $t = $t->getText() ;
214 if ( $doesexist ) {
215 $t = $x->l_from ;
216 } else {
217 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
218 if ( $t != "" ) $t .= ":" ;
219 $t .= $x->cur_title ;
222 $y = explode ( ":" , $t , 2 ) ;
223 if ( count ( $y ) == 2 && $y[0] == $cat ) {
224 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
225 } else {
226 array_push ( $articles , $sk->makeLink ( $t ) ) ;
229 wfFreeResult ( $res ) ;
231 # Children
232 if ( count ( $children ) > 0 )
234 asort ( $children ) ;
235 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
236 $r .= implode ( ", " , $children ) ;
239 # Articles
240 if ( count ( $articles ) > 0 )
242 asort ( $articles ) ;
243 $h = wfMsg( "category_header", $ti[1] );
244 $r .= "<h2>{$h}</h2>\n" ;
245 $r .= implode ( ", " , $articles ) ;
249 return $r ;
252 function getHTMLattrs ()
254 $htmlattrs = array( # Allowed attributes--no scripting, etc.
255 "title", "align", "lang", "dir", "width", "height",
256 "bgcolor", "clear", /* BR */ "noshade", /* HR */
257 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
258 /* FONT */ "type", "start", "value", "compact",
259 /* For various lists, mostly deprecated but safe */
260 "summary", "width", "border", "frame", "rules",
261 "cellspacing", "cellpadding", "valign", "char",
262 "charoff", "colgroup", "col", "span", "abbr", "axis",
263 "headers", "scope", "rowspan", "colspan", /* Tables */
264 "id", "class", "name", "style" /* For CSS */
266 return $htmlattrs ;
269 function fixTagAttributes ( $t )
271 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
272 $htmlattrs = $this->getHTMLattrs() ;
274 # Strip non-approved attributes from the tag
275 $t = preg_replace(
276 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
277 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
278 $t);
279 # Strip javascript "expression" from stylesheets. Brute force approach:
280 # If anythin offensive is found, all attributes of the HTML tag are dropped
282 if( preg_match(
283 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
284 wfMungeToUtf8( $t ) ) )
286 $t="";
289 return trim ( $t ) ;
292 function doTableStuff ( $t )
294 $t = explode ( "\n" , $t ) ;
295 $td = array () ; # Is currently a td tag open?
296 $ltd = array () ; # Was it TD or TH?
297 $tr = array () ; # Is currently a tr tag open?
298 $ltr = array () ; # tr attributes
299 foreach ( $t AS $k => $x )
301 $x = rtrim ( $x ) ;
302 $fc = substr ( $x , 0 , 1 ) ;
303 if ( "{|" == substr ( $x , 0 , 2 ) )
305 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
306 array_push ( $td , false ) ;
307 array_push ( $ltd , "" ) ;
308 array_push ( $tr , false ) ;
309 array_push ( $ltr , "" ) ;
311 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
312 else if ( "|}" == substr ( $x , 0 , 2 ) )
314 $z = "</table>\n" ;
315 $l = array_pop ( $ltd ) ;
316 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
317 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
318 array_pop ( $ltr ) ;
319 $t[$k] = $z ;
321 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
323 $z = trim ( substr ( $x , 2 ) ) ;
324 $t[$k] = "<caption>{$z}</caption>\n" ;
326 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
328 $x = substr ( $x , 1 ) ;
329 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
330 $z = "" ;
331 $l = array_pop ( $ltd ) ;
332 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
333 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
334 array_pop ( $ltr ) ;
335 $t[$k] = $z ;
336 array_push ( $tr , false ) ;
337 array_push ( $td , false ) ;
338 array_push ( $ltd , "" ) ;
339 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
341 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
343 if ( "|+" == substr ( $x , 0 , 2 ) )
345 $fc = "+" ;
346 $x = substr ( $x , 1 ) ;
348 $after = substr ( $x , 1 ) ;
349 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
350 $after = explode ( "||" , $after ) ;
351 $t[$k] = "" ;
352 foreach ( $after AS $theline )
354 $z = "" ;
355 if ( $fc != "+" )
357 $tra = array_pop ( $ltr ) ;
358 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
359 array_push ( $tr , true ) ;
360 array_push ( $ltr , "" ) ;
363 $l = array_pop ( $ltd ) ;
364 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
365 if ( $fc == "|" ) $l = "TD" ;
366 else if ( $fc == "!" ) $l = "TH" ;
367 else if ( $fc == "+" ) $l = "CAPTION" ;
368 else $l = "" ;
369 array_push ( $ltd , $l ) ;
370 $y = explode ( "|" , $theline , 2 ) ;
371 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
372 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
373 $t[$k] .= $y ;
374 array_push ( $td , true ) ;
379 # Closing open td, tr && table
380 while ( count ( $td ) > 0 )
382 if ( array_pop ( $td ) ) $t[] = "</td>" ;
383 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
384 $t[] = "</table>" ;
387 $t = implode ( "\n" , $t ) ;
388 # $t = $this->removeHTMLtags( $t );
389 return $t ;
392 # Well, OK, it's actually about 14 passes. But since all the
393 # hard lifting is done inside PHP's regex code, it probably
394 # wouldn't speed things up much to add a real parser.
396 function doWikiPass2( $text, $linestart )
398 $fname = "OutputPage::doWikiPass2";
399 wfProfileIn( $fname );
401 $text = $this->removeHTMLtags( $text );
402 $text = $this->replaceVariables( $text );
404 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
405 $text = str_replace ( "<HR>", "<hr>", $text );
407 $text = $this->doHeadings( $text );
408 $text = $this->doBlockLevels( $text, $linestart );
410 if($this->mOptions->getUseDynamicDates()) {
411 global $wgDateFormatter;
412 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
415 $text = $this->replaceExternalLinks( $text );
416 $text = $this->replaceInternalLinks ( $text );
417 $text = $this->doTableStuff ( $text ) ;
419 $text = $this->formatHeadings( $text );
421 $sk =& $this->mOptions->getSkin();
422 $text = $sk->transformContent( $text );
423 $text .= $this->categoryMagic () ;
425 wfProfileOut( $fname );
426 return $text;
430 /* private */ function doHeadings( $text )
432 for ( $i = 6; $i >= 1; --$i ) {
433 $h = substr( "======", 0, $i );
434 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
435 "<h{$i}>\\1</h{$i}>\\2", $text );
437 return $text;
440 # Note: we have to do external links before the internal ones,
441 # and otherwise take great care in the order of things here, so
442 # that we don't end up interpreting some URLs twice.
444 /* private */ function replaceExternalLinks( $text )
446 $fname = "OutputPage::replaceExternalLinks";
447 wfProfileIn( $fname );
448 $text = $this->subReplaceExternalLinks( $text, "http", true );
449 $text = $this->subReplaceExternalLinks( $text, "https", true );
450 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
451 $text = $this->subReplaceExternalLinks( $text, "irc", false );
452 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
453 $text = $this->subReplaceExternalLinks( $text, "news", false );
454 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
455 wfProfileOut( $fname );
456 return $text;
459 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
461 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
462 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
464 # this is the list of separators that should be ignored if they
465 # are the last character of an URL but that should be included
466 # if they occur within the URL, e.g. "go to www.foo.com, where .."
467 # in this case, the last comma should not become part of the URL,
468 # but in "www.foo.com/123,2342,32.htm" it should.
469 $sep = ",;\.:";
470 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
471 $images = "gif|png|jpg|jpeg";
473 # PLEASE NOTE: The curly braces { } are not part of the regex,
474 # they are interpreted as part of the string (used to tell PHP
475 # that the content of the string should be inserted there).
476 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
477 "((?i){$images})([^{$uc}]|$)/";
479 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
480 $sk =& $this->mOptions->getSkin();
482 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
483 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
484 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
486 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
487 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
488 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
489 "</a>\\5", $s );
490 $s = str_replace( $unique, $protocol, $s );
492 $a = explode( "[{$protocol}:", " " . $s );
493 $s = array_shift( $a );
494 $s = substr( $s, 1 );
496 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
497 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
499 foreach ( $a as $line ) {
500 if ( preg_match( $e1, $line, $m ) ) {
501 $link = "{$protocol}:{$m[1]}";
502 $trail = $m[2];
503 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
504 else { $text = wfEscapeHTML( $link ); }
505 } else if ( preg_match( $e2, $line, $m ) ) {
506 $link = "{$protocol}:{$m[1]}";
507 $text = $m[2];
508 $trail = $m[3];
509 } else {
510 $s .= "[{$protocol}:" . $line;
511 continue;
513 if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
514 else $paren = "";
515 $la = $sk->getExternalLinkAttributes( $link, $text );
516 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
519 return $s;
522 /* private */ function handle3Quotes( &$state, $token )
524 if ( $state["strong"] ) {
525 if ( $state["em"] && $state["em"] > $state["strong"] )
527 # ''' lala ''lala '''
528 $s = "</em></strong><em>";
529 } else {
530 $s = "</strong>";
532 $state["strong"] = FALSE;
533 } else {
534 $s = "<strong>";
535 $state["strong"] = $token["pos"];
537 return $s;
540 /* private */ function handle2Quotes( &$state, $token )
542 if ( $state["em"] ) {
543 if ( $state["strong"] && $state["strong"] > $state["em"] )
545 # ''lala'''lala'' ....'''
546 $s = "</strong></em><strong>";
547 } else {
548 $s = "</em>";
550 $state["em"] = FALSE;
551 } else {
552 $s = "<em>";
553 $state["em"] = $token["pos"];
555 return $s;
558 /* private */ function handle5Quotes( &$state, $token )
560 if ( $state["em"] && $state["strong"] ) {
561 if ( $state["em"] < $state["strong"] ) {
562 $s .= "</strong></em>";
563 } else {
564 $s .= "</em></strong>";
566 $state["strong"] = $state["em"] = FALSE;
567 } elseif ( $state["em"] ) {
568 $s .= "</em><strong>";
569 $state["em"] = FALSE;
570 $state["strong"] = $token["pos"];
571 } elseif ( $state["strong"] ) {
572 $s .= "</strong><em>";
573 $state["strong"] = FALSE;
574 $state["em"] = $token["pos"];
575 } else { # not $em and not $strong
576 $s .= "<strong><em>";
577 $state["strong"] = $state["em"] = $token["pos"];
579 return $s;
582 /* private */ function replaceInternalLinks( $str )
584 global $wgLang; # for language specific parser hook
586 $tokenizer=Tokenizer::newFromString( $str );
587 $tokenStack = array();
589 $s="";
590 $state["em"] = FALSE;
591 $state["strong"] = FALSE;
592 $tagIsOpen = FALSE;
594 # The tokenizer splits the text into tokens and returns them one by one.
595 # Every call to the tokenizer returns a new token.
596 while ( $token = $tokenizer->nextToken() )
598 switch ( $token["type"] )
600 case "text":
601 # simple text with no further markup
602 $txt = $token["text"];
603 break;
604 case "[[":
605 # link opening tag.
606 # FIXME : Treat orphaned open tags (stack not empty when text is over)
607 $tagIsOpen = TRUE;
608 array_push( $tokenStack, $token );
609 $txt="";
610 break;
611 case "]]":
612 # link close tag.
613 # get text from stack, glue it together, and call the code to handle a
614 # link
615 if ( count( $tokenStack ) == 0 )
617 # stack empty. Found a ]] without an opening [[
618 $txt = "]]";
619 } else {
620 $linkText = "";
621 $lastToken = array_pop( $tokenStack );
622 while ( $lastToken["type"] != "[[" )
624 if( !empty( $lastToken["text"] ) ) {
625 $linkText = $lastToken["text"] . $linkText;
627 $lastToken = array_pop( $tokenStack );
629 $txt = $linkText ."]]";
630 if( isset( $lastToken["text"] ) ) {
631 $prefix = $lastToken["text"];
632 } else {
633 $prefix = "";
635 $nextToken = $tokenizer->previewToken();
636 if ( $nextToken["type"] == "text" )
638 # Preview just looks at it. Now we have to fetch it.
639 $nextToken = $tokenizer->nextToken();
640 $txt .= $nextToken["text"];
642 $txt = $this->handleInternalLink( $txt, $prefix );
644 $tagIsOpen = (count( $tokenStack ) != 0);
645 break;
646 case "----":
647 $txt = "\n<hr>\n";
648 break;
649 case "'''":
650 # This and the three next ones handle quotes
651 $txt = $this->handle3Quotes( $state, $token );
652 break;
653 case "''":
654 $txt = $this->handle2Quotes( $state, $token );
655 break;
656 case "'''''":
657 $txt = $this->handle5Quotes( $state, $token );
658 break;
659 case "":
660 # empty token
661 $txt="";
662 break;
663 case "RFC ":
664 if ( $tagIsOpen ) {
665 $txt = "RFC ";
666 } else {
667 $txt = $this->doMagicRFC( $tokenizer );
669 break;
670 case "ISBN ":
671 if ( $tagIsOpen ) {
672 $txt = "ISBN ";
673 } else {
674 $txt = $this->doMagicISBN( $tokenizer );
676 break;
677 default:
678 # Call language specific Hook.
679 $txt = $wgLang->processToken( $token, $tokenStack );
680 if ( NULL == $txt ) {
681 # An unkown token. Highlight.
682 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
683 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
685 break;
687 # If we're parsing the interior of a link, don't append the interior to $s,
688 # but push it to the stack so it can be processed when a ]] token is found.
689 if ( $tagIsOpen && $txt != "" ) {
690 $token["type"] = "text";
691 $token["text"] = $txt;
692 array_push( $tokenStack, $token );
693 } else {
694 $s .= $txt;
696 } #end while
697 if ( count( $tokenStack ) != 0 )
699 # still objects on stack. opened [[ tag without closing ]] tag.
700 $txt = "";
701 while ( $lastToken = array_pop( $tokenStack ) )
703 if ( $lastToken["type"] == "text" )
705 $txt = $lastToken["text"] . $txt;
706 } else {
707 $txt = $lastToken["type"] . $txt;
710 $s .= $txt;
712 return $s;
715 /* private */ function handleInternalLink( $line, $prefix )
717 global $wgLang, $wgLinkCache;
718 global $wgNamespacesWithSubpages, $wgLanguageCode;
719 static $fname = "OutputPage::replaceInternalLinks" ;
720 wfProfileIn( $fname );
722 wfProfileIn( "$fname-setup" );
723 static $tc = FALSE;
724 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
725 $sk =& $this->mOptions->getSkin();
727 # Match a link having the form [[namespace:link|alternate]]trail
728 static $e1 = FALSE;
729 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
730 # Match the end of a line for a word that's not followed by whitespace,
731 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
732 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
733 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
734 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
737 # Special and Media are pseudo-namespaces; no pages actually exist in them
738 static $image = FALSE;
739 static $special = FALSE;
740 static $media = FALSE;
741 static $category = FALSE;
742 if ( !$image ) { $image = Namespace::getImage(); }
743 if ( !$special ) { $special = Namespace::getSpecial(); }
744 if ( !$media ) { $media = Namespace::getMedia(); }
745 if ( !$category ) { $category = wfMsg ( "category" ) ; }
747 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
749 wfProfileOut( "$fname-setup" );
750 $s = "";
752 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
753 $text = $m[2];
754 $trail = $m[3];
755 } else { # Invalid form; output directly
756 $s .= $prefix . "[[" . $line ;
757 return $s;
760 /* Valid link forms:
761 Foobar -- normal
762 :Foobar -- override special treatment of prefix (images, language links)
763 /Foobar -- convert to CurrentPage/Foobar
764 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
766 $c = substr($m[1],0,1);
767 $noforce = ($c != ":");
768 if( $c == "/" ) { # subpage
769 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
770 $m[1]=substr($m[1],1,strlen($m[1])-2);
771 $noslash=$m[1];
772 } else {
773 $noslash=substr($m[1],1);
775 if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
776 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
777 if( "" == $text ) {
778 $text= $m[1];
779 } # this might be changed for ugliness reasons
780 } else {
781 $link = $noslash; # no subpage allowed, use standard link
783 } elseif( $noforce ) { # no subpage
784 $link = $m[1];
785 } else {
786 $link = substr( $m[1], 1 );
788 if( "" == $text )
789 $text = $link;
791 $nt = Title::newFromText( $link );
792 if( !$nt ) {
793 $s .= $prefix . "[[" . $line;
794 return $s;
796 $ns = $nt->getNamespace();
797 $iw = $nt->getInterWiki();
798 if( $noforce ) {
799 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
800 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
801 $s .= $prefix . $trail;
802 return $s;
804 if( $ns == $image ) {
805 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
806 $wgLinkCache->addImageLinkObj( $nt );
807 return $s;
810 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
811 ( strpos( $link, "#" ) == FALSE ) ) {
812 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
813 return $s;
815 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
816 $t = explode ( ":" , $nt->getText() ) ;
817 array_shift ( $t ) ;
818 $t = implode ( ":" , $t ) ;
819 $t = $wgLang->ucFirst ( $t ) ;
820 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
821 $nnt = Title::newFromText ( $category.":".$t ) ;
822 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
823 $this->mCategoryLinks[] = $t ;
824 $s .= $prefix . $trail ;
825 return $s ;
827 if( $ns == $media ) {
828 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
829 $wgLinkCache->addImageLinkObj( $nt );
830 return $s;
831 } elseif( $ns == $special ) {
832 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
833 return $s;
835 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
837 wfProfileOut( $fname );
838 return $s;
841 # Some functions here used by doBlockLevels()
843 /* private */ function closeParagraph()
845 $result = "";
846 if ( 0 != strcmp( "p", $this->mLastSection ) &&
847 0 != strcmp( "", $this->mLastSection ) ) {
848 $result = "</" . $this->mLastSection . ">";
850 $this->mLastSection = "";
851 return $result."\n";
853 # getCommon() returns the length of the longest common substring
854 # of both arguments, starting at the beginning of both.
856 /* private */ function getCommon( $st1, $st2 )
858 $fl = strlen( $st1 );
859 $shorter = strlen( $st2 );
860 if ( $fl < $shorter ) { $shorter = $fl; }
862 for ( $i = 0; $i < $shorter; ++$i ) {
863 if ( $st1{$i} != $st2{$i} ) { break; }
865 return $i;
867 # These next three functions open, continue, and close the list
868 # element appropriate to the prefix character passed into them.
870 /* private */ function openList( $char )
872 $result = $this->closeParagraph();
874 if ( "*" == $char ) { $result .= "<ul><li>"; }
875 else if ( "#" == $char ) { $result .= "<ol><li>"; }
876 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
877 else if ( ";" == $char ) {
878 $result .= "<dl><dt>";
879 $this->mDTopen = true;
881 else { $result = "<!-- ERR 1 -->"; }
883 return $result;
886 /* private */ function nextItem( $char )
888 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
889 else if ( ":" == $char || ";" == $char ) {
890 $close = "</dd>";
891 if ( $this->mDTopen ) { $close = "</dt>"; }
892 if ( ";" == $char ) {
893 $this->mDTopen = true;
894 return $close . "<dt>";
895 } else {
896 $this->mDTopen = false;
897 return $close . "<dd>";
900 return "<!-- ERR 2 -->";
903 /* private */function closeList( $char )
905 if ( "*" == $char ) { $text = "</li></ul>"; }
906 else if ( "#" == $char ) { $text = "</li></ol>"; }
907 else if ( ":" == $char ) {
908 if ( $this->mDTopen ) {
909 $this->mDTopen = false;
910 $text = "</dt></dl>";
911 } else {
912 $text = "</dd></dl>";
915 else { return "<!-- ERR 3 -->"; }
916 return $text."\n";
919 /* private */ function doBlockLevels( $text, $linestart )
921 $fname = "OutputPage::doBlockLevels";
922 wfProfileIn( $fname );
923 # Parsing through the text line by line. The main thing
924 # happening here is handling of block-level elements p, pre,
925 # and making lists from lines starting with * # : etc.
927 $a = explode( "\n", $text );
928 $text = $lastPref = "";
929 $this->mDTopen = $inBlockElem = false;
931 if ( ! $linestart ) { $text .= array_shift( $a ); }
932 foreach ( $a as $t ) {
933 if ( "" != $text ) { $text .= "\n"; }
935 $oLine = $t;
936 $opl = strlen( $lastPref );
937 $npl = strspn( $t, "*#:;" );
938 $pref = substr( $t, 0, $npl );
939 $pref2 = str_replace( ";", ":", $pref );
940 $t = substr( $t, $npl );
942 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
943 $text .= $this->nextItem( substr( $pref, -1 ) );
945 if ( ";" == substr( $pref, -1 ) ) {
946 $cpos = strpos( $t, ":" );
947 if ( ! ( false === $cpos ) ) {
948 $term = substr( $t, 0, $cpos );
949 $text .= $term . $this->nextItem( ":" );
950 $t = substr( $t, $cpos + 1 );
953 } else if (0 != $npl || 0 != $opl) {
954 $cpl = $this->getCommon( $pref, $lastPref );
956 while ( $cpl < $opl ) {
957 $text .= $this->closeList( $lastPref{$opl-1} );
958 --$opl;
960 if ( $npl <= $cpl && $cpl > 0 ) {
961 $text .= $this->nextItem( $pref{$cpl-1} );
963 while ( $npl > $cpl ) {
964 $char = substr( $pref, $cpl, 1 );
965 $text .= $this->openList( $char );
967 if ( ";" == $char ) {
968 $cpos = strpos( $t, ":" );
969 if ( ! ( false === $cpos ) ) {
970 $term = substr( $t, 0, $cpos );
971 $text .= $term . $this->nextItem( ":" );
972 $t = substr( $t, $cpos + 1 );
975 ++$cpl;
977 $lastPref = $pref2;
979 if ( 0 == $npl ) { # No prefix--go to paragraph mode
980 if ( preg_match(
981 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
982 $text .= $this->closeParagraph();
983 $inBlockElem = true;
985 if ( ! $inBlockElem ) {
986 if ( " " == $t{0} ) {
987 $newSection = "pre";
988 # $t = wfEscapeHTML( $t );
990 else { $newSection = "p"; }
992 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
993 $text .= $this->closeParagraph();
994 $text .= "<" . $newSection . ">";
995 } else if ( 0 != strcmp( $this->mLastSection,
996 $newSection ) ) {
997 $text .= $this->closeParagraph();
998 if ( 0 != strcmp( "p", $newSection ) ) {
999 $text .= "<" . $newSection . ">";
1002 $this->mLastSection = $newSection;
1004 if ( $inBlockElem &&
1005 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
1006 $inBlockElem = false;
1009 $text .= $t;
1011 while ( $npl ) {
1012 $text .= $this->closeList( $pref2{$npl-1} );
1013 --$npl;
1015 if ( "" != $this->mLastSection ) {
1016 if ( "p" != $this->mLastSection ) {
1017 $text .= "</" . $this->mLastSection . ">";
1019 $this->mLastSection = "";
1021 wfProfileOut( $fname );
1022 return $text;
1025 /* private */ function replaceVariables( $text )
1027 global $wgLang, $wgCurOut;
1028 $fname = "OutputPage::replaceVariables";
1029 wfProfileIn( $fname );
1031 $magic = array();
1033 # Basic variables
1034 # See Language.php for the definition of each magic word
1035 # As with sigs, this uses the server's local time -- ensure
1036 # this is appropriate for your audience!
1038 $magic[MAG_CURRENTMONTH] = date( "m" );
1039 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1040 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1041 $magic[MAG_CURRENTDAY] = date("j");
1042 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1043 $magic[MAG_CURRENTYEAR] = date( "Y" );
1044 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1046 $this->mOutput->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1048 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1049 if ( $mw->match( $text ) ) {
1050 $v = wfNumberOfArticles();
1051 $text = $mw->replace( $v, $text );
1052 if( $mw->getWasModified() ) { $this->mOutput->mContainsOldMagic++; }
1055 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1056 # The callbacks are at the bottom of this file
1057 $wgCurOut = $this;
1058 $mw =& MagicWord::get( MAG_MSG );
1059 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1060 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1062 $mw =& MagicWord::get( MAG_MSGNW );
1063 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1064 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1066 wfProfileOut( $fname );
1067 return $text;
1070 # Cleans up HTML, removes dangerous tags and attributes
1071 /* private */ function removeHTMLtags( $text )
1073 $fname = "OutputPage::removeHTMLtags";
1074 wfProfileIn( $fname );
1075 $htmlpairs = array( # Tags that must be closed
1076 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1077 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1078 "strike", "strong", "tt", "var", "div", "center",
1079 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1080 "ruby", "rt" , "rb" , "rp"
1082 $htmlsingle = array(
1083 "br", "p", "hr", "li", "dt", "dd"
1085 $htmlnest = array( # Tags that can be nested--??
1086 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1087 "dl", "font", "big", "small", "sub", "sup"
1089 $tabletags = array( # Can only appear inside table
1090 "td", "th", "tr"
1093 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1094 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1096 $htmlattrs = $this->getHTMLattrs () ;
1098 # Remove HTML comments
1099 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1101 $bits = explode( "<", $text );
1102 $text = array_shift( $bits );
1103 $tagstack = array(); $tablestack = array();
1105 foreach ( $bits as $x ) {
1106 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1107 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1108 $x, $regs );
1109 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1110 error_reporting( $prev );
1112 $badtag = 0 ;
1113 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1114 # Check our stack
1115 if ( $slash ) {
1116 # Closing a tag...
1117 if ( ! in_array( $t, $htmlsingle ) &&
1118 ( $ot = array_pop( $tagstack ) ) != $t ) {
1119 array_push( $tagstack, $ot );
1120 $badtag = 1;
1121 } else {
1122 if ( $t == "table" ) {
1123 $tagstack = array_pop( $tablestack );
1125 $newparams = "";
1127 } else {
1128 # Keep track for later
1129 if ( in_array( $t, $tabletags ) &&
1130 ! in_array( "table", $tagstack ) ) {
1131 $badtag = 1;
1132 } else if ( in_array( $t, $tagstack ) &&
1133 ! in_array ( $t , $htmlnest ) ) {
1134 $badtag = 1 ;
1135 } else if ( ! in_array( $t, $htmlsingle ) ) {
1136 if ( $t == "table" ) {
1137 array_push( $tablestack, $tagstack );
1138 $tagstack = array();
1140 array_push( $tagstack, $t );
1142 # Strip non-approved attributes from the tag
1143 $newparams = $this->fixTagAttributes($params);
1146 if ( ! $badtag ) {
1147 $rest = str_replace( ">", "&gt;", $rest );
1148 $text .= "<$slash$t $newparams$brace$rest";
1149 continue;
1152 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1154 # Close off any remaining tags
1155 while ( $t = array_pop( $tagstack ) ) {
1156 $text .= "</$t>\n";
1157 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1159 wfProfileOut( $fname );
1160 return $text;
1165 * This function accomplishes several tasks:
1166 * 1) Auto-number headings if that option is enabled
1167 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1168 * 3) Add a Table of contents on the top for users who have enabled the option
1169 * 4) Auto-anchor headings
1171 * It loops through all headlines, collects the necessary data, then splits up the
1172 * string and re-inserts the newly formatted headlines.
1174 * */
1175 /* private */ function formatHeadings( $text )
1177 $nh=$this->mOptions->getNumberHeadings();
1178 $st=$this->mOptions->getShowToc();
1179 if(!$this->mTitle->userCanEdit()) {
1180 $es=0;
1181 $esr=0;
1182 } else {
1183 $es=$this->mOptions->getEditSection();
1184 $esr=$this->mOptions->getEditSectionOnRightClick();
1187 # Inhibit editsection links if requested in the page
1188 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1189 if ($esw->matchAndRemove( $text )) {
1190 $es=0;
1192 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1193 # do not add TOC
1194 $mw =& MagicWord::get( MAG_NOTOC );
1195 if ($mw->matchAndRemove( $text ))
1197 $st = 0;
1200 # never add the TOC to the Main Page. This is an entry page that should not
1201 # be more than 1-2 screens large anyway
1202 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1204 # We need this to perform operations on the HTML
1205 $sk =& $this->mOptions->getSkin();
1207 # Get all headlines for numbering them and adding funky stuff like [edit]
1208 # links
1209 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1211 # headline counter
1212 $c=0;
1214 # Ugh .. the TOC should have neat indentation levels which can be
1215 # passed to the skin functions. These are determined here
1216 $toclevel = 0;
1217 $toc = "";
1218 $full = "";
1219 $head = array();
1220 foreach($matches[3] as $headline) {
1221 if($level) { $prevlevel=$level;}
1222 $level=$matches[1][$c];
1223 if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1225 $h[$level]=0; // reset when we enter a new level
1226 $toc.=$sk->tocIndent($level-$prevlevel);
1227 $toclevel+=$level-$prevlevel;
1230 if(($nh||$st) && $level<$prevlevel) {
1231 $h[$level+1]=0; // reset when we step back a level
1232 $toc.=$sk->tocUnindent($prevlevel-$level);
1233 $toclevel-=$prevlevel-$level;
1236 $h[$level]++; // count number of headlines for each level
1238 if($nh||$st) {
1239 for($i=1;$i<=$level;$i++) {
1240 if($h[$i]) {
1241 if($dot) {$numbering.=".";}
1242 $numbering.=$h[$i];
1243 $dot=1;
1248 // The canonized header is a version of the header text safe to use for links
1249 // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1250 $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1251 $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1252 $tocline = trim( $canonized_headline );
1253 $canonized_headline=str_replace('"',"",$canonized_headline);
1254 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1255 $refer[$c]=$canonized_headline;
1256 $refers[$canonized_headline]++; // count how many in assoc. array so we can track dupes in anchors
1257 $refcount[$c]=$refers[$canonized_headline];
1259 // Prepend the number to the heading text
1261 if($nh||$st) {
1262 $tocline=$numbering ." ". $tocline;
1264 // Don't number the heading if it is the only one (looks silly)
1265 if($nh && count($matches[3]) > 1) {
1266 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1270 // Create the anchor for linking from the TOC to the section
1271 $anchor=$canonized_headline;
1272 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1273 if($st) {
1274 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1276 if($es) {
1277 $head[$c].=$sk->editSectionLink($c+1);
1280 // Put it all together
1282 $head[$c].="<h".$level.$matches[2][$c]
1283 ."<a name=\"".$anchor."\">"
1284 .$headline
1285 ."</a>"
1286 ."</h".$level.">";
1288 // Add the edit section link
1290 if($esr) {
1291 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1294 $numbering="";
1295 $c++;
1296 $dot=0;
1299 if($st) {
1300 $toclines=$c;
1301 $toc.=$sk->tocUnindent($toclevel);
1302 $toc=$sk->tocTable($toc);
1305 // split up and insert constructed headlines
1307 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1308 $i=0;
1310 foreach($blocks as $block) {
1311 if(($es) && $c>0 && $i==0) {
1312 # This is the [edit] link that appears for the top block of text when
1313 # section editing is enabled
1314 $full.=$sk->editSectionLink(0);
1316 $full.=$block;
1317 if($st && $toclines>3 && !$i) {
1318 # Let's add a top anchor just in case we want to link to the top of the page
1319 $full="<a name=\"top\"></a>".$full.$toc;
1322 if( !empty( $head[$i] ) ) {
1323 $full .= $head[$i];
1325 $i++;
1328 return $full;
1331 /* private */ function doMagicISBN( &$tokenizer )
1333 global $wgLang;
1335 # Check whether next token is a text token
1336 # If yes, fetch it and convert the text into a
1337 # Special::BookSources link
1338 $token = $tokenizer->previewToken();
1339 while ( $token["type"] == "" )
1341 $tokenizer->nextToken();
1342 $token = $tokenizer->previewToken();
1344 if ( $token["type"] == "text" )
1346 $token = $tokenizer->nextToken();
1347 $x = $token["text"];
1348 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1350 $isbn = $blank = "" ;
1351 while ( " " == $x{0} ) {
1352 $blank .= " ";
1353 $x = substr( $x, 1 );
1355 while ( strstr( $valid, $x{0} ) != false ) {
1356 $isbn .= $x{0};
1357 $x = substr( $x, 1 );
1359 $num = str_replace( "-", "", $isbn );
1360 $num = str_replace( " ", "", $num );
1362 if ( "" == $num ) {
1363 $text = "ISBN $blank$x";
1364 } else {
1365 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1366 $text = "<a href=\"" .
1367 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1368 "\" class=\"internal\">ISBN $isbn</a>";
1369 $text .= $x;
1371 } else {
1372 $text = "ISBN ";
1374 return $text;
1376 /* private */ function doMagicRFC( &$tokenizer )
1378 global $wgLang;
1380 # Check whether next token is a text token
1381 # If yes, fetch it and convert the text into a
1382 # link to an RFC source
1383 $token = $tokenizer->previewToken();
1384 while ( $token["type"] == "" )
1386 $tokenizer->nextToken();
1387 $token = $tokenizer->previewToken();
1389 if ( $token["type"] == "text" )
1391 $token = $tokenizer->nextToken();
1392 $x = $token["text"];
1393 $valid = "0123456789";
1395 $rfc = $blank = "" ;
1396 while ( " " == $x{0} ) {
1397 $blank .= " ";
1398 $x = substr( $x, 1 );
1400 while ( strstr( $valid, $x{0} ) != false ) {
1401 $rfc .= $x{0};
1402 $x = substr( $x, 1 );
1405 if ( "" == $rfc ) {
1406 $text .= "RFC $blank$x";
1407 } else {
1408 $url = wfmsg( "rfcurl" );
1409 $url = str_replace( "$1", $rfc, $url);
1410 $sk =& $this->mOptions->getSkin();
1411 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1412 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1414 } else {
1415 $text = "RFC ";
1417 return $text;
1420 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1422 $this->mOptions = $options;
1423 $this->mTitle = $title;
1424 if ( $clearState ) {
1425 $this->clearState();
1428 $stripState = false;
1429 $text = $this->strip( $text, $stripState, false );
1430 $text = $this->pstPass2( $text, $user );
1431 $text = $this->unstrip( $text, $stripState );
1432 return $text;
1435 /* private */ function pstPass2( $text, &$user )
1437 global $wgLang, $wgLocaltimezone;
1439 # Signatures
1441 $n = $user->getName();
1442 $k = $user->getOption( "nickname" );
1443 if ( "" == $k ) { $k = $n; }
1444 if(isset($wgLocaltimezone)) {
1445 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1447 /* Note: this is an ugly timezone hack for the European wikis */
1448 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1449 " (" . date( "T" ) . ")";
1450 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1452 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1453 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1454 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1455 Namespace::getUser() ) . ":$n|$k]]", $text );
1457 # Context links: [[|name]] and [[name (context)|]]
1459 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1460 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1461 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1462 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1464 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1465 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1466 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1467 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1468 # [[ns:page (cont)|]]
1469 $context = "";
1470 $t = $this->mTitle->getText();
1471 if ( preg_match( $conpat, $t, $m ) ) {
1472 $context = $m[2];
1474 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1475 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1476 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1478 if ( "" == $context ) {
1479 $text = preg_replace( $p2, "[[\\1]]", $text );
1480 } else {
1481 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1484 # {{SUBST:xxx}} variables
1486 $mw =& MagicWord::get( MAG_SUBST );
1487 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1489 # Trim trailing whitespace
1490 # MAG_END (__END__) tag allows for trailing
1491 # whitespace to be deliberately included
1492 $text = rtrim( $text );
1493 $mw =& MagicWord::get( MAG_END );
1494 $mw->matchAndRemove( $text );
1496 return $text;
1502 class ParserOutput
1504 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1506 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1507 $containsOldMagic = false )
1509 $this->mText = $text;
1510 $this->mLanguageLinks = $languageLinks;
1511 $this->mCategoryLinks = $categoryLinks;
1512 $this->mContainsOldMagic = $containsOldMagic;
1515 function getText() { return $this->mText; }
1516 function getLanguageLinks() { return $this->mLanguageLinks; }
1517 function getCategoryLinks() { return $this->mCategoryLinks; }
1518 function containsOldMagic() { return $this->mContainsOldMagic; }
1519 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1520 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1521 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1522 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1525 class ParserOptions
1527 # All variables are private
1528 var $mUseTeX; # Use texvc to expand <math> tags
1529 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1530 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1531 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1532 var $mAllowExternalImages; # Allow external images inline
1533 var $mSkin; # Reference to the preferred skin
1534 var $mDateFormat; # Date format index
1535 var $mEditSection; # Create "edit section" links
1536 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1537 var $mPrintable; # Generate printable output
1538 var $mNumberHeadings; # Automatically number headings
1539 var $mShowToc; # Show table of contents
1541 function getUseTeX() { return $this->mUseTeX; }
1542 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1543 function getUseDynamicDates() { return $this->mUseDynamicDates; }
1544 function getInterwikiMagic() { return $this->mInterwikiMagic; }
1545 function getAllowExternalImages() { return $this->mAllowExternalImages; }
1546 function getSkin() { return $this->mSkin; }
1547 function getDateFormat() { return $this->mDateFormat; }
1548 function getEditSection() { return $this->mEditSection; }
1549 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1550 function getPrintable() { return $this->mPrintable; }
1551 function getNumberHeadings() { return $this->mNumberHeadings; }
1552 function getShowToc() { return $this->mShowToc; }
1554 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1555 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1556 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1557 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1558 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1559 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1560 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1561 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1562 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1563 function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1564 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1565 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1567 /* static */ function newFromUser( &$user )
1569 $popts = new ParserOptions;
1570 $popts->initialiseFromUser( &$user );
1571 return $popts;
1574 function initialiseFromUser( &$userInput )
1576 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1578 if ( !$userInput ) {
1579 $user = new User;
1580 } else {
1581 $user =& $userInput;
1584 $this->mUseTeX = $wgUseTeX;
1585 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1586 $this->mUseDynamicDates = $wgUseDynamicDates;
1587 $this->mInterwikiMagic = $wgInterwikiMagic;
1588 $this->mAllowExternalImages = $wgAllowExternalImages;
1589 $this->mSkin =& $user->getSkin();
1590 $this->mDateFormat = $user->getOption( "date" );
1591 $this->mEditSection = $user->getOption( "editsection" );
1592 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1593 $this->mPrintable = false;
1594 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1595 $this->mShowToc = $user->getOption( "showtoc" );
1601 # Regex callbacks, used in OutputPage::replaceVariables
1603 # Just get rid of the dangerous stuff
1604 # Necessary because replaceVariables is called after removeHTMLtags,
1605 # and message text can come from any user
1606 function wfReplaceMsgVar( $matches ) {
1607 global $wgCurOut, $wgLinkCache;
1608 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1609 $wgLinkCache->suspend();
1610 $text = $wgCurOut->replaceInternalLinks( $text );
1611 $wgLinkCache->resume();
1612 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1613 return $text;
1616 # Effective <nowiki></nowiki>
1617 # Not real <nowiki> because this is called after nowiki sections are processed
1618 function wfReplaceMsgnwVar( $matches ) {
1619 global $wgCurOut, $wgLinkCache;
1620 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1621 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1622 return $text;