3 include_once('Tokenizer.php');
7 # Converts wikitext to HTML.
10 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
12 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
13 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*
15 # * only within ParserOptions
19 # Cleared with clearState():
20 var $mOutput, $mAutonumber, $mLastSection, $mDTopen;
23 var $mOptions, $mTitle;
32 $this->mOutput
= new ParserOutput
;
33 $this->mAutonumber
= 0;
34 $this->mLastSection
= "";
35 $this->mDTopen
= false;
38 # First pass--just handle <nowiki> sections, pass the rest off
39 # to doWikiPass2() which does all the real work.
41 # Returns a ParserOutput
43 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
45 $fname = "Parser::parse";
46 wfProfileIn( $fname );
47 $unique = "3iyZiyA7iMwg5rhxP0Dcc9oTnj8qD1jm1Sfv4";
48 $unique2 = "4LIQ9nXtiYFPCSfitVwDw7EYwQlL4GeeQ7qSO";
49 $unique3 = "fPaA8gDfdLBqzj68Yjg9Hil3qEF8JGO0uszIp";
64 $this->mOptions
= $options;
65 $this->mTitle
=& $title;
67 # Replace any instances of the placeholders
68 $text = str_replace( $unique, wfHtmlEscapeFirst( $unique ), $text );
69 $text = str_replace( $unique2, wfHtmlEscapeFirst( $unique2 ), $text );
70 $text = str_replace( $unique3, wfHtmlEscapeFirst( $unique3 ), $text );
72 while ( "" != $text ) {
73 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
75 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $text = ""; }
77 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
79 $nwlist[$nwsecs] = wfEscapeHTMLTagsOnly($q[0]);
80 $stripped .= $unique . $nwsecs . "s";
85 if( $this->mOptions
->getUseTeX() ) {
86 while ( "" != $stripped ) {
87 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
89 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $stripped = ""; }
91 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
93 $mathlist[$mathsecs] = renderMath($q[0]);
94 $stripped2 .= $unique2 . $mathsecs . "s";
99 $stripped2 = $stripped;
102 while ( "" != $stripped2 ) {
103 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
105 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $stripped2 = ""; }
107 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
109 $prelist[$presecs] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
110 $stripped3 .= $unique3 . $presecs . "s";
115 $text = $this->doWikiPass2( $stripped3, $linestart );
117 $specialChars = array("\\", "$");
118 $escapedChars = array("\\\\", "\\$");
120 # Go backwards so that {$unique1}1 doesn't overwrite {$unique1}10
121 for ( $i = $presecs; $i >= 1; --$i ) {
122 $text = preg_replace( "/{$unique3}{$i}s/", str_replace( $specialChars,
123 $escapedChars, $prelist[$i] ), $text );
126 for ( $i = $mathsecs; $i >= 1; --$i ) {
127 $text = preg_replace( "/{$unique2}{$i}s/", str_replace( $specialChars,
128 $escapedChars, $mathlist[$i] ), $text );
131 for ( $i = $nwsecs; $i >= 1; --$i ) {
132 $text = preg_replace( "/{$unique}{$i}s/", str_replace( $specialChars,
133 $escapedChars, $nwlist[$i] ), $text );
136 $this->mOutput
->setText( $text );
137 wfProfileOut( $fname );
138 return $this->mOutput
;
141 function categoryMagic ()
144 if ( !$this->mOptions
->getUseCategoryMagic() ) return ;
145 $id = $this->mTitle
->getArticleID() ;
146 $cat = ucfirst ( wfMsg ( "category" ) ) ;
147 $ti = $this->mTitle
->getText() ;
148 $ti = explode ( ":" , $ti , 2 ) ;
149 if ( $cat != $ti[0] ) return "" ;
150 $r = "<br break=all>\n" ;
152 $articles = array() ;
153 $parents = array () ;
154 $children = array() ;
157 $sk =& $this->mGetSkin();
161 $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
163 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
166 $res = wfQuery ( $sql, DB_READ
) ;
167 while ( $x = wfFetchObject ( $res ) )
170 # $t->newFromDBkey ( $x->l_from ) ;
171 # $t = $t->getText() ;
175 $t = $wgLang->getNsText ( $x->cur_namespace
) ;
176 if ( $t != "" ) $t .= ":" ;
177 $t .= $x->cur_title
;
180 $y = explode ( ":" , $t , 2 ) ;
181 if ( count ( $y ) == 2 && $y[0] == $cat ) {
182 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
184 array_push ( $articles , $sk->makeLink ( $t ) ) ;
187 wfFreeResult ( $res ) ;
190 if ( count ( $children ) > 0 )
192 asort ( $children ) ;
193 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
194 $r .= implode ( ", " , $children ) ;
198 if ( count ( $articles ) > 0 )
200 asort ( $articles ) ;
201 $h = wfMsg( "category_header", $ti[1] );
202 $r .= "<h2>{$h}</h2>\n" ;
203 $r .= implode ( ", " , $articles ) ;
210 function getHTMLattrs ()
212 $htmlattrs = array( # Allowed attributes--no scripting, etc.
213 "title", "align", "lang", "dir", "width", "height",
214 "bgcolor", "clear", /* BR */ "noshade", /* HR */
215 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
216 /* FONT */ "type", "start", "value", "compact",
217 /* For various lists, mostly deprecated but safe */
218 "summary", "width", "border", "frame", "rules",
219 "cellspacing", "cellpadding", "valign", "char",
220 "charoff", "colgroup", "col", "span", "abbr", "axis",
221 "headers", "scope", "rowspan", "colspan", /* Tables */
222 "id", "class", "name", "style" /* For CSS */
227 function fixTagAttributes ( $t )
229 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
230 $htmlattrs = $this->getHTMLattrs() ;
232 # Strip non-approved attributes from the tag
234 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
235 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
237 # Strip javascript "expression" from stylesheets. Brute force approach:
238 # If anythin offensive is found, all attributes of the HTML tag are dropped
241 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
242 wfMungeToUtf8( $t ) ) )
250 function doTableStuff ( $t )
252 $t = explode ( "\n" , $t ) ;
253 $td = array () ; # Is currently a td tag open?
254 $ltd = array () ; # Was it TD or TH?
255 $tr = array () ; # Is currently a tr tag open?
256 $ltr = array () ; # tr attributes
257 foreach ( $t AS $k => $x )
260 $fc = substr ( $x , 0 , 1 ) ;
261 if ( "{|" == substr ( $x , 0 , 2 ) )
263 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
264 array_push ( $td , false ) ;
265 array_push ( $ltd , "" ) ;
266 array_push ( $tr , false ) ;
267 array_push ( $ltr , "" ) ;
269 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
270 else if ( "|}" == substr ( $x , 0 , 2 ) )
273 $l = array_pop ( $ltd ) ;
274 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
275 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
279 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
281 $z = trim ( substr ( $x , 2 ) ) ;
282 $t[$k] = "<caption>{$z}</caption>\n" ;
284 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
286 $x = substr ( $x , 1 ) ;
287 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
289 $l = array_pop ( $ltd ) ;
290 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
291 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
294 array_push ( $tr , false ) ;
295 array_push ( $td , false ) ;
296 array_push ( $ltd , "" ) ;
297 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
299 else if ( "|" == $fc ||
"!" == $fc ||
"|+" == substr ( $x , 0 , 2 ) ) # Caption
301 if ( "|+" == substr ( $x , 0 , 2 ) )
304 $x = substr ( $x , 1 ) ;
306 $after = substr ( $x , 1 ) ;
307 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
308 $after = explode ( "||" , $after ) ;
310 foreach ( $after AS $theline )
315 $tra = array_pop ( $ltr ) ;
316 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
317 array_push ( $tr , true ) ;
318 array_push ( $ltr , "" ) ;
321 $l = array_pop ( $ltd ) ;
322 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
323 if ( $fc == "|" ) $l = "TD" ;
324 else if ( $fc == "!" ) $l = "TH" ;
325 else if ( $fc == "+" ) $l = "CAPTION" ;
327 array_push ( $ltd , $l ) ;
328 $y = explode ( "|" , $theline , 2 ) ;
329 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
330 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
332 array_push ( $td , true ) ;
337 # Closing open td, tr && table
338 while ( count ( $td ) > 0 )
340 if ( array_pop ( $td ) ) $t[] = "</td>" ;
341 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
345 $t = implode ( "\n" , $t ) ;
346 # $t = $this->removeHTMLtags( $t );
350 # Well, OK, it's actually about 14 passes. But since all the
351 # hard lifting is done inside PHP's regex code, it probably
352 # wouldn't speed things up much to add a real parser.
354 function doWikiPass2( $text, $linestart )
356 $fname = "OutputPage::doWikiPass2";
357 wfProfileIn( $fname );
359 $text = $this->removeHTMLtags( $text );
360 $text = $this->replaceVariables( $text );
362 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
363 $text = str_replace ( "<HR>", "<hr>", $text );
365 $text = $this->doHeadings( $text );
366 $text = $this->doBlockLevels( $text, $linestart );
368 if($this->mOptions
->getUseDynamicDates()) {
369 global $wgDateFormatter;
370 $text = $wgDateFormatter->reformat( $this->mOptions
->getDateFormat(), $text );
373 $text = $this->replaceExternalLinks( $text );
374 $text = $this->replaceInternalLinks ( $text );
375 $text = $this->doTableStuff ( $text ) ;
377 $text = $this->magicISBN( $text );
378 $text = $this->magicRFC( $text );
379 $text = $this->formatHeadings( $text );
381 $sk =& $this->mOptions
->getSkin();
382 $text = $sk->transformContent( $text );
383 $text .= $this->categoryMagic () ;
385 wfProfileOut( $fname );
390 /* private */ function doHeadings( $text )
392 for ( $i = 6; $i >= 1; --$i ) {
393 $h = substr( "======", 0, $i );
394 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
395 "<h{$i}>\\1</h{$i}>\\2", $text );
400 # Note: we have to do external links before the internal ones,
401 # and otherwise take great care in the order of things here, so
402 # that we don't end up interpreting some URLs twice.
404 /* private */ function replaceExternalLinks( $text )
406 $fname = "OutputPage::replaceExternalLinks";
407 wfProfileIn( $fname );
408 $text = $this->subReplaceExternalLinks( $text, "http", true );
409 $text = $this->subReplaceExternalLinks( $text, "https", true );
410 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
411 $text = $this->subReplaceExternalLinks( $text, "irc", false );
412 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
413 $text = $this->subReplaceExternalLinks( $text, "news", false );
414 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
415 wfProfileOut( $fname );
419 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
421 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
422 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
424 # this is the list of separators that should be ignored if they
425 # are the last character of an URL but that should be included
426 # if they occur within the URL, e.g. "go to www.foo.com, where .."
427 # in this case, the last comma should not become part of the URL,
428 # but in "www.foo.com/123,2342,32.htm" it should.
430 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
431 $images = "gif|png|jpg|jpeg";
433 # PLEASE NOTE: The curly braces { } are not part of the regex,
434 # they are interpreted as part of the string (used to tell PHP
435 # that the content of the string should be inserted there).
436 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
437 "((?i){$images})([^{$uc}]|$)/";
439 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
440 $sk =& $this->mOptions
->getSkin();
442 if ( $autonumber and $this->mOptions
->getAllowExternalImages() ) { # Use img tags only for HTTP urls
443 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
444 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
446 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
447 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
448 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
450 $s = str_replace( $unique, $protocol, $s );
452 $a = explode( "[{$protocol}:", " " . $s );
453 $s = array_shift( $a );
454 $s = substr( $s, 1 );
456 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
457 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
459 foreach ( $a as $line ) {
460 if ( preg_match( $e1, $line, $m ) ) {
461 $link = "{$protocol}:{$m[1]}";
463 if ( $autonumber ) { $text = "[" . ++
$this->mAutonumber
. "]"; }
464 else { $text = wfEscapeHTML( $link ); }
465 } else if ( preg_match( $e2, $line, $m ) ) {
466 $link = "{$protocol}:{$m[1]}";
470 $s .= "[{$protocol}:" . $line;
473 if ( $this->mOptions
->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
475 $la = $sk->getExternalLinkAttributes( $link, $text );
476 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
482 /* private */ function handle3Quotes( &$state, $token )
484 if ( $state["strong"] ) {
485 if ( $state["em"] && $state["em"] > $state["strong"] )
487 # ''' lala ''lala '''
488 $s = "</em></strong><em>";
492 $state["strong"] = FALSE;
495 $state["strong"] = $token["pos"];
500 /* private */ function handle2Quotes( &$state, $token )
502 if ( $state["em"] ) {
503 if ( $state["strong"] && $state["strong"] > $state["em"] )
505 # ''lala'''lala'' ....'''
506 $s = "</strong></em><strong>";
510 $state["em"] = FALSE;
513 $state["em"] = $token["pos"];
518 /* private */ function handle5Quotes( &$state, $token )
520 if ( $state["em"] && $state["strong"] ) {
521 if ( $state["em"] < $state["strong"] ) {
522 $s .= "</strong></em>";
524 $s .= "</em></strong>";
526 $state["strong"] = $state["em"] = FALSE;
527 } elseif ( $state["em"] ) {
528 $s .= "</em><strong>";
529 $state["em"] = FALSE;
530 $state["strong"] = $token["pos"];
531 } elseif ( $state["strong"] ) {
532 $s .= "</strong><em>";
533 $state["strong"] = FALSE;
534 $state["em"] = $token["pos"];
535 } else { # not $em and not $strong
536 $s .= "<strong><em>";
537 $state["strong"] = $state["em"] = $token["pos"];
542 /* private */ function replaceInternalLinks( $str )
544 $tokenizer=Tokenizer
::newFromString( $str );
545 $tokenStack = array();
548 $state["em"] = FALSE;
549 $state["strong"] = FALSE;
552 # The tokenizer splits the text into tokens and returns them one by one.
553 # Every call to the tokenizer returns a new token.
554 while ( $token = $tokenizer->nextToken() )
556 switch ( $token["type"] )
559 # simple text with no further markup
560 $txt = $token["text"];
564 # FIXME : Treat orphaned open tags (stack not empty when text is over)
566 array_push( $tokenStack, $token );
571 # get text from stack, glue it together, and call the code to handle a
573 if ( count( $tokenStack ) == 0 )
575 # stack empty. Found a ]] without an opening [[
579 $lastToken = array_pop( $tokenStack );
580 while ( $lastToken["type"] != "[[" )
582 $linkText = $lastToken["text"] . $linkText;
583 $lastToken = array_pop( $tokenStack );
585 $txt = $linkText ."]]";
586 $prefix = $lastToken["text"];
587 $nextToken = $tokenizer->previewToken();
588 if ( $nextToken["type"] == "text" )
590 # Preview just looks at it. Now we have to fetch it.
591 $nextToken = $tokenizer->nextToken();
592 $txt .= $nextToken["text"];
594 $txt = $this->handleInternalLink( $txt, $prefix );
596 $tagIsOpen = (count( $tokenStack ) != 0);
599 # This and the three next ones handle quotes
600 $txt = $this->handle3Quotes( $state, $token );
603 $txt = $this->handle2Quotes( $state, $token );
606 $txt = $this->handle5Quotes( $state, $token );
613 # An unkown token. Highlight.
614 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
615 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
618 # If we're parsing the interior of a link, don't append the interior to $s,
619 # but push it to the stack so it can be processed when a ]] token is found.
620 if ( $tagIsOpen && $txt != "" ) {
621 $token["type"] = "text";
622 $token["text"] = $txt;
623 array_push( $tokenStack, $token );
628 if ( count( $tokenStack ) != 0 )
630 # still objects on stack. opened [[ tag without closing ]] tag.
632 while ( $lastToken = array_pop( $tokenStack ) )
634 if ( $lastToken["type"] == "text" )
636 $txt = $lastToken["text"] . $txt;
638 $txt = $lastToken["type"] . $txt;
646 /* private */ function handleInternalLink( $line, $prefix )
648 global $wgLang, $wgLinkCache;
649 global $wgNamespacesWithSubpages, $wgLanguageCode;
650 static $fname = "OutputPage::replaceInternalLinks" ;
651 wfProfileIn( $fname );
653 wfProfileIn( "$fname-setup" );
655 if ( !$tc ) { $tc = Title
::legalChars() . "#"; }
656 $sk =& $this->mOptions
->getSkin();
658 # Match a link having the form [[namespace:link|alternate]]trail
660 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
661 # Match the end of a line for a word that's not followed by whitespace,
662 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
663 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
664 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
665 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
668 # Special and Media are pseudo-namespaces; no pages actually exist in them
669 static $image = FALSE;
670 static $special = FALSE;
671 static $media = FALSE;
672 static $category = FALSE;
673 if ( !$image ) { $image = Namespace::getImage(); }
674 if ( !$special ) { $special = Namespace::getSpecial(); }
675 if ( !$media ) { $media = Namespace::getMedia(); }
676 if ( !$category ) { $category = wfMsg ( "category" ) ; }
678 $nottalk = !Namespace::isTalk( $this->mTitle
->getNamespace() );
680 wfProfileOut( "$fname-setup" );
682 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
685 } else { # Invalid form; output directly
686 $s .= $prefix . "[[" . $line ;
692 :Foobar -- override special treatment of prefix (images, language links)
693 /Foobar -- convert to CurrentPage/Foobar
694 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
696 $c = substr($m[1],0,1);
697 $noforce = ($c != ":");
698 if( $c == "/" ) { # subpage
699 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
700 $m[1]=substr($m[1],1,strlen($m[1])-2);
703 $noslash=substr($m[1],1);
705 if($wgNamespacesWithSubpages[$this->mTitle
->getNamespace()]) { # subpages allowed here
706 $link = $this->mTitle
->getPrefixedText(). "/" . trim($noslash);
709 } # this might be changed for ugliness reasons
711 $link = $noslash; # no subpage allowed, use standard link
713 } elseif( $noforce ) { # no subpage
716 $link = substr( $m[1], 1 );
721 $nt = Title
::newFromText( $link );
723 $s .= $prefix . "[[" . $line;
726 $ns = $nt->getNamespace();
727 $iw = $nt->getInterWiki();
729 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
730 array_push( $this->mOutput
->mLanguageLinks
, $nt->getPrefixedText() );
731 $s .= $prefix . $trail;
734 if( $ns == $image ) {
735 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
736 $wgLinkCache->addImageLinkObj( $nt );
740 if( ( $nt->getPrefixedText() == $this->mTitle
->getPrefixedText() ) &&
741 ( strpos( $link, "#" ) == FALSE ) ) {
742 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
745 if ( $ns == $category && $wgUseCategoryMagic ) {
746 $t = explode ( ":" , $nt->getText() ) ;
748 $t = implode ( ":" , $t ) ;
749 $t = $wgLang->ucFirst ( $t ) ;
750 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
751 $nnt = Title
::newFromText ( $category.":".$t ) ;
752 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
753 $this->mCategoryLinks
[] = $t ;
754 $s .= $prefix . $trail ;
757 if( $ns == $media ) {
758 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
759 $wgLinkCache->addImageLinkObj( $nt );
761 } elseif( $ns == $special ) {
762 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
765 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
767 wfProfileOut( $fname );
771 # Some functions here used by doBlockLevels()
773 /* private */ function closeParagraph()
776 if ( 0 != strcmp( "p", $this->mLastSection
) &&
777 0 != strcmp( "", $this->mLastSection
) ) {
778 $result = "</" . $this->mLastSection
. ">";
780 $this->mLastSection
= "";
783 # getCommon() returns the length of the longest common substring
784 # of both arguments, starting at the beginning of both.
786 /* private */ function getCommon( $st1, $st2 )
788 $fl = strlen( $st1 );
789 $shorter = strlen( $st2 );
790 if ( $fl < $shorter ) { $shorter = $fl; }
792 for ( $i = 0; $i < $shorter; ++
$i ) {
793 if ( $st1{$i} != $st2{$i} ) { break; }
797 # These next three functions open, continue, and close the list
798 # element appropriate to the prefix character passed into them.
800 /* private */ function openList( $char )
802 $result = $this->closeParagraph();
804 if ( "*" == $char ) { $result .= "<ul><li>"; }
805 else if ( "#" == $char ) { $result .= "<ol><li>"; }
806 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
807 else if ( ";" == $char ) {
808 $result .= "<dl><dt>";
809 $this->mDTopen
= true;
811 else { $result = "<!-- ERR 1 -->"; }
816 /* private */ function nextItem( $char )
818 if ( "*" == $char ||
"#" == $char ) { return "</li><li>"; }
819 else if ( ":" == $char ||
";" == $char ) {
821 if ( $this->mDTopen
) { $close = "</dt>"; }
822 if ( ";" == $char ) {
823 $this->mDTopen
= true;
824 return $close . "<dt>";
826 $this->mDTopen
= false;
827 return $close . "<dd>";
830 return "<!-- ERR 2 -->";
833 /* private */function closeList( $char )
835 if ( "*" == $char ) { $text = "</li></ul>"; }
836 else if ( "#" == $char ) { $text = "</li></ol>"; }
837 else if ( ":" == $char ) {
838 if ( $this->mDTopen
) {
839 $this->mDTopen
= false;
840 $text = "</dt></dl>";
842 $text = "</dd></dl>";
845 else { return "<!-- ERR 3 -->"; }
849 /* private */ function doBlockLevels( $text, $linestart )
851 $fname = "OutputPage::doBlockLevels";
852 wfProfileIn( $fname );
853 # Parsing through the text line by line. The main thing
854 # happening here is handling of block-level elements p, pre,
855 # and making lists from lines starting with * # : etc.
857 $a = explode( "\n", $text );
858 $text = $lastPref = "";
859 $this->mDTopen
= $inBlockElem = false;
861 if ( ! $linestart ) { $text .= array_shift( $a ); }
862 foreach ( $a as $t ) {
863 if ( "" != $text ) { $text .= "\n"; }
866 $opl = strlen( $lastPref );
867 $npl = strspn( $t, "*#:;" );
868 $pref = substr( $t, 0, $npl );
869 $pref2 = str_replace( ";", ":", $pref );
870 $t = substr( $t, $npl );
872 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
873 $text .= $this->nextItem( substr( $pref, -1 ) );
875 if ( ";" == substr( $pref, -1 ) ) {
876 $cpos = strpos( $t, ":" );
877 if ( ! ( false === $cpos ) ) {
878 $term = substr( $t, 0, $cpos );
879 $text .= $term . $this->nextItem( ":" );
880 $t = substr( $t, $cpos +
1 );
883 } else if (0 != $npl ||
0 != $opl) {
884 $cpl = $this->getCommon( $pref, $lastPref );
886 while ( $cpl < $opl ) {
887 $text .= $this->closeList( $lastPref{$opl-1} );
890 if ( $npl <= $cpl && $cpl > 0 ) {
891 $text .= $this->nextItem( $pref{$cpl-1} );
893 while ( $npl > $cpl ) {
894 $char = substr( $pref, $cpl, 1 );
895 $text .= $this->openList( $char );
897 if ( ";" == $char ) {
898 $cpos = strpos( $t, ":" );
899 if ( ! ( false === $cpos ) ) {
900 $term = substr( $t, 0, $cpos );
901 $text .= $term . $this->nextItem( ":" );
902 $t = substr( $t, $cpos +
1 );
909 if ( 0 == $npl ) { # No prefix--go to paragraph mode
911 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
912 $text .= $this->closeParagraph();
915 if ( ! $inBlockElem ) {
916 if ( " " == $t{0} ) {
918 # $t = wfEscapeHTML( $t );
920 else { $newSection = "p"; }
922 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
923 $text .= $this->closeParagraph();
924 $text .= "<" . $newSection . ">";
925 } else if ( 0 != strcmp( $this->mLastSection
,
927 $text .= $this->closeParagraph();
928 if ( 0 != strcmp( "p", $newSection ) ) {
929 $text .= "<" . $newSection . ">";
932 $this->mLastSection
= $newSection;
935 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
936 $inBlockElem = false;
942 $text .= $this->closeList( $pref2{$npl-1} );
945 if ( "" != $this->mLastSection
) {
946 if ( "p" != $this->mLastSection
) {
947 $text .= "</" . $this->mLastSection
. ">";
949 $this->mLastSection
= "";
951 wfProfileOut( $fname );
955 /* private */ function replaceVariables( $text )
957 global $wgLang, $wgCurOut;
958 $fname = "OutputPage::replaceVariables";
959 wfProfileIn( $fname );
964 # See Language.php for the definition of each magic word
965 # As with sigs, this uses the server's local time -- ensure
966 # this is appropriate for your audience!
968 $magic[MAG_CURRENTMONTH
] = date( "m" );
969 $magic[MAG_CURRENTMONTHNAME
] = $wgLang->getMonthName( date("n") );
970 $magic[MAG_CURRENTMONTHNAMEGEN
] = $wgLang->getMonthNameGen( date("n") );
971 $magic[MAG_CURRENTDAY
] = date("j");
972 $magic[MAG_CURRENTDAYNAME
] = $wgLang->getWeekdayName( date("w")+
1 );
973 $magic[MAG_CURRENTYEAR
] = date( "Y" );
974 $magic[MAG_CURRENTTIME
] = $wgLang->time( wfTimestampNow(), false );
976 $this->mContainsOldMagic +
= MagicWord
::replaceMultiple($magic, $text, $text);
978 $mw =& MagicWord
::get( MAG_NUMBEROFARTICLES
);
979 if ( $mw->match( $text ) ) {
980 $v = wfNumberOfArticles();
981 $text = $mw->replace( $v, $text );
982 if( $mw->getWasModified() ) { $this->mContainsOldMagic++
; }
985 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
986 # The callbacks are at the bottom of this file
988 $mw =& MagicWord
::get( MAG_MSG
);
989 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
990 if( $mw->getWasModified() ) { $this->mContainsNewMagic++
; }
992 $mw =& MagicWord
::get( MAG_MSGNW
);
993 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
994 if( $mw->getWasModified() ) { $this->mContainsNewMagic++
; }
996 wfProfileOut( $fname );
1000 # Cleans up HTML, removes dangerous tags and attributes
1001 /* private */ function removeHTMLtags( $text )
1003 $fname = "OutputPage::removeHTMLtags";
1004 wfProfileIn( $fname );
1005 $htmlpairs = array( # Tags that must be closed
1006 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1007 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1008 "strike", "strong", "tt", "var", "div", "center",
1009 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1010 "ruby", "rt" , "rb" , "rp"
1012 $htmlsingle = array(
1013 "br", "p", "hr", "li", "dt", "dd"
1015 $htmlnest = array( # Tags that can be nested--??
1016 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1017 "dl", "font", "big", "small", "sub", "sup"
1019 $tabletags = array( # Can only appear inside table
1023 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1024 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1026 $htmlattrs = $this->getHTMLattrs () ;
1028 # Remove HTML comments
1029 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1031 $bits = explode( "<", $text );
1032 $text = array_shift( $bits );
1033 $tagstack = array(); $tablestack = array();
1035 foreach ( $bits as $x ) {
1036 $prev = error_reporting( E_ALL
& ~
( E_NOTICE | E_WARNING
) );
1037 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1039 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1040 error_reporting( $prev );
1043 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1047 if ( ! in_array( $t, $htmlsingle ) &&
1048 ( $ot = array_pop( $tagstack ) ) != $t ) {
1049 array_push( $tagstack, $ot );
1052 if ( $t == "table" ) {
1053 $tagstack = array_pop( $tablestack );
1058 # Keep track for later
1059 if ( in_array( $t, $tabletags ) &&
1060 ! in_array( "table", $tagstack ) ) {
1062 } else if ( in_array( $t, $tagstack ) &&
1063 ! in_array ( $t , $htmlnest ) ) {
1065 } else if ( ! in_array( $t, $htmlsingle ) ) {
1066 if ( $t == "table" ) {
1067 array_push( $tablestack, $tagstack );
1068 $tagstack = array();
1070 array_push( $tagstack, $t );
1072 # Strip non-approved attributes from the tag
1073 $newparams = $this->fixTagAttributes($params);
1077 $rest = str_replace( ">", ">", $rest );
1078 $text .= "<$slash$t $newparams$brace$rest";
1082 $text .= "<" . str_replace( ">", ">", $x);
1084 # Close off any remaining tags
1085 while ( $t = array_pop( $tagstack ) ) {
1087 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1089 wfProfileOut( $fname );
1095 * This function accomplishes several tasks:
1096 * 1) Auto-number headings if that option is enabled
1097 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1098 * 3) Add a Table of contents on the top for users who have enabled the option
1099 * 4) Auto-anchor headings
1101 * It loops through all headlines, collects the necessary data, then splits up the
1102 * string and re-inserts the newly formatted headlines.
1105 /* private */ function formatHeadings( $text )
1107 $nh=$this->mOptions
->getNumberHeadings();
1108 $st=$this->mOptions
->getShowToc();
1109 if(!$this->mTitle
->userCanEdit()) {
1113 $es=$this->mOptions
->getEditSection();
1114 $esr=$this->mOptions
->getEditSectionOnRightClick();
1117 # Inhibit editsection links if requested in the page
1118 $esw =& MagicWord
::get( MAG_NOEDITSECTION
);
1119 if ($esw->matchAndRemove( $text )) {
1122 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1124 $mw =& MagicWord
::get( MAG_NOTOC
);
1125 if ($mw->matchAndRemove( $text ))
1130 # never add the TOC to the Main Page. This is an entry page that should not
1131 # be more than 1-2 screens large anyway
1132 if($this->mTitle
->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1134 # We need this to perform operations on the HTML
1135 $sk =& $this->mOptions
->getSkin();
1137 # Get all headlines for numbering them and adding funky stuff like [edit]
1139 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1144 # Ugh .. the TOC should have neat indentation levels which can be
1145 # passed to the skin functions. These are determined here
1146 foreach($matches[3] as $headline) {
1147 if($level) { $prevlevel=$level;}
1148 $level=$matches[1][$c];
1149 if(($nh||
$st) && $prevlevel && $level>$prevlevel) {
1151 $h[$level]=0; // reset when we enter a new level
1152 $toc.=$sk->tocIndent($level-$prevlevel);
1153 $toclevel+
=$level-$prevlevel;
1156 if(($nh||
$st) && $level<$prevlevel) {
1157 $h[$level+
1]=0; // reset when we step back a level
1158 $toc.=$sk->tocUnindent($prevlevel-$level);
1159 $toclevel-=$prevlevel-$level;
1162 $h[$level]++
; // count number of headlines for each level
1165 for($i=1;$i<=$level;$i++
) {
1167 if($dot) {$numbering.=".";}
1174 // The canonized header is a version of the header text safe to use for links
1176 $canonized_headline=preg_replace("/<.*?>/","",$headline); // strip out HTML
1177 $tocline = trim( $canonized_headline );
1178 $canonized_headline=str_replace('"',"",$canonized_headline);
1179 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1180 $refer[$c]=$canonized_headline;
1181 $refers[$canonized_headline]++
; // count how many in assoc. array so we can track dupes in anchors
1182 $refcount[$c]=$refers[$canonized_headline];
1184 // Prepend the number to the heading text
1187 $tocline=$numbering ." ". $tocline;
1189 // Don't number the heading if it is the only one (looks silly)
1190 if($nh && count($matches[3]) > 1) {
1191 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1195 // Create the anchor for linking from the TOC to the section
1197 $anchor=$canonized_headline;
1198 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1200 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1203 $head[$c].=$sk->editSectionLink($c+
1);
1206 // Put it all together
1208 $head[$c].="<h".$level.$matches[2][$c]
1209 ."<a name=\"".$anchor."\">"
1214 // Add the edit section link
1217 $head[$c]=$sk->editSectionScript($c+
1,$head[$c]);
1227 $toc.=$sk->tocUnindent($toclevel);
1228 $toc=$sk->tocTable($toc);
1231 // split up and insert constructed headlines
1233 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1236 foreach($blocks as $block) {
1237 if(($es) && $c>0 && $i==0) {
1238 # This is the [edit] link that appears for the top block of text when
1239 # section editing is enabled
1240 $full.=$sk->editSectionLink(0);
1243 if($st && $toclines>3 && !$i) {
1244 # Let's add a top anchor just in case we want to link to the top of the page
1245 $full="<a name=\"top\"></a>".$full.$toc;
1255 /* private */ function magicISBN( $text )
1259 $a = split( "ISBN ", " $text" );
1260 if ( count ( $a ) < 2 ) return $text;
1261 $text = substr( array_shift( $a ), 1);
1262 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1264 foreach ( $a as $x ) {
1265 $isbn = $blank = "" ;
1266 while ( " " == $x{0} ) {
1268 $x = substr( $x, 1 );
1270 while ( strstr( $valid, $x{0} ) != false ) {
1272 $x = substr( $x, 1 );
1274 $num = str_replace( "-", "", $isbn );
1275 $num = str_replace( " ", "", $num );
1278 $text .= "ISBN $blank$x";
1280 $text .= "<a href=\"" . wfLocalUrlE( $wgLang->specialPage(
1281 "Booksources"), "isbn={$num}" ) . "\" class=\"internal\">ISBN $isbn</a>";
1288 /* private */ function magicRFC( $text )
1298 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1300 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1301 $containsOldMagic = false )
1303 $this->mText
= $text;
1304 $this->mLanguageLinks
= $languageLinks;
1305 $this->mCategoryLinks
= $categoryLinks;
1306 $this->mContainsOldMagic
= $containsOldMagic;
1309 function getText() { return $this->mText
; }
1310 function getLanguageLinks() { return $this->mLanguageLinks
; }
1311 function getCategoryLinks() { return $this->mCategoryLinks
; }
1312 function containsOldMagic() { return $this->mContainsOldMagic
; }
1313 function setText( $text ) { return wfSetVar( $this->mText
, $text ); }
1314 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks
, $ll ); }
1315 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks
, $cl ); }
1316 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic
, $com ); }
1321 # All variables are private
1322 var $mUseTeX; # Use texvc to expand <math> tags
1323 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1324 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1325 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1326 var $mAllowExternalImages; # Allow external images inline
1327 var $mSkin; # Reference to the preferred skin
1328 var $mDateFormat; # Date format index
1329 var $mEditSection; # Create "edit section" links
1330 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1331 var $mPrintable; # Generate printable output
1332 var $mNumberHeadings; # Automatically number headings
1333 var $mShowToc; # Show table of contents
1335 function getUseTeX() { return $this->mUseTeX
; }
1336 function getUseCategoryMagic() { return $this->mUseCategoryMagic
; }
1337 function getUseDynamicDates() { return $this->mUseDynamicDates
; }
1338 function getInterwikiMagic() { return $this->mInterwikiMagic
; }
1339 function getAllowExternalImages() { return $this->mAllowExternalImages
; }
1340 function getSkin() { return $this->mSkin
; }
1341 function getDateFormat() { return $this->mDateFormat
; }
1342 function getEditSection() { return $this->mEditSection
; }
1343 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick
; }
1344 function getPrintable() { return $this->mPrintable
; }
1345 function getNumberHeadings() { return $this->mNumberHeadings
; }
1346 function getShowToc() { return $this->mShowToc
; }
1348 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX
, $x ); }
1349 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic
, $x ); }
1350 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates
, $x ); }
1351 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic
, $x ); }
1352 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages
, $x ); }
1353 function setSkin( $x ) { return wfSetRef( $this->mSkin
, $x ); }
1354 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat
, $x ); }
1355 function setEditSection( $x ) { return wfSetVar( $this->mEditSection
, $x ); }
1356 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick
, $x ); }
1357 function setPrintable( $x ) { return wfSetVar( $this->mPrintable
, $x ); }
1358 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings
, $x ); }
1359 function setShowToc( $x ) { return wfSetVar( $this->mShowToc
, $x ); }
1361 /* static */ function newFromUser( &$user )
1363 $popts = new ParserOptions
;
1364 $popts->initialiseFromUser( &$user );
1368 function initialiseFromUser( &$userInput )
1370 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1372 if ( !$userInput ) {
1375 $user =& $userInput;
1378 $this->mUseTeX
= $wgUseTeX;
1379 $this->mUseCategoryMagic
= $wgUseCategoryMagic;
1380 $this->mUseDynamicDates
= $wgUseDynamicDates;
1381 $this->mInterwikiMagic
= $wgInterwikiMagic;
1382 $this->mAllowExternalImages
= $wgAllowExternalImages;
1383 $this->mSkin
=& $user->getSkin();
1384 $this->mDateFormat
= $user->getOption( "date" );
1385 $this->mEditSection
= $user->getOption( "editsection" );
1386 $this->mEditSectionOnRightClick
= $user->getOption( "editsectiononrightclick" );
1387 $this->mPrintable
= false;
1388 $this->mNumberHeadings
= $user->getOption( "numberheadings" );
1389 $this->mShowToc
= $user->getOption( "showtoc" );
1393 # Regex callbacks, used in OutputPage::replaceVariables
1395 # Just get rid of the dangerous stuff
1396 # Necessary because replaceVariables is called after removeHTMLtags,
1397 # and message text can come from any user
1398 function wfReplaceMsgVar( $matches ) {
1399 global $wgCurOut, $wgLinkCache;
1400 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1401 $wgLinkCache->suspend();
1402 $text = $wgCurOut->replaceInternalLinks( $text );
1403 $wgLinkCache->resume();
1404 $wgLinkCache->addLinkObj( Title
::makeTitle( NS_MEDIAWIKI
, $matches[1] ) );
1408 # Effective <nowiki></nowiki>
1409 # Not real <nowiki> because this is called after nowiki sections are processed
1410 function wfReplaceMsgnwVar( $matches ) {
1411 global $wgCurOut, $wgLinkCache;
1412 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1413 $wgLinkCache->addLinkObj( Title
::makeTitle( NS_MEDIAWIKI
, $matches[1] ) );