3 include_once('Tokenizer.php');
7 # Converts wikitext to HTML.
10 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
12 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
13 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*
15 # * only within ParserOptions
19 # Cleared with clearState():
20 var $mOutput, $mAutonumber, $mLastSection, $mDTopen;
23 var $mOptions, $mTitle;
32 $this->mOutput
= new ParserOutput
;
33 $this->mAutonumber
= 0;
34 $this->mLastSection
= "";
35 $this->mDTopen
= false;
38 # First pass--just handle <nowiki> sections, pass the rest off
39 # to doWikiPass2() which does all the real work.
41 # Returns a ParserOutput
43 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
45 $fname = "Parser::parse";
46 wfProfileIn( $fname );
47 $unique = "3iyZiyA7iMwg5rhxP0Dcc9oTnj8qD1jm1Sfv4";
48 $unique2 = "4LIQ9nXtiYFPCSfitVwDw7EYwQlL4GeeQ7qSO";
49 $unique3 = "fPaA8gDfdLBqzj68Yjg9Hil3qEF8JGO0uszIp";
64 $this->mOptions
= $options;
65 $this->mTitle
=& $title;
67 # Replace any instances of the placeholders
68 $text = str_replace( $unique, wfHtmlEscapeFirst( $unique ), $text );
69 $text = str_replace( $unique2, wfHtmlEscapeFirst( $unique2 ), $text );
70 $text = str_replace( $unique3, wfHtmlEscapeFirst( $unique3 ), $text );
72 while ( "" != $text ) {
73 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
75 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $text = ""; }
77 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
79 $nwlist[$nwsecs] = wfEscapeHTMLTagsOnly($q[0]);
80 $stripped .= $unique . $nwsecs . "s";
85 if( $this->mOptions
->getUseTeX() ) {
86 while ( "" != $stripped ) {
87 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
89 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $stripped = ""; }
91 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
93 $mathlist[$mathsecs] = renderMath($q[0]);
94 $stripped2 .= $unique2 . $mathsecs . "s";
99 $stripped2 = $stripped;
102 while ( "" != $stripped2 ) {
103 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
105 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $stripped2 = ""; }
107 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
109 $prelist[$presecs] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
110 $stripped3 .= $unique3 . $presecs . "s";
115 $text = $this->doWikiPass2( $stripped3, $linestart );
117 $specialChars = array("\\", "$");
118 $escapedChars = array("\\\\", "\\$");
120 # Go backwards so that {$unique1}1 doesn't overwrite {$unique1}10
121 for ( $i = $presecs; $i >= 1; --$i ) {
122 $text = preg_replace( "/{$unique3}{$i}s/", str_replace( $specialChars,
123 $escapedChars, $prelist[$i] ), $text );
126 for ( $i = $mathsecs; $i >= 1; --$i ) {
127 $text = preg_replace( "/{$unique2}{$i}s/", str_replace( $specialChars,
128 $escapedChars, $mathlist[$i] ), $text );
131 for ( $i = $nwsecs; $i >= 1; --$i ) {
132 $text = preg_replace( "/{$unique}{$i}s/", str_replace( $specialChars,
133 $escapedChars, $nwlist[$i] ), $text );
136 $this->mOutput
->setText( $text );
137 wfProfileOut( $fname );
138 return $this->mOutput
;
141 function categoryMagic ()
143 global $wgLang , $wgUser ;
144 if ( !$this->mOptions
->getUseCategoryMagic() ) return ;
145 $id = $this->mTitle
->getArticleID() ;
146 $cat = ucfirst ( wfMsg ( "category" ) ) ;
147 $ti = $this->mTitle
->getText() ;
148 $ti = explode ( ":" , $ti , 2 ) ;
149 if ( $cat != $ti[0] ) return "" ;
150 $r = "<br break=all>\n" ;
152 $articles = array() ;
153 $parents = array () ;
154 $children = array() ;
157 # $sk =& $this->mGetSkin();
158 $sk =& $wgUser->getSkin() ;
162 $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
164 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
167 $res = wfQuery ( $sql, DB_READ
) ;
168 while ( $x = wfFetchObject ( $res ) )
171 # $t->newFromDBkey ( $x->l_from ) ;
172 # $t = $t->getText() ;
176 $t = $wgLang->getNsText ( $x->cur_namespace
) ;
177 if ( $t != "" ) $t .= ":" ;
178 $t .= $x->cur_title
;
181 $y = explode ( ":" , $t , 2 ) ;
182 if ( count ( $y ) == 2 && $y[0] == $cat ) {
183 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
185 array_push ( $articles , $sk->makeLink ( $t ) ) ;
188 wfFreeResult ( $res ) ;
191 if ( count ( $children ) > 0 )
193 asort ( $children ) ;
194 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
195 $r .= implode ( ", " , $children ) ;
199 if ( count ( $articles ) > 0 )
201 asort ( $articles ) ;
202 $h = wfMsg( "category_header", $ti[1] );
203 $r .= "<h2>{$h}</h2>\n" ;
204 $r .= implode ( ", " , $articles ) ;
211 function getHTMLattrs ()
213 $htmlattrs = array( # Allowed attributes--no scripting, etc.
214 "title", "align", "lang", "dir", "width", "height",
215 "bgcolor", "clear", /* BR */ "noshade", /* HR */
216 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
217 /* FONT */ "type", "start", "value", "compact",
218 /* For various lists, mostly deprecated but safe */
219 "summary", "width", "border", "frame", "rules",
220 "cellspacing", "cellpadding", "valign", "char",
221 "charoff", "colgroup", "col", "span", "abbr", "axis",
222 "headers", "scope", "rowspan", "colspan", /* Tables */
223 "id", "class", "name", "style" /* For CSS */
228 function fixTagAttributes ( $t )
230 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
231 $htmlattrs = $this->getHTMLattrs() ;
233 # Strip non-approved attributes from the tag
235 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
236 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
238 # Strip javascript "expression" from stylesheets. Brute force approach:
239 # If anythin offensive is found, all attributes of the HTML tag are dropped
242 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
243 wfMungeToUtf8( $t ) ) )
251 function doTableStuff ( $t )
253 $t = explode ( "\n" , $t ) ;
254 $td = array () ; # Is currently a td tag open?
255 $ltd = array () ; # Was it TD or TH?
256 $tr = array () ; # Is currently a tr tag open?
257 $ltr = array () ; # tr attributes
258 foreach ( $t AS $k => $x )
261 $fc = substr ( $x , 0 , 1 ) ;
262 if ( "{|" == substr ( $x , 0 , 2 ) )
264 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
265 array_push ( $td , false ) ;
266 array_push ( $ltd , "" ) ;
267 array_push ( $tr , false ) ;
268 array_push ( $ltr , "" ) ;
270 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
271 else if ( "|}" == substr ( $x , 0 , 2 ) )
274 $l = array_pop ( $ltd ) ;
275 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
276 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
280 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
282 $z = trim ( substr ( $x , 2 ) ) ;
283 $t[$k] = "<caption>{$z}</caption>\n" ;
285 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
287 $x = substr ( $x , 1 ) ;
288 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
290 $l = array_pop ( $ltd ) ;
291 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
292 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
295 array_push ( $tr , false ) ;
296 array_push ( $td , false ) ;
297 array_push ( $ltd , "" ) ;
298 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
300 else if ( "|" == $fc ||
"!" == $fc ||
"|+" == substr ( $x , 0 , 2 ) ) # Caption
302 if ( "|+" == substr ( $x , 0 , 2 ) )
305 $x = substr ( $x , 1 ) ;
307 $after = substr ( $x , 1 ) ;
308 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
309 $after = explode ( "||" , $after ) ;
311 foreach ( $after AS $theline )
316 $tra = array_pop ( $ltr ) ;
317 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
318 array_push ( $tr , true ) ;
319 array_push ( $ltr , "" ) ;
322 $l = array_pop ( $ltd ) ;
323 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
324 if ( $fc == "|" ) $l = "TD" ;
325 else if ( $fc == "!" ) $l = "TH" ;
326 else if ( $fc == "+" ) $l = "CAPTION" ;
328 array_push ( $ltd , $l ) ;
329 $y = explode ( "|" , $theline , 2 ) ;
330 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
331 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
333 array_push ( $td , true ) ;
338 # Closing open td, tr && table
339 while ( count ( $td ) > 0 )
341 if ( array_pop ( $td ) ) $t[] = "</td>" ;
342 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
346 $t = implode ( "\n" , $t ) ;
347 # $t = $this->removeHTMLtags( $t );
351 # Well, OK, it's actually about 14 passes. But since all the
352 # hard lifting is done inside PHP's regex code, it probably
353 # wouldn't speed things up much to add a real parser.
355 function doWikiPass2( $text, $linestart )
357 $fname = "OutputPage::doWikiPass2";
358 wfProfileIn( $fname );
360 $text = $this->removeHTMLtags( $text );
361 $text = $this->replaceVariables( $text );
363 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
364 $text = str_replace ( "<HR>", "<hr>", $text );
366 $text = $this->doHeadings( $text );
367 $text = $this->doBlockLevels( $text, $linestart );
369 if($this->mOptions
->getUseDynamicDates()) {
370 global $wgDateFormatter;
371 $text = $wgDateFormatter->reformat( $this->mOptions
->getDateFormat(), $text );
374 $text = $this->replaceExternalLinks( $text );
375 $text = $this->replaceInternalLinks ( $text );
376 $text = $this->doTableStuff ( $text ) ;
378 $text = $this->magicISBN( $text );
379 $text = $this->magicRFC( $text );
380 $text = $this->formatHeadings( $text );
382 $sk =& $this->mOptions
->getSkin();
383 $text = $sk->transformContent( $text );
384 $text .= $this->categoryMagic () ;
386 wfProfileOut( $fname );
391 /* private */ function doHeadings( $text )
393 for ( $i = 6; $i >= 1; --$i ) {
394 $h = substr( "======", 0, $i );
395 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
396 "<h{$i}>\\1</h{$i}>\\2", $text );
401 # Note: we have to do external links before the internal ones,
402 # and otherwise take great care in the order of things here, so
403 # that we don't end up interpreting some URLs twice.
405 /* private */ function replaceExternalLinks( $text )
407 $fname = "OutputPage::replaceExternalLinks";
408 wfProfileIn( $fname );
409 $text = $this->subReplaceExternalLinks( $text, "http", true );
410 $text = $this->subReplaceExternalLinks( $text, "https", true );
411 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
412 $text = $this->subReplaceExternalLinks( $text, "irc", false );
413 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
414 $text = $this->subReplaceExternalLinks( $text, "news", false );
415 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
416 wfProfileOut( $fname );
420 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
422 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
423 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
425 # this is the list of separators that should be ignored if they
426 # are the last character of an URL but that should be included
427 # if they occur within the URL, e.g. "go to www.foo.com, where .."
428 # in this case, the last comma should not become part of the URL,
429 # but in "www.foo.com/123,2342,32.htm" it should.
431 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
432 $images = "gif|png|jpg|jpeg";
434 # PLEASE NOTE: The curly braces { } are not part of the regex,
435 # they are interpreted as part of the string (used to tell PHP
436 # that the content of the string should be inserted there).
437 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
438 "((?i){$images})([^{$uc}]|$)/";
440 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
441 $sk =& $this->mOptions
->getSkin();
443 if ( $autonumber and $this->mOptions
->getAllowExternalImages() ) { # Use img tags only for HTTP urls
444 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
445 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
447 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
448 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
449 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
451 $s = str_replace( $unique, $protocol, $s );
453 $a = explode( "[{$protocol}:", " " . $s );
454 $s = array_shift( $a );
455 $s = substr( $s, 1 );
457 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
458 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
460 foreach ( $a as $line ) {
461 if ( preg_match( $e1, $line, $m ) ) {
462 $link = "{$protocol}:{$m[1]}";
464 if ( $autonumber ) { $text = "[" . ++
$this->mAutonumber
. "]"; }
465 else { $text = wfEscapeHTML( $link ); }
466 } else if ( preg_match( $e2, $line, $m ) ) {
467 $link = "{$protocol}:{$m[1]}";
471 $s .= "[{$protocol}:" . $line;
474 if ( $this->mOptions
->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
476 $la = $sk->getExternalLinkAttributes( $link, $text );
477 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
483 /* private */ function handle3Quotes( &$state, $token )
485 if ( $state["strong"] ) {
486 if ( $state["em"] && $state["em"] > $state["strong"] )
488 # ''' lala ''lala '''
489 $s = "</em></strong><em>";
493 $state["strong"] = FALSE;
496 $state["strong"] = $token["pos"];
501 /* private */ function handle2Quotes( &$state, $token )
503 if ( $state["em"] ) {
504 if ( $state["strong"] && $state["strong"] > $state["em"] )
506 # ''lala'''lala'' ....'''
507 $s = "</strong></em><strong>";
511 $state["em"] = FALSE;
514 $state["em"] = $token["pos"];
519 /* private */ function handle5Quotes( &$state, $token )
521 if ( $state["em"] && $state["strong"] ) {
522 if ( $state["em"] < $state["strong"] ) {
523 $s .= "</strong></em>";
525 $s .= "</em></strong>";
527 $state["strong"] = $state["em"] = FALSE;
528 } elseif ( $state["em"] ) {
529 $s .= "</em><strong>";
530 $state["em"] = FALSE;
531 $state["strong"] = $token["pos"];
532 } elseif ( $state["strong"] ) {
533 $s .= "</strong><em>";
534 $state["strong"] = FALSE;
535 $state["em"] = $token["pos"];
536 } else { # not $em and not $strong
537 $s .= "<strong><em>";
538 $state["strong"] = $state["em"] = $token["pos"];
543 /* private */ function replaceInternalLinks( $str )
545 global $wgLang; # for language specific parser hook
547 $tokenizer=Tokenizer
::newFromString( $str );
548 $tokenStack = array();
551 $state["em"] = FALSE;
552 $state["strong"] = FALSE;
555 # The tokenizer splits the text into tokens and returns them one by one.
556 # Every call to the tokenizer returns a new token.
557 while ( $token = $tokenizer->nextToken() )
559 switch ( $token["type"] )
562 # simple text with no further markup
563 $txt = $token["text"];
567 # FIXME : Treat orphaned open tags (stack not empty when text is over)
569 array_push( $tokenStack, $token );
574 # get text from stack, glue it together, and call the code to handle a
576 if ( count( $tokenStack ) == 0 )
578 # stack empty. Found a ]] without an opening [[
582 $lastToken = array_pop( $tokenStack );
583 while ( $lastToken["type"] != "[[" )
585 $linkText = $lastToken["text"] . $linkText;
586 $lastToken = array_pop( $tokenStack );
588 $txt = $linkText ."]]";
589 $prefix = $lastToken["text"];
590 $nextToken = $tokenizer->previewToken();
591 if ( $nextToken["type"] == "text" )
593 # Preview just looks at it. Now we have to fetch it.
594 $nextToken = $tokenizer->nextToken();
595 $txt .= $nextToken["text"];
597 $txt = $this->handleInternalLink( $txt, $prefix );
599 $tagIsOpen = (count( $tokenStack ) != 0);
605 # This and the three next ones handle quotes
606 $txt = $this->handle3Quotes( $state, $token );
609 $txt = $this->handle2Quotes( $state, $token );
612 $txt = $this->handle5Quotes( $state, $token );
619 # Call language specific Hook.
620 $txt = $wgLang->processToken( $token, $tokenStack );
621 if ( NULL == $txt ) {
622 # An unkown token. Highlight.
623 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
624 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
628 # If we're parsing the interior of a link, don't append the interior to $s,
629 # but push it to the stack so it can be processed when a ]] token is found.
630 if ( $tagIsOpen && $txt != "" ) {
631 $token["type"] = "text";
632 $token["text"] = $txt;
633 array_push( $tokenStack, $token );
638 if ( count( $tokenStack ) != 0 )
640 # still objects on stack. opened [[ tag without closing ]] tag.
642 while ( $lastToken = array_pop( $tokenStack ) )
644 if ( $lastToken["type"] == "text" )
646 $txt = $lastToken["text"] . $txt;
648 $txt = $lastToken["type"] . $txt;
656 /* private */ function handleInternalLink( $line, $prefix )
658 global $wgLang, $wgLinkCache;
659 global $wgNamespacesWithSubpages, $wgLanguageCode;
660 static $fname = "OutputPage::replaceInternalLinks" ;
661 wfProfileIn( $fname );
663 wfProfileIn( "$fname-setup" );
665 if ( !$tc ) { $tc = Title
::legalChars() . "#"; }
666 $sk =& $this->mOptions
->getSkin();
668 # Match a link having the form [[namespace:link|alternate]]trail
670 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
671 # Match the end of a line for a word that's not followed by whitespace,
672 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
673 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
674 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
675 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
678 # Special and Media are pseudo-namespaces; no pages actually exist in them
679 static $image = FALSE;
680 static $special = FALSE;
681 static $media = FALSE;
682 static $category = FALSE;
683 if ( !$image ) { $image = Namespace::getImage(); }
684 if ( !$special ) { $special = Namespace::getSpecial(); }
685 if ( !$media ) { $media = Namespace::getMedia(); }
686 if ( !$category ) { $category = wfMsg ( "category" ) ; }
688 $nottalk = !Namespace::isTalk( $this->mTitle
->getNamespace() );
690 wfProfileOut( "$fname-setup" );
692 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
695 } else { # Invalid form; output directly
696 $s .= $prefix . "[[" . $line ;
702 :Foobar -- override special treatment of prefix (images, language links)
703 /Foobar -- convert to CurrentPage/Foobar
704 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
706 $c = substr($m[1],0,1);
707 $noforce = ($c != ":");
708 if( $c == "/" ) { # subpage
709 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
710 $m[1]=substr($m[1],1,strlen($m[1])-2);
713 $noslash=substr($m[1],1);
715 if($wgNamespacesWithSubpages[$this->mTitle
->getNamespace()]) { # subpages allowed here
716 $link = $this->mTitle
->getPrefixedText(). "/" . trim($noslash);
719 } # this might be changed for ugliness reasons
721 $link = $noslash; # no subpage allowed, use standard link
723 } elseif( $noforce ) { # no subpage
726 $link = substr( $m[1], 1 );
731 $nt = Title
::newFromText( $link );
733 $s .= $prefix . "[[" . $line;
736 $ns = $nt->getNamespace();
737 $iw = $nt->getInterWiki();
739 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
740 array_push( $this->mOutput
->mLanguageLinks
, $nt->getPrefixedText() );
741 $s .= $prefix . $trail;
744 if( $ns == $image ) {
745 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
746 $wgLinkCache->addImageLinkObj( $nt );
750 if( ( $nt->getPrefixedText() == $this->mTitle
->getPrefixedText() ) &&
751 ( strpos( $link, "#" ) == FALSE ) ) {
752 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
755 if ( $ns == $category && $this->mOptions
->getUseCategoryMagic() ) {
756 $t = explode ( ":" , $nt->getText() ) ;
758 $t = implode ( ":" , $t ) ;
759 $t = $wgLang->ucFirst ( $t ) ;
760 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
761 $nnt = Title
::newFromText ( $category.":".$t ) ;
762 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
763 $this->mCategoryLinks
[] = $t ;
764 $s .= $prefix . $trail ;
767 if( $ns == $media ) {
768 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
769 $wgLinkCache->addImageLinkObj( $nt );
771 } elseif( $ns == $special ) {
772 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
775 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
777 wfProfileOut( $fname );
781 # Some functions here used by doBlockLevels()
783 /* private */ function closeParagraph()
786 if ( 0 != strcmp( "p", $this->mLastSection
) &&
787 0 != strcmp( "", $this->mLastSection
) ) {
788 $result = "</" . $this->mLastSection
. ">";
790 $this->mLastSection
= "";
793 # getCommon() returns the length of the longest common substring
794 # of both arguments, starting at the beginning of both.
796 /* private */ function getCommon( $st1, $st2 )
798 $fl = strlen( $st1 );
799 $shorter = strlen( $st2 );
800 if ( $fl < $shorter ) { $shorter = $fl; }
802 for ( $i = 0; $i < $shorter; ++
$i ) {
803 if ( $st1{$i} != $st2{$i} ) { break; }
807 # These next three functions open, continue, and close the list
808 # element appropriate to the prefix character passed into them.
810 /* private */ function openList( $char )
812 $result = $this->closeParagraph();
814 if ( "*" == $char ) { $result .= "<ul><li>"; }
815 else if ( "#" == $char ) { $result .= "<ol><li>"; }
816 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
817 else if ( ";" == $char ) {
818 $result .= "<dl><dt>";
819 $this->mDTopen
= true;
821 else { $result = "<!-- ERR 1 -->"; }
826 /* private */ function nextItem( $char )
828 if ( "*" == $char ||
"#" == $char ) { return "</li><li>"; }
829 else if ( ":" == $char ||
";" == $char ) {
831 if ( $this->mDTopen
) { $close = "</dt>"; }
832 if ( ";" == $char ) {
833 $this->mDTopen
= true;
834 return $close . "<dt>";
836 $this->mDTopen
= false;
837 return $close . "<dd>";
840 return "<!-- ERR 2 -->";
843 /* private */function closeList( $char )
845 if ( "*" == $char ) { $text = "</li></ul>"; }
846 else if ( "#" == $char ) { $text = "</li></ol>"; }
847 else if ( ":" == $char ) {
848 if ( $this->mDTopen
) {
849 $this->mDTopen
= false;
850 $text = "</dt></dl>";
852 $text = "</dd></dl>";
855 else { return "<!-- ERR 3 -->"; }
859 /* private */ function doBlockLevels( $text, $linestart )
861 $fname = "OutputPage::doBlockLevels";
862 wfProfileIn( $fname );
863 # Parsing through the text line by line. The main thing
864 # happening here is handling of block-level elements p, pre,
865 # and making lists from lines starting with * # : etc.
867 $a = explode( "\n", $text );
868 $text = $lastPref = "";
869 $this->mDTopen
= $inBlockElem = false;
871 if ( ! $linestart ) { $text .= array_shift( $a ); }
872 foreach ( $a as $t ) {
873 if ( "" != $text ) { $text .= "\n"; }
876 $opl = strlen( $lastPref );
877 $npl = strspn( $t, "*#:;" );
878 $pref = substr( $t, 0, $npl );
879 $pref2 = str_replace( ";", ":", $pref );
880 $t = substr( $t, $npl );
882 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
883 $text .= $this->nextItem( substr( $pref, -1 ) );
885 if ( ";" == substr( $pref, -1 ) ) {
886 $cpos = strpos( $t, ":" );
887 if ( ! ( false === $cpos ) ) {
888 $term = substr( $t, 0, $cpos );
889 $text .= $term . $this->nextItem( ":" );
890 $t = substr( $t, $cpos +
1 );
893 } else if (0 != $npl ||
0 != $opl) {
894 $cpl = $this->getCommon( $pref, $lastPref );
896 while ( $cpl < $opl ) {
897 $text .= $this->closeList( $lastPref{$opl-1} );
900 if ( $npl <= $cpl && $cpl > 0 ) {
901 $text .= $this->nextItem( $pref{$cpl-1} );
903 while ( $npl > $cpl ) {
904 $char = substr( $pref, $cpl, 1 );
905 $text .= $this->openList( $char );
907 if ( ";" == $char ) {
908 $cpos = strpos( $t, ":" );
909 if ( ! ( false === $cpos ) ) {
910 $term = substr( $t, 0, $cpos );
911 $text .= $term . $this->nextItem( ":" );
912 $t = substr( $t, $cpos +
1 );
919 if ( 0 == $npl ) { # No prefix--go to paragraph mode
921 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
922 $text .= $this->closeParagraph();
925 if ( ! $inBlockElem ) {
926 if ( " " == $t{0} ) {
928 # $t = wfEscapeHTML( $t );
930 else { $newSection = "p"; }
932 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
933 $text .= $this->closeParagraph();
934 $text .= "<" . $newSection . ">";
935 } else if ( 0 != strcmp( $this->mLastSection
,
937 $text .= $this->closeParagraph();
938 if ( 0 != strcmp( "p", $newSection ) ) {
939 $text .= "<" . $newSection . ">";
942 $this->mLastSection
= $newSection;
945 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
946 $inBlockElem = false;
952 $text .= $this->closeList( $pref2{$npl-1} );
955 if ( "" != $this->mLastSection
) {
956 if ( "p" != $this->mLastSection
) {
957 $text .= "</" . $this->mLastSection
. ">";
959 $this->mLastSection
= "";
961 wfProfileOut( $fname );
965 /* private */ function replaceVariables( $text )
967 global $wgLang, $wgCurOut;
968 $fname = "OutputPage::replaceVariables";
969 wfProfileIn( $fname );
974 # See Language.php for the definition of each magic word
975 # As with sigs, this uses the server's local time -- ensure
976 # this is appropriate for your audience!
978 $magic[MAG_CURRENTMONTH
] = date( "m" );
979 $magic[MAG_CURRENTMONTHNAME
] = $wgLang->getMonthName( date("n") );
980 $magic[MAG_CURRENTMONTHNAMEGEN
] = $wgLang->getMonthNameGen( date("n") );
981 $magic[MAG_CURRENTDAY
] = date("j");
982 $magic[MAG_CURRENTDAYNAME
] = $wgLang->getWeekdayName( date("w")+
1 );
983 $magic[MAG_CURRENTYEAR
] = date( "Y" );
984 $magic[MAG_CURRENTTIME
] = $wgLang->time( wfTimestampNow(), false );
986 $this->mContainsOldMagic +
= MagicWord
::replaceMultiple($magic, $text, $text);
988 $mw =& MagicWord
::get( MAG_NUMBEROFARTICLES
);
989 if ( $mw->match( $text ) ) {
990 $v = wfNumberOfArticles();
991 $text = $mw->replace( $v, $text );
992 if( $mw->getWasModified() ) { $this->mContainsOldMagic++
; }
995 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
996 # The callbacks are at the bottom of this file
998 $mw =& MagicWord
::get( MAG_MSG
);
999 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1000 if( $mw->getWasModified() ) { $this->mContainsNewMagic++
; }
1002 $mw =& MagicWord
::get( MAG_MSGNW
);
1003 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1004 if( $mw->getWasModified() ) { $this->mContainsNewMagic++
; }
1006 wfProfileOut( $fname );
1010 # Cleans up HTML, removes dangerous tags and attributes
1011 /* private */ function removeHTMLtags( $text )
1013 $fname = "OutputPage::removeHTMLtags";
1014 wfProfileIn( $fname );
1015 $htmlpairs = array( # Tags that must be closed
1016 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1017 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1018 "strike", "strong", "tt", "var", "div", "center",
1019 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1020 "ruby", "rt" , "rb" , "rp"
1022 $htmlsingle = array(
1023 "br", "p", "hr", "li", "dt", "dd"
1025 $htmlnest = array( # Tags that can be nested--??
1026 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1027 "dl", "font", "big", "small", "sub", "sup"
1029 $tabletags = array( # Can only appear inside table
1033 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1034 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1036 $htmlattrs = $this->getHTMLattrs () ;
1038 # Remove HTML comments
1039 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1041 $bits = explode( "<", $text );
1042 $text = array_shift( $bits );
1043 $tagstack = array(); $tablestack = array();
1045 foreach ( $bits as $x ) {
1046 $prev = error_reporting( E_ALL
& ~
( E_NOTICE | E_WARNING
) );
1047 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1049 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1050 error_reporting( $prev );
1053 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1057 if ( ! in_array( $t, $htmlsingle ) &&
1058 ( $ot = array_pop( $tagstack ) ) != $t ) {
1059 array_push( $tagstack, $ot );
1062 if ( $t == "table" ) {
1063 $tagstack = array_pop( $tablestack );
1068 # Keep track for later
1069 if ( in_array( $t, $tabletags ) &&
1070 ! in_array( "table", $tagstack ) ) {
1072 } else if ( in_array( $t, $tagstack ) &&
1073 ! in_array ( $t , $htmlnest ) ) {
1075 } else if ( ! in_array( $t, $htmlsingle ) ) {
1076 if ( $t == "table" ) {
1077 array_push( $tablestack, $tagstack );
1078 $tagstack = array();
1080 array_push( $tagstack, $t );
1082 # Strip non-approved attributes from the tag
1083 $newparams = $this->fixTagAttributes($params);
1087 $rest = str_replace( ">", ">", $rest );
1088 $text .= "<$slash$t $newparams$brace$rest";
1092 $text .= "<" . str_replace( ">", ">", $x);
1094 # Close off any remaining tags
1095 while ( $t = array_pop( $tagstack ) ) {
1097 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1099 wfProfileOut( $fname );
1105 * This function accomplishes several tasks:
1106 * 1) Auto-number headings if that option is enabled
1107 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1108 * 3) Add a Table of contents on the top for users who have enabled the option
1109 * 4) Auto-anchor headings
1111 * It loops through all headlines, collects the necessary data, then splits up the
1112 * string and re-inserts the newly formatted headlines.
1115 /* private */ function formatHeadings( $text )
1117 $nh=$this->mOptions
->getNumberHeadings();
1118 $st=$this->mOptions
->getShowToc();
1119 if(!$this->mTitle
->userCanEdit()) {
1123 $es=$this->mOptions
->getEditSection();
1124 $esr=$this->mOptions
->getEditSectionOnRightClick();
1127 # Inhibit editsection links if requested in the page
1128 $esw =& MagicWord
::get( MAG_NOEDITSECTION
);
1129 if ($esw->matchAndRemove( $text )) {
1132 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1134 $mw =& MagicWord
::get( MAG_NOTOC
);
1135 if ($mw->matchAndRemove( $text ))
1140 # never add the TOC to the Main Page. This is an entry page that should not
1141 # be more than 1-2 screens large anyway
1142 if($this->mTitle
->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1144 # We need this to perform operations on the HTML
1145 $sk =& $this->mOptions
->getSkin();
1147 # Get all headlines for numbering them and adding funky stuff like [edit]
1149 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1154 # Ugh .. the TOC should have neat indentation levels which can be
1155 # passed to the skin functions. These are determined here
1156 foreach($matches[3] as $headline) {
1157 if($level) { $prevlevel=$level;}
1158 $level=$matches[1][$c];
1159 if(($nh||
$st) && $prevlevel && $level>$prevlevel) {
1161 $h[$level]=0; // reset when we enter a new level
1162 $toc.=$sk->tocIndent($level-$prevlevel);
1163 $toclevel+
=$level-$prevlevel;
1166 if(($nh||
$st) && $level<$prevlevel) {
1167 $h[$level+
1]=0; // reset when we step back a level
1168 $toc.=$sk->tocUnindent($prevlevel-$level);
1169 $toclevel-=$prevlevel-$level;
1172 $h[$level]++
; // count number of headlines for each level
1175 for($i=1;$i<=$level;$i++
) {
1177 if($dot) {$numbering.=".";}
1184 // The canonized header is a version of the header text safe to use for links
1186 $canonized_headline=preg_replace("/<.*?>/","",$headline); // strip out HTML
1187 $tocline = trim( $canonized_headline );
1188 $canonized_headline=str_replace('"',"",$canonized_headline);
1189 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1190 $refer[$c]=$canonized_headline;
1191 $refers[$canonized_headline]++
; // count how many in assoc. array so we can track dupes in anchors
1192 $refcount[$c]=$refers[$canonized_headline];
1194 // Prepend the number to the heading text
1197 $tocline=$numbering ." ". $tocline;
1199 // Don't number the heading if it is the only one (looks silly)
1200 if($nh && count($matches[3]) > 1) {
1201 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1205 // Create the anchor for linking from the TOC to the section
1207 $anchor=$canonized_headline;
1208 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1210 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1213 $head[$c].=$sk->editSectionLink($c+
1);
1216 // Put it all together
1218 $head[$c].="<h".$level.$matches[2][$c]
1219 ."<a name=\"".$anchor."\">"
1224 // Add the edit section link
1227 $head[$c]=$sk->editSectionScript($c+
1,$head[$c]);
1237 $toc.=$sk->tocUnindent($toclevel);
1238 $toc=$sk->tocTable($toc);
1241 // split up and insert constructed headlines
1243 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1246 foreach($blocks as $block) {
1247 if(($es) && $c>0 && $i==0) {
1248 # This is the [edit] link that appears for the top block of text when
1249 # section editing is enabled
1250 $full.=$sk->editSectionLink(0);
1253 if($st && $toclines>3 && !$i) {
1254 # Let's add a top anchor just in case we want to link to the top of the page
1255 $full="<a name=\"top\"></a>".$full.$toc;
1265 /* private */ function magicISBN( $text )
1269 $a = split( "ISBN ", " $text" );
1270 if ( count ( $a ) < 2 ) return $text;
1271 $text = substr( array_shift( $a ), 1);
1272 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1274 foreach ( $a as $x ) {
1275 $isbn = $blank = "" ;
1276 while ( " " == $x{0} ) {
1278 $x = substr( $x, 1 );
1280 while ( strstr( $valid, $x{0} ) != false ) {
1282 $x = substr( $x, 1 );
1284 $num = str_replace( "-", "", $isbn );
1285 $num = str_replace( " ", "", $num );
1288 $text .= "ISBN $blank$x";
1290 $text .= "<a href=\"" . wfLocalUrlE( $wgLang->specialPage(
1291 "Booksources"), "isbn={$num}" ) . "\" class=\"internal\">ISBN $isbn</a>";
1298 /* private */ function magicRFC( $text )
1308 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1310 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1311 $containsOldMagic = false )
1313 $this->mText
= $text;
1314 $this->mLanguageLinks
= $languageLinks;
1315 $this->mCategoryLinks
= $categoryLinks;
1316 $this->mContainsOldMagic
= $containsOldMagic;
1319 function getText() { return $this->mText
; }
1320 function getLanguageLinks() { return $this->mLanguageLinks
; }
1321 function getCategoryLinks() { return $this->mCategoryLinks
; }
1322 function containsOldMagic() { return $this->mContainsOldMagic
; }
1323 function setText( $text ) { return wfSetVar( $this->mText
, $text ); }
1324 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks
, $ll ); }
1325 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks
, $cl ); }
1326 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic
, $com ); }
1331 # All variables are private
1332 var $mUseTeX; # Use texvc to expand <math> tags
1333 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1334 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1335 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1336 var $mAllowExternalImages; # Allow external images inline
1337 var $mSkin; # Reference to the preferred skin
1338 var $mDateFormat; # Date format index
1339 var $mEditSection; # Create "edit section" links
1340 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1341 var $mPrintable; # Generate printable output
1342 var $mNumberHeadings; # Automatically number headings
1343 var $mShowToc; # Show table of contents
1345 function getUseTeX() { return $this->mUseTeX
; }
1346 function getUseCategoryMagic() { return $this->mUseCategoryMagic
; }
1347 function getUseDynamicDates() { return $this->mUseDynamicDates
; }
1348 function getInterwikiMagic() { return $this->mInterwikiMagic
; }
1349 function getAllowExternalImages() { return $this->mAllowExternalImages
; }
1350 function getSkin() { return $this->mSkin
; }
1351 function getDateFormat() { return $this->mDateFormat
; }
1352 function getEditSection() { return $this->mEditSection
; }
1353 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick
; }
1354 function getPrintable() { return $this->mPrintable
; }
1355 function getNumberHeadings() { return $this->mNumberHeadings
; }
1356 function getShowToc() { return $this->mShowToc
; }
1358 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX
, $x ); }
1359 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic
, $x ); }
1360 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates
, $x ); }
1361 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic
, $x ); }
1362 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages
, $x ); }
1363 function setSkin( $x ) { return wfSetRef( $this->mSkin
, $x ); }
1364 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat
, $x ); }
1365 function setEditSection( $x ) { return wfSetVar( $this->mEditSection
, $x ); }
1366 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick
, $x ); }
1367 function setPrintable( $x ) { return wfSetVar( $this->mPrintable
, $x ); }
1368 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings
, $x ); }
1369 function setShowToc( $x ) { return wfSetVar( $this->mShowToc
, $x ); }
1371 /* static */ function newFromUser( &$user )
1373 $popts = new ParserOptions
;
1374 $popts->initialiseFromUser( &$user );
1378 function initialiseFromUser( &$userInput )
1380 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1382 if ( !$userInput ) {
1385 $user =& $userInput;
1388 $this->mUseTeX
= $wgUseTeX;
1389 $this->mUseCategoryMagic
= $wgUseCategoryMagic;
1390 $this->mUseDynamicDates
= $wgUseDynamicDates;
1391 $this->mInterwikiMagic
= $wgInterwikiMagic;
1392 $this->mAllowExternalImages
= $wgAllowExternalImages;
1393 $this->mSkin
=& $user->getSkin();
1394 $this->mDateFormat
= $user->getOption( "date" );
1395 $this->mEditSection
= $user->getOption( "editsection" );
1396 $this->mEditSectionOnRightClick
= $user->getOption( "editsectiononrightclick" );
1397 $this->mPrintable
= false;
1398 $this->mNumberHeadings
= $user->getOption( "numberheadings" );
1399 $this->mShowToc
= $user->getOption( "showtoc" );
1403 # Regex callbacks, used in OutputPage::replaceVariables
1405 # Just get rid of the dangerous stuff
1406 # Necessary because replaceVariables is called after removeHTMLtags,
1407 # and message text can come from any user
1408 function wfReplaceMsgVar( $matches ) {
1409 global $wgCurOut, $wgLinkCache;
1410 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1411 $wgLinkCache->suspend();
1412 $text = $wgCurOut->replaceInternalLinks( $text );
1413 $wgLinkCache->resume();
1414 $wgLinkCache->addLinkObj( Title
::makeTitle( NS_MEDIAWIKI
, $matches[1] ) );
1418 # Effective <nowiki></nowiki>
1419 # Not real <nowiki> because this is called after nowiki sections are processed
1420 function wfReplaceMsgnwVar( $matches ) {
1421 global $wgCurOut, $wgLinkCache;
1422 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1423 $wgLinkCache->addLinkObj( Title
::makeTitle( NS_MEDIAWIKI
, $matches[1] ) );