4 # major: $wgUser, $wgTitle,
9 var $mOutput, $mAutonumber, $mLastSection, $mDTopen;
18 $this->mOutput
= new ParserOutput
;
19 $this->mAutonumber
= 0;
20 $this->mLastSection
= "";
21 $this->mDTopen
= false;
24 # First pass--just handle <nowiki> sections, pass the rest off
25 # to doWikiPass2() which does all the real work.
27 # Returns a ParserOutput
29 function parse( $text, $linestart = true, $clearState = true )
32 $fname = "Parser::parse";
33 wfProfileIn( $fname );
34 $unique = "3iyZiyA7iMwg5rhxP0Dcc9oTnj8qD1jm1Sfv4";
35 $unique2 = "4LIQ9nXtiYFPCSfitVwDw7EYwQlL4GeeQ7qSO";
36 $unique3 = "fPaA8gDfdLBqzj68Yjg9Hil3qEF8JGO0uszIp";
51 # Replace any instances of the placeholders
52 $text = str_replace( $unique, wfHtmlEscapeFirst( $unique ), $text );
53 $text = str_replace( $unique2, wfHtmlEscapeFirst( $unique2 ), $text );
54 $text = str_replace( $unique3, wfHtmlEscapeFirst( $unique3 ), $text );
57 global $wgEnableParserCache;
59 $wgEnableParserCache && $action == "view" &&
60 intval($wgUser->getOption( "stubthreshold" )) == 0 &&
61 is_object($article) && $article->getID() > 0;
63 if( $use_parser_cache ){
64 if( $this->fillFromParserCache() ){
65 wfProfileOut( $fname );
71 while ( "" != $text ) {
72 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
74 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $text = ""; }
76 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
78 $nwlist[$nwsecs] = wfEscapeHTMLTagsOnly($q[0]);
79 $stripped .= $unique . $nwsecs . "s";
85 while ( "" != $stripped ) {
86 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
88 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $stripped = ""; }
90 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
92 $mathlist[$mathsecs] = renderMath($q[0]);
93 $stripped2 .= $unique2 . $mathsecs . "s";
98 $stripped2 = $stripped;
101 while ( "" != $stripped2 ) {
102 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
104 if ( ( count( $p ) < 2 ) ||
( "" == $p[1] ) ) { $stripped2 = ""; }
106 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
108 $prelist[$presecs] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
109 $stripped3 .= $unique3 . $presecs . "s";
114 $text = $this->doWikiPass2( $stripped3, $linestart );
116 $specialChars = array("\\", "$");
117 $escapedChars = array("\\\\", "\\$");
119 # Go backwards so that {$unique1}1 doesn't overwrite {$unique1}10
120 for ( $i = $presecs; $i >= 1; --$i ) {
121 $text = preg_replace( "/{$unique3}{$i}s/", str_replace( $specialChars,
122 $escapedChars, $prelist[$i] ), $text );
125 for ( $i = $mathsecs; $i >= 1; --$i ) {
126 $text = preg_replace( "/{$unique2}{$i}s/", str_replace( $specialChars,
127 $escapedChars, $mathlist[$i] ), $text );
130 for ( $i = $nwsecs; $i >= 1; --$i ) {
131 $text = preg_replace( "/{$unique}{$i}s/", str_replace( $specialChars,
132 $escapedChars, $nwlist[$i] ), $text );
136 if($use_parser_cache ){
137 $this->saveParserCache( $text );
141 $this->mOutput
->setText( $text );
142 wfProfileOut( $fname );
143 return $this->mOutput
;
146 function categoryMagic ()
148 global $wgTitle , $wgUseCategoryMagic, $wgLang ;
149 if ( !isset ( $wgUseCategoryMagic ) ||
!$wgUseCategoryMagic ) return ;
150 $id = $wgTitle->getArticleID() ;
151 $cat = ucfirst ( wfMsg ( "category" ) ) ;
152 $ti = $wgTitle->getText() ;
153 $ti = explode ( ":" , $ti , 2 ) ;
154 if ( $cat != $ti[0] ) return "" ;
155 $r = "<br break=all>\n" ;
157 $articles = array() ;
158 $parents = array () ;
159 $children = array() ;
163 $sk = $wgUser->getSkin() ;
167 $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
169 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
172 $res = wfQuery ( $sql, DB_READ
) ;
173 while ( $x = wfFetchObject ( $res ) )
176 # $t->newFromDBkey ( $x->l_from ) ;
177 # $t = $t->getText() ;
181 $t = $wgLang->getNsText ( $x->cur_namespace
) ;
182 if ( $t != "" ) $t .= ":" ;
183 $t .= $x->cur_title
;
186 $y = explode ( ":" , $t , 2 ) ;
187 if ( count ( $y ) == 2 && $y[0] == $cat ) {
188 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
190 array_push ( $articles , $sk->makeLink ( $t ) ) ;
193 wfFreeResult ( $res ) ;
196 if ( count ( $children ) > 0 )
198 asort ( $children ) ;
199 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
200 $r .= implode ( ", " , $children ) ;
204 if ( count ( $articles ) > 0 )
206 asort ( $articles ) ;
207 $h = wfMsg( "category_header", $ti[1] );
208 $r .= "<h2>{$h}</h2>\n" ;
209 $r .= implode ( ", " , $articles ) ;
216 function getHTMLattrs ()
218 $htmlattrs = array( # Allowed attributes--no scripting, etc.
219 "title", "align", "lang", "dir", "width", "height",
220 "bgcolor", "clear", /* BR */ "noshade", /* HR */
221 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
222 /* FONT */ "type", "start", "value", "compact",
223 /* For various lists, mostly deprecated but safe */
224 "summary", "width", "border", "frame", "rules",
225 "cellspacing", "cellpadding", "valign", "char",
226 "charoff", "colgroup", "col", "span", "abbr", "axis",
227 "headers", "scope", "rowspan", "colspan", /* Tables */
228 "id", "class", "name", "style" /* For CSS */
233 function fixTagAttributes ( $t )
235 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
236 $htmlattrs = $this->getHTMLattrs() ;
238 # Strip non-approved attributes from the tag
240 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
241 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
243 # Strip javascript "expression" from stylesheets. Brute force approach:
244 # If anythin offensive is found, all attributes of the HTML tag are dropped
247 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
248 wfMungeToUtf8( $t ) ) )
256 function doTableStuff ( $t )
258 $t = explode ( "\n" , $t ) ;
259 $td = array () ; # Is currently a td tag open?
260 $ltd = array () ; # Was it TD or TH?
261 $tr = array () ; # Is currently a tr tag open?
262 $ltr = array () ; # tr attributes
263 foreach ( $t AS $k => $x )
266 $fc = substr ( $x , 0 , 1 ) ;
267 if ( "{|" == substr ( $x , 0 , 2 ) )
269 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
270 array_push ( $td , false ) ;
271 array_push ( $ltd , "" ) ;
272 array_push ( $tr , false ) ;
273 array_push ( $ltr , "" ) ;
275 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
276 else if ( "|}" == substr ( $x , 0 , 2 ) )
279 $l = array_pop ( $ltd ) ;
280 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
281 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
285 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
287 $z = trim ( substr ( $x , 2 ) ) ;
288 $t[$k] = "<caption>{$z}</caption>\n" ;
290 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
292 $x = substr ( $x , 1 ) ;
293 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
295 $l = array_pop ( $ltd ) ;
296 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
297 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
300 array_push ( $tr , false ) ;
301 array_push ( $td , false ) ;
302 array_push ( $ltd , "" ) ;
303 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
305 else if ( "|" == $fc ||
"!" == $fc ||
"|+" == substr ( $x , 0 , 2 ) ) # Caption
307 if ( "|+" == substr ( $x , 0 , 2 ) )
310 $x = substr ( $x , 1 ) ;
312 $after = substr ( $x , 1 ) ;
313 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
314 $after = explode ( "||" , $after ) ;
316 foreach ( $after AS $theline )
321 $tra = array_pop ( $ltr ) ;
322 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
323 array_push ( $tr , true ) ;
324 array_push ( $ltr , "" ) ;
327 $l = array_pop ( $ltd ) ;
328 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
329 if ( $fc == "|" ) $l = "TD" ;
330 else if ( $fc == "!" ) $l = "TH" ;
331 else if ( $fc == "+" ) $l = "CAPTION" ;
333 array_push ( $ltd , $l ) ;
334 $y = explode ( "|" , $theline , 2 ) ;
335 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
336 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
338 array_push ( $td , true ) ;
343 # Closing open td, tr && table
344 while ( count ( $td ) > 0 )
346 if ( array_pop ( $td ) ) $t[] = "</td>" ;
347 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
351 $t = implode ( "\n" , $t ) ;
352 # $t = $this->removeHTMLtags( $t );
356 # Well, OK, it's actually about 14 passes. But since all the
357 # hard lifting is done inside PHP's regex code, it probably
358 # wouldn't speed things up much to add a real parser.
360 function doWikiPass2( $text, $linestart )
362 global $wgUser, $wgLang, $wgUseDynamicDates;
363 $fname = "OutputPage::doWikiPass2";
364 wfProfileIn( $fname );
366 $text = $this->removeHTMLtags( $text );
367 $text = $this->replaceVariables( $text );
369 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
370 $text = str_replace ( "<HR>", "<hr>", $text );
372 $text = $this->doAllQuotes( $text );
373 $text = $this->doHeadings( $text );
374 $text = $this->doBlockLevels( $text, $linestart );
376 if($wgUseDynamicDates) {
377 global $wgDateFormatter;
378 $text = $wgDateFormatter->reformat( $wgUser->getOption("date"), $text );
381 $text = $this->replaceExternalLinks( $text );
382 $text = $this->replaceInternalLinks ( $text );
383 $text = $this->doTableStuff ( $text ) ;
385 $text = $this->magicISBN( $text );
386 $text = $this->magicRFC( $text );
387 $text = $this->formatHeadings( $text );
389 $sk = $wgUser->getSkin();
390 $text = $sk->transformContent( $text );
391 $text .= $this->categoryMagic () ;
393 wfProfileOut( $fname );
397 /* private */ function doAllQuotes( $text )
400 $lines = explode( "\r\n", $text );
401 foreach ( $lines as $line ) {
402 $outtext .= $this->doQuotes ( "", $line, "" ) . "\r\n";
407 /* private */ function doQuotes( $pre, $text, $mode )
409 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
410 $m1_strong = ($m[1] == "") ?
"" : "<strong>{$m[1]}</strong>";
411 $m1_em = ($m[1] == "") ?
"" : "<em>{$m[1]}</em>";
412 if ( substr ($m[2], 0, 1) == "'" ) {
413 $m[2] = substr ($m[2], 1);
415 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ?
"both" : "emstrong" );
416 } else if ($mode == "strong") {
417 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
418 } else if (($mode == "emstrong") ||
($mode == "both")) {
419 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
420 } else if ($mode == "strongem") {
421 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
423 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
426 if ($mode == "strong") {
427 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ?
"both" : "strongem" );
428 } else if ($mode == "em") {
429 return $m1_em . $this->doQuotes ( "", $m[2], "" );
430 } else if ($mode == "emstrong") {
431 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
432 } else if (($mode == "strongem") ||
($mode == "both")) {
433 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
435 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
439 $text_strong = ($text == "") ?
"" : "<strong>{$text}</strong>";
440 $text_em = ($text == "") ?
"" : "<em>{$text}</em>";
443 } else if ($mode == "em") {
444 return $pre . $text_em;
445 } else if ($mode == "strong") {
446 return $pre . $text_strong;
447 } else if ($mode == "strongem") {
448 return (($pre == "") && ($text == "")) ?
"" : "<strong>{$pre}{$text_em}</strong>";
450 return (($pre == "") && ($text == "")) ?
"" : "<em>{$pre}{$text_strong}</em>";
455 /* private */ function doHeadings( $text )
457 for ( $i = 6; $i >= 1; --$i ) {
458 $h = substr( "======", 0, $i );
459 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
460 "<h{$i}>\\1</h{$i}>\\2", $text );
465 # Note: we have to do external links before the internal ones,
466 # and otherwise take great care in the order of things here, so
467 # that we don't end up interpreting some URLs twice.
469 /* private */ function replaceExternalLinks( $text )
471 $fname = "OutputPage::replaceExternalLinks";
472 wfProfileIn( $fname );
473 $text = $this->subReplaceExternalLinks( $text, "http", true );
474 $text = $this->subReplaceExternalLinks( $text, "https", true );
475 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
476 $text = $this->subReplaceExternalLinks( $text, "irc", false );
477 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
478 $text = $this->subReplaceExternalLinks( $text, "news", false );
479 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
480 wfProfileOut( $fname );
484 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
486 global $wgUser, $printable;
487 global $wgAllowExternalImages;
490 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
491 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
493 # this is the list of separators that should be ignored if they
494 # are the last character of an URL but that should be included
495 # if they occur within the URL, e.g. "go to www.foo.com, where .."
496 # in this case, the last comma should not become part of the URL,
497 # but in "www.foo.com/123,2342,32.htm" it should.
499 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
500 $images = "gif|png|jpg|jpeg";
502 # PLEASE NOTE: The curly braces { } are not part of the regex,
503 # they are interpreted as part of the string (used to tell PHP
504 # that the content of the string should be inserted there).
505 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
506 "((?i){$images})([^{$uc}]|$)/";
508 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
509 $sk = $wgUser->getSkin();
511 if ( $autonumber and $wgAllowExternalImages) { # Use img tags only for HTTP urls
512 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
513 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
515 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
516 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
517 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
519 $s = str_replace( $unique, $protocol, $s );
521 $a = explode( "[{$protocol}:", " " . $s );
522 $s = array_shift( $a );
523 $s = substr( $s, 1 );
525 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
526 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
528 foreach ( $a as $line ) {
529 if ( preg_match( $e1, $line, $m ) ) {
530 $link = "{$protocol}:{$m[1]}";
532 if ( $autonumber ) { $text = "[" . ++
$this->mAutonumber
. "]"; }
533 else { $text = wfEscapeHTML( $link ); }
534 } else if ( preg_match( $e2, $line, $m ) ) {
535 $link = "{$protocol}:{$m[1]}";
539 $s .= "[{$protocol}:" . $line;
542 if ( $printable == "yes") $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
544 $la = $sk->getExternalLinkAttributes( $link, $text );
545 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
551 /* private */ function replaceInternalLinks( $s )
553 global $wgTitle, $wgUser, $wgLang;
554 global $wgLinkCache, $wgInterwikiMagic, $wgUseCategoryMagic;
555 global $wgNamespacesWithSubpages, $wgLanguageCode;
556 global $wgUseLinkPrefixCombination;
557 wfProfileIn( $fname = "OutputPage::replaceInternalLinks" );
559 wfProfileIn( "$fname-setup" );
560 $tc = Title
::legalChars() . "#";
561 $sk = $wgUser->getSkin();
563 $a = explode( "[[", " " . $s );
564 $s = array_shift( $a );
565 $s = substr( $s, 1 );
567 # Match a link having the form [[namespace:link|alternate]]trail
568 $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD";
569 # Match the end of a line for a word that's not followed by whitespace,
570 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
571 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
572 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
573 $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
576 # Special and Media are pseudo-namespaces; no pages actually exist in them
577 $image = Namespace::getImage();
578 $special = Namespace::getSpecial();
579 $media = Namespace::getMedia();
580 $category = wfMsg ( "category" ) ;
581 $nottalk = !Namespace::isTalk( $wgTitle->getNamespace() );
583 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
590 wfProfileOut( "$fname-setup" );
592 foreach ( $a as $line ) {
593 $prefix = $new_prefix;
594 if ( $wgUseLinkPrefixCombination && preg_match( $e2, $line, $m ) ) {
600 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
603 } else { # Invalid form; output directly
604 $s .= $prefix . "[[" . $line ;
610 :Foobar -- override special treatment of prefix (images, language links)
611 /Foobar -- convert to CurrentPage/Foobar
612 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
614 $c = substr($m[1],0,1);
615 $noforce = ($c != ":");
616 if( $c == "/" ) { # subpage
617 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
618 $m[1]=substr($m[1],1,strlen($m[1])-2);
621 $noslash=substr($m[1],1);
623 if($wgNamespacesWithSubpages[$wgTitle->getNamespace()]) { # subpages allowed here
624 $link = $wgTitle->getPrefixedText(). "/" . trim($noslash);
627 } # this might be changed for ugliness reasons
629 $link = $noslash; # no subpage allowed, use standard link
631 } elseif( $noforce ) { # no subpage
634 $link = substr( $m[1], 1 );
639 $nt = Title
::newFromText( $link );
641 $s .= $prefix . "[[" . $line;
644 $ns = $nt->getNamespace();
645 $iw = $nt->getInterWiki();
647 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
648 array_push( $this->mOutput
->mLanguageLinks
, $nt->getPrefixedText() );
649 $s .= $prefix . $trail;
652 if( $ns == $image ) {
653 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
654 $wgLinkCache->addImageLinkObj( $nt );
658 if( ( $nt->getPrefixedText() == $wgTitle->getPrefixedText() ) &&
659 ( strpos( $link, "#" ) == FALSE ) ) {
660 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
663 if ( $ns == $category && $wgUseCategoryMagic ) {
664 $t = explode ( ":" , $nt->getText() ) ;
666 $t = implode ( ":" , $t ) ;
667 $t = $wgLang->ucFirst ( $t ) ;
668 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
669 $nnt = Title
::newFromText ( $category.":".$t ) ;
670 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
671 $this->mCategoryLinks
[] = $t ;
672 $s .= $prefix . $trail ;
675 if( $ns == $media ) {
676 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
677 $wgLinkCache->addImageLinkObj( $nt );
679 } elseif( $ns == $special ) {
680 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
683 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
685 wfProfileOut( $fname );
689 # Some functions here used by doBlockLevels()
691 /* private */ function closeParagraph()
694 if ( 0 != strcmp( "p", $this->mLastSection
) &&
695 0 != strcmp( "", $this->mLastSection
) ) {
696 $result = "</" . $this->mLastSection
. ">";
698 $this->mLastSection
= "";
701 # getCommon() returns the length of the longest common substring
702 # of both arguments, starting at the beginning of both.
704 /* private */ function getCommon( $st1, $st2 )
706 $fl = strlen( $st1 );
707 $shorter = strlen( $st2 );
708 if ( $fl < $shorter ) { $shorter = $fl; }
710 for ( $i = 0; $i < $shorter; ++
$i ) {
711 if ( $st1{$i} != $st2{$i} ) { break; }
715 # These next three functions open, continue, and close the list
716 # element appropriate to the prefix character passed into them.
718 /* private */ function openList( $char )
720 $result = $this->closeParagraph();
722 if ( "*" == $char ) { $result .= "<ul><li>"; }
723 else if ( "#" == $char ) { $result .= "<ol><li>"; }
724 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
725 else if ( ";" == $char ) {
726 $result .= "<dl><dt>";
727 $this->mDTopen
= true;
729 else { $result = "<!-- ERR 1 -->"; }
734 /* private */ function nextItem( $char )
736 if ( "*" == $char ||
"#" == $char ) { return "</li><li>"; }
737 else if ( ":" == $char ||
";" == $char ) {
739 if ( $this->mDTopen
) { $close = "</dt>"; }
740 if ( ";" == $char ) {
741 $this->mDTopen
= true;
742 return $close . "<dt>";
744 $this->mDTopen
= false;
745 return $close . "<dd>";
748 return "<!-- ERR 2 -->";
751 /* private */function closeList( $char )
753 if ( "*" == $char ) { $text = "</li></ul>"; }
754 else if ( "#" == $char ) { $text = "</li></ol>"; }
755 else if ( ":" == $char ) {
756 if ( $this->mDTopen
) {
757 $this->mDTopen
= false;
758 $text = "</dt></dl>";
760 $text = "</dd></dl>";
763 else { return "<!-- ERR 3 -->"; }
767 /* private */ function doBlockLevels( $text, $linestart )
769 $fname = "OutputPage::doBlockLevels";
770 wfProfileIn( $fname );
771 # Parsing through the text line by line. The main thing
772 # happening here is handling of block-level elements p, pre,
773 # and making lists from lines starting with * # : etc.
775 $a = explode( "\n", $text );
776 $text = $lastPref = "";
777 $this->mDTopen
= $inBlockElem = false;
779 if ( ! $linestart ) { $text .= array_shift( $a ); }
780 foreach ( $a as $t ) {
781 if ( "" != $text ) { $text .= "\n"; }
784 $opl = strlen( $lastPref );
785 $npl = strspn( $t, "*#:;" );
786 $pref = substr( $t, 0, $npl );
787 $pref2 = str_replace( ";", ":", $pref );
788 $t = substr( $t, $npl );
790 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
791 $text .= $this->nextItem( substr( $pref, -1 ) );
793 if ( ";" == substr( $pref, -1 ) ) {
794 $cpos = strpos( $t, ":" );
795 if ( ! ( false === $cpos ) ) {
796 $term = substr( $t, 0, $cpos );
797 $text .= $term . $this->nextItem( ":" );
798 $t = substr( $t, $cpos +
1 );
801 } else if (0 != $npl ||
0 != $opl) {
802 $cpl = $this->getCommon( $pref, $lastPref );
804 while ( $cpl < $opl ) {
805 $text .= $this->closeList( $lastPref{$opl-1} );
808 if ( $npl <= $cpl && $cpl > 0 ) {
809 $text .= $this->nextItem( $pref{$cpl-1} );
811 while ( $npl > $cpl ) {
812 $char = substr( $pref, $cpl, 1 );
813 $text .= $this->openList( $char );
815 if ( ";" == $char ) {
816 $cpos = strpos( $t, ":" );
817 if ( ! ( false === $cpos ) ) {
818 $term = substr( $t, 0, $cpos );
819 $text .= $term . $this->nextItem( ":" );
820 $t = substr( $t, $cpos +
1 );
827 if ( 0 == $npl ) { # No prefix--go to paragraph mode
829 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
830 $text .= $this->closeParagraph();
833 if ( ! $inBlockElem ) {
834 if ( " " == $t{0} ) {
836 # $t = wfEscapeHTML( $t );
838 else { $newSection = "p"; }
840 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
841 $text .= $this->closeParagraph();
842 $text .= "<" . $newSection . ">";
843 } else if ( 0 != strcmp( $this->mLastSection
,
845 $text .= $this->closeParagraph();
846 if ( 0 != strcmp( "p", $newSection ) ) {
847 $text .= "<" . $newSection . ">";
850 $this->mLastSection
= $newSection;
853 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
854 $inBlockElem = false;
860 $text .= $this->closeList( $pref2{$npl-1} );
863 if ( "" != $this->mLastSection
) {
864 if ( "p" != $this->mLastSection
) {
865 $text .= "</" . $this->mLastSection
. ">";
867 $this->mLastSection
= "";
869 wfProfileOut( $fname );
873 /* private */ function replaceVariables( $text )
875 global $wgLang, $wgCurOut;
876 $fname = "OutputPage::replaceVariables";
877 wfProfileIn( $fname );
882 # See Language.php for the definition of each magic word
883 # As with sigs, this uses the server's local time -- ensure
884 # this is appropriate for your audience!
886 $magic[MAG_CURRENTMONTH
] = date( "m" );
887 $magic[MAG_CURRENTMONTHNAME
] = $wgLang->getMonthName( date("n") );
888 $magic[MAG_CURRENTMONTHNAMEGEN
] = $wgLang->getMonthNameGen( date("n") );
889 $magic[MAG_CURRENTDAY
] = date("j");
890 $magic[MAG_CURRENTDAYNAME
] = $wgLang->getWeekdayName( date("w")+
1 );
891 $magic[MAG_CURRENTYEAR
] = date( "Y" );
892 $magic[MAG_CURRENTTIME
] = $wgLang->time( wfTimestampNow(), false );
894 $this->mContainsOldMagic +
= MagicWord
::replaceMultiple($magic, $text, $text);
896 $mw =& MagicWord
::get( MAG_NUMBEROFARTICLES
);
897 if ( $mw->match( $text ) ) {
898 $v = wfNumberOfArticles();
899 $text = $mw->replace( $v, $text );
900 if( $mw->getWasModified() ) { $this->mContainsOldMagic++
; }
903 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
904 # The callbacks are at the bottom of this file
906 $mw =& MagicWord
::get( MAG_MSG
);
907 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
908 if( $mw->getWasModified() ) { $this->mContainsNewMagic++
; }
910 $mw =& MagicWord
::get( MAG_MSGNW
);
911 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
912 if( $mw->getWasModified() ) { $this->mContainsNewMagic++
; }
914 wfProfileOut( $fname );
918 # Cleans up HTML, removes dangerous tags and attributes
919 /* private */ function removeHTMLtags( $text )
921 $fname = "OutputPage::removeHTMLtags";
922 wfProfileIn( $fname );
923 $htmlpairs = array( # Tags that must be closed
924 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
925 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
926 "strike", "strong", "tt", "var", "div", "center",
927 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
928 "ruby", "rt" , "rb" , "rp"
931 "br", "p", "hr", "li", "dt", "dd"
933 $htmlnest = array( # Tags that can be nested--??
934 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
935 "dl", "font", "big", "small", "sub", "sup"
937 $tabletags = array( # Can only appear inside table
941 $htmlsingle = array_merge( $tabletags, $htmlsingle );
942 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
944 $htmlattrs = $this->getHTMLattrs () ;
946 # Remove HTML comments
947 $text = preg_replace( "/<!--.*-->/sU", "", $text );
949 $bits = explode( "<", $text );
950 $text = array_shift( $bits );
951 $tagstack = array(); $tablestack = array();
953 foreach ( $bits as $x ) {
954 $prev = error_reporting( E_ALL
& ~
( E_NOTICE | E_WARNING
) );
955 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
957 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
958 error_reporting( $prev );
961 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
965 if ( ! in_array( $t, $htmlsingle ) &&
966 ( $ot = array_pop( $tagstack ) ) != $t ) {
967 array_push( $tagstack, $ot );
970 if ( $t == "table" ) {
971 $tagstack = array_pop( $tablestack );
976 # Keep track for later
977 if ( in_array( $t, $tabletags ) &&
978 ! in_array( "table", $tagstack ) ) {
980 } else if ( in_array( $t, $tagstack ) &&
981 ! in_array ( $t , $htmlnest ) ) {
983 } else if ( ! in_array( $t, $htmlsingle ) ) {
984 if ( $t == "table" ) {
985 array_push( $tablestack, $tagstack );
988 array_push( $tagstack, $t );
990 # Strip non-approved attributes from the tag
991 $newparams = $this->fixTagAttributes($params);
995 $rest = str_replace( ">", ">", $rest );
996 $text .= "<$slash$t $newparams$brace$rest";
1000 $text .= "<" . str_replace( ">", ">", $x);
1002 # Close off any remaining tags
1003 while ( $t = array_pop( $tagstack ) ) {
1005 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1007 wfProfileOut( $fname );
1013 * This function accomplishes several tasks:
1014 * 1) Auto-number headings if that option is enabled
1015 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1016 * 3) Add a Table of contents on the top for users who have enabled the option
1017 * 4) Auto-anchor headings
1019 * It loops through all headlines, collects the necessary data, then splits up the
1020 * string and re-inserts the newly formatted headlines.
1023 /* private */ function formatHeadings( $text )
1025 global $wgUser,$wgArticle,$wgTitle,$wpPreview;
1026 $nh=$wgUser->getOption( "numberheadings" );
1027 $st=$wgUser->getOption( "showtoc" );
1028 if(!$wgTitle->userCanEdit()) {
1032 $es=$wgUser->getID() && $wgUser->getOption( "editsection" );
1033 $esr=$wgUser->getID() && $wgUser->getOption( "editsectiononrightclick" );
1036 # Inhibit editsection links if requested in the page
1037 $esw =& MagicWord
::get( MAG_NOEDITSECTION
);
1038 if ($esw->matchAndRemove( $text )) {
1041 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1043 $mw =& MagicWord
::get( MAG_NOTOC
);
1044 if ($mw->matchAndRemove( $text ))
1049 # never add the TOC to the Main Page. This is an entry page that should not
1050 # be more than 1-2 screens large anyway
1051 if($wgTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1053 # We need this to perform operations on the HTML
1054 $sk=$wgUser->getSkin();
1056 # Get all headlines for numbering them and adding funky stuff like [edit]
1058 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1063 # Ugh .. the TOC should have neat indentation levels which can be
1064 # passed to the skin functions. These are determined here
1065 foreach($matches[3] as $headline) {
1066 if($level) { $prevlevel=$level;}
1067 $level=$matches[1][$c];
1068 if(($nh||
$st) && $prevlevel && $level>$prevlevel) {
1070 $h[$level]=0; // reset when we enter a new level
1071 $toc.=$sk->tocIndent($level-$prevlevel);
1072 $toclevel+
=$level-$prevlevel;
1075 if(($nh||
$st) && $level<$prevlevel) {
1076 $h[$level+
1]=0; // reset when we step back a level
1077 $toc.=$sk->tocUnindent($prevlevel-$level);
1078 $toclevel-=$prevlevel-$level;
1081 $h[$level]++
; // count number of headlines for each level
1084 for($i=1;$i<=$level;$i++
) {
1086 if($dot) {$numbering.=".";}
1093 // The canonized header is a version of the header text safe to use for links
1095 $canonized_headline=preg_replace("/<.*?>/","",$headline); // strip out HTML
1096 $tocline = trim( $canonized_headline );
1097 $canonized_headline=str_replace('"',"",$canonized_headline);
1098 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1099 $refer[$c]=$canonized_headline;
1100 $refers[$canonized_headline]++
; // count how many in assoc. array so we can track dupes in anchors
1101 $refcount[$c]=$refers[$canonized_headline];
1103 // Prepend the number to the heading text
1106 $tocline=$numbering ." ". $tocline;
1108 // Don't number the heading if it is the only one (looks silly)
1109 if($nh && count($matches[3]) > 1) {
1110 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1114 // Create the anchor for linking from the TOC to the section
1116 $anchor=$canonized_headline;
1117 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1119 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1121 if($es && !isset($wpPreview)) {
1122 $head[$c].=$sk->editSectionLink($c+
1);
1125 // Put it all together
1127 $head[$c].="<h".$level.$matches[2][$c]
1128 ."<a name=\"".$anchor."\">"
1133 // Add the edit section link
1135 if($esr && !isset($wpPreview)) {
1136 $head[$c]=$sk->editSectionScript($c+
1,$head[$c]);
1146 $toc.=$sk->tocUnindent($toclevel);
1147 $toc=$sk->tocTable($toc);
1150 // split up and insert constructed headlines
1152 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1155 foreach($blocks as $block) {
1156 if(($es) && !isset($wpPreview) && $c>0 && $i==0) {
1157 # This is the [edit] link that appears for the top block of text when
1158 # section editing is enabled
1159 $full.=$sk->editSectionLink(0);
1162 if($st && $toclines>3 && !$i) {
1163 # Let's add a top anchor just in case we want to link to the top of the page
1164 $full="<a name=\"top\"></a>".$full.$toc;
1174 /* private */ function magicISBN( $text )
1178 $a = split( "ISBN ", " $text" );
1179 if ( count ( $a ) < 2 ) return $text;
1180 $text = substr( array_shift( $a ), 1);
1181 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1183 foreach ( $a as $x ) {
1184 $isbn = $blank = "" ;
1185 while ( " " == $x{0} ) {
1187 $x = substr( $x, 1 );
1189 while ( strstr( $valid, $x{0} ) != false ) {
1191 $x = substr( $x, 1 );
1193 $num = str_replace( "-", "", $isbn );
1194 $num = str_replace( " ", "", $num );
1197 $text .= "ISBN $blank$x";
1199 $text .= "<a href=\"" . wfLocalUrlE( $wgLang->specialPage(
1200 "Booksources"), "isbn={$num}" ) . "\" class=\"internal\">ISBN $isbn</a>";
1207 /* private */ function magicRFC( $text )
1217 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1219 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1220 $containsOldMagic = false )
1222 $this->mText
= $text;
1223 $this->mLanguageLinks
= $languageLinks;
1224 $this->mCategoryLinks
= $categoryLinks;
1225 $this->mContainsOldMagic
= $containsOldMagic;
1228 function getText() { return $this->mText
; }
1229 function getLanguageLinks() { return $this->mLanguageLinks
; }
1230 function getCategoryLinks() { return $this->mCategoryLinks
; }
1231 function containsOldMagic() { return $this->mContainsOldMagic
; }
1232 function setText( $text ) { return wfSetVar( $this->mText
, $text ); }
1233 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks
, $ll ); }
1234 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks
, $cl ); }
1235 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic
, $com ); }
1238 # Regex callbacks, used in OutputPage::replaceVariables
1240 # Just get rid of the dangerous stuff
1241 # Necessary because replaceVariables is called after removeHTMLtags,
1242 # and message text can come from any user
1243 function wfReplaceMsgVar( $matches ) {
1244 global $wgCurOut, $wgLinkCache;
1245 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1246 $wgLinkCache->suspend();
1247 $text = $wgCurOut->replaceInternalLinks( $text );
1248 $wgLinkCache->resume();
1249 $wgLinkCache->addLinkObj( Title
::makeTitle( NS_MEDIAWIKI
, $matches[1] ) );
1253 # Effective <nowiki></nowiki>
1254 # Not real <nowiki> because this is called after nowiki sections are processed
1255 function wfReplaceMsgnwVar( $matches ) {
1256 global $wgCurOut, $wgLinkCache;
1257 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1258 $wgLinkCache->addLinkObj( Title
::makeTitle( NS_MEDIAWIKI
, $matches[1] ) );